From 82a9d6bdd4bb6fac3a080f49c0ba2013a3e65f72 Mon Sep 17 00:00:00 2001 From: Jeff Johnson Date: Sat, 18 May 2024 16:12:05 -0700 Subject: [PATCH 01/98] backtracetest: add MODULE_DESCRIPTION() Fix the 'make W=1' warning: WARNING: modpost: missing MODULE_DESCRIPTION() in kernel/backtracetest.o Link: https://lkml.kernel.org/r/20240518-md-backtracetest-v1-1-fab9f942c139@quicinc.com Signed-off-by: Jeff Johnson Signed-off-by: Andrew Morton --- kernel/backtracetest.c | 1 + 1 file changed, 1 insertion(+) diff --git a/kernel/backtracetest.c b/kernel/backtracetest.c index a4181234232b..2dfe66b9ed76 100644 --- a/kernel/backtracetest.c +++ b/kernel/backtracetest.c @@ -74,5 +74,6 @@ static void exitf(void) module_init(backtrace_regression_test); module_exit(exitf); +MODULE_DESCRIPTION("Simple stack backtrace regression test module"); MODULE_LICENSE("GPL"); MODULE_AUTHOR("Arjan van de Ven "); From 9e3041fecdc8f78a5900c3aa51d3d756e73264d6 Mon Sep 17 00:00:00 2001 From: Ferry Meng Date: Mon, 20 May 2024 10:40:23 +0800 Subject: [PATCH 02/98] ocfs2: add bounds checking to ocfs2_xattr_find_entry() Add a paranoia check to make sure it doesn't stray beyond valid memory region containing ocfs2 xattr entries when scanning for a match. It will prevent out-of-bound access in case of crafted images. Link: https://lkml.kernel.org/r/20240520024024.1976129-1-joseph.qi@linux.alibaba.com Signed-off-by: Ferry Meng Signed-off-by: Joseph Qi Reported-by: lei lu Reviewed-by: Joseph Qi Cc: Mark Fasheh Cc: Joel Becker Cc: Junxiao Bi Cc: Changwei Ge Cc: Gang He Cc: Jun Piao Signed-off-by: Andrew Morton --- fs/ocfs2/xattr.c | 12 ++++++++---- 1 file changed, 8 insertions(+), 4 deletions(-) diff --git a/fs/ocfs2/xattr.c b/fs/ocfs2/xattr.c index 3b81213ed7b8..8aea94c90739 100644 --- a/fs/ocfs2/xattr.c +++ b/fs/ocfs2/xattr.c @@ -1062,7 +1062,7 @@ ssize_t ocfs2_listxattr(struct dentry *dentry, return i_ret + b_ret; } -static int ocfs2_xattr_find_entry(int name_index, +static int ocfs2_xattr_find_entry(struct inode *inode, int name_index, const char *name, struct ocfs2_xattr_search *xs) { @@ -1076,6 +1076,10 @@ static int ocfs2_xattr_find_entry(int name_index, name_len = strlen(name); entry = xs->here; for (i = 0; i < le16_to_cpu(xs->header->xh_count); i++) { + if ((void *)entry >= xs->end) { + ocfs2_error(inode->i_sb, "corrupted xattr entries"); + return -EFSCORRUPTED; + } cmp = name_index - ocfs2_xattr_get_type(entry); if (!cmp) cmp = name_len - entry->xe_name_len; @@ -1166,7 +1170,7 @@ static int ocfs2_xattr_ibody_get(struct inode *inode, xs->base = (void *)xs->header; xs->here = xs->header->xh_entries; - ret = ocfs2_xattr_find_entry(name_index, name, xs); + ret = ocfs2_xattr_find_entry(inode, name_index, name, xs); if (ret) return ret; size = le64_to_cpu(xs->here->xe_value_size); @@ -2698,7 +2702,7 @@ static int ocfs2_xattr_ibody_find(struct inode *inode, /* Find the named attribute. */ if (oi->ip_dyn_features & OCFS2_INLINE_XATTR_FL) { - ret = ocfs2_xattr_find_entry(name_index, name, xs); + ret = ocfs2_xattr_find_entry(inode, name_index, name, xs); if (ret && ret != -ENODATA) return ret; xs->not_found = ret; @@ -2833,7 +2837,7 @@ static int ocfs2_xattr_block_find(struct inode *inode, xs->end = (void *)(blk_bh->b_data) + blk_bh->b_size; xs->here = xs->header->xh_entries; - ret = ocfs2_xattr_find_entry(name_index, name, xs); + ret = ocfs2_xattr_find_entry(inode, name_index, name, xs); } else ret = ocfs2_xattr_index_block_find(inode, blk_bh, name_index, From af77c4fc1871847b528d58b7fdafb4aa1f6a9262 Mon Sep 17 00:00:00 2001 From: Ferry Meng Date: Mon, 20 May 2024 10:40:24 +0800 Subject: [PATCH 03/98] ocfs2: strict bound check before memcmp in ocfs2_xattr_find_entry() xattr in ocfs2 maybe 'non-indexed', which saved with additional space requested. It's better to check if the memory is out of bound before memcmp, although this possibility mainly comes from crafted poisonous images. Link: https://lkml.kernel.org/r/20240520024024.1976129-2-joseph.qi@linux.alibaba.com Signed-off-by: Ferry Meng Signed-off-by: Joseph Qi Reported-by: lei lu Reviewed-by: Joseph Qi Cc: Changwei Ge Cc: Gang He Cc: Joel Becker Cc: Jun Piao Cc: Junxiao Bi Cc: Mark Fasheh Signed-off-by: Andrew Morton --- fs/ocfs2/xattr.c | 15 ++++++++++----- 1 file changed, 10 insertions(+), 5 deletions(-) diff --git a/fs/ocfs2/xattr.c b/fs/ocfs2/xattr.c index 8aea94c90739..35c0cc2a51af 100644 --- a/fs/ocfs2/xattr.c +++ b/fs/ocfs2/xattr.c @@ -1068,7 +1068,7 @@ static int ocfs2_xattr_find_entry(struct inode *inode, int name_index, { struct ocfs2_xattr_entry *entry; size_t name_len; - int i, cmp = 1; + int i, name_offset, cmp = 1; if (name == NULL) return -EINVAL; @@ -1083,10 +1083,15 @@ static int ocfs2_xattr_find_entry(struct inode *inode, int name_index, cmp = name_index - ocfs2_xattr_get_type(entry); if (!cmp) cmp = name_len - entry->xe_name_len; - if (!cmp) - cmp = memcmp(name, (xs->base + - le16_to_cpu(entry->xe_name_offset)), - name_len); + if (!cmp) { + name_offset = le16_to_cpu(entry->xe_name_offset); + if ((xs->base + name_offset + name_len) > xs->end) { + ocfs2_error(inode->i_sb, + "corrupted xattr entries"); + return -EFSCORRUPTED; + } + cmp = memcmp(name, (xs->base + name_offset), name_len); + } if (cmp == 0) break; entry += 1; From 47e39c79336760031f98215c014c9800f5ed6481 Mon Sep 17 00:00:00 2001 From: Uros Bizjak Date: Thu, 23 May 2024 09:35:14 +0200 Subject: [PATCH 04/98] fork: use this_cpu_try_cmpxchg() in try_release_thread_stack_to_cache() Use this_cpu_try_cmpxchg() instead of this_cpu_cmpxchg (*ptr, old, new) == old in try_release_thread_stack_to_cache. x86 CMPXCHG instruction returns success in ZF flag, so this change saves a compare after cmpxchg (and related move instruction in front of cmpxchg). No functional change intended. [ubizjak@gmail.com: simplify the for loop a bit] Link: https://lkml.kernel.org/r/20240523214442.21102-1-ubizjak@gmail.com Link: https://lkml.kernel.org/r/20240523073530.8128-1-ubizjak@gmail.com Signed-off-by: Uros Bizjak Cc: Oleg Nesterov Cc: Kees Cook Signed-off-by: Andrew Morton --- kernel/fork.c | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/kernel/fork.c b/kernel/fork.c index 99076dbe27d8..f1c16b3dd8ac 100644 --- a/kernel/fork.c +++ b/kernel/fork.c @@ -205,9 +205,10 @@ static bool try_release_thread_stack_to_cache(struct vm_struct *vm) unsigned int i; for (i = 0; i < NR_CACHED_STACKS; i++) { - if (this_cpu_cmpxchg(cached_stacks[i], NULL, vm) != NULL) - continue; - return true; + struct vm_struct *tmp = NULL; + + if (this_cpu_try_cmpxchg(cached_stacks[i], &tmp, vm)) + return true; } return false; } From cf28d7716e0c777f593948eea109dc273047dac7 Mon Sep 17 00:00:00 2001 From: Wei-Hsin Yeh Date: Sun, 12 May 2024 13:42:11 +0800 Subject: [PATCH 05/98] include/linux/jhash.h: fix typos Drop one '-' to adhere to coding style. Replace 'arbitray' with 'arbitrary'. Link: https://lkml.kernel.org/r/20240512054211.24726-1-weihsinyeh168@gmail.com Signed-off-by: Wei-Hsin Yeh Signed-off-by: Andrew Morton --- include/linux/jhash.h | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/include/linux/jhash.h b/include/linux/jhash.h index ab7f8c152b89..fa26a2dd3b52 100644 --- a/include/linux/jhash.h +++ b/include/linux/jhash.h @@ -31,7 +31,7 @@ /* Mask the hash value, i.e (value & jhash_mask(n)) instead of (value % n) */ #define jhash_mask(n) (jhash_size(n)-1) -/* __jhash_mix -- mix 3 32-bit values reversibly. */ +/* __jhash_mix - mix 3 32-bit values reversibly. */ #define __jhash_mix(a, b, c) \ { \ a -= c; a ^= rol32(c, 4); c += b; \ @@ -60,7 +60,7 @@ /* jhash - hash an arbitrary key * @k: sequence of bytes as key * @length: the length of the key - * @initval: the previous hash, or an arbitray value + * @initval: the previous hash, or an arbitrary value * * The generic version, hashes an arbitrary sequence of bytes. * No alignment or length assumptions are made about the input key. @@ -110,7 +110,7 @@ static inline u32 jhash(const void *key, u32 length, u32 initval) /* jhash2 - hash an array of u32's * @k: the key which must be an array of u32's * @length: the number of u32's in the key - * @initval: the previous hash, or an arbitray value + * @initval: the previous hash, or an arbitrary value * * Returns the hash value of the key. */ From ddd36b7ee19f3fd3faee5f97ce02204c85fda486 Mon Sep 17 00:00:00 2001 From: Kuan-Wei Chiu Date: Fri, 24 May 2024 23:29:43 +0800 Subject: [PATCH 06/98] perf/core: fix several typos Patch series "treewide: Refactor heap related implementation", v6. This patch series focuses on several adjustments related to heap implementation. Firstly, a type-safe interface has been added to the min_heap, along with the introduction of several new functions to enhance its functionality. Additionally, the heap implementation for bcache and bcachefs has been replaced with the generic min_heap implementation from include/linux. Furthermore, several typos have been corrected. Previous discussion with Kent Overstreet: https://lkml.kernel.org/ioyfizrzq7w7mjrqcadtzsfgpuntowtjdw5pgn4qhvsdp4mqqg@nrlek5vmisbu This patch (of 16): Replace 'artifically' with 'artificially'. Replace 'irrespecive' with 'irrespective'. Replace 'futher' with 'further'. Replace 'sufficent' with 'sufficient'. Link: https://lkml.kernel.org/r/20240524152958.919343-1-visitorckw@gmail.com Link: https://lkml.kernel.org/r/20240524152958.919343-2-visitorckw@gmail.com Signed-off-by: Kuan-Wei Chiu Reviewed-by: Ian Rogers Reviewed-by: Randy Dunlap Cc: Adrian Hunter Cc: Alexander Shishkin Cc: Arnaldo Carvalho de Melo Cc: Bagas Sanjaya Cc: Brian Foster Cc: Ching-Chun (Jim) Huang Cc: Coly Li Cc: Ingo Molnar Cc: Jiri Olsa Cc: Kent Overstreet Cc: Mark Rutland Cc: Matthew Sakai Cc: Namhyung Kim Cc: Peter Zijlstra Signed-off-by: Andrew Morton --- kernel/events/core.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/kernel/events/core.c b/kernel/events/core.c index 8f908f077935..effe9c15ec7d 100644 --- a/kernel/events/core.c +++ b/kernel/events/core.c @@ -534,7 +534,7 @@ void perf_sample_event_took(u64 sample_len_ns) __this_cpu_write(running_sample_length, running_len); /* - * Note: this will be biased artifically low until we have + * Note: this will be biased artificially low until we have * seen NR_ACCUMULATED_SAMPLES. Doing it this way keeps us * from having to maintain a count. */ @@ -596,10 +596,10 @@ static inline u64 perf_event_clock(struct perf_event *event) * * Event groups make things a little more complicated, but not terribly so. The * rules for a group are that if the group leader is OFF the entire group is - * OFF, irrespecive of what the group member states are. This results in + * OFF, irrespective of what the group member states are. This results in * __perf_effective_state(). * - * A futher ramification is that when a group leader flips between OFF and + * A further ramification is that when a group leader flips between OFF and * !OFF, we need to update all group member times. * * @@ -891,7 +891,7 @@ static int perf_cgroup_ensure_storage(struct perf_event *event, int cpu, heap_size, ret = 0; /* - * Allow storage to have sufficent space for an iterator for each + * Allow storage to have sufficient space for an iterator for each * possibly nested cgroup plus an iterator for events with no cgroup. */ for (heap_size = 1; css; css = css->parent) From b42995607e9d7fc15f8626a2fb863b55d2281782 Mon Sep 17 00:00:00 2001 From: Kuan-Wei Chiu Date: Fri, 24 May 2024 23:29:44 +0800 Subject: [PATCH 07/98] bcache: fix typo Replace 'utiility' with 'utility'. Link: https://lkml.kernel.org/r/20240524152958.919343-3-visitorckw@gmail.com Signed-off-by: Kuan-Wei Chiu Reviewed-by: Ian Rogers Reviewed-by: Randy Dunlap Cc: Adrian Hunter Cc: Alexander Shishkin Cc: Arnaldo Carvalho de Melo Cc: Bagas Sanjaya Cc: Brian Foster Cc: Ching-Chun (Jim) Huang Cc: Coly Li Cc: Ingo Molnar Cc: Jiri Olsa Cc: Kent Overstreet Cc: Mark Rutland Cc: Matthew Sakai Cc: Namhyung Kim Cc: Peter Zijlstra Signed-off-by: Andrew Morton --- drivers/md/bcache/util.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/md/bcache/util.c b/drivers/md/bcache/util.c index ae380bc3992e..410d8cb49e50 100644 --- a/drivers/md/bcache/util.c +++ b/drivers/md/bcache/util.c @@ -1,6 +1,6 @@ // SPDX-License-Identifier: GPL-2.0 /* - * random utiility code, for bcache but in theory not specific to bcache + * random utility code, for bcache but in theory not specific to bcache * * Copyright 2010, 2011 Kent Overstreet * Copyright 2012 Google, Inc. From fd60f7fe69609d6541e58853082ed63bd845aee0 Mon Sep 17 00:00:00 2001 From: Kuan-Wei Chiu Date: Fri, 24 May 2024 23:29:45 +0800 Subject: [PATCH 08/98] bcachefs: fix typo Replace 'utiility' with 'utility'. Link: https://lkml.kernel.org/r/20240524152958.919343-4-visitorckw@gmail.com Signed-off-by: Kuan-Wei Chiu Reviewed-by: Ian Rogers Reviewed-by: Randy Dunlap Cc: Adrian Hunter Cc: Alexander Shishkin Cc: Arnaldo Carvalho de Melo Cc: Bagas Sanjaya Cc: Brian Foster Cc: Ching-Chun (Jim) Huang Cc: Coly Li Cc: Ingo Molnar Cc: Jiri Olsa Cc: Kent Overstreet Cc: Mark Rutland Cc: Matthew Sakai Cc: Namhyung Kim Cc: Peter Zijlstra Signed-off-by: Andrew Morton --- fs/bcachefs/util.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/bcachefs/util.c b/fs/bcachefs/util.c index de331dec2a99..65d42206703c 100644 --- a/fs/bcachefs/util.c +++ b/fs/bcachefs/util.c @@ -1,6 +1,6 @@ // SPDX-License-Identifier: GPL-2.0 /* - * random utiility code, for bcache but in theory not specific to bcache + * random utility code, for bcache but in theory not specific to bcache * * Copyright 2010, 2011 Kent Overstreet * Copyright 2012 Google, Inc. From 873ce25766019dd017c1a3a10c19ac3fc4bf24aa Mon Sep 17 00:00:00 2001 From: Kuan-Wei Chiu Date: Fri, 24 May 2024 23:29:46 +0800 Subject: [PATCH 09/98] lib min_heap: add type safe interface Implement a type-safe interface for min_heap using strong type pointers instead of void * in the data field. This change includes adding small macro wrappers around functions, enabling the use of __minheap_cast and __minheap_obj_size macros for type casting and obtaining element size. This implementation removes the necessity of passing element size in min_heap_callbacks. Additionally, introduce the MIN_HEAP_PREALLOCATED macro for preallocating some elements. Link: https://lkml.kernel.org/ioyfizrzq7w7mjrqcadtzsfgpuntowtjdw5pgn4qhvsdp4mqqg@nrlek5vmisbu Link: https://lkml.kernel.org/r/20240524152958.919343-5-visitorckw@gmail.com Signed-off-by: Kuan-Wei Chiu Reviewed-by: Ian Rogers Cc: Adrian Hunter Cc: Alexander Shishkin Cc: Arnaldo Carvalho de Melo Cc: Bagas Sanjaya Cc: Brian Foster Cc: Ching-Chun (Jim) Huang Cc: Coly Li Cc: Ingo Molnar Cc: Jiri Olsa Cc: Kent Overstreet Cc: Mark Rutland Cc: Matthew Sakai Cc: Namhyung Kim Cc: Peter Zijlstra Cc: Randy Dunlap Signed-off-by: Andrew Morton --- drivers/md/dm-vdo/repair.c | 9 ++-- drivers/md/dm-vdo/slab-depot.c | 5 +-- include/linux/min_heap.h | 79 ++++++++++++++++++++++------------ kernel/events/core.c | 11 ++--- lib/test_min_heap.c | 13 +++--- 5 files changed, 70 insertions(+), 47 deletions(-) diff --git a/drivers/md/dm-vdo/repair.c b/drivers/md/dm-vdo/repair.c index defc9359f10e..e8ad611fe7c1 100644 --- a/drivers/md/dm-vdo/repair.c +++ b/drivers/md/dm-vdo/repair.c @@ -51,6 +51,8 @@ struct recovery_point { bool increment_applied; }; +DEFINE_MIN_HEAP(struct numbered_block_mapping, replay_heap); + struct repair_completion { /* The completion header */ struct vdo_completion completion; @@ -97,7 +99,7 @@ struct repair_completion { * order, then original journal order. This permits efficient iteration over the journal * entries in order. */ - struct min_heap replay_heap; + struct replay_heap replay_heap; /* Fields tracking progress through the journal entries. */ struct numbered_block_mapping *current_entry; struct numbered_block_mapping *current_unfetched_entry; @@ -163,14 +165,13 @@ static void swap_mappings(void *item1, void *item2) } static const struct min_heap_callbacks repair_min_heap = { - .elem_size = sizeof(struct numbered_block_mapping), .less = mapping_is_less_than, .swp = swap_mappings, }; static struct numbered_block_mapping *sort_next_heap_element(struct repair_completion *repair) { - struct min_heap *heap = &repair->replay_heap; + struct replay_heap *heap = &repair->replay_heap; struct numbered_block_mapping *last; if (heap->nr == 0) @@ -1117,7 +1118,7 @@ static void recover_block_map(struct vdo_completion *completion) * Organize the journal entries into a binary heap so we can iterate over them in sorted * order incrementally, avoiding an expensive sort call. */ - repair->replay_heap = (struct min_heap) { + repair->replay_heap = (struct replay_heap) { .data = repair->entries, .nr = repair->block_map_entry_count, .size = repair->block_map_entry_count, diff --git a/drivers/md/dm-vdo/slab-depot.c b/drivers/md/dm-vdo/slab-depot.c index 46e4721e5b4f..ef9a6e53109c 100644 --- a/drivers/md/dm-vdo/slab-depot.c +++ b/drivers/md/dm-vdo/slab-depot.c @@ -3309,7 +3309,6 @@ static void swap_slab_statuses(void *item1, void *item2) } static const struct min_heap_callbacks slab_status_min_heap = { - .elem_size = sizeof(struct slab_status), .less = slab_status_is_less_than, .swp = swap_slab_statuses, }; @@ -3509,7 +3508,7 @@ static int get_slab_statuses(struct block_allocator *allocator, static int __must_check vdo_prepare_slabs_for_allocation(struct block_allocator *allocator) { struct slab_status current_slab_status; - struct min_heap heap; + DEFINE_MIN_HEAP(struct slab_status, heap) heap; int result; struct slab_status *slab_statuses; struct slab_depot *depot = allocator->depot; @@ -3521,7 +3520,7 @@ static int __must_check vdo_prepare_slabs_for_allocation(struct block_allocator return result; /* Sort the slabs by cleanliness, then by emptiness hint. */ - heap = (struct min_heap) { + heap = (struct heap) { .data = slab_statuses, .nr = allocator->slab_count, .size = allocator->slab_count, diff --git a/include/linux/min_heap.h b/include/linux/min_heap.h index d52daf45861b..92830f41642a 100644 --- a/include/linux/min_heap.h +++ b/include/linux/min_heap.h @@ -7,45 +7,53 @@ #include /** - * struct min_heap - Data structure to hold a min-heap. - * @data: Start of array holding the heap elements. + * Data structure to hold a min-heap. * @nr: Number of elements currently in the heap. * @size: Maximum number of elements that can be held in current storage. + * @data: Pointer to the start of array holding the heap elements. + * @preallocated: Start of the static preallocated array holding the heap elements. */ -struct min_heap { - void *data; - int nr; - int size; -}; +#define MIN_HEAP_PREALLOCATED(_type, _name, _nr) \ +struct _name { \ + int nr; \ + int size; \ + _type *data; \ + _type preallocated[_nr]; \ +} + +#define DEFINE_MIN_HEAP(_type, _name) MIN_HEAP_PREALLOCATED(_type, _name, 0) + +typedef DEFINE_MIN_HEAP(char, min_heap_char) min_heap_char; + +#define __minheap_cast(_heap) (typeof((_heap)->data[0]) *) +#define __minheap_obj_size(_heap) sizeof((_heap)->data[0]) /** * struct min_heap_callbacks - Data/functions to customise the min_heap. - * @elem_size: The nr of each element in bytes. * @less: Partial order function for this heap. * @swp: Swap elements function. */ struct min_heap_callbacks { - int elem_size; bool (*less)(const void *lhs, const void *rhs); void (*swp)(void *lhs, void *rhs); }; /* Sift the element at pos down the heap. */ static __always_inline -void min_heapify(struct min_heap *heap, int pos, +void __min_heapify(min_heap_char *heap, int pos, size_t elem_size, const struct min_heap_callbacks *func) { void *left, *right; void *data = heap->data; - void *root = data + pos * func->elem_size; + void *root = data + pos * elem_size; int i = pos, j; /* Find the sift-down path all the way to the leaves. */ for (;;) { if (i * 2 + 2 >= heap->nr) break; - left = data + (i * 2 + 1) * func->elem_size; - right = data + (i * 2 + 2) * func->elem_size; + left = data + (i * 2 + 1) * elem_size; + right = data + (i * 2 + 2) * elem_size; i = func->less(left, right) ? i * 2 + 1 : i * 2 + 2; } @@ -54,31 +62,37 @@ void min_heapify(struct min_heap *heap, int pos, i = i * 2 + 1; /* Backtrack to the correct location. */ - while (i != pos && func->less(root, data + i * func->elem_size)) + while (i != pos && func->less(root, data + i * elem_size)) i = (i - 1) / 2; /* Shift the element into its correct place. */ j = i; while (i != pos) { i = (i - 1) / 2; - func->swp(data + i * func->elem_size, data + j * func->elem_size); + func->swp(data + i * elem_size, data + j * elem_size); } } +#define min_heapify(_heap, _pos, _func) \ + __min_heapify((min_heap_char *)_heap, _pos, __minheap_obj_size(_heap), _func) + /* Floyd's approach to heapification that is O(nr). */ static __always_inline -void min_heapify_all(struct min_heap *heap, +void __min_heapify_all(min_heap_char *heap, size_t elem_size, const struct min_heap_callbacks *func) { int i; for (i = heap->nr / 2 - 1; i >= 0; i--) - min_heapify(heap, i, func); + __min_heapify(heap, i, elem_size, func); } +#define min_heapify_all(_heap, _func) \ + __min_heapify_all((min_heap_char *)_heap, __minheap_obj_size(_heap), _func) + /* Remove minimum element from the heap, O(log2(nr)). */ static __always_inline -void min_heap_pop(struct min_heap *heap, +void __min_heap_pop(min_heap_char *heap, size_t elem_size, const struct min_heap_callbacks *func) { void *data = heap->data; @@ -88,27 +102,33 @@ void min_heap_pop(struct min_heap *heap, /* Place last element at the root (position 0) and then sift down. */ heap->nr--; - memcpy(data, data + (heap->nr * func->elem_size), func->elem_size); - min_heapify(heap, 0, func); + memcpy(data, data + (heap->nr * elem_size), elem_size); + __min_heapify(heap, 0, elem_size, func); } +#define min_heap_pop(_heap, _func) \ + __min_heap_pop((min_heap_char *)_heap, __minheap_obj_size(_heap), _func) + /* * Remove the minimum element and then push the given element. The * implementation performs 1 sift (O(log2(nr))) and is therefore more * efficient than a pop followed by a push that does 2. */ static __always_inline -void min_heap_pop_push(struct min_heap *heap, - const void *element, +void __min_heap_pop_push(min_heap_char *heap, + const void *element, size_t elem_size, const struct min_heap_callbacks *func) { - memcpy(heap->data, element, func->elem_size); - min_heapify(heap, 0, func); + memcpy(heap->data, element, elem_size); + __min_heapify(heap, 0, elem_size, func); } +#define min_heap_pop_push(_heap, _element, _func) \ + __min_heap_pop_push((min_heap_char *)_heap, _element, __minheap_obj_size(_heap), _func) + /* Push an element on to the heap, O(log2(nr)). */ static __always_inline -void min_heap_push(struct min_heap *heap, const void *element, +void __min_heap_push(min_heap_char *heap, const void *element, size_t elem_size, const struct min_heap_callbacks *func) { void *data = heap->data; @@ -120,17 +140,20 @@ void min_heap_push(struct min_heap *heap, const void *element, /* Place at the end of data. */ pos = heap->nr; - memcpy(data + (pos * func->elem_size), element, func->elem_size); + memcpy(data + (pos * elem_size), element, elem_size); heap->nr++; /* Sift child at pos up. */ for (; pos > 0; pos = (pos - 1) / 2) { - child = data + (pos * func->elem_size); - parent = data + ((pos - 1) / 2) * func->elem_size; + child = data + (pos * elem_size); + parent = data + ((pos - 1) / 2) * elem_size; if (func->less(parent, child)) break; func->swp(parent, child); } } +#define min_heap_push(_heap, _element, _func) \ + __min_heap_push((min_heap_char *)_heap, _element, __minheap_obj_size(_heap), _func) + #endif /* _LINUX_MIN_HEAP_H */ diff --git a/kernel/events/core.c b/kernel/events/core.c index effe9c15ec7d..5ae4e429b3fb 100644 --- a/kernel/events/core.c +++ b/kernel/events/core.c @@ -3701,13 +3701,14 @@ static void swap_ptr(void *l, void *r) swap(*lp, *rp); } +DEFINE_MIN_HEAP(struct perf_event *, perf_event_min_heap); + static const struct min_heap_callbacks perf_min_heap = { - .elem_size = sizeof(struct perf_event *), .less = perf_less_group_idx, .swp = swap_ptr, }; -static void __heap_add(struct min_heap *heap, struct perf_event *event) +static void __heap_add(struct perf_event_min_heap *heap, struct perf_event *event) { struct perf_event **itrs = heap->data; @@ -3741,7 +3742,7 @@ static noinline int visit_groups_merge(struct perf_event_context *ctx, struct perf_cpu_context *cpuctx = NULL; /* Space for per CPU and/or any CPU event iterators. */ struct perf_event *itrs[2]; - struct min_heap event_heap; + struct perf_event_min_heap event_heap; struct perf_event **evt; int ret; @@ -3750,7 +3751,7 @@ static noinline int visit_groups_merge(struct perf_event_context *ctx, if (!ctx->task) { cpuctx = this_cpu_ptr(&perf_cpu_context); - event_heap = (struct min_heap){ + event_heap = (struct perf_event_min_heap){ .data = cpuctx->heap, .nr = 0, .size = cpuctx->heap_size, @@ -3763,7 +3764,7 @@ static noinline int visit_groups_merge(struct perf_event_context *ctx, css = &cpuctx->cgrp->css; #endif } else { - event_heap = (struct min_heap){ + event_heap = (struct perf_event_min_heap){ .data = itrs, .nr = 0, .size = ARRAY_SIZE(itrs), diff --git a/lib/test_min_heap.c b/lib/test_min_heap.c index 7b01b4387cfb..52efab9fb2f1 100644 --- a/lib/test_min_heap.c +++ b/lib/test_min_heap.c @@ -11,6 +11,8 @@ #include #include +DEFINE_MIN_HEAP(int, min_heap_test); + static __init bool less_than(const void *lhs, const void *rhs) { return *(int *)lhs < *(int *)rhs; @@ -30,7 +32,7 @@ static __init void swap_ints(void *lhs, void *rhs) } static __init int pop_verify_heap(bool min_heap, - struct min_heap *heap, + struct min_heap_test *heap, const struct min_heap_callbacks *funcs) { int *values = heap->data; @@ -63,13 +65,12 @@ static __init int test_heapify_all(bool min_heap) { int values[] = { 3, 1, 2, 4, 0x8000000, 0x7FFFFFF, 0, -3, -1, -2, -4, 0x8000000, 0x7FFFFFF }; - struct min_heap heap = { + struct min_heap_test heap = { .data = values, .nr = ARRAY_SIZE(values), .size = ARRAY_SIZE(values), }; struct min_heap_callbacks funcs = { - .elem_size = sizeof(int), .less = min_heap ? less_than : greater_than, .swp = swap_ints, }; @@ -96,13 +97,12 @@ static __init int test_heap_push(bool min_heap) const int data[] = { 3, 1, 2, 4, 0x80000000, 0x7FFFFFFF, 0, -3, -1, -2, -4, 0x80000000, 0x7FFFFFFF }; int values[ARRAY_SIZE(data)]; - struct min_heap heap = { + struct min_heap_test heap = { .data = values, .nr = 0, .size = ARRAY_SIZE(values), }; struct min_heap_callbacks funcs = { - .elem_size = sizeof(int), .less = min_heap ? less_than : greater_than, .swp = swap_ints, }; @@ -129,13 +129,12 @@ static __init int test_heap_pop_push(bool min_heap) const int data[] = { 3, 1, 2, 4, 0x80000000, 0x7FFFFFFF, 0, -3, -1, -2, -4, 0x80000000, 0x7FFFFFFF }; int values[ARRAY_SIZE(data)]; - struct min_heap heap = { + struct min_heap_test heap = { .data = values, .nr = 0, .size = ARRAY_SIZE(values), }; struct min_heap_callbacks funcs = { - .elem_size = sizeof(int), .less = min_heap ? less_than : greater_than, .swp = swap_ints, }; From e146683ccff9af2d26b70a9ea4c87f5a86eff0c4 Mon Sep 17 00:00:00 2001 From: Kuan-Wei Chiu Date: Fri, 24 May 2024 23:29:47 +0800 Subject: [PATCH 10/98] lib min_heap: add min_heap_init() Add min_heap_init() for initializing heap with data, nr, and size. Link: https://lkml.kernel.org/r/20240524152958.919343-6-visitorckw@gmail.com Signed-off-by: Kuan-Wei Chiu Reviewed-by: Ian Rogers Cc: Adrian Hunter Cc: Alexander Shishkin Cc: Arnaldo Carvalho de Melo Cc: Bagas Sanjaya Cc: Brian Foster Cc: Ching-Chun (Jim) Huang Cc: Coly Li Cc: Ingo Molnar Cc: Jiri Olsa Cc: Kent Overstreet Cc: Mark Rutland Cc: Matthew Sakai Cc: Namhyung Kim Cc: Peter Zijlstra Cc: Randy Dunlap Signed-off-by: Andrew Morton --- include/linux/min_heap.h | 15 +++++++++++++++ 1 file changed, 15 insertions(+) diff --git a/include/linux/min_heap.h b/include/linux/min_heap.h index 92830f41642a..b384a4ea2afa 100644 --- a/include/linux/min_heap.h +++ b/include/linux/min_heap.h @@ -38,6 +38,21 @@ struct min_heap_callbacks { void (*swp)(void *lhs, void *rhs); }; +/* Initialize a min-heap. */ +static __always_inline +void __min_heap_init(min_heap_char *heap, void *data, int size) +{ + heap->nr = 0; + heap->size = size; + if (data) + heap->data = data; + else + heap->data = heap->preallocated; +} + +#define min_heap_init(_heap, _data, _size) \ + __min_heap_init((min_heap_char *)_heap, _data, _size) + /* Sift the element at pos down the heap. */ static __always_inline void __min_heapify(min_heap_char *heap, int pos, size_t elem_size, From 0562d54ddc3dc195f338bd8e816dd8ff154f44ad Mon Sep 17 00:00:00 2001 From: Kuan-Wei Chiu Date: Fri, 24 May 2024 23:29:48 +0800 Subject: [PATCH 11/98] lib min_heap: add min_heap_peek() Add min_heap_peek() to retrieve a pointer to the smallest element. The pointer is cast to the appropriate type of heap elements. Link: https://lkml.kernel.org/r/20240524152958.919343-7-visitorckw@gmail.com Signed-off-by: Kuan-Wei Chiu Reviewed-by: Ian Rogers Cc: Adrian Hunter Cc: Alexander Shishkin Cc: Arnaldo Carvalho de Melo Cc: Bagas Sanjaya Cc: Brian Foster Cc: Ching-Chun (Jim) Huang Cc: Coly Li Cc: Ingo Molnar Cc: Jiri Olsa Cc: Kent Overstreet Cc: Mark Rutland Cc: Matthew Sakai Cc: Namhyung Kim Cc: Peter Zijlstra Cc: Randy Dunlap Signed-off-by: Andrew Morton --- include/linux/min_heap.h | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/include/linux/min_heap.h b/include/linux/min_heap.h index b384a4ea2afa..d9c4ae7ad0cc 100644 --- a/include/linux/min_heap.h +++ b/include/linux/min_heap.h @@ -53,6 +53,16 @@ void __min_heap_init(min_heap_char *heap, void *data, int size) #define min_heap_init(_heap, _data, _size) \ __min_heap_init((min_heap_char *)_heap, _data, _size) +/* Get the minimum element from the heap. */ +static __always_inline +void *__min_heap_peek(struct min_heap_char *heap) +{ + return heap->nr ? heap->data : NULL; +} + +#define min_heap_peek(_heap) \ + (__minheap_cast(_heap) __min_heap_peek((min_heap_char *)_heap)) + /* Sift the element at pos down the heap. */ static __always_inline void __min_heapify(min_heap_char *heap, int pos, size_t elem_size, From b9d720e65a72c9754faf689e0859a5632853f4bc Mon Sep 17 00:00:00 2001 From: Kuan-Wei Chiu Date: Fri, 24 May 2024 23:29:49 +0800 Subject: [PATCH 12/98] lib min_heap: add min_heap_full() Add min_heap_full() which returns a boolean value indicating whether the heap has reached its maximum capacity. Link: https://lkml.kernel.org/r/20240524152958.919343-8-visitorckw@gmail.com Signed-off-by: Kuan-Wei Chiu Reviewed-by: Ian Rogers Cc: Adrian Hunter Cc: Alexander Shishkin Cc: Arnaldo Carvalho de Melo Cc: Bagas Sanjaya Cc: Brian Foster Cc: Ching-Chun (Jim) Huang Cc: Coly Li Cc: Ingo Molnar Cc: Jiri Olsa Cc: Kent Overstreet Cc: Mark Rutland Cc: Matthew Sakai Cc: Namhyung Kim Cc: Peter Zijlstra Cc: Randy Dunlap Signed-off-by: Andrew Morton --- include/linux/min_heap.h | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/include/linux/min_heap.h b/include/linux/min_heap.h index d9c4ae7ad0cc..f41898c05f5a 100644 --- a/include/linux/min_heap.h +++ b/include/linux/min_heap.h @@ -63,6 +63,16 @@ void *__min_heap_peek(struct min_heap_char *heap) #define min_heap_peek(_heap) \ (__minheap_cast(_heap) __min_heap_peek((min_heap_char *)_heap)) +/* Check if the heap is full. */ +static __always_inline +bool __min_heap_full(min_heap_char *heap) +{ + return heap->nr == heap->size; +} + +#define min_heap_full(_heap) \ + __min_heap_full((min_heap_char *)_heap) + /* Sift the element at pos down the heap. */ static __always_inline void __min_heapify(min_heap_char *heap, int pos, size_t elem_size, From 267607e87599509a6a39a5f7dd3959365e58af27 Mon Sep 17 00:00:00 2001 From: Kuan-Wei Chiu Date: Fri, 24 May 2024 23:29:50 +0800 Subject: [PATCH 13/98] lib min_heap: add args for min_heap_callbacks Add a third parameter 'args' for the 'less' and 'swp' functions in the 'struct min_heap_callbacks'. This additional parameter allows these comparison and swap functions to handle extra arguments when necessary. Link: https://lkml.kernel.org/r/20240524152958.919343-9-visitorckw@gmail.com Signed-off-by: Kuan-Wei Chiu Reviewed-by: Ian Rogers Cc: Adrian Hunter Cc: Alexander Shishkin Cc: Arnaldo Carvalho de Melo Cc: Bagas Sanjaya Cc: Brian Foster Cc: Ching-Chun (Jim) Huang Cc: Coly Li Cc: Ingo Molnar Cc: Jiri Olsa Cc: Kent Overstreet Cc: Mark Rutland Cc: Matthew Sakai Cc: Namhyung Kim Cc: Peter Zijlstra Cc: Randy Dunlap Signed-off-by: Andrew Morton --- drivers/md/dm-vdo/repair.c | 10 +++---- drivers/md/dm-vdo/slab-depot.c | 9 +++--- include/linux/min_heap.h | 51 +++++++++++++++++----------------- kernel/events/core.c | 10 +++---- lib/test_min_heap.c | 26 ++++++++--------- 5 files changed, 54 insertions(+), 52 deletions(-) diff --git a/drivers/md/dm-vdo/repair.c b/drivers/md/dm-vdo/repair.c index e8ad611fe7c1..eae990859db4 100644 --- a/drivers/md/dm-vdo/repair.c +++ b/drivers/md/dm-vdo/repair.c @@ -137,7 +137,7 @@ struct repair_completion { * to sort by slot while still ensuring we replay all entries with the same slot in the exact order * as they appeared in the journal. */ -static bool mapping_is_less_than(const void *item1, const void *item2) +static bool mapping_is_less_than(const void *item1, const void *item2, void __always_unused *args) { const struct numbered_block_mapping *mapping1 = (const struct numbered_block_mapping *) item1; @@ -156,7 +156,7 @@ static bool mapping_is_less_than(const void *item1, const void *item2) return 0; } -static void swap_mappings(void *item1, void *item2) +static void swap_mappings(void *item1, void *item2, void __always_unused *args) { struct numbered_block_mapping *mapping1 = item1; struct numbered_block_mapping *mapping2 = item2; @@ -182,8 +182,8 @@ static struct numbered_block_mapping *sort_next_heap_element(struct repair_compl * restore the heap invariant, and return a pointer to the popped element. */ last = &repair->entries[--heap->nr]; - swap_mappings(heap->data, last); - min_heapify(heap, 0, &repair_min_heap); + swap_mappings(heap->data, last, NULL); + min_heapify(heap, 0, &repair_min_heap, NULL); return last; } @@ -1123,7 +1123,7 @@ static void recover_block_map(struct vdo_completion *completion) .nr = repair->block_map_entry_count, .size = repair->block_map_entry_count, }; - min_heapify_all(&repair->replay_heap, &repair_min_heap); + min_heapify_all(&repair->replay_heap, &repair_min_heap, NULL); vdo_log_info("Replaying %zu recovery entries into block map", repair->block_map_entry_count); diff --git a/drivers/md/dm-vdo/slab-depot.c b/drivers/md/dm-vdo/slab-depot.c index ef9a6e53109c..274f9ccd072f 100644 --- a/drivers/md/dm-vdo/slab-depot.c +++ b/drivers/md/dm-vdo/slab-depot.c @@ -3288,7 +3288,8 @@ int vdo_release_block_reference(struct block_allocator *allocator, * Thus, the ordering is reversed from the usual sense since min_heap returns smaller elements * before larger ones. */ -static bool slab_status_is_less_than(const void *item1, const void *item2) +static bool slab_status_is_less_than(const void *item1, const void *item2, + void __always_unused *args) { const struct slab_status *info1 = item1; const struct slab_status *info2 = item2; @@ -3300,7 +3301,7 @@ static bool slab_status_is_less_than(const void *item1, const void *item2) return info1->slab_number < info2->slab_number; } -static void swap_slab_statuses(void *item1, void *item2) +static void swap_slab_statuses(void *item1, void *item2, void __always_unused *args) { struct slab_status *info1 = item1; struct slab_status *info2 = item2; @@ -3525,7 +3526,7 @@ static int __must_check vdo_prepare_slabs_for_allocation(struct block_allocator .nr = allocator->slab_count, .size = allocator->slab_count, }; - min_heapify_all(&heap, &slab_status_min_heap); + min_heapify_all(&heap, &slab_status_min_heap, NULL); while (heap.nr > 0) { bool high_priority; @@ -3533,7 +3534,7 @@ static int __must_check vdo_prepare_slabs_for_allocation(struct block_allocator struct slab_journal *journal; current_slab_status = slab_statuses[0]; - min_heap_pop(&heap, &slab_status_min_heap); + min_heap_pop(&heap, &slab_status_min_heap, NULL); slab = depot->slabs[current_slab_status.slab_number]; if ((depot->load_type == VDO_SLAB_DEPOT_REBUILD_LOAD) || diff --git a/include/linux/min_heap.h b/include/linux/min_heap.h index f41898c05f5a..4acd0f4b3faf 100644 --- a/include/linux/min_heap.h +++ b/include/linux/min_heap.h @@ -34,8 +34,8 @@ typedef DEFINE_MIN_HEAP(char, min_heap_char) min_heap_char; * @swp: Swap elements function. */ struct min_heap_callbacks { - bool (*less)(const void *lhs, const void *rhs); - void (*swp)(void *lhs, void *rhs); + bool (*less)(const void *lhs, const void *rhs, void *args); + void (*swp)(void *lhs, void *rhs, void *args); }; /* Initialize a min-heap. */ @@ -76,7 +76,7 @@ bool __min_heap_full(min_heap_char *heap) /* Sift the element at pos down the heap. */ static __always_inline void __min_heapify(min_heap_char *heap, int pos, size_t elem_size, - const struct min_heap_callbacks *func) + const struct min_heap_callbacks *func, void *args) { void *left, *right; void *data = heap->data; @@ -89,7 +89,7 @@ void __min_heapify(min_heap_char *heap, int pos, size_t elem_size, break; left = data + (i * 2 + 1) * elem_size; right = data + (i * 2 + 2) * elem_size; - i = func->less(left, right) ? i * 2 + 1 : i * 2 + 2; + i = func->less(left, right, args) ? i * 2 + 1 : i * 2 + 2; } /* Special case for the last leaf with no sibling. */ @@ -97,38 +97,38 @@ void __min_heapify(min_heap_char *heap, int pos, size_t elem_size, i = i * 2 + 1; /* Backtrack to the correct location. */ - while (i != pos && func->less(root, data + i * elem_size)) + while (i != pos && func->less(root, data + i * elem_size, args)) i = (i - 1) / 2; /* Shift the element into its correct place. */ j = i; while (i != pos) { i = (i - 1) / 2; - func->swp(data + i * elem_size, data + j * elem_size); + func->swp(data + i * elem_size, data + j * elem_size, args); } } -#define min_heapify(_heap, _pos, _func) \ - __min_heapify((min_heap_char *)_heap, _pos, __minheap_obj_size(_heap), _func) +#define min_heapify(_heap, _pos, _func, _args) \ + __min_heapify((min_heap_char *)_heap, _pos, __minheap_obj_size(_heap), _func, _args) /* Floyd's approach to heapification that is O(nr). */ static __always_inline void __min_heapify_all(min_heap_char *heap, size_t elem_size, - const struct min_heap_callbacks *func) + const struct min_heap_callbacks *func, void *args) { int i; for (i = heap->nr / 2 - 1; i >= 0; i--) - __min_heapify(heap, i, elem_size, func); + __min_heapify(heap, i, elem_size, func, args); } -#define min_heapify_all(_heap, _func) \ - __min_heapify_all((min_heap_char *)_heap, __minheap_obj_size(_heap), _func) +#define min_heapify_all(_heap, _func, _args) \ + __min_heapify_all((min_heap_char *)_heap, __minheap_obj_size(_heap), _func, _args) /* Remove minimum element from the heap, O(log2(nr)). */ static __always_inline void __min_heap_pop(min_heap_char *heap, size_t elem_size, - const struct min_heap_callbacks *func) + const struct min_heap_callbacks *func, void *args) { void *data = heap->data; @@ -138,11 +138,11 @@ void __min_heap_pop(min_heap_char *heap, size_t elem_size, /* Place last element at the root (position 0) and then sift down. */ heap->nr--; memcpy(data, data + (heap->nr * elem_size), elem_size); - __min_heapify(heap, 0, elem_size, func); + __min_heapify(heap, 0, elem_size, func, args); } -#define min_heap_pop(_heap, _func) \ - __min_heap_pop((min_heap_char *)_heap, __minheap_obj_size(_heap), _func) +#define min_heap_pop(_heap, _func, _args) \ + __min_heap_pop((min_heap_char *)_heap, __minheap_obj_size(_heap), _func, _args) /* * Remove the minimum element and then push the given element. The @@ -152,19 +152,20 @@ void __min_heap_pop(min_heap_char *heap, size_t elem_size, static __always_inline void __min_heap_pop_push(min_heap_char *heap, const void *element, size_t elem_size, - const struct min_heap_callbacks *func) + const struct min_heap_callbacks *func, + void *args) { memcpy(heap->data, element, elem_size); - __min_heapify(heap, 0, elem_size, func); + __min_heapify(heap, 0, elem_size, func, args); } -#define min_heap_pop_push(_heap, _element, _func) \ - __min_heap_pop_push((min_heap_char *)_heap, _element, __minheap_obj_size(_heap), _func) +#define min_heap_pop_push(_heap, _element, _func, _args) \ + __min_heap_pop_push((min_heap_char *)_heap, _element, __minheap_obj_size(_heap), _func, _args) /* Push an element on to the heap, O(log2(nr)). */ static __always_inline void __min_heap_push(min_heap_char *heap, const void *element, size_t elem_size, - const struct min_heap_callbacks *func) + const struct min_heap_callbacks *func, void *args) { void *data = heap->data; void *child, *parent; @@ -182,13 +183,13 @@ void __min_heap_push(min_heap_char *heap, const void *element, size_t elem_size, for (; pos > 0; pos = (pos - 1) / 2) { child = data + (pos * elem_size); parent = data + ((pos - 1) / 2) * elem_size; - if (func->less(parent, child)) + if (func->less(parent, child, args)) break; - func->swp(parent, child); + func->swp(parent, child, args); } } -#define min_heap_push(_heap, _element, _func) \ - __min_heap_push((min_heap_char *)_heap, _element, __minheap_obj_size(_heap), _func) +#define min_heap_push(_heap, _element, _func, _args) \ + __min_heap_push((min_heap_char *)_heap, _element, __minheap_obj_size(_heap), _func, _args) #endif /* _LINUX_MIN_HEAP_H */ diff --git a/kernel/events/core.c b/kernel/events/core.c index 5ae4e429b3fb..27cafe661740 100644 --- a/kernel/events/core.c +++ b/kernel/events/core.c @@ -3686,7 +3686,7 @@ void __perf_event_task_sched_out(struct task_struct *task, perf_cgroup_switch(next); } -static bool perf_less_group_idx(const void *l, const void *r) +static bool perf_less_group_idx(const void *l, const void *r, void __always_unused *args) { const struct perf_event *le = *(const struct perf_event **)l; const struct perf_event *re = *(const struct perf_event **)r; @@ -3694,7 +3694,7 @@ static bool perf_less_group_idx(const void *l, const void *r) return le->group_index < re->group_index; } -static void swap_ptr(void *l, void *r) +static void swap_ptr(void *l, void *r, void __always_unused *args) { void **lp = l, **rp = r; @@ -3786,7 +3786,7 @@ static noinline int visit_groups_merge(struct perf_event_context *ctx, perf_assert_pmu_disabled((*evt)->pmu_ctx->pmu); } - min_heapify_all(&event_heap, &perf_min_heap); + min_heapify_all(&event_heap, &perf_min_heap, NULL); while (event_heap.nr) { ret = func(*evt, data); @@ -3795,9 +3795,9 @@ static noinline int visit_groups_merge(struct perf_event_context *ctx, *evt = perf_event_groups_next(*evt, pmu); if (*evt) - min_heapify(&event_heap, 0, &perf_min_heap); + min_heapify(&event_heap, 0, &perf_min_heap, NULL); else - min_heap_pop(&event_heap, &perf_min_heap); + min_heap_pop(&event_heap, &perf_min_heap, NULL); } return 0; diff --git a/lib/test_min_heap.c b/lib/test_min_heap.c index 52efab9fb2f1..f59638cf5dfa 100644 --- a/lib/test_min_heap.c +++ b/lib/test_min_heap.c @@ -13,17 +13,17 @@ DEFINE_MIN_HEAP(int, min_heap_test); -static __init bool less_than(const void *lhs, const void *rhs) +static __init bool less_than(const void *lhs, const void *rhs, void __always_unused *args) { return *(int *)lhs < *(int *)rhs; } -static __init bool greater_than(const void *lhs, const void *rhs) +static __init bool greater_than(const void *lhs, const void *rhs, void __always_unused *args) { return *(int *)lhs > *(int *)rhs; } -static __init void swap_ints(void *lhs, void *rhs) +static __init void swap_ints(void *lhs, void *rhs, void __always_unused *args) { int temp = *(int *)lhs; @@ -40,7 +40,7 @@ static __init int pop_verify_heap(bool min_heap, int last; last = values[0]; - min_heap_pop(heap, funcs); + min_heap_pop(heap, funcs, NULL); while (heap->nr > 0) { if (min_heap) { if (last > values[0]) { @@ -56,7 +56,7 @@ static __init int pop_verify_heap(bool min_heap, } } last = values[0]; - min_heap_pop(heap, funcs); + min_heap_pop(heap, funcs, NULL); } return err; } @@ -77,7 +77,7 @@ static __init int test_heapify_all(bool min_heap) int i, err; /* Test with known set of values. */ - min_heapify_all(&heap, &funcs); + min_heapify_all(&heap, &funcs, NULL); err = pop_verify_heap(min_heap, &heap, &funcs); @@ -86,7 +86,7 @@ static __init int test_heapify_all(bool min_heap) for (i = 0; i < heap.nr; i++) values[i] = get_random_u32(); - min_heapify_all(&heap, &funcs); + min_heapify_all(&heap, &funcs, NULL); err += pop_verify_heap(min_heap, &heap, &funcs); return err; @@ -110,14 +110,14 @@ static __init int test_heap_push(bool min_heap) /* Test with known set of values copied from data. */ for (i = 0; i < ARRAY_SIZE(data); i++) - min_heap_push(&heap, &data[i], &funcs); + min_heap_push(&heap, &data[i], &funcs, NULL); err = pop_verify_heap(min_heap, &heap, &funcs); /* Test with randomly generated values. */ while (heap.nr < heap.size) { temp = get_random_u32(); - min_heap_push(&heap, &temp, &funcs); + min_heap_push(&heap, &temp, &funcs, NULL); } err += pop_verify_heap(min_heap, &heap, &funcs); @@ -143,22 +143,22 @@ static __init int test_heap_pop_push(bool min_heap) /* Fill values with data to pop and replace. */ temp = min_heap ? 0x80000000 : 0x7FFFFFFF; for (i = 0; i < ARRAY_SIZE(data); i++) - min_heap_push(&heap, &temp, &funcs); + min_heap_push(&heap, &temp, &funcs, NULL); /* Test with known set of values copied from data. */ for (i = 0; i < ARRAY_SIZE(data); i++) - min_heap_pop_push(&heap, &data[i], &funcs); + min_heap_pop_push(&heap, &data[i], &funcs, NULL); err = pop_verify_heap(min_heap, &heap, &funcs); heap.nr = 0; for (i = 0; i < ARRAY_SIZE(data); i++) - min_heap_push(&heap, &temp, &funcs); + min_heap_push(&heap, &temp, &funcs, NULL); /* Test with randomly generated values. */ for (i = 0; i < ARRAY_SIZE(data); i++) { temp = get_random_u32(); - min_heap_pop_push(&heap, &temp, &funcs); + min_heap_pop_push(&heap, &temp, &funcs, NULL); } err += pop_verify_heap(min_heap, &heap, &funcs); From eaa0bc7119247942200b9209d40ffd86ace347d4 Mon Sep 17 00:00:00 2001 From: Kuan-Wei Chiu Date: Fri, 24 May 2024 23:29:51 +0800 Subject: [PATCH 14/98] lib min_heap: add min_heap_sift_up() Add min_heap_sift_up() to sift up the element at index 'idx' in the heap. Link: https://lkml.kernel.org/r/20240524152958.919343-10-visitorckw@gmail.com Signed-off-by: Kuan-Wei Chiu Reviewed-by: Ian Rogers Cc: Adrian Hunter Cc: Alexander Shishkin Cc: Arnaldo Carvalho de Melo Cc: Bagas Sanjaya Cc: Brian Foster Cc: Ching-Chun (Jim) Huang Cc: Coly Li Cc: Ingo Molnar Cc: Jiri Olsa Cc: Kent Overstreet Cc: Mark Rutland Cc: Matthew Sakai Cc: Namhyung Kim Cc: Peter Zijlstra Cc: Randy Dunlap Signed-off-by: Andrew Morton --- include/linux/min_heap.h | 20 ++++++++++++++++++++ 1 file changed, 20 insertions(+) diff --git a/include/linux/min_heap.h b/include/linux/min_heap.h index 4acd0f4b3faf..ddd0186afb77 100644 --- a/include/linux/min_heap.h +++ b/include/linux/min_heap.h @@ -111,6 +111,26 @@ void __min_heapify(min_heap_char *heap, int pos, size_t elem_size, #define min_heapify(_heap, _pos, _func, _args) \ __min_heapify((min_heap_char *)_heap, _pos, __minheap_obj_size(_heap), _func, _args) +/* Sift up ith element from the heap, O(log2(nr)). */ +static __always_inline +void __min_heap_sift_up(min_heap_char *heap, size_t elem_size, size_t idx, + const struct min_heap_callbacks *func, void *args) +{ + void *data = heap->data; + size_t parent; + + while (idx) { + parent = (idx - 1) / 2; + if (func->less(data + parent * elem_size, data + idx * elem_size, args)) + break; + func->swp(data + parent * elem_size, data + idx * elem_size, args); + idx = parent; + } +} + +#define min_heap_sift_up(_heap, _idx, _func, _args) \ + __min_heap_sift_up((min_heap_char *)_heap, __minheap_obj_size(_heap), _idx, _func, _args) + /* Floyd's approach to heapification that is O(nr). */ static __always_inline void __min_heapify_all(min_heap_char *heap, size_t elem_size, From 420f171031207a13c83560b2ebb32ccc3c8ed6df Mon Sep 17 00:00:00 2001 From: Kuan-Wei Chiu Date: Fri, 24 May 2024 23:29:52 +0800 Subject: [PATCH 15/98] lib min_heap: add min_heap_del() Add min_heap_del() to delete the element at index 'idx' in the heap. Link: https://lkml.kernel.org/r/20240524152958.919343-11-visitorckw@gmail.com Signed-off-by: Kuan-Wei Chiu Reviewed-by: Ian Rogers Cc: Adrian Hunter Cc: Alexander Shishkin Cc: Arnaldo Carvalho de Melo Cc: Bagas Sanjaya Cc: Brian Foster Cc: Ching-Chun (Jim) Huang Cc: Coly Li Cc: Ingo Molnar Cc: Jiri Olsa Cc: Kent Overstreet Cc: Mark Rutland Cc: Matthew Sakai Cc: Namhyung Kim Cc: Peter Zijlstra Cc: Randy Dunlap Signed-off-by: Andrew Morton --- include/linux/min_heap.h | 24 ++++++++++++++++++++++++ 1 file changed, 24 insertions(+) diff --git a/include/linux/min_heap.h b/include/linux/min_heap.h index ddd0186afb77..98e838195e7e 100644 --- a/include/linux/min_heap.h +++ b/include/linux/min_heap.h @@ -212,4 +212,28 @@ void __min_heap_push(min_heap_char *heap, const void *element, size_t elem_size, #define min_heap_push(_heap, _element, _func, _args) \ __min_heap_push((min_heap_char *)_heap, _element, __minheap_obj_size(_heap), _func, _args) +/* Remove ith element from the heap, O(log2(nr)). */ +static __always_inline +bool __min_heap_del(min_heap_char *heap, size_t elem_size, size_t idx, + const struct min_heap_callbacks *func, void *args) +{ + void *data = heap->data; + + if (WARN_ONCE(heap->nr <= 0, "Popping an empty heap")) + return false; + + /* Place last element at the root (position 0) and then sift down. */ + heap->nr--; + if (idx == heap->nr) + return true; + func->swp(data + (idx * elem_size), data + (heap->nr * elem_size), args); + __min_heap_sift_up(heap, elem_size, idx, func, args); + __min_heapify(heap, idx, elem_size, func, args); + + return true; +} + +#define min_heap_del(_heap, _idx, _func, _args) \ + __min_heap_del((min_heap_char *)_heap, __minheap_obj_size(_heap), _idx, _func, _args) + #endif /* _LINUX_MIN_HEAP_H */ From 2eb637c649a3d89c5483dc851ce98d56717a36d2 Mon Sep 17 00:00:00 2001 From: Kuan-Wei Chiu Date: Fri, 24 May 2024 23:29:53 +0800 Subject: [PATCH 16/98] lib min_heap: update min_heap_push() and min_heap_pop() to return bool values Modify the min_heap_push() and min_heap_pop() to return a boolean value. They now return false when the operation fails and true when it succeeds. Link: https://lkml.kernel.org/r/20240524152958.919343-12-visitorckw@gmail.com Signed-off-by: Kuan-Wei Chiu Reviewed-by: Ian Rogers Cc: Adrian Hunter Cc: Alexander Shishkin Cc: Arnaldo Carvalho de Melo Cc: Bagas Sanjaya Cc: Brian Foster Cc: Ching-Chun (Jim) Huang Cc: Coly Li Cc: Ingo Molnar Cc: Jiri Olsa Cc: Kent Overstreet Cc: Mark Rutland Cc: Matthew Sakai Cc: Namhyung Kim Cc: Peter Zijlstra Cc: Randy Dunlap Signed-off-by: Andrew Morton --- include/linux/min_heap.h | 12 ++++++++---- 1 file changed, 8 insertions(+), 4 deletions(-) diff --git a/include/linux/min_heap.h b/include/linux/min_heap.h index 98e838195e7e..3410a5f907ad 100644 --- a/include/linux/min_heap.h +++ b/include/linux/min_heap.h @@ -147,18 +147,20 @@ void __min_heapify_all(min_heap_char *heap, size_t elem_size, /* Remove minimum element from the heap, O(log2(nr)). */ static __always_inline -void __min_heap_pop(min_heap_char *heap, size_t elem_size, +bool __min_heap_pop(min_heap_char *heap, size_t elem_size, const struct min_heap_callbacks *func, void *args) { void *data = heap->data; if (WARN_ONCE(heap->nr <= 0, "Popping an empty heap")) - return; + return false; /* Place last element at the root (position 0) and then sift down. */ heap->nr--; memcpy(data, data + (heap->nr * elem_size), elem_size); __min_heapify(heap, 0, elem_size, func, args); + + return true; } #define min_heap_pop(_heap, _func, _args) \ @@ -184,7 +186,7 @@ void __min_heap_pop_push(min_heap_char *heap, /* Push an element on to the heap, O(log2(nr)). */ static __always_inline -void __min_heap_push(min_heap_char *heap, const void *element, size_t elem_size, +bool __min_heap_push(min_heap_char *heap, const void *element, size_t elem_size, const struct min_heap_callbacks *func, void *args) { void *data = heap->data; @@ -192,7 +194,7 @@ void __min_heap_push(min_heap_char *heap, const void *element, size_t elem_size, int pos; if (WARN_ONCE(heap->nr >= heap->size, "Pushing on a full heap")) - return; + return false; /* Place at the end of data. */ pos = heap->nr; @@ -207,6 +209,8 @@ void __min_heap_push(min_heap_char *heap, const void *element, size_t elem_size, break; func->swp(parent, child, args); } + + return true; } #define min_heap_push(_heap, _element, _func, _args) \ From bfe3127180418f1b569999954cb653b9926f75ae Mon Sep 17 00:00:00 2001 From: Kuan-Wei Chiu Date: Fri, 24 May 2024 23:29:54 +0800 Subject: [PATCH 17/98] lib min_heap: rename min_heapify() to min_heap_sift_down() After adding min_heap_sift_up(), the naming convention has been adjusted to maintain consistency with the min_heap_sift_up(). Consequently, min_heapify() has been renamed to min_heap_sift_down(). Link: https://lkml.kernel.org/CAP-5=fVcBAxt8Mw72=NCJPRJfjDaJcqk4rjbadgouAEAHz_q1A@mail.gmail.com Link: https://lkml.kernel.org/r/20240524152958.919343-13-visitorckw@gmail.com Signed-off-by: Kuan-Wei Chiu Reviewed-by: Ian Rogers Cc: Adrian Hunter Cc: Alexander Shishkin Cc: Arnaldo Carvalho de Melo Cc: Bagas Sanjaya Cc: Brian Foster Cc: Ching-Chun (Jim) Huang Cc: Coly Li Cc: Ingo Molnar Cc: Jiri Olsa Cc: Kent Overstreet Cc: Mark Rutland Cc: Matthew Sakai Cc: Namhyung Kim Cc: Peter Zijlstra Cc: Randy Dunlap Signed-off-by: Andrew Morton --- drivers/md/dm-vdo/repair.c | 2 +- include/linux/min_heap.h | 14 +++++++------- kernel/events/core.c | 2 +- 3 files changed, 9 insertions(+), 9 deletions(-) diff --git a/drivers/md/dm-vdo/repair.c b/drivers/md/dm-vdo/repair.c index eae990859db4..ff09e4a14333 100644 --- a/drivers/md/dm-vdo/repair.c +++ b/drivers/md/dm-vdo/repair.c @@ -183,7 +183,7 @@ static struct numbered_block_mapping *sort_next_heap_element(struct repair_compl */ last = &repair->entries[--heap->nr]; swap_mappings(heap->data, last, NULL); - min_heapify(heap, 0, &repair_min_heap, NULL); + min_heap_sift_down(heap, 0, &repair_min_heap, NULL); return last; } diff --git a/include/linux/min_heap.h b/include/linux/min_heap.h index 3410a5f907ad..0baee5787247 100644 --- a/include/linux/min_heap.h +++ b/include/linux/min_heap.h @@ -75,7 +75,7 @@ bool __min_heap_full(min_heap_char *heap) /* Sift the element at pos down the heap. */ static __always_inline -void __min_heapify(min_heap_char *heap, int pos, size_t elem_size, +void __min_heap_sift_down(min_heap_char *heap, int pos, size_t elem_size, const struct min_heap_callbacks *func, void *args) { void *left, *right; @@ -108,8 +108,8 @@ void __min_heapify(min_heap_char *heap, int pos, size_t elem_size, } } -#define min_heapify(_heap, _pos, _func, _args) \ - __min_heapify((min_heap_char *)_heap, _pos, __minheap_obj_size(_heap), _func, _args) +#define min_heap_sift_down(_heap, _pos, _func, _args) \ + __min_heap_sift_down((min_heap_char *)_heap, _pos, __minheap_obj_size(_heap), _func, _args) /* Sift up ith element from the heap, O(log2(nr)). */ static __always_inline @@ -139,7 +139,7 @@ void __min_heapify_all(min_heap_char *heap, size_t elem_size, int i; for (i = heap->nr / 2 - 1; i >= 0; i--) - __min_heapify(heap, i, elem_size, func, args); + __min_heap_sift_down(heap, i, elem_size, func, args); } #define min_heapify_all(_heap, _func, _args) \ @@ -158,7 +158,7 @@ bool __min_heap_pop(min_heap_char *heap, size_t elem_size, /* Place last element at the root (position 0) and then sift down. */ heap->nr--; memcpy(data, data + (heap->nr * elem_size), elem_size); - __min_heapify(heap, 0, elem_size, func, args); + __min_heap_sift_down(heap, 0, elem_size, func, args); return true; } @@ -178,7 +178,7 @@ void __min_heap_pop_push(min_heap_char *heap, void *args) { memcpy(heap->data, element, elem_size); - __min_heapify(heap, 0, elem_size, func, args); + __min_heap_sift_down(heap, 0, elem_size, func, args); } #define min_heap_pop_push(_heap, _element, _func, _args) \ @@ -232,7 +232,7 @@ bool __min_heap_del(min_heap_char *heap, size_t elem_size, size_t idx, return true; func->swp(data + (idx * elem_size), data + (heap->nr * elem_size), args); __min_heap_sift_up(heap, elem_size, idx, func, args); - __min_heapify(heap, idx, elem_size, func, args); + __min_heap_sift_down(heap, idx, elem_size, func, args); return true; } diff --git a/kernel/events/core.c b/kernel/events/core.c index 27cafe661740..6da0ab01cfd3 100644 --- a/kernel/events/core.c +++ b/kernel/events/core.c @@ -3795,7 +3795,7 @@ static noinline int visit_groups_merge(struct perf_event_context *ctx, *evt = perf_event_groups_next(*evt, pmu); if (*evt) - min_heapify(&event_heap, 0, &perf_min_heap, NULL); + min_heap_sift_down(&event_heap, 0, &perf_min_heap, NULL); else min_heap_pop(&event_heap, &perf_min_heap, NULL); } From e596930fc78b97b71df80df0fb1cbf2efb9fc6e4 Mon Sep 17 00:00:00 2001 From: Kuan-Wei Chiu Date: Fri, 24 May 2024 23:29:55 +0800 Subject: [PATCH 18/98] lib min_heap: update min_heap_push() to use min_heap_sift_up() Update min_heap_push() to use min_heap_sift_up() rather than its origin inline version. Link: https://lkml.kernel.org/r/20240524152958.919343-14-visitorckw@gmail.com Signed-off-by: Kuan-Wei Chiu Reviewed-by: Ian Rogers Cc: Adrian Hunter Cc: Alexander Shishkin Cc: Arnaldo Carvalho de Melo Cc: Bagas Sanjaya Cc: Brian Foster Cc: Ching-Chun (Jim) Huang Cc: Coly Li Cc: Ingo Molnar Cc: Jiri Olsa Cc: Kent Overstreet Cc: Mark Rutland Cc: Matthew Sakai Cc: Namhyung Kim Cc: Peter Zijlstra Cc: Randy Dunlap Signed-off-by: Andrew Morton --- include/linux/min_heap.h | 9 +-------- 1 file changed, 1 insertion(+), 8 deletions(-) diff --git a/include/linux/min_heap.h b/include/linux/min_heap.h index 0baee5787247..43a7b9dcf15e 100644 --- a/include/linux/min_heap.h +++ b/include/linux/min_heap.h @@ -190,7 +190,6 @@ bool __min_heap_push(min_heap_char *heap, const void *element, size_t elem_size, const struct min_heap_callbacks *func, void *args) { void *data = heap->data; - void *child, *parent; int pos; if (WARN_ONCE(heap->nr >= heap->size, "Pushing on a full heap")) @@ -202,13 +201,7 @@ bool __min_heap_push(min_heap_char *heap, const void *element, size_t elem_size, heap->nr++; /* Sift child at pos up. */ - for (; pos > 0; pos = (pos - 1) / 2) { - child = data + (pos * elem_size); - parent = data + ((pos - 1) / 2) * elem_size; - if (func->less(parent, child, args)) - break; - func->swp(parent, child, args); - } + __min_heap_sift_up(heap, elem_size, pos, func, args); return true; } From 7099f74dc31058ee9ac01da5590321173c2b771a Mon Sep 17 00:00:00 2001 From: Kuan-Wei Chiu Date: Fri, 24 May 2024 23:29:56 +0800 Subject: [PATCH 19/98] lib/test_min_heap: add test for heap_del() Add test cases for the min_heap_del() to ensure its functionality is thoroughly tested. Link: https://lkml.kernel.org/r/20240524152958.919343-15-visitorckw@gmail.com Signed-off-by: Kuan-Wei Chiu Reviewed-by: Ian Rogers Cc: Adrian Hunter Cc: Alexander Shishkin Cc: Arnaldo Carvalho de Melo Cc: Bagas Sanjaya Cc: Brian Foster Cc: Ching-Chun (Jim) Huang Cc: Coly Li Cc: Ingo Molnar Cc: Jiri Olsa Cc: Kent Overstreet Cc: Mark Rutland Cc: Matthew Sakai Cc: Namhyung Kim Cc: Peter Zijlstra Cc: Randy Dunlap Signed-off-by: Andrew Morton --- lib/test_min_heap.c | 36 ++++++++++++++++++++++++++++++++++++ 1 file changed, 36 insertions(+) diff --git a/lib/test_min_heap.c b/lib/test_min_heap.c index f59638cf5dfa..9e1feb9b679c 100644 --- a/lib/test_min_heap.c +++ b/lib/test_min_heap.c @@ -165,6 +165,40 @@ static __init int test_heap_pop_push(bool min_heap) return err; } +static __init int test_heap_del(bool min_heap) +{ + int values[] = { 3, 1, 2, 4, 0x8000000, 0x7FFFFFF, 0, + -3, -1, -2, -4, 0x8000000, 0x7FFFFFF }; + struct min_heap_test heap; + + min_heap_init(&heap, values, ARRAY_SIZE(values)); + heap.nr = ARRAY_SIZE(values); + struct min_heap_callbacks funcs = { + .less = min_heap ? less_than : greater_than, + .swp = swap_ints, + }; + int i, err; + + /* Test with known set of values. */ + min_heapify_all(&heap, &funcs, NULL); + for (i = 0; i < ARRAY_SIZE(values) / 2; i++) + min_heap_del(&heap, get_random_u32() % heap.nr, &funcs, NULL); + err = pop_verify_heap(min_heap, &heap, &funcs); + + + /* Test with randomly generated values. */ + heap.nr = ARRAY_SIZE(values); + for (i = 0; i < heap.nr; i++) + values[i] = get_random_u32(); + min_heapify_all(&heap, &funcs, NULL); + + for (i = 0; i < ARRAY_SIZE(values) / 2; i++) + min_heap_del(&heap, get_random_u32() % heap.nr, &funcs, NULL); + err += pop_verify_heap(min_heap, &heap, &funcs); + + return err; +} + static int __init test_min_heap_init(void) { int err = 0; @@ -175,6 +209,8 @@ static int __init test_min_heap_init(void) err += test_heap_push(false); err += test_heap_pop_push(true); err += test_heap_pop_push(false); + err += test_heap_del(true); + err += test_heap_del(false); if (err) { pr_err("test failed with %d errors\n", err); return -EINVAL; From 866898efbb25bb44fd42848318e46db9e785973a Mon Sep 17 00:00:00 2001 From: Kuan-Wei Chiu Date: Fri, 24 May 2024 23:29:57 +0800 Subject: [PATCH 20/98] bcache: remove heap-related macros and switch to generic min_heap Drop the heap-related macros from bcache and replacing them with the generic min_heap implementation from include/linux. By doing so, code readability is improved by using functions instead of macros. Moreover, the min_heap implementation in include/linux adopts a bottom-up variation compared to the textbook version currently used in bcache. This bottom-up variation allows for approximately 50% reduction in the number of comparison operations during heap siftdown, without changing the number of swaps, thus making it more efficient. Link: https://lkml.kernel.org/ioyfizrzq7w7mjrqcadtzsfgpuntowtjdw5pgn4qhvsdp4mqqg@nrlek5vmisbu Link: https://lkml.kernel.org/r/20240524152958.919343-16-visitorckw@gmail.com Signed-off-by: Kuan-Wei Chiu Reviewed-by: Ian Rogers Acked-by: Coly Li Cc: Adrian Hunter Cc: Alexander Shishkin Cc: Arnaldo Carvalho de Melo Cc: Bagas Sanjaya Cc: Brian Foster Cc: Ching-Chun (Jim) Huang Cc: Ingo Molnar Cc: Jiri Olsa Cc: Kent Overstreet Cc: Mark Rutland Cc: Matthew Sakai Cc: Namhyung Kim Cc: Peter Zijlstra Cc: Randy Dunlap Signed-off-by: Andrew Morton --- drivers/md/bcache/alloc.c | 64 +++++++++++++----- drivers/md/bcache/bcache.h | 2 +- drivers/md/bcache/bset.c | 124 ++++++++++++++++++++++------------ drivers/md/bcache/bset.h | 40 +++++------ drivers/md/bcache/btree.c | 69 +++++++++++-------- drivers/md/bcache/extents.c | 53 +++++++++------ drivers/md/bcache/movinggc.c | 41 ++++++++--- drivers/md/bcache/super.c | 3 +- drivers/md/bcache/sysfs.c | 4 +- drivers/md/bcache/util.h | 67 +----------------- drivers/md/bcache/writeback.c | 13 ++-- 11 files changed, 263 insertions(+), 217 deletions(-) diff --git a/drivers/md/bcache/alloc.c b/drivers/md/bcache/alloc.c index 48ce750bf70a..da50f6661bae 100644 --- a/drivers/md/bcache/alloc.c +++ b/drivers/md/bcache/alloc.c @@ -164,40 +164,68 @@ static void bch_invalidate_one_bucket(struct cache *ca, struct bucket *b) * prio is worth 1/8th of what INITIAL_PRIO is worth. */ -#define bucket_prio(b) \ -({ \ - unsigned int min_prio = (INITIAL_PRIO - ca->set->min_prio) / 8; \ - \ - (b->prio - ca->set->min_prio + min_prio) * GC_SECTORS_USED(b); \ -}) +static inline unsigned int new_bucket_prio(struct cache *ca, struct bucket *b) +{ + unsigned int min_prio = (INITIAL_PRIO - ca->set->min_prio) / 8; -#define bucket_max_cmp(l, r) (bucket_prio(l) < bucket_prio(r)) -#define bucket_min_cmp(l, r) (bucket_prio(l) > bucket_prio(r)) + return (b->prio - ca->set->min_prio + min_prio) * GC_SECTORS_USED(b); +} + +static inline bool new_bucket_max_cmp(const void *l, const void *r, void *args) +{ + struct bucket **lhs = (struct bucket **)l; + struct bucket **rhs = (struct bucket **)r; + struct cache *ca = args; + + return new_bucket_prio(ca, *lhs) > new_bucket_prio(ca, *rhs); +} + +static inline bool new_bucket_min_cmp(const void *l, const void *r, void *args) +{ + struct bucket **lhs = (struct bucket **)l; + struct bucket **rhs = (struct bucket **)r; + struct cache *ca = args; + + return new_bucket_prio(ca, *lhs) < new_bucket_prio(ca, *rhs); +} + +static inline void new_bucket_swap(void *l, void *r, void __always_unused *args) +{ + struct bucket **lhs = l, **rhs = r; + + swap(*lhs, *rhs); +} static void invalidate_buckets_lru(struct cache *ca) { struct bucket *b; - ssize_t i; + const struct min_heap_callbacks bucket_max_cmp_callback = { + .less = new_bucket_max_cmp, + .swp = new_bucket_swap, + }; + const struct min_heap_callbacks bucket_min_cmp_callback = { + .less = new_bucket_min_cmp, + .swp = new_bucket_swap, + }; - ca->heap.used = 0; + ca->heap.nr = 0; for_each_bucket(b, ca) { if (!bch_can_invalidate_bucket(ca, b)) continue; - if (!heap_full(&ca->heap)) - heap_add(&ca->heap, b, bucket_max_cmp); - else if (bucket_max_cmp(b, heap_peek(&ca->heap))) { + if (!min_heap_full(&ca->heap)) + min_heap_push(&ca->heap, &b, &bucket_max_cmp_callback, ca); + else if (!new_bucket_max_cmp(&b, min_heap_peek(&ca->heap), ca)) { ca->heap.data[0] = b; - heap_sift(&ca->heap, 0, bucket_max_cmp); + min_heap_sift_down(&ca->heap, 0, &bucket_max_cmp_callback, ca); } } - for (i = ca->heap.used / 2 - 1; i >= 0; --i) - heap_sift(&ca->heap, i, bucket_min_cmp); + min_heapify_all(&ca->heap, &bucket_min_cmp_callback, ca); while (!fifo_full(&ca->free_inc)) { - if (!heap_pop(&ca->heap, b, bucket_min_cmp)) { + if (!ca->heap.nr) { /* * We don't want to be calling invalidate_buckets() * multiple times when it can't do anything @@ -206,6 +234,8 @@ static void invalidate_buckets_lru(struct cache *ca) wake_up_gc(ca->set); return; } + b = min_heap_peek(&ca->heap)[0]; + min_heap_pop(&ca->heap, &bucket_min_cmp_callback, ca); bch_invalidate_one_bucket(ca, b); } diff --git a/drivers/md/bcache/bcache.h b/drivers/md/bcache/bcache.h index 1d33e40d26ea..785b0d9008fa 100644 --- a/drivers/md/bcache/bcache.h +++ b/drivers/md/bcache/bcache.h @@ -458,7 +458,7 @@ struct cache { /* Allocation stuff: */ struct bucket *buckets; - DECLARE_HEAP(struct bucket *, heap); + DEFINE_MIN_HEAP(struct bucket *, cache_heap) heap; /* * If nonzero, we know we aren't going to find any buckets to invalidate diff --git a/drivers/md/bcache/bset.c b/drivers/md/bcache/bset.c index 463eb13bd0b2..bd97d8626887 100644 --- a/drivers/md/bcache/bset.c +++ b/drivers/md/bcache/bset.c @@ -54,9 +54,11 @@ void bch_dump_bucket(struct btree_keys *b) int __bch_count_data(struct btree_keys *b) { unsigned int ret = 0; - struct btree_iter_stack iter; + struct btree_iter iter; struct bkey *k; + min_heap_init(&iter.heap, NULL, MAX_BSETS); + if (b->ops->is_extents) for_each_key(b, k, &iter) ret += KEY_SIZE(k); @@ -67,9 +69,11 @@ void __bch_check_keys(struct btree_keys *b, const char *fmt, ...) { va_list args; struct bkey *k, *p = NULL; - struct btree_iter_stack iter; + struct btree_iter iter; const char *err; + min_heap_init(&iter.heap, NULL, MAX_BSETS); + for_each_key(b, k, &iter) { if (b->ops->is_extents) { err = "Keys out of order"; @@ -110,9 +114,9 @@ bug: static void bch_btree_iter_next_check(struct btree_iter *iter) { - struct bkey *k = iter->data->k, *next = bkey_next(k); + struct bkey *k = iter->heap.data->k, *next = bkey_next(k); - if (next < iter->data->end && + if (next < iter->heap.data->end && bkey_cmp(k, iter->b->ops->is_extents ? &START_KEY(next) : next) > 0) { bch_dump_bucket(iter->b); @@ -879,12 +883,14 @@ unsigned int bch_btree_insert_key(struct btree_keys *b, struct bkey *k, unsigned int status = BTREE_INSERT_STATUS_NO_INSERT; struct bset *i = bset_tree_last(b)->data; struct bkey *m, *prev = NULL; - struct btree_iter_stack iter; + struct btree_iter iter; struct bkey preceding_key_on_stack = ZERO_KEY; struct bkey *preceding_key_p = &preceding_key_on_stack; BUG_ON(b->ops->is_extents && !KEY_SIZE(k)); + min_heap_init(&iter.heap, NULL, MAX_BSETS); + /* * If k has preceding key, preceding_key_p will be set to address * of k's preceding key; otherwise preceding_key_p will be set @@ -895,9 +901,9 @@ unsigned int bch_btree_insert_key(struct btree_keys *b, struct bkey *k, else preceding_key(k, &preceding_key_p); - m = bch_btree_iter_stack_init(b, &iter, preceding_key_p); + m = bch_btree_iter_init(b, &iter, preceding_key_p); - if (b->ops->insert_fixup(b, k, &iter.iter, replace_key)) + if (b->ops->insert_fixup(b, k, &iter, replace_key)) return status; status = BTREE_INSERT_STATUS_INSERT; @@ -1077,79 +1083,102 @@ struct bkey *__bch_bset_search(struct btree_keys *b, struct bset_tree *t, /* Btree iterator */ -typedef bool (btree_iter_cmp_fn)(struct btree_iter_set, - struct btree_iter_set); +typedef bool (new_btree_iter_cmp_fn)(const void *, const void *, void *); -static inline bool btree_iter_cmp(struct btree_iter_set l, - struct btree_iter_set r) +static inline bool new_btree_iter_cmp(const void *l, const void *r, void __always_unused *args) { - return bkey_cmp(l.k, r.k) > 0; + const struct btree_iter_set *_l = l; + const struct btree_iter_set *_r = r; + + return bkey_cmp(_l->k, _r->k) <= 0; +} + +static inline void new_btree_iter_swap(void *iter1, void *iter2, void __always_unused *args) +{ + struct btree_iter_set *_iter1 = iter1; + struct btree_iter_set *_iter2 = iter2; + + swap(*_iter1, *_iter2); } static inline bool btree_iter_end(struct btree_iter *iter) { - return !iter->used; + return !iter->heap.nr; } void bch_btree_iter_push(struct btree_iter *iter, struct bkey *k, struct bkey *end) { + const struct min_heap_callbacks callbacks = { + .less = new_btree_iter_cmp, + .swp = new_btree_iter_swap, + }; + if (k != end) - BUG_ON(!heap_add(iter, - ((struct btree_iter_set) { k, end }), - btree_iter_cmp)); + BUG_ON(!min_heap_push(&iter->heap, + &((struct btree_iter_set) { k, end }), + &callbacks, + NULL)); } -static struct bkey *__bch_btree_iter_stack_init(struct btree_keys *b, - struct btree_iter_stack *iter, - struct bkey *search, - struct bset_tree *start) +static struct bkey *__bch_btree_iter_init(struct btree_keys *b, + struct btree_iter *iter, + struct bkey *search, + struct bset_tree *start) { struct bkey *ret = NULL; - iter->iter.size = ARRAY_SIZE(iter->stack_data); - iter->iter.used = 0; + iter->heap.size = ARRAY_SIZE(iter->heap.preallocated); + iter->heap.nr = 0; #ifdef CONFIG_BCACHE_DEBUG - iter->iter.b = b; + iter->b = b; #endif for (; start <= bset_tree_last(b); start++) { ret = bch_bset_search(b, start, search); - bch_btree_iter_push(&iter->iter, ret, bset_bkey_last(start->data)); + bch_btree_iter_push(iter, ret, bset_bkey_last(start->data)); } return ret; } -struct bkey *bch_btree_iter_stack_init(struct btree_keys *b, - struct btree_iter_stack *iter, +struct bkey *bch_btree_iter_init(struct btree_keys *b, + struct btree_iter *iter, struct bkey *search) { - return __bch_btree_iter_stack_init(b, iter, search, b->set); + return __bch_btree_iter_init(b, iter, search, b->set); } static inline struct bkey *__bch_btree_iter_next(struct btree_iter *iter, - btree_iter_cmp_fn *cmp) + new_btree_iter_cmp_fn *cmp) { struct btree_iter_set b __maybe_unused; struct bkey *ret = NULL; + const struct min_heap_callbacks callbacks = { + .less = cmp, + .swp = new_btree_iter_swap, + }; if (!btree_iter_end(iter)) { bch_btree_iter_next_check(iter); - ret = iter->data->k; - iter->data->k = bkey_next(iter->data->k); + ret = iter->heap.data->k; + iter->heap.data->k = bkey_next(iter->heap.data->k); - if (iter->data->k > iter->data->end) { + if (iter->heap.data->k > iter->heap.data->end) { WARN_ONCE(1, "bset was corrupt!\n"); - iter->data->k = iter->data->end; + iter->heap.data->k = iter->heap.data->end; } - if (iter->data->k == iter->data->end) - heap_pop(iter, b, cmp); + if (iter->heap.data->k == iter->heap.data->end) { + if (iter->heap.nr) { + b = min_heap_peek(&iter->heap)[0]; + min_heap_pop(&iter->heap, &callbacks, NULL); + } + } else - heap_sift(iter, 0, cmp); + min_heap_sift_down(&iter->heap, 0, &callbacks, NULL); } return ret; @@ -1157,7 +1186,7 @@ static inline struct bkey *__bch_btree_iter_next(struct btree_iter *iter, struct bkey *bch_btree_iter_next(struct btree_iter *iter) { - return __bch_btree_iter_next(iter, btree_iter_cmp); + return __bch_btree_iter_next(iter, new_btree_iter_cmp); } @@ -1195,16 +1224,18 @@ static void btree_mergesort(struct btree_keys *b, struct bset *out, struct btree_iter *iter, bool fixup, bool remove_stale) { - int i; struct bkey *k, *last = NULL; BKEY_PADDED(k) tmp; bool (*bad)(struct btree_keys *, const struct bkey *) = remove_stale ? bch_ptr_bad : bch_ptr_invalid; + const struct min_heap_callbacks callbacks = { + .less = b->ops->sort_cmp, + .swp = new_btree_iter_swap, + }; /* Heapify the iterator, using our comparison function */ - for (i = iter->used / 2 - 1; i >= 0; --i) - heap_sift(iter, i, b->ops->sort_cmp); + min_heapify_all(&iter->heap, &callbacks, NULL); while (!btree_iter_end(iter)) { if (b->ops->sort_fixup && fixup) @@ -1293,10 +1324,11 @@ void bch_btree_sort_partial(struct btree_keys *b, unsigned int start, struct bset_sort_state *state) { size_t order = b->page_order, keys = 0; - struct btree_iter_stack iter; + struct btree_iter iter; int oldsize = bch_count_data(b); - __bch_btree_iter_stack_init(b, &iter, NULL, &b->set[start]); + min_heap_init(&iter.heap, NULL, MAX_BSETS); + __bch_btree_iter_init(b, &iter, NULL, &b->set[start]); if (start) { unsigned int i; @@ -1307,7 +1339,7 @@ void bch_btree_sort_partial(struct btree_keys *b, unsigned int start, order = get_order(__set_bytes(b->set->data, keys)); } - __btree_sort(b, &iter.iter, start, order, false, state); + __btree_sort(b, &iter, start, order, false, state); EBUG_ON(oldsize >= 0 && bch_count_data(b) != oldsize); } @@ -1323,11 +1355,13 @@ void bch_btree_sort_into(struct btree_keys *b, struct btree_keys *new, struct bset_sort_state *state) { uint64_t start_time = local_clock(); - struct btree_iter_stack iter; + struct btree_iter iter; - bch_btree_iter_stack_init(b, &iter, NULL); + min_heap_init(&iter.heap, NULL, MAX_BSETS); - btree_mergesort(b, new->set->data, &iter.iter, false, true); + bch_btree_iter_init(b, &iter, NULL); + + btree_mergesort(b, new->set->data, &iter, false, true); bch_time_stats_update(&state->time, start_time); diff --git a/drivers/md/bcache/bset.h b/drivers/md/bcache/bset.h index 011f6062c4c0..f79441acd4c1 100644 --- a/drivers/md/bcache/bset.h +++ b/drivers/md/bcache/bset.h @@ -187,8 +187,9 @@ struct bset_tree { }; struct btree_keys_ops { - bool (*sort_cmp)(struct btree_iter_set l, - struct btree_iter_set r); + bool (*sort_cmp)(const void *l, + const void *r, + void *args); struct bkey *(*sort_fixup)(struct btree_iter *iter, struct bkey *tmp); bool (*insert_fixup)(struct btree_keys *b, @@ -312,23 +313,17 @@ enum { BTREE_INSERT_STATUS_FRONT_MERGE, }; +struct btree_iter_set { + struct bkey *k, *end; +}; + /* Btree key iteration */ struct btree_iter { - size_t size, used; #ifdef CONFIG_BCACHE_DEBUG struct btree_keys *b; #endif - struct btree_iter_set { - struct bkey *k, *end; - } data[]; -}; - -/* Fixed-size btree_iter that can be allocated on the stack */ - -struct btree_iter_stack { - struct btree_iter iter; - struct btree_iter_set stack_data[MAX_BSETS]; + MIN_HEAP_PREALLOCATED(struct btree_iter_set, btree_iter_heap, MAX_BSETS) heap; }; typedef bool (*ptr_filter_fn)(struct btree_keys *b, const struct bkey *k); @@ -340,9 +335,9 @@ struct bkey *bch_btree_iter_next_filter(struct btree_iter *iter, void bch_btree_iter_push(struct btree_iter *iter, struct bkey *k, struct bkey *end); -struct bkey *bch_btree_iter_stack_init(struct btree_keys *b, - struct btree_iter_stack *iter, - struct bkey *search); +struct bkey *bch_btree_iter_init(struct btree_keys *b, + struct btree_iter *iter, + struct bkey *search); struct bkey *__bch_bset_search(struct btree_keys *b, struct bset_tree *t, const struct bkey *search); @@ -357,14 +352,13 @@ static inline struct bkey *bch_bset_search(struct btree_keys *b, return search ? __bch_bset_search(b, t, search) : t->data->start; } -#define for_each_key_filter(b, k, stack_iter, filter) \ - for (bch_btree_iter_stack_init((b), (stack_iter), NULL); \ - ((k) = bch_btree_iter_next_filter(&((stack_iter)->iter), (b), \ - filter));) +#define for_each_key_filter(b, k, iter, filter) \ + for (bch_btree_iter_init((b), (iter), NULL); \ + ((k) = bch_btree_iter_next_filter((iter), (b), filter));) -#define for_each_key(b, k, stack_iter) \ - for (bch_btree_iter_stack_init((b), (stack_iter), NULL); \ - ((k) = bch_btree_iter_next(&((stack_iter)->iter)));) +#define for_each_key(b, k, iter) \ + for (bch_btree_iter_init((b), (iter), NULL); \ + ((k) = bch_btree_iter_next(iter));) /* Sorting */ diff --git a/drivers/md/bcache/btree.c b/drivers/md/bcache/btree.c index 4e6ccf2c8a0b..ed40d8600656 100644 --- a/drivers/md/bcache/btree.c +++ b/drivers/md/bcache/btree.c @@ -149,19 +149,19 @@ void bch_btree_node_read_done(struct btree *b) { const char *err = "bad btree header"; struct bset *i = btree_bset_first(b); - struct btree_iter *iter; + struct btree_iter iter; /* * c->fill_iter can allocate an iterator with more memory space * than static MAX_BSETS. * See the comment arount cache_set->fill_iter. */ - iter = mempool_alloc(&b->c->fill_iter, GFP_NOIO); - iter->size = b->c->cache->sb.bucket_size / b->c->cache->sb.block_size; - iter->used = 0; + iter.heap.data = mempool_alloc(&b->c->fill_iter, GFP_NOIO); + iter.heap.size = b->c->cache->sb.bucket_size / b->c->cache->sb.block_size; + iter.heap.nr = 0; #ifdef CONFIG_BCACHE_DEBUG - iter->b = &b->keys; + iter.b = &b->keys; #endif if (!i->seq) @@ -199,7 +199,7 @@ void bch_btree_node_read_done(struct btree *b) if (i != b->keys.set[0].data && !i->keys) goto err; - bch_btree_iter_push(iter, i->start, bset_bkey_last(i)); + bch_btree_iter_push(&iter, i->start, bset_bkey_last(i)); b->written += set_blocks(i, block_bytes(b->c->cache)); } @@ -211,7 +211,7 @@ void bch_btree_node_read_done(struct btree *b) if (i->seq == b->keys.set[0].data->seq) goto err; - bch_btree_sort_and_fix_extents(&b->keys, iter, &b->c->sort); + bch_btree_sort_and_fix_extents(&b->keys, &iter, &b->c->sort); i = b->keys.set[0].data; err = "short btree key"; @@ -223,7 +223,7 @@ void bch_btree_node_read_done(struct btree *b) bch_bset_init_next(&b->keys, write_block(b), bset_magic(&b->c->cache->sb)); out: - mempool_free(iter, &b->c->fill_iter); + mempool_free(iter.heap.data, &b->c->fill_iter); return; err: set_btree_node_io_error(b); @@ -1309,9 +1309,11 @@ static bool btree_gc_mark_node(struct btree *b, struct gc_stat *gc) uint8_t stale = 0; unsigned int keys = 0, good_keys = 0; struct bkey *k; - struct btree_iter_stack iter; + struct btree_iter iter; struct bset_tree *t; + min_heap_init(&iter.heap, NULL, MAX_BSETS); + gc->nodes++; for_each_key_filter(&b->keys, k, &iter, bch_ptr_invalid) { @@ -1570,9 +1572,11 @@ static int btree_gc_rewrite_node(struct btree *b, struct btree_op *op, static unsigned int btree_gc_count_keys(struct btree *b) { struct bkey *k; - struct btree_iter_stack iter; + struct btree_iter iter; unsigned int ret = 0; + min_heap_init(&iter.heap, NULL, MAX_BSETS); + for_each_key_filter(&b->keys, k, &iter, bch_ptr_bad) ret += bkey_u64s(k); @@ -1611,18 +1615,18 @@ static int btree_gc_recurse(struct btree *b, struct btree_op *op, int ret = 0; bool should_rewrite; struct bkey *k; - struct btree_iter_stack iter; + struct btree_iter iter; struct gc_merge_info r[GC_MERGE_NODES]; struct gc_merge_info *i, *last = r + ARRAY_SIZE(r) - 1; - bch_btree_iter_stack_init(&b->keys, &iter, &b->c->gc_done); + min_heap_init(&iter.heap, NULL, MAX_BSETS); + bch_btree_iter_init(&b->keys, &iter, &b->c->gc_done); for (i = r; i < r + ARRAY_SIZE(r); i++) i->b = ERR_PTR(-EINTR); while (1) { - k = bch_btree_iter_next_filter(&iter.iter, &b->keys, - bch_ptr_bad); + k = bch_btree_iter_next_filter(&iter, &b->keys, bch_ptr_bad); if (k) { r->b = bch_btree_node_get(b->c, op, k, b->level - 1, true, b); @@ -1917,7 +1921,9 @@ static int bch_btree_check_recurse(struct btree *b, struct btree_op *op) { int ret = 0; struct bkey *k, *p = NULL; - struct btree_iter_stack iter; + struct btree_iter iter; + + min_heap_init(&iter.heap, NULL, MAX_BSETS); for_each_key_filter(&b->keys, k, &iter, bch_ptr_invalid) bch_initial_mark_key(b->c, b->level, k); @@ -1925,10 +1931,10 @@ static int bch_btree_check_recurse(struct btree *b, struct btree_op *op) bch_initial_mark_key(b->c, b->level + 1, &b->key); if (b->level) { - bch_btree_iter_stack_init(&b->keys, &iter, NULL); + bch_btree_iter_init(&b->keys, &iter, NULL); do { - k = bch_btree_iter_next_filter(&iter.iter, &b->keys, + k = bch_btree_iter_next_filter(&iter, &b->keys, bch_ptr_bad); if (k) { btree_node_prefetch(b, k); @@ -1956,7 +1962,7 @@ static int bch_btree_check_thread(void *arg) struct btree_check_info *info = arg; struct btree_check_state *check_state = info->state; struct cache_set *c = check_state->c; - struct btree_iter_stack iter; + struct btree_iter iter; struct bkey *k, *p; int cur_idx, prev_idx, skip_nr; @@ -1964,9 +1970,11 @@ static int bch_btree_check_thread(void *arg) cur_idx = prev_idx = 0; ret = 0; + min_heap_init(&iter.heap, NULL, MAX_BSETS); + /* root node keys are checked before thread created */ - bch_btree_iter_stack_init(&c->root->keys, &iter, NULL); - k = bch_btree_iter_next_filter(&iter.iter, &c->root->keys, bch_ptr_bad); + bch_btree_iter_init(&c->root->keys, &iter, NULL); + k = bch_btree_iter_next_filter(&iter, &c->root->keys, bch_ptr_bad); BUG_ON(!k); p = k; @@ -1984,7 +1992,7 @@ static int bch_btree_check_thread(void *arg) skip_nr = cur_idx - prev_idx; while (skip_nr) { - k = bch_btree_iter_next_filter(&iter.iter, + k = bch_btree_iter_next_filter(&iter, &c->root->keys, bch_ptr_bad); if (k) @@ -2057,9 +2065,11 @@ int bch_btree_check(struct cache_set *c) int ret = 0; int i; struct bkey *k = NULL; - struct btree_iter_stack iter; + struct btree_iter iter; struct btree_check_state check_state; + min_heap_init(&iter.heap, NULL, MAX_BSETS); + /* check and mark root node keys */ for_each_key_filter(&c->root->keys, k, &iter, bch_ptr_invalid) bch_initial_mark_key(c, c->root->level, k); @@ -2553,11 +2563,12 @@ static int bch_btree_map_nodes_recurse(struct btree *b, struct btree_op *op, if (b->level) { struct bkey *k; - struct btree_iter_stack iter; + struct btree_iter iter; - bch_btree_iter_stack_init(&b->keys, &iter, from); + min_heap_init(&iter.heap, NULL, MAX_BSETS); + bch_btree_iter_init(&b->keys, &iter, from); - while ((k = bch_btree_iter_next_filter(&iter.iter, &b->keys, + while ((k = bch_btree_iter_next_filter(&iter, &b->keys, bch_ptr_bad))) { ret = bcache_btree(map_nodes_recurse, k, b, op, from, fn, flags); @@ -2586,12 +2597,12 @@ int bch_btree_map_keys_recurse(struct btree *b, struct btree_op *op, { int ret = MAP_CONTINUE; struct bkey *k; - struct btree_iter_stack iter; + struct btree_iter iter; - bch_btree_iter_stack_init(&b->keys, &iter, from); + min_heap_init(&iter.heap, NULL, MAX_BSETS); + bch_btree_iter_init(&b->keys, &iter, from); - while ((k = bch_btree_iter_next_filter(&iter.iter, &b->keys, - bch_ptr_bad))) { + while ((k = bch_btree_iter_next_filter(&iter, &b->keys, bch_ptr_bad))) { ret = !b->level ? fn(op, b, k) : bcache_btree(map_keys_recurse, k, diff --git a/drivers/md/bcache/extents.c b/drivers/md/bcache/extents.c index d626ffcbecb9..a7221e5dbe81 100644 --- a/drivers/md/bcache/extents.c +++ b/drivers/md/bcache/extents.c @@ -33,15 +33,16 @@ static void sort_key_next(struct btree_iter *iter, i->k = bkey_next(i->k); if (i->k == i->end) - *i = iter->data[--iter->used]; + *i = iter->heap.data[--iter->heap.nr]; } -static bool bch_key_sort_cmp(struct btree_iter_set l, - struct btree_iter_set r) +static bool new_bch_key_sort_cmp(const void *l, const void *r, void *args) { - int64_t c = bkey_cmp(l.k, r.k); + struct btree_iter_set *_l = (struct btree_iter_set *)l; + struct btree_iter_set *_r = (struct btree_iter_set *)r; + int64_t c = bkey_cmp(_l->k, _r->k); - return c ? c > 0 : l.k < r.k; + return !(c ? c > 0 : _l->k < _r->k); } static bool __ptr_invalid(struct cache_set *c, const struct bkey *k) @@ -238,7 +239,7 @@ static bool bch_btree_ptr_insert_fixup(struct btree_keys *bk, } const struct btree_keys_ops bch_btree_keys_ops = { - .sort_cmp = bch_key_sort_cmp, + .sort_cmp = new_bch_key_sort_cmp, .insert_fixup = bch_btree_ptr_insert_fixup, .key_invalid = bch_btree_ptr_invalid, .key_bad = bch_btree_ptr_bad, @@ -255,22 +256,36 @@ const struct btree_keys_ops bch_btree_keys_ops = { * Necessary for btree_sort_fixup() - if there are multiple keys that compare * equal in different sets, we have to process them newest to oldest. */ -static bool bch_extent_sort_cmp(struct btree_iter_set l, - struct btree_iter_set r) -{ - int64_t c = bkey_cmp(&START_KEY(l.k), &START_KEY(r.k)); - return c ? c > 0 : l.k < r.k; +static bool new_bch_extent_sort_cmp(const void *l, const void *r, void __always_unused *args) +{ + struct btree_iter_set *_l = (struct btree_iter_set *)l; + struct btree_iter_set *_r = (struct btree_iter_set *)r; + int64_t c = bkey_cmp(&START_KEY(_l->k), &START_KEY(_r->k)); + + return !(c ? c > 0 : _l->k < _r->k); +} + +static inline void new_btree_iter_swap(void *iter1, void *iter2, void __always_unused *args) +{ + struct btree_iter_set *_iter1 = iter1; + struct btree_iter_set *_iter2 = iter2; + + swap(*_iter1, *_iter2); } static struct bkey *bch_extent_sort_fixup(struct btree_iter *iter, struct bkey *tmp) { - while (iter->used > 1) { - struct btree_iter_set *top = iter->data, *i = top + 1; + const struct min_heap_callbacks callbacks = { + .less = new_bch_extent_sort_cmp, + .swp = new_btree_iter_swap, + }; + while (iter->heap.nr > 1) { + struct btree_iter_set *top = iter->heap.data, *i = top + 1; - if (iter->used > 2 && - bch_extent_sort_cmp(i[0], i[1])) + if (iter->heap.nr > 2 && + !new_bch_extent_sort_cmp(&i[0], &i[1], NULL)) i++; if (bkey_cmp(top->k, &START_KEY(i->k)) <= 0) @@ -278,7 +293,7 @@ static struct bkey *bch_extent_sort_fixup(struct btree_iter *iter, if (!KEY_SIZE(i->k)) { sort_key_next(iter, i); - heap_sift(iter, i - top, bch_extent_sort_cmp); + min_heap_sift_down(&iter->heap, i - top, &callbacks, NULL); continue; } @@ -288,7 +303,7 @@ static struct bkey *bch_extent_sort_fixup(struct btree_iter *iter, else bch_cut_front(top->k, i->k); - heap_sift(iter, i - top, bch_extent_sort_cmp); + min_heap_sift_down(&iter->heap, i - top, &callbacks, NULL); } else { /* can't happen because of comparison func */ BUG_ON(!bkey_cmp(&START_KEY(top->k), &START_KEY(i->k))); @@ -298,7 +313,7 @@ static struct bkey *bch_extent_sort_fixup(struct btree_iter *iter, bch_cut_back(&START_KEY(i->k), tmp); bch_cut_front(i->k, top->k); - heap_sift(iter, 0, bch_extent_sort_cmp); + min_heap_sift_down(&iter->heap, 0, &callbacks, NULL); return tmp; } else { @@ -618,7 +633,7 @@ static bool bch_extent_merge(struct btree_keys *bk, } const struct btree_keys_ops bch_extent_keys_ops = { - .sort_cmp = bch_extent_sort_cmp, + .sort_cmp = new_bch_extent_sort_cmp, .sort_fixup = bch_extent_sort_fixup, .insert_fixup = bch_extent_insert_fixup, .key_invalid = bch_extent_invalid, diff --git a/drivers/md/bcache/movinggc.c b/drivers/md/bcache/movinggc.c index ebd500bdf0b2..7f482729c56d 100644 --- a/drivers/md/bcache/movinggc.c +++ b/drivers/md/bcache/movinggc.c @@ -182,16 +182,27 @@ err: if (!IS_ERR_OR_NULL(w->private)) closure_sync(&cl); } -static bool bucket_cmp(struct bucket *l, struct bucket *r) +static bool new_bucket_cmp(const void *l, const void *r, void __always_unused *args) { - return GC_SECTORS_USED(l) < GC_SECTORS_USED(r); + struct bucket **_l = (struct bucket **)l; + struct bucket **_r = (struct bucket **)r; + + return GC_SECTORS_USED(*_l) >= GC_SECTORS_USED(*_r); +} + +static void new_bucket_swap(void *l, void *r, void __always_unused *args) +{ + struct bucket **_l = l; + struct bucket **_r = r; + + swap(*_l, *_r); } static unsigned int bucket_heap_top(struct cache *ca) { struct bucket *b; - return (b = heap_peek(&ca->heap)) ? GC_SECTORS_USED(b) : 0; + return (b = min_heap_peek(&ca->heap)[0]) ? GC_SECTORS_USED(b) : 0; } void bch_moving_gc(struct cache_set *c) @@ -199,6 +210,10 @@ void bch_moving_gc(struct cache_set *c) struct cache *ca = c->cache; struct bucket *b; unsigned long sectors_to_move, reserve_sectors; + const struct min_heap_callbacks callbacks = { + .less = new_bucket_cmp, + .swp = new_bucket_swap, + }; if (!c->copy_gc_enabled) return; @@ -209,7 +224,7 @@ void bch_moving_gc(struct cache_set *c) reserve_sectors = ca->sb.bucket_size * fifo_used(&ca->free[RESERVE_MOVINGGC]); - ca->heap.used = 0; + ca->heap.nr = 0; for_each_bucket(b, ca) { if (GC_MARK(b) == GC_MARK_METADATA || @@ -218,25 +233,31 @@ void bch_moving_gc(struct cache_set *c) atomic_read(&b->pin)) continue; - if (!heap_full(&ca->heap)) { + if (!min_heap_full(&ca->heap)) { sectors_to_move += GC_SECTORS_USED(b); - heap_add(&ca->heap, b, bucket_cmp); - } else if (bucket_cmp(b, heap_peek(&ca->heap))) { + min_heap_push(&ca->heap, &b, &callbacks, NULL); + } else if (!new_bucket_cmp(&b, min_heap_peek(&ca->heap), ca)) { sectors_to_move -= bucket_heap_top(ca); sectors_to_move += GC_SECTORS_USED(b); ca->heap.data[0] = b; - heap_sift(&ca->heap, 0, bucket_cmp); + min_heap_sift_down(&ca->heap, 0, &callbacks, NULL); } } while (sectors_to_move > reserve_sectors) { - heap_pop(&ca->heap, b, bucket_cmp); + if (ca->heap.nr) { + b = min_heap_peek(&ca->heap)[0]; + min_heap_pop(&ca->heap, &callbacks, NULL); + } sectors_to_move -= GC_SECTORS_USED(b); } - while (heap_pop(&ca->heap, b, bucket_cmp)) + while (ca->heap.nr) { + b = min_heap_peek(&ca->heap)[0]; + min_heap_pop(&ca->heap, &callbacks, NULL); SET_GC_MOVE(b, 1); + } mutex_unlock(&c->bucket_lock); diff --git a/drivers/md/bcache/super.c b/drivers/md/bcache/super.c index 4d11fc664cb0..1240d1b09e8c 100644 --- a/drivers/md/bcache/super.c +++ b/drivers/md/bcache/super.c @@ -1914,8 +1914,7 @@ struct cache_set *bch_cache_set_alloc(struct cache_sb *sb) INIT_LIST_HEAD(&c->btree_cache_freed); INIT_LIST_HEAD(&c->data_buckets); - iter_size = sizeof(struct btree_iter) + - ((meta_bucket_pages(sb) * PAGE_SECTORS) / sb->block_size) * + iter_size = ((meta_bucket_pages(sb) * PAGE_SECTORS) / sb->block_size) * sizeof(struct btree_iter_set); c->devices = kcalloc(c->nr_uuids, sizeof(void *), GFP_KERNEL); diff --git a/drivers/md/bcache/sysfs.c b/drivers/md/bcache/sysfs.c index 826b14cae4e5..e8f696cb58c0 100644 --- a/drivers/md/bcache/sysfs.c +++ b/drivers/md/bcache/sysfs.c @@ -660,7 +660,9 @@ static unsigned int bch_root_usage(struct cache_set *c) unsigned int bytes = 0; struct bkey *k; struct btree *b; - struct btree_iter_stack iter; + struct btree_iter iter; + + min_heap_init(&iter.heap, NULL, MAX_BSETS); goto lock_root; diff --git a/drivers/md/bcache/util.h b/drivers/md/bcache/util.h index f61ab1bada6c..539454d8e2d0 100644 --- a/drivers/md/bcache/util.h +++ b/drivers/md/bcache/util.h @@ -9,6 +9,7 @@ #include #include #include +#include #include #include #include @@ -30,16 +31,10 @@ struct closure; #endif -#define DECLARE_HEAP(type, name) \ - struct { \ - size_t size, used; \ - type *data; \ - } name - #define init_heap(heap, _size, gfp) \ ({ \ size_t _bytes; \ - (heap)->used = 0; \ + (heap)->nr = 0; \ (heap)->size = (_size); \ _bytes = (heap)->size * sizeof(*(heap)->data); \ (heap)->data = kvmalloc(_bytes, (gfp) & GFP_KERNEL); \ @@ -52,64 +47,6 @@ do { \ (heap)->data = NULL; \ } while (0) -#define heap_swap(h, i, j) swap((h)->data[i], (h)->data[j]) - -#define heap_sift(h, i, cmp) \ -do { \ - size_t _r, _j = i; \ - \ - for (; _j * 2 + 1 < (h)->used; _j = _r) { \ - _r = _j * 2 + 1; \ - if (_r + 1 < (h)->used && \ - cmp((h)->data[_r], (h)->data[_r + 1])) \ - _r++; \ - \ - if (cmp((h)->data[_r], (h)->data[_j])) \ - break; \ - heap_swap(h, _r, _j); \ - } \ -} while (0) - -#define heap_sift_down(h, i, cmp) \ -do { \ - while (i) { \ - size_t p = (i - 1) / 2; \ - if (cmp((h)->data[i], (h)->data[p])) \ - break; \ - heap_swap(h, i, p); \ - i = p; \ - } \ -} while (0) - -#define heap_add(h, d, cmp) \ -({ \ - bool _r = !heap_full(h); \ - if (_r) { \ - size_t _i = (h)->used++; \ - (h)->data[_i] = d; \ - \ - heap_sift_down(h, _i, cmp); \ - heap_sift(h, _i, cmp); \ - } \ - _r; \ -}) - -#define heap_pop(h, d, cmp) \ -({ \ - bool _r = (h)->used; \ - if (_r) { \ - (d) = (h)->data[0]; \ - (h)->used--; \ - heap_swap(h, 0, (h)->used); \ - heap_sift(h, 0, cmp); \ - } \ - _r; \ -}) - -#define heap_peek(h) ((h)->used ? (h)->data[0] : NULL) - -#define heap_full(h) ((h)->used == (h)->size) - #define DECLARE_FIFO(type, name) \ struct { \ size_t front, back, size, mask; \ diff --git a/drivers/md/bcache/writeback.c b/drivers/md/bcache/writeback.c index 792e070ccf38..c1d28e365910 100644 --- a/drivers/md/bcache/writeback.c +++ b/drivers/md/bcache/writeback.c @@ -908,15 +908,16 @@ static int bch_dirty_init_thread(void *arg) struct dirty_init_thrd_info *info = arg; struct bch_dirty_init_state *state = info->state; struct cache_set *c = state->c; - struct btree_iter_stack iter; + struct btree_iter iter; struct bkey *k, *p; int cur_idx, prev_idx, skip_nr; k = p = NULL; prev_idx = 0; - bch_btree_iter_stack_init(&c->root->keys, &iter, NULL); - k = bch_btree_iter_next_filter(&iter.iter, &c->root->keys, bch_ptr_bad); + min_heap_init(&iter.heap, NULL, MAX_BSETS); + bch_btree_iter_init(&c->root->keys, &iter, NULL); + k = bch_btree_iter_next_filter(&iter, &c->root->keys, bch_ptr_bad); BUG_ON(!k); p = k; @@ -930,7 +931,7 @@ static int bch_dirty_init_thread(void *arg) skip_nr = cur_idx - prev_idx; while (skip_nr) { - k = bch_btree_iter_next_filter(&iter.iter, + k = bch_btree_iter_next_filter(&iter, &c->root->keys, bch_ptr_bad); if (k) @@ -979,11 +980,13 @@ void bch_sectors_dirty_init(struct bcache_device *d) int i; struct btree *b = NULL; struct bkey *k = NULL; - struct btree_iter_stack iter; + struct btree_iter iter; struct sectors_dirty_init op; struct cache_set *c = d->c; struct bch_dirty_init_state state; + min_heap_init(&iter.heap, NULL, MAX_BSETS); + retry_lock: b = c->root; rw_lock(0, b, b->level); From 1fcce6b8a768b7fc0bc788abd79658cbf9b21536 Mon Sep 17 00:00:00 2001 From: Kuan-Wei Chiu Date: Fri, 24 May 2024 23:29:58 +0800 Subject: [PATCH 21/98] bcachefs: remove heap-related macros and switch to generic min_heap Drop the heap-related macros from bcachefs and replacing them with the generic min_heap implementation from include/linux. By doing so, code readability is improved by using functions instead of macros. Moreover, the min_heap implementation in include/linux adopts a bottom-up variation compared to the textbook version currently used in bcachefs. This bottom-up variation allows for approximately 50% reduction in the number of comparison operations during heap siftdown, without changing the number of swaps, thus making it more efficient. [visitorckw@gmail.com: fix missing assignment of minimum element] Link: https://lkml.kernel.org/r/20240602174828.1955320-1-visitorckw@gmail.com Link: https://lkml.kernel.org/ioyfizrzq7w7mjrqcadtzsfgpuntowtjdw5pgn4qhvsdp4mqqg@nrlek5vmisbu Link: https://lkml.kernel.org/r/20240524152958.919343-17-visitorckw@gmail.com Signed-off-by: Kuan-Wei Chiu Reviewed-by: Ian Rogers Acked-by: Kent Overstreet Cc: Adrian Hunter Cc: Alexander Shishkin Cc: Arnaldo Carvalho de Melo Cc: Bagas Sanjaya Cc: Brian Foster Cc: Ching-Chun (Jim) Huang Cc: Coly Li Cc: Ingo Molnar Cc: Jiri Olsa Cc: Mark Rutland Cc: Matthew Sakai Cc: Namhyung Kim Cc: Peter Zijlstra Cc: Randy Dunlap Signed-off-by: Andrew Morton --- fs/bcachefs/clock.c | 47 +++++++++++---- fs/bcachefs/clock_types.h | 2 +- fs/bcachefs/ec.c | 78 ++++++++++++++++--------- fs/bcachefs/ec_types.h | 2 +- fs/bcachefs/util.h | 118 +------------------------------------- 5 files changed, 91 insertions(+), 156 deletions(-) diff --git a/fs/bcachefs/clock.c b/fs/bcachefs/clock.c index 363644451106..18fab9c44b1b 100644 --- a/fs/bcachefs/clock.c +++ b/fs/bcachefs/clock.c @@ -6,16 +6,29 @@ #include #include -static inline long io_timer_cmp(io_timer_heap *h, - struct io_timer *l, - struct io_timer *r) +static inline bool io_timer_cmp(const void *l, const void *r, void __always_unused *args) { - return l->expire - r->expire; + struct io_timer **_l = (struct io_timer **)l; + struct io_timer **_r = (struct io_timer **)r; + + return (*_l)->expire < (*_r)->expire; +} + +static inline void io_timer_swp(void *l, void *r, void __always_unused *args) +{ + struct io_timer **_l = (struct io_timer **)l; + struct io_timer **_r = (struct io_timer **)r; + + swap(*_l, *_r); } void bch2_io_timer_add(struct io_clock *clock, struct io_timer *timer) { size_t i; + const struct min_heap_callbacks callbacks = { + .less = io_timer_cmp, + .swp = io_timer_swp, + }; spin_lock(&clock->timer_lock); @@ -26,11 +39,11 @@ void bch2_io_timer_add(struct io_clock *clock, struct io_timer *timer) return; } - for (i = 0; i < clock->timers.used; i++) + for (i = 0; i < clock->timers.nr; i++) if (clock->timers.data[i] == timer) goto out; - BUG_ON(!heap_add(&clock->timers, timer, io_timer_cmp, NULL)); + BUG_ON(!min_heap_push(&clock->timers, &timer, &callbacks, NULL)); out: spin_unlock(&clock->timer_lock); } @@ -38,12 +51,16 @@ out: void bch2_io_timer_del(struct io_clock *clock, struct io_timer *timer) { size_t i; + const struct min_heap_callbacks callbacks = { + .less = io_timer_cmp, + .swp = io_timer_swp, + }; spin_lock(&clock->timer_lock); - for (i = 0; i < clock->timers.used; i++) + for (i = 0; i < clock->timers.nr; i++) if (clock->timers.data[i] == timer) { - heap_del(&clock->timers, i, io_timer_cmp, NULL); + min_heap_del(&clock->timers, i, &callbacks, NULL); break; } @@ -131,12 +148,18 @@ static struct io_timer *get_expired_timer(struct io_clock *clock, unsigned long now) { struct io_timer *ret = NULL; + const struct min_heap_callbacks callbacks = { + .less = io_timer_cmp, + .swp = io_timer_swp, + }; spin_lock(&clock->timer_lock); - if (clock->timers.used && - time_after_eq(now, clock->timers.data[0]->expire)) - heap_pop(&clock->timers, ret, io_timer_cmp, NULL); + if (clock->timers.nr && + time_after_eq(now, clock->timers.data[0]->expire)) { + ret = *min_heap_peek(&clock->timers); + min_heap_pop(&clock->timers, &callbacks, NULL); + } spin_unlock(&clock->timer_lock); @@ -161,7 +184,7 @@ void bch2_io_timers_to_text(struct printbuf *out, struct io_clock *clock) spin_lock(&clock->timer_lock); now = atomic64_read(&clock->now); - for (i = 0; i < clock->timers.used; i++) + for (i = 0; i < clock->timers.nr; i++) prt_printf(out, "%ps:\t%li\n", clock->timers.data[i]->fn, clock->timers.data[i]->expire - now); diff --git a/fs/bcachefs/clock_types.h b/fs/bcachefs/clock_types.h index 5fae0012d808..f2c8a25b7079 100644 --- a/fs/bcachefs/clock_types.h +++ b/fs/bcachefs/clock_types.h @@ -23,7 +23,7 @@ struct io_timer { /* Amount to buffer up on a percpu counter */ #define IO_CLOCK_PCPU_SECTORS 128 -typedef HEAP(struct io_timer *) io_timer_heap; +typedef DEFINE_MIN_HEAP(struct io_timer *, io_timer_heap) io_timer_heap; struct io_clock { atomic64_t now; diff --git a/fs/bcachefs/ec.c b/fs/bcachefs/ec.c index 83e279d41829..452d6b142784 100644 --- a/fs/bcachefs/ec.c +++ b/fs/bcachefs/ec.c @@ -910,8 +910,8 @@ static int __ec_stripe_mem_alloc(struct bch_fs *c, size_t idx, gfp_t gfp) mutex_lock(&c->ec_stripes_heap_lock); if (n.size > h->size) { - memcpy(n.data, h->data, h->used * sizeof(h->data[0])); - n.used = h->used; + memcpy(n.data, h->data, h->nr * sizeof(h->data[0])); + n.nr = h->nr; swap(*h, n); } mutex_unlock(&c->ec_stripes_heap_lock); @@ -1002,7 +1002,7 @@ static u64 stripe_idx_to_delete(struct bch_fs *c) lockdep_assert_held(&c->ec_stripes_heap_lock); - if (h->used && + if (h->nr && h->data[0].blocks_nonempty == 0 && !bch2_stripe_is_open(c, h->data[0].idx)) return h->data[0].idx; @@ -1010,14 +1010,6 @@ static u64 stripe_idx_to_delete(struct bch_fs *c) return 0; } -static inline int ec_stripes_heap_cmp(ec_stripes_heap *h, - struct ec_stripe_heap_entry l, - struct ec_stripe_heap_entry r) -{ - return ((l.blocks_nonempty > r.blocks_nonempty) - - (l.blocks_nonempty < r.blocks_nonempty)); -} - static inline void ec_stripes_heap_set_backpointer(ec_stripes_heap *h, size_t i) { @@ -1026,39 +1018,71 @@ static inline void ec_stripes_heap_set_backpointer(ec_stripes_heap *h, genradix_ptr(&c->stripes, h->data[i].idx)->heap_idx = i; } +static inline bool ec_stripes_heap_cmp(const void *l, const void *r, void __always_unused *args) +{ + struct ec_stripe_heap_entry *_l = (struct ec_stripe_heap_entry *)l; + struct ec_stripe_heap_entry *_r = (struct ec_stripe_heap_entry *)r; + + return ((_l->blocks_nonempty > _r->blocks_nonempty) < + (_l->blocks_nonempty < _r->blocks_nonempty)); +} + +static inline void ec_stripes_heap_swap(void *l, void *r, void *h) +{ + struct ec_stripe_heap_entry *_l = (struct ec_stripe_heap_entry *)l; + struct ec_stripe_heap_entry *_r = (struct ec_stripe_heap_entry *)r; + ec_stripes_heap *_h = (ec_stripes_heap *)h; + size_t i = _l - _h->data; + size_t j = _r - _h->data; + + swap(*_l, *_r); + + ec_stripes_heap_set_backpointer(_h, i); + ec_stripes_heap_set_backpointer(_h, j); +} + static void heap_verify_backpointer(struct bch_fs *c, size_t idx) { ec_stripes_heap *h = &c->ec_stripes_heap; struct stripe *m = genradix_ptr(&c->stripes, idx); - BUG_ON(m->heap_idx >= h->used); + BUG_ON(m->heap_idx >= h->nr); BUG_ON(h->data[m->heap_idx].idx != idx); } void bch2_stripes_heap_del(struct bch_fs *c, struct stripe *m, size_t idx) { + const struct min_heap_callbacks callbacks = { + .less = ec_stripes_heap_cmp, + .swp = ec_stripes_heap_swap, + }; + mutex_lock(&c->ec_stripes_heap_lock); heap_verify_backpointer(c, idx); - heap_del(&c->ec_stripes_heap, m->heap_idx, - ec_stripes_heap_cmp, - ec_stripes_heap_set_backpointer); + min_heap_del(&c->ec_stripes_heap, m->heap_idx, &callbacks, &c->ec_stripes_heap); mutex_unlock(&c->ec_stripes_heap_lock); } void bch2_stripes_heap_insert(struct bch_fs *c, struct stripe *m, size_t idx) { - mutex_lock(&c->ec_stripes_heap_lock); - BUG_ON(heap_full(&c->ec_stripes_heap)); + const struct min_heap_callbacks callbacks = { + .less = ec_stripes_heap_cmp, + .swp = ec_stripes_heap_swap, + }; - heap_add(&c->ec_stripes_heap, ((struct ec_stripe_heap_entry) { + mutex_lock(&c->ec_stripes_heap_lock); + BUG_ON(min_heap_full(&c->ec_stripes_heap)); + + genradix_ptr(&c->stripes, idx)->heap_idx = c->ec_stripes_heap.nr; + min_heap_push(&c->ec_stripes_heap, &((struct ec_stripe_heap_entry) { .idx = idx, .blocks_nonempty = m->blocks_nonempty, }), - ec_stripes_heap_cmp, - ec_stripes_heap_set_backpointer); + &callbacks, + &c->ec_stripes_heap); heap_verify_backpointer(c, idx); mutex_unlock(&c->ec_stripes_heap_lock); @@ -1067,6 +1091,10 @@ void bch2_stripes_heap_insert(struct bch_fs *c, void bch2_stripes_heap_update(struct bch_fs *c, struct stripe *m, size_t idx) { + const struct min_heap_callbacks callbacks = { + .less = ec_stripes_heap_cmp, + .swp = ec_stripes_heap_swap, + }; ec_stripes_heap *h = &c->ec_stripes_heap; bool do_deletes; size_t i; @@ -1077,10 +1105,8 @@ void bch2_stripes_heap_update(struct bch_fs *c, h->data[m->heap_idx].blocks_nonempty = m->blocks_nonempty; i = m->heap_idx; - heap_sift_up(h, i, ec_stripes_heap_cmp, - ec_stripes_heap_set_backpointer); - heap_sift_down(h, i, ec_stripes_heap_cmp, - ec_stripes_heap_set_backpointer); + min_heap_sift_up(h, i, &callbacks, &c->ec_stripes_heap); + min_heap_sift_down(h, i, &callbacks, &c->ec_stripes_heap); heap_verify_backpointer(c, idx); @@ -1873,7 +1899,7 @@ static s64 get_existing_stripe(struct bch_fs *c, return -1; mutex_lock(&c->ec_stripes_heap_lock); - for (heap_idx = 0; heap_idx < h->used; heap_idx++) { + for (heap_idx = 0; heap_idx < h->nr; heap_idx++) { /* No blocks worth reusing, stripe will just be deleted: */ if (!h->data[heap_idx].blocks_nonempty) continue; @@ -2204,7 +2230,7 @@ void bch2_stripes_heap_to_text(struct printbuf *out, struct bch_fs *c) size_t i; mutex_lock(&c->ec_stripes_heap_lock); - for (i = 0; i < min_t(size_t, h->used, 50); i++) { + for (i = 0; i < min_t(size_t, h->nr, 50); i++) { m = genradix_ptr(&c->stripes, h->data[i].idx); prt_printf(out, "%zu %u/%u+%u", h->data[i].idx, diff --git a/fs/bcachefs/ec_types.h b/fs/bcachefs/ec_types.h index 976426da3a12..1df03dccfc72 100644 --- a/fs/bcachefs/ec_types.h +++ b/fs/bcachefs/ec_types.h @@ -36,6 +36,6 @@ struct ec_stripe_heap_entry { unsigned blocks_nonempty; }; -typedef HEAP(struct ec_stripe_heap_entry) ec_stripes_heap; +typedef DEFINE_MIN_HEAP(struct ec_stripe_heap_entry, ec_stripes_heap) ec_stripes_heap; #endif /* _BCACHEFS_EC_TYPES_H */ diff --git a/fs/bcachefs/util.h b/fs/bcachefs/util.h index 5d2c470a49ac..d0d99400f986 100644 --- a/fs/bcachefs/util.h +++ b/fs/bcachefs/util.h @@ -8,6 +8,7 @@ #include #include #include +#include #include #include #include @@ -54,17 +55,9 @@ static inline size_t buf_pages(void *p, size_t len) PAGE_SIZE); } -#define HEAP(type) \ -struct { \ - size_t size, used; \ - type *data; \ -} - -#define DECLARE_HEAP(type, name) HEAP(type) name - #define init_heap(heap, _size, gfp) \ ({ \ - (heap)->used = 0; \ + (heap)->nr = 0; \ (heap)->size = (_size); \ (heap)->data = kvmalloc((heap)->size * sizeof((heap)->data[0]),\ (gfp)); \ @@ -76,113 +69,6 @@ do { \ (heap)->data = NULL; \ } while (0) -#define heap_set_backpointer(h, i, _fn) \ -do { \ - void (*fn)(typeof(h), size_t) = _fn; \ - if (fn) \ - fn(h, i); \ -} while (0) - -#define heap_swap(h, i, j, set_backpointer) \ -do { \ - swap((h)->data[i], (h)->data[j]); \ - heap_set_backpointer(h, i, set_backpointer); \ - heap_set_backpointer(h, j, set_backpointer); \ -} while (0) - -#define heap_peek(h) \ -({ \ - EBUG_ON(!(h)->used); \ - (h)->data[0]; \ -}) - -#define heap_full(h) ((h)->used == (h)->size) - -#define heap_sift_down(h, i, cmp, set_backpointer) \ -do { \ - size_t _c, _j = i; \ - \ - for (; _j * 2 + 1 < (h)->used; _j = _c) { \ - _c = _j * 2 + 1; \ - if (_c + 1 < (h)->used && \ - cmp(h, (h)->data[_c], (h)->data[_c + 1]) >= 0) \ - _c++; \ - \ - if (cmp(h, (h)->data[_c], (h)->data[_j]) >= 0) \ - break; \ - heap_swap(h, _c, _j, set_backpointer); \ - } \ -} while (0) - -#define heap_sift_up(h, i, cmp, set_backpointer) \ -do { \ - while (i) { \ - size_t p = (i - 1) / 2; \ - if (cmp(h, (h)->data[i], (h)->data[p]) >= 0) \ - break; \ - heap_swap(h, i, p, set_backpointer); \ - i = p; \ - } \ -} while (0) - -#define __heap_add(h, d, cmp, set_backpointer) \ -({ \ - size_t _i = (h)->used++; \ - (h)->data[_i] = d; \ - heap_set_backpointer(h, _i, set_backpointer); \ - \ - heap_sift_up(h, _i, cmp, set_backpointer); \ - _i; \ -}) - -#define heap_add(h, d, cmp, set_backpointer) \ -({ \ - bool _r = !heap_full(h); \ - if (_r) \ - __heap_add(h, d, cmp, set_backpointer); \ - _r; \ -}) - -#define heap_add_or_replace(h, new, cmp, set_backpointer) \ -do { \ - if (!heap_add(h, new, cmp, set_backpointer) && \ - cmp(h, new, heap_peek(h)) >= 0) { \ - (h)->data[0] = new; \ - heap_set_backpointer(h, 0, set_backpointer); \ - heap_sift_down(h, 0, cmp, set_backpointer); \ - } \ -} while (0) - -#define heap_del(h, i, cmp, set_backpointer) \ -do { \ - size_t _i = (i); \ - \ - BUG_ON(_i >= (h)->used); \ - (h)->used--; \ - if ((_i) < (h)->used) { \ - heap_swap(h, _i, (h)->used, set_backpointer); \ - heap_sift_up(h, _i, cmp, set_backpointer); \ - heap_sift_down(h, _i, cmp, set_backpointer); \ - } \ -} while (0) - -#define heap_pop(h, d, cmp, set_backpointer) \ -({ \ - bool _r = (h)->used; \ - if (_r) { \ - (d) = (h)->data[0]; \ - heap_del(h, 0, cmp, set_backpointer); \ - } \ - _r; \ -}) - -#define heap_resort(heap, cmp, set_backpointer) \ -do { \ - ssize_t _i; \ - for (_i = (ssize_t) (heap)->used / 2 - 1; _i >= 0; --_i) \ - heap_sift_down(heap, _i, cmp, set_backpointer); \ -} while (0) - #define ANYSINT_MAX(t) \ ((((t) 1 << (sizeof(t) * 8 - 2)) - (t) 1) * (t) 2 + (t) 1) From b41838fe11ce1d2d994210c2c79582499c53e0ca Mon Sep 17 00:00:00 2001 From: Xiong Nandi Date: Fri, 24 May 2024 12:25:59 +0800 Subject: [PATCH 22/98] scripts/decode_stacktrace.sh: wrap nm with UTIL_PREFIX and UTIL_SUFFIX Patch series "scripts/decode_stacktrace.sh: better support to ARM32". This patch (of 2): Since System.map is generated by cross-compile nm tool, we should use it here too. Otherwise host nm may not recognize ARM Thumb-2 instruction address well. Link: https://lkml.kernel.org/r/20240524042600.14738-1-xndchn@gmail.com Link: https://lkml.kernel.org/r/20240524042600.14738-2-xndchn@gmail.com Signed-off-by: Xiong Nandi Reviewed-by: Elliot Berman Cc: Bjorn Andersson Cc: Carlos Llamas Signed-off-by: Andrew Morton --- scripts/decode_stacktrace.sh | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/scripts/decode_stacktrace.sh b/scripts/decode_stacktrace.sh index fa5be6f57b00..2bc3a54ffba5 100755 --- a/scripts/decode_stacktrace.sh +++ b/scripts/decode_stacktrace.sh @@ -30,6 +30,7 @@ fi READELF=${UTIL_PREFIX}readelf${UTIL_SUFFIX} ADDR2LINE=${UTIL_PREFIX}addr2line${UTIL_SUFFIX} +NM=${UTIL_PREFIX}nm${UTIL_SUFFIX} if [[ $1 == "-r" ]] ; then vmlinux="" @@ -158,7 +159,7 @@ parse_symbol() { if [[ $aarray_support == true && "${cache[$module,$name]+isset}" == "isset" ]]; then local base_addr=${cache[$module,$name]} else - local base_addr=$(nm "$objfile" 2>/dev/null | awk '$3 == "'$name'" && ($2 == "t" || $2 == "T") {print $1; exit}') + local base_addr=$(${NM} "$objfile" 2>/dev/null | awk '$3 == "'$name'" && ($2 == "t" || $2 == "T") {print $1; exit}') if [[ $base_addr == "" ]] ; then # address not found return From 78efbfb5b7675b88d987fda108df0a2f3e07e722 Mon Sep 17 00:00:00 2001 From: Xiong Nandi Date: Fri, 24 May 2024 12:26:00 +0800 Subject: [PATCH 23/98] scripts/decode_stacktrace.sh: better support to ARM32 module stack trace Sometimes there are special characters around module names in stack traces, such as ARM32 with BACKTRACE_VERBOSE in "(%pS)" format, such as: [<806e4845>] (dump_stack_lvl) from [<7f806013>] (hello_init+0x13/0x1000 [test]) In this case, $module will be "[test])", the trace can be decoded by stripping the right parenthesis first: (dump_stack_lvl) from hello_init (/foo/test.c:10) test. Link: https://lkml.kernel.org/r/20240524042600.14738-3-xndchn@gmail.com Signed-off-by: Xiong Nandi Suggested-by: Elliot Berman Cc: Bjorn Andersson Cc: Carlos Llamas Signed-off-by: Andrew Morton --- scripts/decode_stacktrace.sh | 3 +++ 1 file changed, 3 insertions(+) diff --git a/scripts/decode_stacktrace.sh b/scripts/decode_stacktrace.sh index 2bc3a54ffba5..a0f50a5b4f7c 100755 --- a/scripts/decode_stacktrace.sh +++ b/scripts/decode_stacktrace.sh @@ -283,6 +283,9 @@ handle_line() { if [[ ${words[$last]} =~ \[([^]]+)\] ]]; then module=${words[$last]} + # some traces format is "(%pS)", which like "(foo+0x0/0x1 [bar])" + # so $module may like "[bar])". Strip the right parenthesis firstly + module=${module%\)} module=${module#\[} module=${module%\]} modbuildid=${module#* } From 5b1a6373aa61f2902ea4922ec4a168703f720b2c Mon Sep 17 00:00:00 2001 From: Yury Norov Date: Mon, 27 May 2024 17:56:43 -0700 Subject: [PATCH 24/98] MAINTAINERS: add linux/nodemask_types.h to BITMAP API Patch series "Cleanup cpumask.h inclusion in core headers". Many core headers include linux/cpumask.h for nothing, and some others include it just for types. We already have nodemask_types.h, and this series adds cpumask_types.h to optimize core headers inclusion paths. Interestingly, it doesn't improve on build time for me, but the headers cleanup work should keep going. This patch (of 6): Commit bea32141764b ("nodemask: Split out include/linux/nodemask_types.h") added the nodemask_types.h but didn't cover it with corresponding record in the MAINTAINERS file. Link: https://lkml.kernel.org/r/20240528005648.182376-1-yury.norov@gmail.com Link: https://lkml.kernel.org/r/20240528005648.182376-2-yury.norov@gmail.com Signed-off-by: Yury Norov Cc: Amit Daniel Kachhap Cc: Anna-Maria Behnsen Cc: Christoph Lameter Cc: Daniel Lezcano Cc: Dennis Zhou Cc: Frederic Weisbecker Cc: Johannes Weiner Cc: Juri Lelli Cc: Kees Cook Cc: Mathieu Desnoyers Cc: Paul E. McKenney Cc: Peter Zijlstra Cc: Rafael J. Wysocki # for thermal Cc: Rasmus Villemoes Cc: Tejun Heo Cc: Thomas Gleixner Cc: Ulf Hansson Cc: Vincent Guittot Cc: Viresh Kumar Cc: Yury Norov Signed-off-by: Andrew Morton --- MAINTAINERS | 1 + 1 file changed, 1 insertion(+) diff --git a/MAINTAINERS b/MAINTAINERS index 2ca8f35dfe03..2be0733bf0a2 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -3732,6 +3732,7 @@ F: include/linux/bits.h F: include/linux/cpumask.h F: include/linux/find.h F: include/linux/nodemask.h +F: include/linux/nodemask_types.h F: include/vdso/bits.h F: lib/bitmap-str.c F: lib/bitmap.c From 7c45d8282660cf4ce011d96bf918df50b7481b3c Mon Sep 17 00:00:00 2001 From: Yury Norov Date: Mon, 27 May 2024 17:56:44 -0700 Subject: [PATCH 25/98] sched: avoid using ilog2() in sched.h indirectly via cpumask.h path includes the ilog2.h header to calculate ilog2(TASK_REPORT_MAX). The following patches drops sched.h dependency on cpumask.h, and to have a successful build, the header has to be included explicitly. sched.h is a frequently included header, and it's better to keep the dependency list as small as possible. So, instead of including ilog2.h for a single BUILD_BUG_ON() check, the same check may be implemented by taking exponent of the other part of equation. Link: https://lkml.kernel.org/r/20240528005648.182376-3-yury.norov@gmail.com Signed-off-by: Yury Norov Cc: Amit Daniel Kachhap Cc: Anna-Maria Behnsen Cc: Christoph Lameter Cc: Daniel Lezcano Cc: Dennis Zhou Cc: Frederic Weisbecker Cc: Johannes Weiner Cc: Juri Lelli Cc: Kees Cook Cc: Mathieu Desnoyers Cc: Paul E. McKenney Cc: Peter Zijlstra Cc: Rafael J. Wysocki Cc: Rasmus Villemoes Cc: Tejun Heo Cc: Thomas Gleixner Cc: Ulf Hansson Cc: Vincent Guittot Cc: Viresh Kumar Cc: Yury Norov Signed-off-by: Andrew Morton --- include/linux/sched.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/include/linux/sched.h b/include/linux/sched.h index 61591ac6eab6..98abb07de149 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -1604,7 +1604,7 @@ static inline char task_index_to_char(unsigned int state) { static const char state_char[] = "RSDTtXZPI"; - BUILD_BUG_ON(1 + ilog2(TASK_REPORT_MAX) != sizeof(state_char) - 1); + BUILD_BUG_ON(TASK_REPORT_MAX * 2 != 1 << (sizeof(state_char) - 1)); return state_char[state]; } From eb4faa36d674eed60a522a72a3a40384f4b285d4 Mon Sep 17 00:00:00 2001 From: Yury Norov Date: Mon, 27 May 2024 17:56:45 -0700 Subject: [PATCH 26/98] cpumask: split out include/linux/cpumask_types.h Many core headers, like sched.h, include cpumask.h mostly for struct cpumask and cpumask_var_t. Those are frequently used headers and shouldn't pull more than the bare minimum. Link: https://lkml.kernel.org/r/20240528005648.182376-4-yury.norov@gmail.com Signed-off-by: Yury Norov Cc: Amit Daniel Kachhap Cc: Anna-Maria Behnsen Cc: Christoph Lameter Cc: Daniel Lezcano Cc: Dennis Zhou Cc: Frederic Weisbecker Cc: Johannes Weiner Cc: Juri Lelli Cc: Kees Cook Cc: Mathieu Desnoyers Cc: Paul E. McKenney Cc: Peter Zijlstra Cc: Rafael J. Wysocki Cc: Rasmus Villemoes Cc: Tejun Heo Cc: Thomas Gleixner Cc: Ulf Hansson Cc: Vincent Guittot Cc: Viresh Kumar Signed-off-by: Andrew Morton --- MAINTAINERS | 1 + include/linux/cpumask.h | 56 +---------------------------- include/linux/cpumask_types.h | 66 +++++++++++++++++++++++++++++++++++ 3 files changed, 68 insertions(+), 55 deletions(-) create mode 100644 include/linux/cpumask_types.h diff --git a/MAINTAINERS b/MAINTAINERS index 2be0733bf0a2..2840c7b071b2 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -3730,6 +3730,7 @@ F: include/linux/bitmap-str.h F: include/linux/bitmap.h F: include/linux/bits.h F: include/linux/cpumask.h +F: include/linux/cpumask_types.h F: include/linux/find.h F: include/linux/nodemask.h F: include/linux/nodemask_types.h diff --git a/include/linux/cpumask.h b/include/linux/cpumask.h index 23686bed441d..76dca7b86189 100644 --- a/include/linux/cpumask.h +++ b/include/linux/cpumask.h @@ -9,25 +9,13 @@ */ #include #include -#include #include +#include #include #include #include #include -/* Don't assign or return these: may not be this big! */ -typedef struct cpumask { DECLARE_BITMAP(bits, NR_CPUS); } cpumask_t; - -/** - * cpumask_bits - get the bits in a cpumask - * @maskp: the struct cpumask * - * - * You should only assume nr_cpu_ids bits of this mask are valid. This is - * a macro so it's const-correct. - */ -#define cpumask_bits(maskp) ((maskp)->bits) - /** * cpumask_pr_args - printf args to output a cpumask * @maskp: cpumask to be printed @@ -922,48 +910,7 @@ static inline unsigned int cpumask_size(void) return bitmap_size(large_cpumask_bits); } -/* - * cpumask_var_t: struct cpumask for stack usage. - * - * Oh, the wicked games we play! In order to make kernel coding a - * little more difficult, we typedef cpumask_var_t to an array or a - * pointer: doing &mask on an array is a noop, so it still works. - * - * i.e. - * cpumask_var_t tmpmask; - * if (!alloc_cpumask_var(&tmpmask, GFP_KERNEL)) - * return -ENOMEM; - * - * ... use 'tmpmask' like a normal struct cpumask * ... - * - * free_cpumask_var(tmpmask); - * - * - * However, one notable exception is there. alloc_cpumask_var() allocates - * only nr_cpumask_bits bits (in the other hand, real cpumask_t always has - * NR_CPUS bits). Therefore you don't have to dereference cpumask_var_t. - * - * cpumask_var_t tmpmask; - * if (!alloc_cpumask_var(&tmpmask, GFP_KERNEL)) - * return -ENOMEM; - * - * var = *tmpmask; - * - * This code makes NR_CPUS length memcopy and brings to a memory corruption. - * cpumask_copy() provide safe copy functionality. - * - * Note that there is another evil here: If you define a cpumask_var_t - * as a percpu variable then the way to obtain the address of the cpumask - * structure differently influences what this_cpu_* operation needs to be - * used. Please use this_cpu_cpumask_var_t in those cases. The direct use - * of this_cpu_ptr() or this_cpu_read() will lead to failures when the - * other type of cpumask_var_t implementation is configured. - * - * Please also note that __cpumask_var_read_mostly can be used to declare - * a cpumask_var_t variable itself (not its content) as read mostly. - */ #ifdef CONFIG_CPUMASK_OFFSTACK -typedef struct cpumask *cpumask_var_t; #define this_cpu_cpumask_var_ptr(x) this_cpu_read(x) #define __cpumask_var_read_mostly __read_mostly @@ -1010,7 +957,6 @@ static inline bool cpumask_available(cpumask_var_t mask) } #else -typedef struct cpumask cpumask_var_t[1]; #define this_cpu_cpumask_var_ptr(x) this_cpu_ptr(x) #define __cpumask_var_read_mostly diff --git a/include/linux/cpumask_types.h b/include/linux/cpumask_types.h new file mode 100644 index 000000000000..461ed1b6bcdb --- /dev/null +++ b/include/linux/cpumask_types.h @@ -0,0 +1,66 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef __LINUX_CPUMASK_TYPES_H +#define __LINUX_CPUMASK_TYPES_H + +#include +#include + +/* Don't assign or return these: may not be this big! */ +typedef struct cpumask { DECLARE_BITMAP(bits, NR_CPUS); } cpumask_t; + +/** + * cpumask_bits - get the bits in a cpumask + * @maskp: the struct cpumask * + * + * You should only assume nr_cpu_ids bits of this mask are valid. This is + * a macro so it's const-correct. + */ +#define cpumask_bits(maskp) ((maskp)->bits) + +/* + * cpumask_var_t: struct cpumask for stack usage. + * + * Oh, the wicked games we play! In order to make kernel coding a + * little more difficult, we typedef cpumask_var_t to an array or a + * pointer: doing &mask on an array is a noop, so it still works. + * + * i.e. + * cpumask_var_t tmpmask; + * if (!alloc_cpumask_var(&tmpmask, GFP_KERNEL)) + * return -ENOMEM; + * + * ... use 'tmpmask' like a normal struct cpumask * ... + * + * free_cpumask_var(tmpmask); + * + * + * However, one notable exception is there. alloc_cpumask_var() allocates + * only nr_cpumask_bits bits (in the other hand, real cpumask_t always has + * NR_CPUS bits). Therefore you don't have to dereference cpumask_var_t. + * + * cpumask_var_t tmpmask; + * if (!alloc_cpumask_var(&tmpmask, GFP_KERNEL)) + * return -ENOMEM; + * + * var = *tmpmask; + * + * This code makes NR_CPUS length memcopy and brings to a memory corruption. + * cpumask_copy() provide safe copy functionality. + * + * Note that there is another evil here: If you define a cpumask_var_t + * as a percpu variable then the way to obtain the address of the cpumask + * structure differently influences what this_cpu_* operation needs to be + * used. Please use this_cpu_cpumask_var_t in those cases. The direct use + * of this_cpu_ptr() or this_cpu_read() will lead to failures when the + * other type of cpumask_var_t implementation is configured. + * + * Please also note that __cpumask_var_read_mostly can be used to declare + * a cpumask_var_t variable itself (not its content) as read mostly. + */ +#ifdef CONFIG_CPUMASK_OFFSTACK +typedef struct cpumask *cpumask_var_t; +#else +typedef struct cpumask cpumask_var_t[1]; +#endif /* CONFIG_CPUMASK_OFFSTACK */ + +#endif /* __LINUX_CPUMASK_TYPES_H */ From 361c1f04f3b43b00997b2845c7a9e1d0a9b4c38a Mon Sep 17 00:00:00 2001 From: Yury Norov Date: Mon, 27 May 2024 17:56:46 -0700 Subject: [PATCH 27/98] sched: drop sched.h dependency on cpumask sched.h needs cpumask.h mostly for types declaration. Now that we have cpumask_types.h, which is a significantly smaller header, we can rely on it. The only exception is UP stub for set_cpus_allowed_ptr(). The function needs to test bit #0 in a @new_mask, which can be trivially opencoded. Link: https://lkml.kernel.org/r/20240528005648.182376-5-yury.norov@gmail.com Signed-off-by: Yury Norov Cc: Amit Daniel Kachhap Cc: Anna-Maria Behnsen Cc: Christoph Lameter Cc: Daniel Lezcano Cc: Dennis Zhou Cc: Frederic Weisbecker Cc: Johannes Weiner Cc: Juri Lelli Cc: Kees Cook Cc: Mathieu Desnoyers Cc: Paul E. McKenney Cc: Peter Zijlstra Cc: Rafael J. Wysocki Cc: Rasmus Villemoes Cc: Tejun Heo Cc: Thomas Gleixner Cc: Ulf Hansson Cc: Vincent Guittot Cc: Viresh Kumar Cc: Yury Norov Signed-off-by: Andrew Morton --- include/linux/sched.h | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/include/linux/sched.h b/include/linux/sched.h index 98abb07de149..f2f907ef1389 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -13,7 +13,7 @@ #include #include #include -#include +#include #include #include @@ -1778,7 +1778,8 @@ static inline void do_set_cpus_allowed(struct task_struct *p, const struct cpuma } static inline int set_cpus_allowed_ptr(struct task_struct *p, const struct cpumask *new_mask) { - if (!cpumask_test_cpu(0, new_mask)) + /* Opencoded cpumask_test_cpu(0, new_mask) to avoid dependency on cpumask.h */ + if ((*cpumask_bits(new_mask) & 1) == 0) return -EINVAL; return 0; } From 7f36688f126ba4a4ec510fa81466b1dacdec97ee Mon Sep 17 00:00:00 2001 From: Yury Norov Date: Mon, 27 May 2024 17:56:47 -0700 Subject: [PATCH 28/98] cpumask: cleanup core headers inclusion Many core headers include cpumask.h for nothing. Drop it. Link: https://lkml.kernel.org/r/20240528005648.182376-6-yury.norov@gmail.com Signed-off-by: Yury Norov Cc: Amit Daniel Kachhap Cc: Anna-Maria Behnsen Cc: Christoph Lameter Cc: Daniel Lezcano Cc: Dennis Zhou Cc: Frederic Weisbecker Cc: Johannes Weiner Cc: Juri Lelli Cc: Kees Cook Cc: Mathieu Desnoyers Cc: Paul E. McKenney Cc: Peter Zijlstra Cc: Rafael J. Wysocki Cc: Rasmus Villemoes Cc: Tejun Heo Cc: Thomas Gleixner Cc: Ulf Hansson Cc: Vincent Guittot Cc: Viresh Kumar Cc: Yury Norov Signed-off-by: Andrew Morton --- include/linux/cgroup.h | 1 - include/linux/cpu.h | 1 - include/linux/cpu_cooling.h | 1 - include/linux/kernel_stat.h | 1 - include/linux/node.h | 1 - include/linux/percpu.h | 1 - include/linux/profile.h | 1 - include/linux/rcupdate.h | 1 - include/linux/seq_file.h | 1 - include/linux/tracepoint.h | 1 - 10 files changed, 10 deletions(-) diff --git a/include/linux/cgroup.h b/include/linux/cgroup.h index 2150ca60394b..c60ba0ab1462 100644 --- a/include/linux/cgroup.h +++ b/include/linux/cgroup.h @@ -10,7 +10,6 @@ */ #include -#include #include #include #include diff --git a/include/linux/cpu.h b/include/linux/cpu.h index 861c3bfc5f17..ea6ac8f98e4a 100644 --- a/include/linux/cpu.h +++ b/include/linux/cpu.h @@ -16,7 +16,6 @@ #include #include -#include #include #include diff --git a/include/linux/cpu_cooling.h b/include/linux/cpu_cooling.h index a3bdc8a98f2c..2c774fb3c091 100644 --- a/include/linux/cpu_cooling.h +++ b/include/linux/cpu_cooling.h @@ -15,7 +15,6 @@ #include #include -#include struct cpufreq_policy; diff --git a/include/linux/kernel_stat.h b/include/linux/kernel_stat.h index 9c042c6384bb..b97ce2df376f 100644 --- a/include/linux/kernel_stat.h +++ b/include/linux/kernel_stat.h @@ -5,7 +5,6 @@ #include #include #include -#include #include #include #include diff --git a/include/linux/node.h b/include/linux/node.h index dfc004e4bee7..9a881c2208b3 100644 --- a/include/linux/node.h +++ b/include/linux/node.h @@ -16,7 +16,6 @@ #define _LINUX_NODE_H_ #include -#include #include /** diff --git a/include/linux/percpu.h b/include/linux/percpu.h index 03053de557cf..4b2047b78b67 100644 --- a/include/linux/percpu.h +++ b/include/linux/percpu.h @@ -6,7 +6,6 @@ #include #include #include -#include #include #include #include diff --git a/include/linux/profile.h b/include/linux/profile.h index 04ae5ebcb637..2fb487f61d12 100644 --- a/include/linux/profile.h +++ b/include/linux/profile.h @@ -4,7 +4,6 @@ #include #include -#include #include #include diff --git a/include/linux/rcupdate.h b/include/linux/rcupdate.h index dfd2399f2cde..fb8ab4618d63 100644 --- a/include/linux/rcupdate.h +++ b/include/linux/rcupdate.h @@ -29,7 +29,6 @@ #include #include #include -#include #include #define ULONG_CMP_GE(a, b) (ULONG_MAX / 2 >= (a) - (b)) diff --git a/include/linux/seq_file.h b/include/linux/seq_file.h index 8bd4fda6e027..2fb266ea69fa 100644 --- a/include/linux/seq_file.h +++ b/include/linux/seq_file.h @@ -7,7 +7,6 @@ #include #include #include -#include #include #include #include diff --git a/include/linux/tracepoint.h b/include/linux/tracepoint.h index 689b6d71590e..6be396bb4297 100644 --- a/include/linux/tracepoint.h +++ b/include/linux/tracepoint.h @@ -16,7 +16,6 @@ #include #include #include -#include #include #include #include From e1b6705bcfb2797ea182e313d5ec4f57fa8571f2 Mon Sep 17 00:00:00 2001 From: Yury Norov Date: Mon, 27 May 2024 17:56:48 -0700 Subject: [PATCH 29/98] cpumask: make core headers including cpumask_types.h where possible Now that cpumask types are split out to a separate smaller header, many frequently included core headers may switch to using it. Link: https://lkml.kernel.org/r/20240528005648.182376-7-yury.norov@gmail.com Signed-off-by: Yury Norov Cc: Amit Daniel Kachhap Cc: Anna-Maria Behnsen Cc: Christoph Lameter Cc: Daniel Lezcano Cc: Dennis Zhou Cc: Frederic Weisbecker Cc: Johannes Weiner Cc: Juri Lelli Cc: Kees Cook Cc: Mathieu Desnoyers Cc: Paul E. McKenney Cc: Peter Zijlstra Cc: Rafael J. Wysocki Cc: Rasmus Villemoes Cc: Tejun Heo Cc: Thomas Gleixner Cc: Ulf Hansson Cc: Vincent Guittot Cc: Viresh Kumar Cc: Yury Norov Signed-off-by: Andrew Morton --- include/linux/cacheinfo.h | 2 +- include/linux/clockchips.h | 2 +- include/linux/cpu_rmap.h | 2 +- include/linux/interrupt.h | 2 +- include/linux/irqchip/irq-partition-percpu.h | 2 +- include/linux/msi.h | 2 +- include/linux/pm_domain.h | 2 +- include/linux/stop_machine.h | 2 +- include/linux/torture.h | 2 +- include/linux/workqueue.h | 2 +- 10 files changed, 10 insertions(+), 10 deletions(-) diff --git a/include/linux/cacheinfo.h b/include/linux/cacheinfo.h index 2cb15fe4fe12..286db104e054 100644 --- a/include/linux/cacheinfo.h +++ b/include/linux/cacheinfo.h @@ -3,7 +3,7 @@ #define _LINUX_CACHEINFO_H #include -#include +#include #include struct device_node; diff --git a/include/linux/clockchips.h b/include/linux/clockchips.h index 9aac31d856f3..b0df28ddd394 100644 --- a/include/linux/clockchips.h +++ b/include/linux/clockchips.h @@ -12,7 +12,7 @@ #ifdef CONFIG_GENERIC_CLOCKEVENTS # include -# include +# include # include # include diff --git a/include/linux/cpu_rmap.h b/include/linux/cpu_rmap.h index cae324d10965..20b5729903d7 100644 --- a/include/linux/cpu_rmap.h +++ b/include/linux/cpu_rmap.h @@ -7,7 +7,7 @@ * Copyright 2011 Solarflare Communications Inc. */ -#include +#include #include #include #include diff --git a/include/linux/interrupt.h b/include/linux/interrupt.h index 5c9bdd3ffccc..136a55455529 100644 --- a/include/linux/interrupt.h +++ b/include/linux/interrupt.h @@ -5,13 +5,13 @@ #include #include -#include #include #include #include #include #include #include +#include #include #include diff --git a/include/linux/irqchip/irq-partition-percpu.h b/include/linux/irqchip/irq-partition-percpu.h index 2f6ae7551748..b35ee22c278f 100644 --- a/include/linux/irqchip/irq-partition-percpu.h +++ b/include/linux/irqchip/irq-partition-percpu.h @@ -8,7 +8,7 @@ #define __LINUX_IRQCHIP_IRQ_PARTITION_PERCPU_H #include -#include +#include #include struct partition_affinity { diff --git a/include/linux/msi.h b/include/linux/msi.h index dc27cf3903d5..26588da88bdd 100644 --- a/include/linux/msi.h +++ b/include/linux/msi.h @@ -19,7 +19,7 @@ */ #include -#include +#include #include #include #include diff --git a/include/linux/pm_domain.h b/include/linux/pm_domain.h index f24546a3d3db..71e4f0fb8867 100644 --- a/include/linux/pm_domain.h +++ b/include/linux/pm_domain.h @@ -16,7 +16,7 @@ #include #include #include -#include +#include #include /* diff --git a/include/linux/stop_machine.h b/include/linux/stop_machine.h index ea7a74ea7389..3132262a404d 100644 --- a/include/linux/stop_machine.h +++ b/include/linux/stop_machine.h @@ -3,7 +3,7 @@ #define _LINUX_STOP_MACHINE #include -#include +#include #include #include diff --git a/include/linux/torture.h b/include/linux/torture.h index 1541454da03e..c2e979f82f8d 100644 --- a/include/linux/torture.h +++ b/include/linux/torture.h @@ -14,7 +14,7 @@ #include #include #include -#include +#include #include #include #include diff --git a/include/linux/workqueue.h b/include/linux/workqueue.h index fb3993894536..52496f07fba5 100644 --- a/include/linux/workqueue.h +++ b/include/linux/workqueue.h @@ -12,7 +12,7 @@ #include #include #include -#include +#include #include #include From 85fb11a87921a017e5551835eaaf58f9da173686 Mon Sep 17 00:00:00 2001 From: Kuan-Wei Chiu Date: Tue, 28 May 2024 04:30:08 +0800 Subject: [PATCH 30/98] lib/sort: remove unused pr_fmt macro Patch series "lib/sort: Optimizations and cleanups". This patch series optimizes the handling of the last 2 or 3 elements in lib/sort and adds a testcase in lib/test_sort to maintain 100% code coverage reflecting this change. Additionally, it corrects outdated descriptions regarding glibc qsort() and removes the unused pr_fmt macro. This patch (of 4): The pr_fmt macro is defined but not used in lib/sort.c. Since there are no pr_* functions printing any messages, the pr_fmt macro is redundant and can be safely removed. Link: https://lkml.kernel.org/r/20240527203011.1644280-1-visitorckw@gmail.com Link: https://lkml.kernel.org/r/20240527203011.1644280-2-visitorckw@gmail.com Signed-off-by: Kuan-Wei Chiu Cc: Ching-Chun (Jim) Huang Signed-off-by: Andrew Morton --- lib/sort.c | 2 -- 1 file changed, 2 deletions(-) diff --git a/lib/sort.c b/lib/sort.c index a0509088f82a..651b73205f6d 100644 --- a/lib/sort.c +++ b/lib/sort.c @@ -10,8 +10,6 @@ * quicksort's O(n^2) worst case. */ -#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt - #include #include #include From f49ac9571b8f1de4dfcce941a936a4d19dd7bb8a Mon Sep 17 00:00:00 2001 From: Kuan-Wei Chiu Date: Tue, 28 May 2024 04:30:09 +0800 Subject: [PATCH 31/98] lib/sort: fix outdated comment regarding glibc qsort() The existing comment in lib/sort refers to glibc qsort() using quicksort. However, glibc qsort() no longer uses quicksort; it now uses mergesort and falls back to heapsort if memory allocation for mergesort fails. This makes the comment outdated and incorrect. Update the comment to refer to quicksort in general rather than glibc's implementation to provide accurate information about the comparisons and trade-offs without implying an outdated implementation. Link: https://lkml.kernel.org/r/20240527203011.1644280-3-visitorckw@gmail.com Signed-off-by: Kuan-Wei Chiu Cc: Ching-Chun (Jim) Huang Signed-off-by: Andrew Morton --- lib/sort.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/lib/sort.c b/lib/sort.c index 651b73205f6d..b918ae15302d 100644 --- a/lib/sort.c +++ b/lib/sort.c @@ -5,7 +5,7 @@ * This performs n*log2(n) + 0.37*n + o(n) comparisons on average, * and 1.5*n*log2(n) + O(n) in the (very contrived) worst case. * - * Glibc qsort() manages n*log2(n) - 1.26*n for random inputs (1.63*n + * Quicksort manages n*log2(n) - 1.26*n for random inputs (1.63*n * better) at the expense of stack usage and much larger code to avoid * quicksort's O(n^2) worst case. */ From 41ed7804350839608308fed0225894fdab8b71fd Mon Sep 17 00:00:00 2001 From: Kuan-Wei Chiu Date: Tue, 28 May 2024 04:30:10 +0800 Subject: [PATCH 32/98] lib/sort: optimize heapsort for handling final 2 or 3 elements After building the heap, the code continuously pops two elements from the heap until only 2 or 3 elements remain, at which point it switches back to a regular heapsort with one element popped at a time. However, to handle the final 2 or 3 elements, an additional else-if statement in the while loop was introduced, potentially increasing branch misses. Moreover, when there are only 2 or 3 elements left, continuing with regular heapify operations is unnecessary as these cases are simple enough to be handled with a single comparison and 1 or 2 swaps outside the while loop. Eliminating the additional else-if statement and directly managing cases involving 2 or 3 elements outside the loop reduces unnecessary conditional branches resulting from the numerous loops and conditionals in heapify. This optimization maintains consistent numbers of comparisons and swaps for arrays with even lengths while reducing swaps and comparisons for arrays with odd lengths from 2.5 swaps and 1 comparison to 1.5 swaps and 1 comparison. Link: https://lkml.kernel.org/r/20240527203011.1644280-4-visitorckw@gmail.com Signed-off-by: Kuan-Wei Chiu Cc: Ching-Chun (Jim) Huang Signed-off-by: Andrew Morton --- lib/sort.c | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/lib/sort.c b/lib/sort.c index b918ae15302d..048b7a6ef967 100644 --- a/lib/sort.c +++ b/lib/sort.c @@ -250,10 +250,7 @@ void sort_r(void *base, size_t num, size_t size, a = size << shift; n -= size; do_swap(base + a, base + n, size, swap_func, priv); - } else if (n > size) { /* Sorting: Extract root */ - n -= size; - do_swap(base, base + n, size, swap_func, priv); - } else { /* Sort complete */ + } else { /* Sort complete */ break; } @@ -283,6 +280,11 @@ void sort_r(void *base, size_t num, size_t size, do_swap(base + b, base + c, size, swap_func, priv); } } + + n -= size; + do_swap(base, base + n, size, swap_func, priv); + if (n == size * 2 && do_cmp(base, base + size, cmp_func, priv) > 0) + do_swap(base, base + size, size, swap_func, priv); } EXPORT_SYMBOL(sort_r); From 54ce43da25816e6134ffc777b02f9a720d07a8db Mon Sep 17 00:00:00 2001 From: Kuan-Wei Chiu Date: Tue, 28 May 2024 04:30:11 +0800 Subject: [PATCH 33/98] lib/test_sort: add a testcase to ensure code coverage The addition of an if statement in lib/sort to handle the final unsorted 2 or 3 elements is not covered by existing test cases, leading to incomplete test coverage. To ensure comprehensive testing and maintain 100% code coverage, add a new testcase for scenarios where the if statement is triggered. Since the if statement is only triggered when the array length is odd and the first element is greater than the second element, a testcase is created using an array length of TEST_LEN - 1 and a suitable random seed to maintain full code coverage. Link: https://lkml.kernel.org/r/20240527203011.1644280-5-visitorckw@gmail.com Signed-off-by: Kuan-Wei Chiu Cc: Ching-Chun (Jim) Huang Signed-off-by: Andrew Morton --- lib/test_sort.c | 14 +++++++++++++- 1 file changed, 13 insertions(+), 1 deletion(-) diff --git a/lib/test_sort.c b/lib/test_sort.c index be02e3a098cf..da4495125097 100644 --- a/lib/test_sort.c +++ b/lib/test_sort.c @@ -29,7 +29,19 @@ static void test_sort(struct kunit *test) sort(a, TEST_LEN, sizeof(*a), cmpint, NULL); - for (i = 0; i < TEST_LEN-1; i++) + for (i = 0; i < TEST_LEN - 1; i++) + KUNIT_ASSERT_LE(test, a[i], a[i + 1]); + + r = 48; + + for (i = 0; i < TEST_LEN - 1; i++) { + r = (r * 725861) % 6599; + a[i] = r; + } + + sort(a, TEST_LEN - 1, sizeof(*a), cmpint, NULL); + + for (i = 0; i < TEST_LEN - 2; i++) KUNIT_ASSERT_LE(test, a[i], a[i + 1]); } From 727759d748ed34cc8d3e1d215fbc1766010dee3d Mon Sep 17 00:00:00 2001 From: John Hubbard Date: Mon, 27 May 2024 13:08:35 -0700 Subject: [PATCH 34/98] selftests/mqueue: fix 5 warnings about signed/unsigned mismatches When building with clang, via: make LLVM=1 -C tools/testing/selftest ...clang warns about several cases of using a signed integer for the priority argument to mq_receive(3), which expects an unsigned int. Fix this by declaring the type as unsigned int in all cases. Link: https://lkml.kernel.org/r/20240527200835.143682-1-jhubbard@nvidia.com Signed-off-by: John Hubbard Reviewed-by: Muhammad Usama Anjum Reviewed-by: Ryan Roberts Cc: David Hildenbrand Cc: Muhammad Usama Anjum Cc: SeongJae Park Cc: Shuah Khan Cc: Valentin Obst Signed-off-by: Andrew Morton --- tools/testing/selftests/mqueue/mq_perf_tests.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/tools/testing/selftests/mqueue/mq_perf_tests.c b/tools/testing/selftests/mqueue/mq_perf_tests.c index 5c16159d0bcd..fb898850867c 100644 --- a/tools/testing/selftests/mqueue/mq_perf_tests.c +++ b/tools/testing/selftests/mqueue/mq_perf_tests.c @@ -323,7 +323,8 @@ void *fake_cont_thread(void *arg) void *cont_thread(void *arg) { char buff[MSG_SIZE]; - int i, priority; + int i; + unsigned int priority; for (i = 0; i < num_cpus_to_pin; i++) if (cpu_threads[i] == pthread_self()) @@ -425,7 +426,8 @@ struct test test2[] = { void *perf_test_thread(void *arg) { char buff[MSG_SIZE]; - int prio_out, prio_in; + int prio_out; + unsigned int prio_in; int i; clockid_t clock; pthread_t *t; From 51d821654be4286b005ad2b7dc8b973d5008a2ec Mon Sep 17 00:00:00 2001 From: Mateusz Guzik Date: Tue, 28 May 2024 22:42:57 +0200 Subject: [PATCH 35/98] percpu_counter: add a cmpxchg-based _add_batch variant Interrupt disable/enable trips are quite expensive on x86-64 compared to a mere cmpxchg (note: no lock prefix!) and percpu counters are used quite often. With this change I get a bump of 1% ops/s for negative path lookups, plugged into will-it-scale: void testcase(unsigned long long *iterations, unsigned long nr) { while (1) { int fd = open("/tmp/nonexistent", O_RDONLY); assert(fd == -1); (*iterations)++; } } The win would be higher if it was not for other slowdowns, but one has to start somewhere. Link: https://lkml.kernel.org/r/20240528204257.434817-1-mjguzik@gmail.com Signed-off-by: Mateusz Guzik Acked-by: Vlastimil Babka Acked-by: Dennis Zhou Cc: Hugh Dickins Cc: Tejun Heo Cc: Vlastimil Babka Signed-off-by: Andrew Morton --- lib/percpu_counter.c | 44 +++++++++++++++++++++++++++++++++++++++----- 1 file changed, 39 insertions(+), 5 deletions(-) diff --git a/lib/percpu_counter.c b/lib/percpu_counter.c index 44dd133594d4..51bc5246986d 100644 --- a/lib/percpu_counter.c +++ b/lib/percpu_counter.c @@ -73,17 +73,50 @@ void percpu_counter_set(struct percpu_counter *fbc, s64 amount) EXPORT_SYMBOL(percpu_counter_set); /* - * local_irq_save() is needed to make the function irq safe: - * - The slow path would be ok as protected by an irq-safe spinlock. - * - this_cpu_add would be ok as it is irq-safe by definition. - * But: - * The decision slow path/fast path and the actual update must be atomic, too. + * Add to a counter while respecting batch size. + * + * There are 2 implementations, both dealing with the following problem: + * + * The decision slow path/fast path and the actual update must be atomic. * Otherwise a call in process context could check the current values and * decide that the fast path can be used. If now an interrupt occurs before * the this_cpu_add(), and the interrupt updates this_cpu(*fbc->counters), * then the this_cpu_add() that is executed after the interrupt has completed * can produce values larger than "batch" or even overflows. */ +#ifdef CONFIG_HAVE_CMPXCHG_LOCAL +/* + * Safety against interrupts is achieved in 2 ways: + * 1. the fast path uses local cmpxchg (note: no lock prefix) + * 2. the slow path operates with interrupts disabled + */ +void percpu_counter_add_batch(struct percpu_counter *fbc, s64 amount, s32 batch) +{ + s64 count; + unsigned long flags; + + count = this_cpu_read(*fbc->counters); + do { + if (unlikely(abs(count + amount) >= batch)) { + raw_spin_lock_irqsave(&fbc->lock, flags); + /* + * Note: by now we might have migrated to another CPU + * or the value might have changed. + */ + count = __this_cpu_read(*fbc->counters); + fbc->count += count + amount; + __this_cpu_sub(*fbc->counters, count); + raw_spin_unlock_irqrestore(&fbc->lock, flags); + return; + } + } while (!this_cpu_try_cmpxchg(*fbc->counters, &count, count + amount)); +} +#else +/* + * local_irq_save() is used to make the function irq safe: + * - The slow path would be ok as protected by an irq-safe spinlock. + * - this_cpu_add would be ok as it is irq-safe by definition. + */ void percpu_counter_add_batch(struct percpu_counter *fbc, s64 amount, s32 batch) { s64 count; @@ -101,6 +134,7 @@ void percpu_counter_add_batch(struct percpu_counter *fbc, s64 amount, s32 batch) } local_irq_restore(flags); } +#endif EXPORT_SYMBOL(percpu_counter_add_batch); /* From 87beb66918f83700c8cfe8309b64b80dc48c6ef5 Mon Sep 17 00:00:00 2001 From: Wen Yang Date: Mon, 27 May 2024 08:02:00 +0800 Subject: [PATCH 36/98] selftests: introduce additional eventfd test coverage Add several new test cases which assert corner cases on the eventfd mechanism, for example, the supplied buffer is less than 8 bytes, attempting to write a value that is too large, etc. ./eventfd_test # Starting 9 tests from 1 test cases. # RUN global.eventfd_check_flag_rdwr ... # OK global.eventfd_check_flag_rdwr ok 1 global.eventfd_check_flag_rdwr # RUN global.eventfd_check_flag_cloexec ... # OK global.eventfd_check_flag_cloexec ok 2 global.eventfd_check_flag_cloexec # RUN global.eventfd_check_flag_nonblock ... # OK global.eventfd_check_flag_nonblock ok 3 global.eventfd_check_flag_nonblock # RUN global.eventfd_chek_flag_cloexec_and_nonblock ... # OK global.eventfd_chek_flag_cloexec_and_nonblock ok 4 global.eventfd_chek_flag_cloexec_and_nonblock # RUN global.eventfd_check_flag_semaphore ... # OK global.eventfd_check_flag_semaphore ok 5 global.eventfd_check_flag_semaphore # RUN global.eventfd_check_write ... # OK global.eventfd_check_write ok 6 global.eventfd_check_write # RUN global.eventfd_check_read ... # OK global.eventfd_check_read ok 7 global.eventfd_check_read # RUN global.eventfd_check_read_with_nonsemaphore ... # OK global.eventfd_check_read_with_nonsemaphore ok 8 global.eventfd_check_read_with_nonsemaphore # RUN global.eventfd_check_read_with_semaphore ... # OK global.eventfd_check_read_with_semaphore ok 9 global.eventfd_check_read_with_semaphore # PASSED: 9 / 9 tests passed. # Totals: pass:9 fail:0 xfail:0 xpass:0 skip:0 error:0 Link: https://lkml.kernel.org/r/20240527000200.5615-1-wen.yang@linux.dev Signed-off-by: Wen Yang Cc: Shuah Khan Cc: Christian Brauner Cc: Andrei Vagin Cc: Mathieu Desnoyers Cc: Steven Rostedt Cc: Dave Young Cc: Tim Bird Signed-off-by: Andrew Morton --- .../filesystems/eventfd/eventfd_test.c | 136 +++++++++++++++++- 1 file changed, 131 insertions(+), 5 deletions(-) diff --git a/tools/testing/selftests/filesystems/eventfd/eventfd_test.c b/tools/testing/selftests/filesystems/eventfd/eventfd_test.c index f142a137526c..85acb4e3ef00 100644 --- a/tools/testing/selftests/filesystems/eventfd/eventfd_test.c +++ b/tools/testing/selftests/filesystems/eventfd/eventfd_test.c @@ -13,6 +13,8 @@ #include #include "../../kselftest_harness.h" +#define EVENTFD_TEST_ITERATIONS 100000UL + struct error { int code; char msg[512]; @@ -40,7 +42,7 @@ static inline int sys_eventfd2(unsigned int count, int flags) return syscall(__NR_eventfd2, count, flags); } -TEST(eventfd01) +TEST(eventfd_check_flag_rdwr) { int fd, flags; @@ -54,7 +56,7 @@ TEST(eventfd01) close(fd); } -TEST(eventfd02) +TEST(eventfd_check_flag_cloexec) { int fd, flags; @@ -68,7 +70,7 @@ TEST(eventfd02) close(fd); } -TEST(eventfd03) +TEST(eventfd_check_flag_nonblock) { int fd, flags; @@ -83,7 +85,7 @@ TEST(eventfd03) close(fd); } -TEST(eventfd04) +TEST(eventfd_chek_flag_cloexec_and_nonblock) { int fd, flags; @@ -161,7 +163,7 @@ static int verify_fdinfo(int fd, struct error *err, const char *prefix, return 0; } -TEST(eventfd05) +TEST(eventfd_check_flag_semaphore) { struct error err = {0}; int fd, ret; @@ -183,4 +185,128 @@ TEST(eventfd05) close(fd); } +/* + * A write(2) fails with the error EINVAL if the size of the supplied buffer + * is less than 8 bytes, or if an attempt is made to write the value + * 0xffffffffffffffff. + */ +TEST(eventfd_check_write) +{ + uint64_t value = 1; + ssize_t size; + int fd; + + fd = sys_eventfd2(0, 0); + ASSERT_GE(fd, 0); + + size = write(fd, &value, sizeof(int)); + EXPECT_EQ(size, -1); + EXPECT_EQ(errno, EINVAL); + + size = write(fd, &value, sizeof(value)); + EXPECT_EQ(size, sizeof(value)); + + value = (uint64_t)-1; + size = write(fd, &value, sizeof(value)); + EXPECT_EQ(size, -1); + EXPECT_EQ(errno, EINVAL); + + close(fd); +} + +/* + * A read(2) fails with the error EINVAL if the size of the supplied buffer is + * less than 8 bytes. + */ +TEST(eventfd_check_read) +{ + uint64_t value; + ssize_t size; + int fd; + + fd = sys_eventfd2(1, 0); + ASSERT_GE(fd, 0); + + size = read(fd, &value, sizeof(int)); + EXPECT_EQ(size, -1); + EXPECT_EQ(errno, EINVAL); + + size = read(fd, &value, sizeof(value)); + EXPECT_EQ(size, sizeof(value)); + EXPECT_EQ(value, 1); + + close(fd); +} + + +/* + * If EFD_SEMAPHORE was not specified and the eventfd counter has a nonzero + * value, then a read(2) returns 8 bytes containing that value, and the + * counter's value is reset to zero. + * If the eventfd counter is zero at the time of the call to read(2), then the + * call fails with the error EAGAIN if the file descriptor has been made nonblocking. + */ +TEST(eventfd_check_read_with_nonsemaphore) +{ + uint64_t value; + ssize_t size; + int fd; + int i; + + fd = sys_eventfd2(0, EFD_NONBLOCK); + ASSERT_GE(fd, 0); + + value = 1; + for (i = 0; i < EVENTFD_TEST_ITERATIONS; i++) { + size = write(fd, &value, sizeof(value)); + EXPECT_EQ(size, sizeof(value)); + } + + size = read(fd, &value, sizeof(value)); + EXPECT_EQ(size, sizeof(uint64_t)); + EXPECT_EQ(value, EVENTFD_TEST_ITERATIONS); + + size = read(fd, &value, sizeof(value)); + EXPECT_EQ(size, -1); + EXPECT_EQ(errno, EAGAIN); + + close(fd); +} + +/* + * If EFD_SEMAPHORE was specified and the eventfd counter has a nonzero value, + * then a read(2) returns 8 bytes containing the value 1, and the counter's + * value is decremented by 1. + * If the eventfd counter is zero at the time of the call to read(2), then the + * call fails with the error EAGAIN if the file descriptor has been made nonblocking. + */ +TEST(eventfd_check_read_with_semaphore) +{ + uint64_t value; + ssize_t size; + int fd; + int i; + + fd = sys_eventfd2(0, EFD_SEMAPHORE|EFD_NONBLOCK); + ASSERT_GE(fd, 0); + + value = 1; + for (i = 0; i < EVENTFD_TEST_ITERATIONS; i++) { + size = write(fd, &value, sizeof(value)); + EXPECT_EQ(size, sizeof(value)); + } + + for (i = 0; i < EVENTFD_TEST_ITERATIONS; i++) { + size = read(fd, &value, sizeof(value)); + EXPECT_EQ(size, sizeof(value)); + EXPECT_EQ(value, 1); + } + + size = read(fd, &value, sizeof(value)); + EXPECT_EQ(size, -1); + EXPECT_EQ(errno, EAGAIN); + + close(fd); +} + TEST_HARNESS_MAIN From 7abcb84f953df037d40fad66f2109db318dd155b Mon Sep 17 00:00:00 2001 From: I Hsin Cheng Date: Sun, 26 May 2024 22:01:39 +0800 Subject: [PATCH 37/98] lib/plist.c: enforce memory ordering in plist_check_list There exists an iteration over a plist in plist_check_list(), and memory dependency exists between variables "prev", "next" and "prev->next". As plist is used in the scheduling subsystem, we should guarantee the memory ordering between multiple processors. Using macro "WRITE_ONCE()" can help us to ensure the memory ordering as it was stated in "Documentation/memory-barriers.txt". Link: https://lkml.kernel.org/r/20240526140139.17220-1-richard120310@gmail.com Signed-off-by: I Hsin Cheng Signed-off-by: Andrew Morton --- lib/plist.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/lib/plist.c b/lib/plist.c index 0d86ed7a76ac..2e51829d3db9 100644 --- a/lib/plist.c +++ b/lib/plist.c @@ -47,8 +47,8 @@ static void plist_check_list(struct list_head *top) plist_check_prev_next(top, prev, next); while (next != top) { - prev = next; - next = prev->next; + WRITE_ONCE(prev, next); + WRITE_ONCE(next, prev->next); plist_check_prev_next(top, prev, next); } } From 6d74e1e371d43a7b378d1cb8cd1bb1007f1a8580 Mon Sep 17 00:00:00 2001 From: Kuan-Wei Chiu Date: Sun, 26 May 2024 07:02:06 +0800 Subject: [PATCH 38/98] tools/lib/list_sort: remove redundant code for cond_resched handling Since cond_resched() is not called in userspace, remove the redundant code in userspace's list_sort() implementation. This change eliminates the unused 'count' variable and the associated logic for invoking cmp() periodically, which was intended to trigger cond_resched() in kernel space. The removed code includes: - Declaration and increment of the 'count' variable. - Conditional invocation of cmp() based on 'count'. This cleanup simplifies merge_final(), avoids unnecessary overhead, and has no impact on the functionality of list_sort() in userspace. Link: https://lkml.kernel.org/r/20240525230206.1077536-1-visitorckw@gmail.com Signed-off-by: Kuan-Wei Chiu Cc: Adrian Hunter Cc: Alexander Shishkin Cc: Arnaldo Carvalho de Melo Cc: Ching-Chun (Jim) Huang Cc: Ian Rogers Cc: Ingo Molnar Cc: Jiri Olsa Cc: Kan Liang Cc: Mark Rutland Cc: Namhyung Kim Cc: Peter Zijlstra Signed-off-by: Andrew Morton --- tools/lib/list_sort.c | 10 ---------- 1 file changed, 10 deletions(-) diff --git a/tools/lib/list_sort.c b/tools/lib/list_sort.c index 10c067e3a8d2..69affa251fa7 100644 --- a/tools/lib/list_sort.c +++ b/tools/lib/list_sort.c @@ -52,7 +52,6 @@ static void merge_final(void *priv, list_cmp_func_t cmp, struct list_head *head, struct list_head *a, struct list_head *b) { struct list_head *tail = head; - u8 count = 0; for (;;) { /* if equal, take 'a' -- important for sort stability */ @@ -78,15 +77,6 @@ static void merge_final(void *priv, list_cmp_func_t cmp, struct list_head *head, /* Finish linking remainder of list b on to tail */ tail->next = b; do { - /* - * If the merge is highly unbalanced (e.g. the input is - * already sorted), this loop may run many iterations. - * Continue callbacks to the client even though no - * element comparison is needed, so the client's cmp() - * routine can invoke cond_resched() periodically. - */ - if (unlikely(!++count)) - cmp(priv, b, b); b->prev = tail; tail = b; b = b->next; From 21516c56ffe280d547af656fa9ba0ae62779ec98 Mon Sep 17 00:00:00 2001 From: Jeff Johnson Date: Fri, 31 May 2024 08:14:56 -0700 Subject: [PATCH 39/98] lib/ts: add missing MODULE_DESCRIPTION() macros make allmodconfig && make W=1 C=1 reports: WARNING: modpost: missing MODULE_DESCRIPTION() in lib/ts_kmp.o WARNING: modpost: missing MODULE_DESCRIPTION() in lib/ts_bm.o WARNING: modpost: missing MODULE_DESCRIPTION() in lib/ts_fsm.o Add the missing invocations of the MODULE_DESCRIPTION() macro. Link: https://lkml.kernel.org/r/20240531-lib-ts-v1-1-03d7f3546c49@quicinc.com Signed-off-by: Jeff Johnson Signed-off-by: Andrew Morton --- lib/ts_bm.c | 1 + lib/ts_fsm.c | 1 + lib/ts_kmp.c | 1 + 3 files changed, 3 insertions(+) diff --git a/lib/ts_bm.c b/lib/ts_bm.c index e5f30f9177df..eed5967238c5 100644 --- a/lib/ts_bm.c +++ b/lib/ts_bm.c @@ -216,6 +216,7 @@ static void __exit exit_bm(void) textsearch_unregister(&bm_ops); } +MODULE_DESCRIPTION("Boyer-Moore text search implementation"); MODULE_LICENSE("GPL"); module_init(init_bm); diff --git a/lib/ts_fsm.c b/lib/ts_fsm.c index 64fd9015ad80..053615f4fcd7 100644 --- a/lib/ts_fsm.c +++ b/lib/ts_fsm.c @@ -331,6 +331,7 @@ static void __exit exit_fsm(void) textsearch_unregister(&fsm_ops); } +MODULE_DESCRIPTION("naive finite state machine text search"); MODULE_LICENSE("GPL"); module_init(init_fsm); diff --git a/lib/ts_kmp.c b/lib/ts_kmp.c index c77a3d537f24..5520dc28255a 100644 --- a/lib/ts_kmp.c +++ b/lib/ts_kmp.c @@ -147,6 +147,7 @@ static void __exit exit_kmp(void) textsearch_unregister(&kmp_ops); } +MODULE_DESCRIPTION("Knuth-Morris-Pratt text search implementation"); MODULE_LICENSE("GPL"); module_init(init_kmp); From f4b62423383e7a26eb1a6ce0fc52c472ed955d6f Mon Sep 17 00:00:00 2001 From: Jani Nikula Date: Fri, 31 May 2024 12:04:54 +0300 Subject: [PATCH 40/98] kernel/panic: return early from print_tainted() when not tainted Reduce indent to make follow-up changes slightly easier on the eyes. Link: https://lkml.kernel.org/r/01d6c03de1c9d1b52b59c652a3704a0a9886ed63.1717146197.git.jani.nikula@intel.com Signed-off-by: Jani Nikula Reviewed-by: Greg Kroah-Hartman Signed-off-by: Andrew Morton --- kernel/panic.c | 25 +++++++++++++------------ 1 file changed, 13 insertions(+), 12 deletions(-) diff --git a/kernel/panic.c b/kernel/panic.c index 8bff183d6180..3edad0c6091d 100644 --- a/kernel/panic.c +++ b/kernel/panic.c @@ -507,22 +507,23 @@ const struct taint_flag taint_flags[TAINT_FLAGS_COUNT] = { const char *print_tainted(void) { static char buf[TAINT_FLAGS_COUNT + sizeof("Tainted: ")]; + char *s; + int i; BUILD_BUG_ON(ARRAY_SIZE(taint_flags) != TAINT_FLAGS_COUNT); - if (tainted_mask) { - char *s; - int i; - - s = buf + sprintf(buf, "Tainted: "); - for (i = 0; i < TAINT_FLAGS_COUNT; i++) { - const struct taint_flag *t = &taint_flags[i]; - *s++ = test_bit(i, &tainted_mask) ? - t->c_true : t->c_false; - } - *s = 0; - } else + if (!tainted_mask) { snprintf(buf, sizeof(buf), "Not tainted"); + return buf; + } + + s = buf + sprintf(buf, "Tainted: "); + for (i = 0; i < TAINT_FLAGS_COUNT; i++) { + const struct taint_flag *t = &taint_flags[i]; + *s++ = test_bit(i, &tainted_mask) ? + t->c_true : t->c_false; + } + *s = 0; return buf; } From aff1db0e4eb579a06290d7054871383aa0607015 Mon Sep 17 00:00:00 2001 From: Jani Nikula Date: Fri, 31 May 2024 12:04:55 +0300 Subject: [PATCH 41/98] kernel/panic: convert print_tainted() to use struct seq_buf internally Convert print_tainted() to use struct seq_buf internally in order to be more aware of the buffer constraints as well as make it easier to extend in follow-up work. Link: https://lkml.kernel.org/r/cb6006fa7c0f82a6b6885e8eea2920fcdc4fc9d0.1717146197.git.jani.nikula@intel.com Signed-off-by: Jani Nikula Reviewed-by: Greg Kroah-Hartman Signed-off-by: Andrew Morton --- kernel/panic.c | 38 ++++++++++++++++++++++++-------------- 1 file changed, 24 insertions(+), 14 deletions(-) diff --git a/kernel/panic.c b/kernel/panic.c index 3edad0c6091d..f7c0fddfd89a 100644 --- a/kernel/panic.c +++ b/kernel/panic.c @@ -35,6 +35,7 @@ #include #include #include +#include #include #include @@ -496,6 +497,25 @@ const struct taint_flag taint_flags[TAINT_FLAGS_COUNT] = { [ TAINT_TEST ] = { 'N', ' ', true }, }; +static void print_tainted_seq(struct seq_buf *s) +{ + int i; + + if (!tainted_mask) { + seq_buf_puts(s, "Not tainted"); + return; + } + + seq_buf_printf(s, "Tainted: "); + for (i = 0; i < TAINT_FLAGS_COUNT; i++) { + const struct taint_flag *t = &taint_flags[i]; + bool is_set = test_bit(i, &tainted_mask); + char c = is_set ? t->c_true : t->c_false; + + seq_buf_putc(s, c); + } +} + /** * print_tainted - return a string to represent the kernel taint state. * @@ -507,25 +527,15 @@ const struct taint_flag taint_flags[TAINT_FLAGS_COUNT] = { const char *print_tainted(void) { static char buf[TAINT_FLAGS_COUNT + sizeof("Tainted: ")]; - char *s; - int i; + struct seq_buf s; BUILD_BUG_ON(ARRAY_SIZE(taint_flags) != TAINT_FLAGS_COUNT); - if (!tainted_mask) { - snprintf(buf, sizeof(buf), "Not tainted"); - return buf; - } + seq_buf_init(&s, buf, sizeof(buf)); - s = buf + sprintf(buf, "Tainted: "); - for (i = 0; i < TAINT_FLAGS_COUNT; i++) { - const struct taint_flag *t = &taint_flags[i]; - *s++ = test_bit(i, &tainted_mask) ? - t->c_true : t->c_false; - } - *s = 0; + print_tainted_seq(&s); - return buf; + return seq_buf_str(&s); } int test_taint(unsigned flag) From f36fc96c15bd860776ffc66e53bdeeb791b6f442 Mon Sep 17 00:00:00 2001 From: Jani Nikula Date: Fri, 31 May 2024 12:04:56 +0300 Subject: [PATCH 42/98] kernel/panic: initialize taint_flags[] using a macro Make it easier to extend struct taint_flags in follow-up. Link: https://lkml.kernel.org/r/8a2498285d37953cfad9dce939ed3abef61051bd.1717146197.git.jani.nikula@intel.com Signed-off-by: Jani Nikula Reviewed-by: Greg Kroah-Hartman Signed-off-by: Andrew Morton --- kernel/panic.c | 46 +++++++++++++++++++++++++++------------------- 1 file changed, 27 insertions(+), 19 deletions(-) diff --git a/kernel/panic.c b/kernel/panic.c index f7c0fddfd89a..21975497bfa4 100644 --- a/kernel/panic.c +++ b/kernel/panic.c @@ -471,32 +471,40 @@ void panic(const char *fmt, ...) EXPORT_SYMBOL(panic); +#define TAINT_FLAG(taint, _c_true, _c_false, _module) \ + [ TAINT_##taint ] = { \ + .c_true = _c_true, .c_false = _c_false, \ + .module = _module, \ + } + /* * TAINT_FORCED_RMMOD could be a per-module flag but the module * is being removed anyway. */ const struct taint_flag taint_flags[TAINT_FLAGS_COUNT] = { - [ TAINT_PROPRIETARY_MODULE ] = { 'P', 'G', true }, - [ TAINT_FORCED_MODULE ] = { 'F', ' ', true }, - [ TAINT_CPU_OUT_OF_SPEC ] = { 'S', ' ', false }, - [ TAINT_FORCED_RMMOD ] = { 'R', ' ', false }, - [ TAINT_MACHINE_CHECK ] = { 'M', ' ', false }, - [ TAINT_BAD_PAGE ] = { 'B', ' ', false }, - [ TAINT_USER ] = { 'U', ' ', false }, - [ TAINT_DIE ] = { 'D', ' ', false }, - [ TAINT_OVERRIDDEN_ACPI_TABLE ] = { 'A', ' ', false }, - [ TAINT_WARN ] = { 'W', ' ', false }, - [ TAINT_CRAP ] = { 'C', ' ', true }, - [ TAINT_FIRMWARE_WORKAROUND ] = { 'I', ' ', false }, - [ TAINT_OOT_MODULE ] = { 'O', ' ', true }, - [ TAINT_UNSIGNED_MODULE ] = { 'E', ' ', true }, - [ TAINT_SOFTLOCKUP ] = { 'L', ' ', false }, - [ TAINT_LIVEPATCH ] = { 'K', ' ', true }, - [ TAINT_AUX ] = { 'X', ' ', true }, - [ TAINT_RANDSTRUCT ] = { 'T', ' ', true }, - [ TAINT_TEST ] = { 'N', ' ', true }, + TAINT_FLAG(PROPRIETARY_MODULE, 'P', 'G', true), + TAINT_FLAG(FORCED_MODULE, 'F', ' ', true), + TAINT_FLAG(CPU_OUT_OF_SPEC, 'S', ' ', false), + TAINT_FLAG(FORCED_RMMOD, 'R', ' ', false), + TAINT_FLAG(MACHINE_CHECK, 'M', ' ', false), + TAINT_FLAG(BAD_PAGE, 'B', ' ', false), + TAINT_FLAG(USER, 'U', ' ', false), + TAINT_FLAG(DIE, 'D', ' ', false), + TAINT_FLAG(OVERRIDDEN_ACPI_TABLE, 'A', ' ', false), + TAINT_FLAG(WARN, 'W', ' ', false), + TAINT_FLAG(CRAP, 'C', ' ', true), + TAINT_FLAG(FIRMWARE_WORKAROUND, 'I', ' ', false), + TAINT_FLAG(OOT_MODULE, 'O', ' ', true), + TAINT_FLAG(UNSIGNED_MODULE, 'E', ' ', true), + TAINT_FLAG(SOFTLOCKUP, 'L', ' ', false), + TAINT_FLAG(LIVEPATCH, 'K', ' ', true), + TAINT_FLAG(AUX, 'X', ' ', true), + TAINT_FLAG(RANDSTRUCT, 'T', ' ', true), + TAINT_FLAG(TEST, 'N', ' ', true), }; +#undef TAINT_FLAG + static void print_tainted_seq(struct seq_buf *s) { int i; From 2f183c68345a26213e5e7f798399bee68d1c4a97 Mon Sep 17 00:00:00 2001 From: Jani Nikula Date: Fri, 31 May 2024 12:04:57 +0300 Subject: [PATCH 43/98] kernel/panic: add verbose logging of kernel taints in backtraces With nearly 20 taint flags and respective characters, it's getting a bit difficult to remember what each taint flag character means. Add verbose logging of the set taints in the format: Tainted: [P]=PROPRIETARY_MODULE, [W]=WARN in dump_stack_print_info() when there are taints. Note that the "negative flag" G is not included. Link: https://lkml.kernel.org/r/7321e306166cb2ca2807ab8639e665baa2462e9c.1717146197.git.jani.nikula@intel.com Signed-off-by: Jani Nikula Reviewed-by: Greg Kroah-Hartman Signed-off-by: Andrew Morton --- include/linux/panic.h | 8 +++++--- kernel/panic.c | 45 ++++++++++++++++++++++++++++++++----------- lib/dump_stack.c | 3 +++ 3 files changed, 42 insertions(+), 14 deletions(-) diff --git a/include/linux/panic.h b/include/linux/panic.h index 6717b15e798c..3130e0b5116b 100644 --- a/include/linux/panic.h +++ b/include/linux/panic.h @@ -77,9 +77,10 @@ static inline void set_arch_panic_timeout(int timeout, int arch_default_timeout) #define TAINT_FLAGS_MAX ((1UL << TAINT_FLAGS_COUNT) - 1) struct taint_flag { - char c_true; /* character printed when tainted */ - char c_false; /* character printed when not tainted */ - bool module; /* also show as a per-module taint flag */ + char c_true; /* character printed when tainted */ + char c_false; /* character printed when not tainted */ + bool module; /* also show as a per-module taint flag */ + const char *desc; /* verbose description of the set taint flag */ }; extern const struct taint_flag taint_flags[TAINT_FLAGS_COUNT]; @@ -90,6 +91,7 @@ enum lockdep_ok { }; extern const char *print_tainted(void); +extern const char *print_tainted_verbose(void); extern void add_taint(unsigned flag, enum lockdep_ok); extern int test_taint(unsigned flag); extern unsigned long get_taint(void); diff --git a/kernel/panic.c b/kernel/panic.c index 21975497bfa4..f861bedc1925 100644 --- a/kernel/panic.c +++ b/kernel/panic.c @@ -475,6 +475,7 @@ EXPORT_SYMBOL(panic); [ TAINT_##taint ] = { \ .c_true = _c_true, .c_false = _c_false, \ .module = _module, \ + .desc = #taint, \ } /* @@ -505,8 +506,9 @@ const struct taint_flag taint_flags[TAINT_FLAGS_COUNT] = { #undef TAINT_FLAG -static void print_tainted_seq(struct seq_buf *s) +static void print_tainted_seq(struct seq_buf *s, bool verbose) { + const char *sep = ""; int i; if (!tainted_mask) { @@ -520,10 +522,32 @@ static void print_tainted_seq(struct seq_buf *s) bool is_set = test_bit(i, &tainted_mask); char c = is_set ? t->c_true : t->c_false; - seq_buf_putc(s, c); + if (verbose) { + if (is_set) { + seq_buf_printf(s, "%s[%c]=%s", sep, c, t->desc); + sep = ", "; + } + } else { + seq_buf_putc(s, c); + } } } +static const char *_print_tainted(bool verbose) +{ + /* FIXME: what should the size be? */ + static char buf[sizeof(taint_flags)]; + struct seq_buf s; + + BUILD_BUG_ON(ARRAY_SIZE(taint_flags) != TAINT_FLAGS_COUNT); + + seq_buf_init(&s, buf, sizeof(buf)); + + print_tainted_seq(&s, verbose); + + return seq_buf_str(&s); +} + /** * print_tainted - return a string to represent the kernel taint state. * @@ -534,16 +558,15 @@ static void print_tainted_seq(struct seq_buf *s) */ const char *print_tainted(void) { - static char buf[TAINT_FLAGS_COUNT + sizeof("Tainted: ")]; - struct seq_buf s; + return _print_tainted(false); +} - BUILD_BUG_ON(ARRAY_SIZE(taint_flags) != TAINT_FLAGS_COUNT); - - seq_buf_init(&s, buf, sizeof(buf)); - - print_tainted_seq(&s); - - return seq_buf_str(&s); +/** + * print_tainted_verbose - A more verbose version of print_tainted() + */ +const char *print_tainted_verbose(void) +{ + return _print_tainted(true); } int test_taint(unsigned flag) diff --git a/lib/dump_stack.c b/lib/dump_stack.c index 222c6d6c8281..8b6b70eaf949 100644 --- a/lib/dump_stack.c +++ b/lib/dump_stack.c @@ -62,6 +62,9 @@ void dump_stack_print_info(const char *log_lvl) (int)strcspn(init_utsname()->version, " "), init_utsname()->version, BUILD_ID_VAL); + if (get_taint()) + printk("%s%s\n", log_lvl, print_tainted_verbose()); + if (dump_stack_arch_desc_str[0] != '\0') printk("%sHardware name: %s\n", log_lvl, dump_stack_arch_desc_str); From e471831be2bea5f253396796c7691209c0065c65 Mon Sep 17 00:00:00 2001 From: Jeff Johnson Date: Fri, 31 May 2024 15:45:22 -0700 Subject: [PATCH 44/98] kunit/fortify: add missing MODULE_DESCRIPTION() macros make allmodconfig && make W=1 C=1 reports: WARNING: modpost: missing MODULE_DESCRIPTION() in lib/memcpy_kunit.o WARNING: modpost: missing MODULE_DESCRIPTION() in lib/fortify_kunit.o Add the missing invocations of the MODULE_DESCRIPTION() macro. Link: https://lkml.kernel.org/r/20240531-md-lib-fortify_source-v1-1-2c37f7fbaafc@quicinc.com Signed-off-by: Jeff Johnson Cc: Kees Cook Signed-off-by: Andrew Morton --- lib/fortify_kunit.c | 1 + lib/memcpy_kunit.c | 1 + 2 files changed, 2 insertions(+) diff --git a/lib/fortify_kunit.c b/lib/fortify_kunit.c index e17d520f532c..47b5d247c37e 100644 --- a/lib/fortify_kunit.c +++ b/lib/fortify_kunit.c @@ -1096,4 +1096,5 @@ static struct kunit_suite fortify_test_suite = { kunit_test_suite(fortify_test_suite); +MODULE_DESCRIPTION("Runtime test cases for CONFIG_FORTIFY_SOURCE"); MODULE_LICENSE("GPL"); diff --git a/lib/memcpy_kunit.c b/lib/memcpy_kunit.c index 20ea9038c3ff..d36933554e46 100644 --- a/lib/memcpy_kunit.c +++ b/lib/memcpy_kunit.c @@ -510,4 +510,5 @@ static struct kunit_suite memcpy_test_suite = { kunit_test_suite(memcpy_test_suite); +MODULE_DESCRIPTION("test cases for memcpy(), memmove(), and memset()"); MODULE_LICENSE("GPL"); From f069e33dafe17aab332e6671275cc61d16795064 Mon Sep 17 00:00:00 2001 From: Jeff Johnson Date: Sat, 1 Jun 2024 16:33:23 -0700 Subject: [PATCH 45/98] KUnit: add missing MODULE_DESCRIPTION() macros for lib/*_test.ko make allmodconfig && make W=1 C=1 reports for lib/*_test.ko: WARNING: modpost: missing MODULE_DESCRIPTION() in lib/atomic64_test.o WARNING: modpost: missing MODULE_DESCRIPTION() in lib/hashtable_test.o Add the missing invocations of the MODULE_DESCRIPTION() macro. Link: https://lkml.kernel.org/r/20240601-md-lib-test2-v1-1-be764b785f17@quicinc.com Signed-off-by: Jeff Johnson Signed-off-by: Andrew Morton --- lib/atomic64_test.c | 1 + lib/hashtable_test.c | 1 + 2 files changed, 2 insertions(+) diff --git a/lib/atomic64_test.c b/lib/atomic64_test.c index d9d170238165..759ea1783cc5 100644 --- a/lib/atomic64_test.c +++ b/lib/atomic64_test.c @@ -273,4 +273,5 @@ static __exit void test_atomics_exit(void) {} module_init(test_atomics_init); module_exit(test_atomics_exit); +MODULE_DESCRIPTION("Testsuite for atomic64_t functions"); MODULE_LICENSE("GPL"); diff --git a/lib/hashtable_test.c b/lib/hashtable_test.c index 1d1b3288dee2..3521de6bad15 100644 --- a/lib/hashtable_test.c +++ b/lib/hashtable_test.c @@ -314,4 +314,5 @@ static struct kunit_suite hashtable_test_module = { kunit_test_suites(&hashtable_test_module); +MODULE_DESCRIPTION("KUnit test for the Kernel Hashtable structures"); MODULE_LICENSE("GPL"); From 2e29fcb7743649649418e9aadcbcf032d2ec488f Mon Sep 17 00:00:00 2001 From: Jeff Johnson Date: Sat, 1 Jun 2024 16:09:47 -0700 Subject: [PATCH 46/98] lib/asn1_encoder: add missing MODULE_DESCRIPTION() macro make allmodconfig && make W=1 C=1 reports: WARNING: modpost: missing MODULE_DESCRIPTION() in lib/asn1_encoder.o Add the missing invocation of the MODULE_DESCRIPTION() macro. Link: https://lkml.kernel.org/r/20240601-md-lib-asn1_encoder-v1-1-8c634ed2d2e8@quicinc.com Signed-off-by: Jeff Johnson Signed-off-by: Andrew Morton --- lib/asn1_encoder.c | 1 + 1 file changed, 1 insertion(+) diff --git a/lib/asn1_encoder.c b/lib/asn1_encoder.c index 0fd3c454a468..92f35aae13b1 100644 --- a/lib/asn1_encoder.c +++ b/lib/asn1_encoder.c @@ -449,4 +449,5 @@ asn1_encode_boolean(unsigned char *data, const unsigned char *end_data, } EXPORT_SYMBOL_GPL(asn1_encode_boolean); +MODULE_DESCRIPTION("Simple encoder primitives for ASN.1 BER/DER/CER"); MODULE_LICENSE("GPL"); From 1c5a13b39daf823b4b0349ef2345bf997cb8c257 Mon Sep 17 00:00:00 2001 From: Jeff Johnson Date: Sat, 1 Jun 2024 10:53:34 -0700 Subject: [PATCH 47/98] kunit: add missing MODULE_DESCRIPTION() macros to lib/*.c make allmodconfig && make W=1 C=1 reports for lib/*kunit: WARNING: modpost: missing MODULE_DESCRIPTION() in lib/bitfield_kunit.o WARNING: modpost: missing MODULE_DESCRIPTION() in lib/checksum_kunit.o WARNING: modpost: missing MODULE_DESCRIPTION() in lib/cmdline_kunit.o WARNING: modpost: missing MODULE_DESCRIPTION() in lib/is_signed_type_kunit.o WARNING: modpost: missing MODULE_DESCRIPTION() in lib/overflow_kunit.o WARNING: modpost: missing MODULE_DESCRIPTION() in lib/stackinit_kunit.o Add the missing invocations of the MODULE_DESCRIPTION() macro. Link: https://lkml.kernel.org/r/20240601-md-lib-kunit-tests-v1-1-4493fe0032b9@quicinc.com Signed-off-by: Jeff Johnson Signed-off-by: Andrew Morton --- lib/bitfield_kunit.c | 1 + lib/checksum_kunit.c | 1 + lib/cmdline_kunit.c | 1 + lib/is_signed_type_kunit.c | 1 + lib/overflow_kunit.c | 1 + lib/stackinit_kunit.c | 1 + 6 files changed, 6 insertions(+) diff --git a/lib/bitfield_kunit.c b/lib/bitfield_kunit.c index 1473d8b4bf0f..5ccd86f61896 100644 --- a/lib/bitfield_kunit.c +++ b/lib/bitfield_kunit.c @@ -151,4 +151,5 @@ static struct kunit_suite bitfields_test_suite = { kunit_test_suites(&bitfields_test_suite); MODULE_AUTHOR("Johannes Berg "); +MODULE_DESCRIPTION("Test cases for bitfield helpers"); MODULE_LICENSE("GPL"); diff --git a/lib/checksum_kunit.c b/lib/checksum_kunit.c index 404dba36bae3..4e4d081a1d3b 100644 --- a/lib/checksum_kunit.c +++ b/lib/checksum_kunit.c @@ -639,4 +639,5 @@ static struct kunit_suite checksum_test_suite = { kunit_test_suites(&checksum_test_suite); MODULE_AUTHOR("Noah Goldstein "); +MODULE_DESCRIPTION("Test cases csum_* APIs"); MODULE_LICENSE("GPL"); diff --git a/lib/cmdline_kunit.c b/lib/cmdline_kunit.c index 705b82736be0..c1602f797637 100644 --- a/lib/cmdline_kunit.c +++ b/lib/cmdline_kunit.c @@ -153,4 +153,5 @@ static struct kunit_suite cmdline_test_suite = { }; kunit_test_suite(cmdline_test_suite); +MODULE_DESCRIPTION("Test cases for API provided by cmdline.c"); MODULE_LICENSE("GPL"); diff --git a/lib/is_signed_type_kunit.c b/lib/is_signed_type_kunit.c index 0a7f6ae62839..88adbe813f3a 100644 --- a/lib/is_signed_type_kunit.c +++ b/lib/is_signed_type_kunit.c @@ -46,4 +46,5 @@ static struct kunit_suite is_signed_type_test_suite = { kunit_test_suite(is_signed_type_test_suite); +MODULE_DESCRIPTION("is_signed_type() KUnit test suite"); MODULE_LICENSE("Dual MIT/GPL"); diff --git a/lib/overflow_kunit.c b/lib/overflow_kunit.c index d305b0c054bb..f314a0c15a6d 100644 --- a/lib/overflow_kunit.c +++ b/lib/overflow_kunit.c @@ -1237,4 +1237,5 @@ static struct kunit_suite overflow_test_suite = { kunit_test_suite(overflow_test_suite); +MODULE_DESCRIPTION("Test cases for arithmetic overflow checks"); MODULE_LICENSE("Dual MIT/GPL"); diff --git a/lib/stackinit_kunit.c b/lib/stackinit_kunit.c index 3bc14d1ee816..c14c6f8e6308 100644 --- a/lib/stackinit_kunit.c +++ b/lib/stackinit_kunit.c @@ -471,4 +471,5 @@ static struct kunit_suite stackinit_test_suite = { kunit_test_suites(&stackinit_test_suite); +MODULE_DESCRIPTION("Test cases for compiler-based stack variable zeroing"); MODULE_LICENSE("GPL"); From 683da20738fd3e0687a939b684c4a94a09de096c Mon Sep 17 00:00:00 2001 From: Jeff Johnson Date: Fri, 31 May 2024 19:19:09 -0700 Subject: [PATCH 48/98] uuid: add missing MODULE_DESCRIPTION() macro make allmodconfig && make W=1 C=1 reports: WARNING: modpost: missing MODULE_DESCRIPTION() in lib/test_uuid.o Add the missing invocation of the MODULE_DESCRIPTION() macro. Link: https://lkml.kernel.org/r/20240531-md-lib-test_uuid-v1-1-67fa498104c0@quicinc.com Signed-off-by: Jeff Johnson Acked-by: Andy Shevchenko Signed-off-by: Andrew Morton --- lib/test_uuid.c | 1 + 1 file changed, 1 insertion(+) diff --git a/lib/test_uuid.c b/lib/test_uuid.c index cd819c397dc7..0124fad5d72c 100644 --- a/lib/test_uuid.c +++ b/lib/test_uuid.c @@ -130,4 +130,5 @@ static void __exit test_uuid_exit(void) module_exit(test_uuid_exit); MODULE_AUTHOR("Andy Shevchenko "); +MODULE_DESCRIPTION("Test cases for lib/uuid.c module"); MODULE_LICENSE("Dual BSD/GPL"); From d46a555d3cd97132e21014b28a459c928925fa0f Mon Sep 17 00:00:00 2001 From: Jeff Johnson Date: Fri, 31 May 2024 18:49:47 -0700 Subject: [PATCH 49/98] siphash: add missing MODULE_DESCRIPTION() macro make allmodconfig && make W=1 C=1 reports: WARNING: modpost: missing MODULE_DESCRIPTION() in lib/siphash_kunit.o Add the missing invocation of the MODULE_DESCRIPTION() macro. Link: https://lkml.kernel.org/r/20240531-md-lib-siphash_kunit-v1-1-38688065b796@quicinc.com Signed-off-by: Jeff Johnson Cc: Jason A. Donenfeld Signed-off-by: Andrew Morton --- lib/siphash_kunit.c | 1 + 1 file changed, 1 insertion(+) diff --git a/lib/siphash_kunit.c b/lib/siphash_kunit.c index a3c697e8be35..26bd4e8dc03e 100644 --- a/lib/siphash_kunit.c +++ b/lib/siphash_kunit.c @@ -194,4 +194,5 @@ static struct kunit_suite siphash_test_suite = { kunit_test_suite(siphash_test_suite); MODULE_AUTHOR("Jason A. Donenfeld "); +MODULE_DESCRIPTION("Test cases for siphash.c"); MODULE_LICENSE("Dual BSD/GPL"); From 7ef148daa56dfcad7b554a79c28b8e94726771a5 Mon Sep 17 00:00:00 2001 From: Jeff Johnson Date: Fri, 31 May 2024 17:23:09 -0700 Subject: [PATCH 50/98] lib/test_kmod: add missing MODULE_DESCRIPTION() macro make allmodconfig && make W=1 C=1 reports: WARNING: modpost: missing MODULE_DESCRIPTION() in lib/test_kmod.o Add the missing invocation of the MODULE_DESCRIPTION() macro. Link: https://lkml.kernel.org/r/20240531-md-lib-test_kmod-v1-1-fdf11bc6095e@quicinc.com Signed-off-by: Jeff Johnson Reviewed-by: Lucas De Marchi Cc: Luis Chamberlain Signed-off-by: Andrew Morton --- lib/test_kmod.c | 1 + 1 file changed, 1 insertion(+) diff --git a/lib/test_kmod.c b/lib/test_kmod.c index 1eec3b7ac67c..064ed0fce75a 100644 --- a/lib/test_kmod.c +++ b/lib/test_kmod.c @@ -1223,4 +1223,5 @@ static void __exit test_kmod_exit(void) module_exit(test_kmod_exit); MODULE_AUTHOR("Luis R. Rodriguez "); +MODULE_DESCRIPTION("kmod stress test driver"); MODULE_LICENSE("GPL"); From 09aaf15a7826717b75d2b4305daaee099636ca5c Mon Sep 17 00:00:00 2001 From: Jeff Johnson Date: Fri, 31 May 2024 16:45:16 -0700 Subject: [PATCH 51/98] lib/test_linear_ranges: add missing MODULE_DESCRIPTION() macro make allmodconfig && make W=1 C=1 reports: WARNING: modpost: missing MODULE_DESCRIPTION() in lib/test_linear_ranges.o Add the missing invocation of the MODULE_DESCRIPTION() macro. Link: https://lkml.kernel.org/r/20240531-md-lib-test_linear_ranges-v1-1-053a1aad37c6@quicinc.com Signed-off-by: Jeff Johnson Reviewed-by: Matti Vaittinen Cc: Mark Brown Signed-off-by: Andrew Morton --- lib/test_linear_ranges.c | 1 + 1 file changed, 1 insertion(+) diff --git a/lib/test_linear_ranges.c b/lib/test_linear_ranges.c index c18f9c0f1f25..f482be00f1bc 100644 --- a/lib/test_linear_ranges.c +++ b/lib/test_linear_ranges.c @@ -216,4 +216,5 @@ static struct kunit_suite range_test_module = { kunit_test_suites(&range_test_module); +MODULE_DESCRIPTION("KUnit test for the linear_ranges helper"); MODULE_LICENSE("GPL"); From d0bff054053f7a46a3819aba55ad803aa639ed7e Mon Sep 17 00:00:00 2001 From: Brian Masney Date: Fri, 7 Jun 2024 11:24:43 -0400 Subject: [PATCH 52/98] lib/Kconfig.debug: document panic= command line option and procfs entry for PANIC_TIMEOUT PANIC_TIMEOUT can also be controlled with the panic= kernel command line option and the file /proc/sys/kernel/panic. Let's document both of these in the Kconfig help text. Link: https://lkml.kernel.org/r/20240607152443.925168-1-bmasney@redhat.com Signed-off-by: Brian Masney Reviewed-by: Javier Martinez Canillas Signed-off-by: Andrew Morton --- lib/Kconfig.debug | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/lib/Kconfig.debug b/lib/Kconfig.debug index 59b6765d86b8..86c24d9ed376 100644 --- a/lib/Kconfig.debug +++ b/lib/Kconfig.debug @@ -1043,7 +1043,9 @@ config PANIC_TIMEOUT Set the timeout value (in seconds) until a reboot occurs when the kernel panics. If n = 0, then we wait forever. A timeout value n > 0 will wait n seconds before rebooting, while a timeout - value n < 0 will reboot immediately. + value n < 0 will reboot immediately. This setting can be overridden + with the kernel command line option panic=, and from userspace via + /proc/sys/kernel/panic. config LOCKUP_DETECTOR bool From e7679a5c820d0865d0900931a7341048d830917d Mon Sep 17 00:00:00 2001 From: Alexey Dobriyan Date: Fri, 7 Jun 2024 18:14:29 +0300 Subject: [PATCH 53/98] proc: test "Kthread:" field /proc/${pid}/status got Kthread field recently. Test that userspace program is not reported as kernel thread. Test that kernel thread is reported as kernel thread. Use kthreadd with pid 2 for this. Link: https://lkml.kernel.org/r/818c4c41-8668-4566-97a9-7254abf819ee@p183 Signed-off-by: Alexey Dobriyan Cc: Chunguang Wu Cc: Shuah Khan Signed-off-by: Andrew Morton --- tools/testing/selftests/proc/.gitignore | 2 + tools/testing/selftests/proc/Makefile | 2 + .../selftests/proc/proc-2-is-kthread.c | 53 +++++++++++++++++++ .../selftests/proc/proc-self-isnt-kthread.c | 37 +++++++++++++ 4 files changed, 94 insertions(+) create mode 100644 tools/testing/selftests/proc/proc-2-is-kthread.c create mode 100644 tools/testing/selftests/proc/proc-self-isnt-kthread.c diff --git a/tools/testing/selftests/proc/.gitignore b/tools/testing/selftests/proc/.gitignore index a156ac5dd2c6..973968f45bba 100644 --- a/tools/testing/selftests/proc/.gitignore +++ b/tools/testing/selftests/proc/.gitignore @@ -2,6 +2,7 @@ /fd-001-lookup /fd-002-posix-eq /fd-003-kthread +/proc-2-is-kthread /proc-fsconfig-hidepid /proc-loadavg-001 /proc-multiple-procfs @@ -9,6 +10,7 @@ /proc-pid-vm /proc-self-map-files-001 /proc-self-map-files-002 +/proc-self-isnt-kthread /proc-self-syscall /proc-self-wchan /proc-subset-pid diff --git a/tools/testing/selftests/proc/Makefile b/tools/testing/selftests/proc/Makefile index cd95369254c0..6066f607f758 100644 --- a/tools/testing/selftests/proc/Makefile +++ b/tools/testing/selftests/proc/Makefile @@ -7,11 +7,13 @@ TEST_GEN_PROGS := TEST_GEN_PROGS += fd-001-lookup TEST_GEN_PROGS += fd-002-posix-eq TEST_GEN_PROGS += fd-003-kthread +TEST_GEN_PROGS += proc-2-is-kthread TEST_GEN_PROGS += proc-loadavg-001 TEST_GEN_PROGS += proc-empty-vm TEST_GEN_PROGS += proc-pid-vm TEST_GEN_PROGS += proc-self-map-files-001 TEST_GEN_PROGS += proc-self-map-files-002 +TEST_GEN_PROGS += proc-self-isnt-kthread TEST_GEN_PROGS += proc-self-syscall TEST_GEN_PROGS += proc-self-wchan TEST_GEN_PROGS += proc-subset-pid diff --git a/tools/testing/selftests/proc/proc-2-is-kthread.c b/tools/testing/selftests/proc/proc-2-is-kthread.c new file mode 100644 index 000000000000..f13668fb482e --- /dev/null +++ b/tools/testing/selftests/proc/proc-2-is-kthread.c @@ -0,0 +1,53 @@ +/* + * Copyright (c) 2024 Alexey Dobriyan + * + * Permission to use, copy, modify, and distribute this software for any + * purpose with or without fee is hereby granted, provided that the above + * copyright notice and this permission notice appear in all copies. + * + * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES + * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF + * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR + * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES + * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN + * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF + * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. + */ +/* Test that kernel thread is reported as such. */ +#undef NDEBUG +#include +#include +#include +#include +#include + +int main(void) +{ + /* + * The following solutions don't really work: + * + * 1) jit kernel module which creates kernel thread: + * test becomes arch-specific, + * problems with mandatory module signing, + * problems with lockdown mode, + * doesn't work with CONFIG_MODULES=n at all, + * kthread creation API is formally unstable internal kernel API, + * need a mechanism to report test kernel thread's PID back, + * + * 2) ksoftirqd/0 and kswapd0 look like stable enough kernel threads, + * but their PIDs are unstable. + * + * Check against kthreadd which always seem to exist under pid 2. + */ + int fd = open("/proc/2/status", O_RDONLY); + assert(fd >= 0); + + char buf[4096]; + ssize_t rv = read(fd, buf, sizeof(buf)); + assert(0 <= rv && rv < sizeof(buf)); + buf[rv] = '\0'; + + assert(strstr(buf, "Kthread:\t1\n")); + + return 0; +} diff --git a/tools/testing/selftests/proc/proc-self-isnt-kthread.c b/tools/testing/selftests/proc/proc-self-isnt-kthread.c new file mode 100644 index 000000000000..e01f4e0a91b4 --- /dev/null +++ b/tools/testing/selftests/proc/proc-self-isnt-kthread.c @@ -0,0 +1,37 @@ +/* + * Copyright (c) 2024 Alexey Dobriyan + * + * Permission to use, copy, modify, and distribute this software for any + * purpose with or without fee is hereby granted, provided that the above + * copyright notice and this permission notice appear in all copies. + * + * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES + * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF + * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR + * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES + * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN + * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF + * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. + */ +/* Test that userspace program is not kernel thread. */ +#undef NDEBUG +#include +#include +#include +#include + +int main(void) +{ + int fd = open("/proc/self/status", O_RDONLY); + assert(fd >= 0); + + char buf[4096]; + ssize_t rv = read(fd, buf, sizeof(buf)); + assert(0 <= rv && rv < sizeof(buf)); + buf[rv] = '\0'; + + /* This test is very much not kernel thread. */ + assert(strstr(buf, "Kthread:\t0\n")); + + return 0; +} From 5eb1911a8c63f0e10a5f746f52fcc3c9bbfbc710 Mon Sep 17 00:00:00 2001 From: Wenchao Hao Date: Thu, 6 Jun 2024 17:14:27 +0800 Subject: [PATCH 54/98] crash: remove header files which are included more than once Following warning is reported, so remove these duplicated header including: ./kernel/crash_reserve.c: linux/kexec.h is included more than once. This is just a clean code, no logic changed. Link: https://lkml.kernel.org/r/20240606091427.3512314-1-haowenchao22@gmail.com Signed-off-by: Wenchao Hao Acked-by: Baoquan He Cc: Dave Young Cc: Vivek Goyal Signed-off-by: Andrew Morton --- kernel/crash_reserve.c | 1 - 1 file changed, 1 deletion(-) diff --git a/kernel/crash_reserve.c b/kernel/crash_reserve.c index 5b2722a93a48..d3b4cd12bdd1 100644 --- a/kernel/crash_reserve.c +++ b/kernel/crash_reserve.c @@ -13,7 +13,6 @@ #include #include #include -#include #include #include From 08ab0915831d454ecf220c14df5dea35933468e7 Mon Sep 17 00:00:00 2001 From: Christophe JAILLET Date: Sat, 8 Jun 2024 16:34:18 +0200 Subject: [PATCH 55/98] fsi: occ: remove usage of the deprecated ida_simple_xx() API Patch series "Remove usage of the deprecated ida_simple_xx() API". The series removes the *last* usages of the deprecated ida_simple_xx() API. This patch (of 3): ida_alloc() and ida_free() should be preferred to the deprecated ida_simple_get() and ida_simple_remove(). Note that the upper limit of ida_simple_get() is exclusive, but the one of ida_alloc_range() is inclusive. So, this upper limit, INT_MAX, should have been changed to INT_MAX-1. But, it is likely that the INT_MAX 'idx' is valid that the max value passed to ida_simple_get() should have been 0. So, allow this INT_MAX 'idx' value now. Link: https://lkml.kernel.org/r/cover.1717855701.git.christophe.jaillet@wanadoo.fr Link: https://lkml.kernel.org/r/8e28b0c45fe8f28ca4475fe0027f8099c41259f0.1717855701.git.christophe.jaillet@wanadoo.fr Signed-off-by: Christophe JAILLET Reviewed-by: Eddie James Cc: Alistar Popple Cc: Christian Gromm Cc: Jeremy Kerr Cc: Joel Stanley Cc: Matthew Wilcox (Oracle) Cc: Parthiban Veerasooran Cc: Alexey Dobriyan Signed-off-by: Andrew Morton --- drivers/fsi/fsi-occ.c | 17 ++++++++--------- 1 file changed, 8 insertions(+), 9 deletions(-) diff --git a/drivers/fsi/fsi-occ.c b/drivers/fsi/fsi-occ.c index da35ca9e84a6..f7157c1d77d8 100644 --- a/drivers/fsi/fsi-occ.c +++ b/drivers/fsi/fsi-occ.c @@ -656,17 +656,16 @@ static int occ_probe(struct platform_device *pdev) rc = of_property_read_u32(dev->of_node, "reg", ®); if (!rc) { /* make sure we don't have a duplicate from dts */ - occ->idx = ida_simple_get(&occ_ida, reg, reg + 1, - GFP_KERNEL); + occ->idx = ida_alloc_range(&occ_ida, reg, reg, + GFP_KERNEL); if (occ->idx < 0) - occ->idx = ida_simple_get(&occ_ida, 1, INT_MAX, - GFP_KERNEL); + occ->idx = ida_alloc_min(&occ_ida, 1, + GFP_KERNEL); } else { - occ->idx = ida_simple_get(&occ_ida, 1, INT_MAX, - GFP_KERNEL); + occ->idx = ida_alloc_min(&occ_ida, 1, GFP_KERNEL); } } else { - occ->idx = ida_simple_get(&occ_ida, 1, INT_MAX, GFP_KERNEL); + occ->idx = ida_alloc_min(&occ_ida, 1, GFP_KERNEL); } platform_set_drvdata(pdev, occ); @@ -680,7 +679,7 @@ static int occ_probe(struct platform_device *pdev) rc = misc_register(&occ->mdev); if (rc) { dev_err(dev, "failed to register miscdevice: %d\n", rc); - ida_simple_remove(&occ_ida, occ->idx); + ida_free(&occ_ida, occ->idx); kvfree(occ->buffer); return rc; } @@ -719,7 +718,7 @@ static int occ_remove(struct platform_device *pdev) else device_for_each_child(&pdev->dev, NULL, occ_unregister_of_child); - ida_simple_remove(&occ_ida, occ->idx); + ida_free(&occ_ida, occ->idx); return 0; } From b737a221702c7b6080a72ab671eac938f4768318 Mon Sep 17 00:00:00 2001 From: Christophe JAILLET Date: Sat, 8 Jun 2024 16:34:19 +0200 Subject: [PATCH 56/98] most: remove usage of the deprecated ida_simple_xx() API ida_alloc() and ida_free() should be preferred to the deprecated ida_simple_get() and ida_simple_remove(). This is less verbose. Link: https://lkml.kernel.org/r/ddbb2e3f249ba90417dc7ab01713faa1091fb44c.1717855701.git.christophe.jaillet@wanadoo.fr Signed-off-by: Christophe JAILLET Acked-by: Parthiban Veerasooran Cc: Alexey Dobriyan Cc: Alistar Popple Cc: Christian Gromm Cc: Eddie James Cc: Jeremy Kerr Cc: Joel Stanley Cc: Matthew Wilcox (Oracle) Signed-off-by: Andrew Morton --- drivers/most/core.c | 10 +++++----- drivers/most/most_cdev.c | 6 +++--- 2 files changed, 8 insertions(+), 8 deletions(-) diff --git a/drivers/most/core.c b/drivers/most/core.c index f13d0e14a48b..10342e8801bf 100644 --- a/drivers/most/core.c +++ b/drivers/most/core.c @@ -1286,7 +1286,7 @@ int most_register_interface(struct most_interface *iface) !iface->poison_channel || (iface->num_channels > MAX_CHANNELS)) return -EINVAL; - id = ida_simple_get(&mdev_id, 0, 0, GFP_KERNEL); + id = ida_alloc(&mdev_id, GFP_KERNEL); if (id < 0) { dev_err(iface->dev, "Failed to allocate device ID\n"); return id; @@ -1294,7 +1294,7 @@ int most_register_interface(struct most_interface *iface) iface->p = kzalloc(sizeof(*iface->p), GFP_KERNEL); if (!iface->p) { - ida_simple_remove(&mdev_id, id); + ida_free(&mdev_id, id); return -ENOMEM; } @@ -1308,7 +1308,7 @@ int most_register_interface(struct most_interface *iface) dev_err(iface->dev, "Failed to register interface device\n"); kfree(iface->p); put_device(iface->dev); - ida_simple_remove(&mdev_id, id); + ida_free(&mdev_id, id); return -ENOMEM; } @@ -1366,7 +1366,7 @@ err_free_resources: } kfree(iface->p); device_unregister(iface->dev); - ida_simple_remove(&mdev_id, id); + ida_free(&mdev_id, id); return -ENOMEM; } EXPORT_SYMBOL_GPL(most_register_interface); @@ -1397,7 +1397,7 @@ void most_deregister_interface(struct most_interface *iface) device_unregister(&c->dev); } - ida_simple_remove(&mdev_id, iface->p->dev_id); + ida_free(&mdev_id, iface->p->dev_id); kfree(iface->p); device_unregister(iface->dev); } diff --git a/drivers/most/most_cdev.c b/drivers/most/most_cdev.c index 3ed8f461e01e..b9423f82373d 100644 --- a/drivers/most/most_cdev.c +++ b/drivers/most/most_cdev.c @@ -100,7 +100,7 @@ static void destroy_cdev(struct comp_channel *c) static void destroy_channel(struct comp_channel *c) { - ida_simple_remove(&comp.minor_id, MINOR(c->devno)); + ida_free(&comp.minor_id, MINOR(c->devno)); kfifo_free(&c->fifo); kfree(c); } @@ -425,7 +425,7 @@ static int comp_probe(struct most_interface *iface, int channel_id, if (c) return -EEXIST; - current_minor = ida_simple_get(&comp.minor_id, 0, 0, GFP_KERNEL); + current_minor = ida_alloc(&comp.minor_id, GFP_KERNEL); if (current_minor < 0) return current_minor; @@ -472,7 +472,7 @@ err_del_cdev_and_free_channel: err_free_c: kfree(c); err_remove_ida: - ida_simple_remove(&comp.minor_id, current_minor); + ida_free(&comp.minor_id, current_minor); return retval; } From cda1c8e0c33d3e06ef8e912ab1b16f00e4d5d088 Mon Sep 17 00:00:00 2001 From: Christophe JAILLET Date: Sat, 8 Jun 2024 16:34:20 +0200 Subject: [PATCH 57/98] proc: remove usage of the deprecated ida_simple_xx() API ida_alloc() and ida_free() should be preferred to the deprecated ida_simple_get() and ida_simple_remove(). Note that the upper limit of ida_simple_get() is exclusive, but the one of ida_alloc_max() is inclusive. So a -1 has been added when needed. Link: https://lkml.kernel.org/r/ae10003feb87d240163d0854de95f09e1f00be7d.1717855701.git.christophe.jaillet@wanadoo.fr Signed-off-by: Christophe JAILLET Reviewed-by: Matthew Wilcox (Oracle) Cc: Alexey Dobriyan Cc: Alistar Popple Cc: Christian Gromm Cc: Eddie James Cc: Jeremy Kerr Cc: Joel Stanley Cc: Parthiban Veerasooran Signed-off-by: Andrew Morton --- fs/proc/generic.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/fs/proc/generic.c b/fs/proc/generic.c index 775ce0bcf08c..c02f1e63f82d 100644 --- a/fs/proc/generic.c +++ b/fs/proc/generic.c @@ -202,8 +202,8 @@ int proc_alloc_inum(unsigned int *inum) { int i; - i = ida_simple_get(&proc_inum_ida, 0, UINT_MAX - PROC_DYNAMIC_FIRST + 1, - GFP_KERNEL); + i = ida_alloc_max(&proc_inum_ida, UINT_MAX - PROC_DYNAMIC_FIRST, + GFP_KERNEL); if (i < 0) return i; @@ -213,7 +213,7 @@ int proc_alloc_inum(unsigned int *inum) void proc_free_inum(unsigned int inum) { - ida_simple_remove(&proc_inum_ida, inum - PROC_DYNAMIC_FIRST); + ida_free(&proc_inum_ida, inum - PROC_DYNAMIC_FIRST); } static int proc_misc_d_revalidate(struct dentry *dentry, unsigned int flags) From 25fa5f9923383784bbf2423ccc84f75615e4b12d Mon Sep 17 00:00:00 2001 From: Ryusuke Konishi Date: Tue, 11 Jun 2024 01:00:28 +0900 Subject: [PATCH 58/98] nilfs2: prepare backing device folios for writing after adding checksums Patch series "nilfs2: eliminate the call to inode_attach_wb()". This series eliminates the inode_attach_wb() call from nilfs2, which was introduced as a workaround for a kernel bug but is suspected of layer violation (in fact, it is undesirable since it exposes a reference to the backing device). Removal of the inode_attach_wb() call is done by simply using mark_buffer_dirty() on the backing device's buffers. To use it safely, this series will prepare it in patch 1/2, and perform the replacement itself in patch 2/2. This patch (of 2): In preparation for inode_attach_wb(), which is currently called when attaching the log writer, to be done via mark_buffer_dirty(), change the order of preparation for log writing. Specifically, the function call that adds checksums to segment summary and super root blocks, which correspond to the log header and trailer, is made before starting writeback of folios containing those blocks. The current steps are as follows: 1. Put the folios of segment summary blocks in writeback state. 2. Put the folios of data blocks, metadata file blocks, and btree node blocks (collectively called payload blocks) into writeback state. 3. Put the super root block folio in writeback state. 4. Add checksums. Change these as follows: 1. Put the folios of payload blocks in writeback state. 2. Add checksums. 3. Put the folios of segment summary blocks in writeback state. 4. Put the super root block folio in writeback state. In this order, the contents of segment summaries and super root block that directly use buffer/folio of the backing device can be determined including the addition of checksums, before preparing to write. Step (1), which puts the payload block folios in writeback state, is performed first because if there are memory-mapped data blocks, a valid checksum can only be calculated after step (1). Link: https://lkml.kernel.org/r/20240610160029.7673-2-konishi.ryusuke@gmail.com Signed-off-by: Ryusuke Konishi Signed-off-by: Andrew Morton --- fs/nilfs2/segment.c | 85 +++++++++++++++++++++++++++------------------ 1 file changed, 52 insertions(+), 33 deletions(-) diff --git a/fs/nilfs2/segment.c b/fs/nilfs2/segment.c index 6ea81f1d5094..a92609816bc9 100644 --- a/fs/nilfs2/segment.c +++ b/fs/nilfs2/segment.c @@ -1639,41 +1639,30 @@ static void nilfs_begin_folio_io(struct folio *folio) folio_unlock(folio); } -static void nilfs_segctor_prepare_write(struct nilfs_sc_info *sci) +/** + * nilfs_prepare_write_logs - prepare to write logs + * @logs: logs to prepare for writing + * @seed: checksum seed value + * + * nilfs_prepare_write_logs() adds checksums and prepares the block + * buffers/folios for writing logs. In order to stabilize folios of + * memory-mapped file blocks by putting them in writeback state before + * calculating the checksums, first prepare to write payload blocks other + * than segment summary and super root blocks in which the checksums will + * be embedded. + */ +static void nilfs_prepare_write_logs(struct list_head *logs, u32 seed) { struct nilfs_segment_buffer *segbuf; struct folio *bd_folio = NULL, *fs_folio = NULL; + struct buffer_head *bh; - list_for_each_entry(segbuf, &sci->sc_segbufs, sb_list) { - struct buffer_head *bh; - - list_for_each_entry(bh, &segbuf->sb_segsum_buffers, - b_assoc_buffers) { - if (bh->b_folio != bd_folio) { - if (bd_folio) { - folio_lock(bd_folio); - folio_wait_writeback(bd_folio); - folio_clear_dirty_for_io(bd_folio); - folio_start_writeback(bd_folio); - folio_unlock(bd_folio); - } - bd_folio = bh->b_folio; - } - } - + /* Prepare to write payload blocks */ + list_for_each_entry(segbuf, logs, sb_list) { list_for_each_entry(bh, &segbuf->sb_payload_buffers, b_assoc_buffers) { - if (bh == segbuf->sb_super_root) { - if (bh->b_folio != bd_folio) { - folio_lock(bd_folio); - folio_wait_writeback(bd_folio); - folio_clear_dirty_for_io(bd_folio); - folio_start_writeback(bd_folio); - folio_unlock(bd_folio); - bd_folio = bh->b_folio; - } + if (bh == segbuf->sb_super_root) break; - } set_buffer_async_write(bh); if (bh->b_folio != fs_folio) { nilfs_begin_folio_io(fs_folio); @@ -1681,6 +1670,40 @@ static void nilfs_segctor_prepare_write(struct nilfs_sc_info *sci) } } } + nilfs_begin_folio_io(fs_folio); + + nilfs_add_checksums_on_logs(logs, seed); + + /* Prepare to write segment summary blocks */ + list_for_each_entry(segbuf, logs, sb_list) { + list_for_each_entry(bh, &segbuf->sb_segsum_buffers, + b_assoc_buffers) { + if (bh->b_folio == bd_folio) + continue; + if (bd_folio) { + folio_lock(bd_folio); + folio_wait_writeback(bd_folio); + folio_clear_dirty_for_io(bd_folio); + folio_start_writeback(bd_folio); + folio_unlock(bd_folio); + } + bd_folio = bh->b_folio; + } + } + + /* Prepare to write super root block */ + bh = NILFS_LAST_SEGBUF(logs)->sb_super_root; + if (bh) { + if (bh->b_folio != bd_folio) { + folio_lock(bd_folio); + folio_wait_writeback(bd_folio); + folio_clear_dirty_for_io(bd_folio); + folio_start_writeback(bd_folio); + folio_unlock(bd_folio); + bd_folio = bh->b_folio; + } + } + if (bd_folio) { folio_lock(bd_folio); folio_wait_writeback(bd_folio); @@ -1688,7 +1711,6 @@ static void nilfs_segctor_prepare_write(struct nilfs_sc_info *sci) folio_start_writeback(bd_folio); folio_unlock(bd_folio); } - nilfs_begin_folio_io(fs_folio); } static int nilfs_segctor_write(struct nilfs_sc_info *sci, @@ -2070,10 +2092,7 @@ static int nilfs_segctor_do_construct(struct nilfs_sc_info *sci, int mode) nilfs_segctor_update_segusage(sci, nilfs->ns_sufile); /* Write partial segments */ - nilfs_segctor_prepare_write(sci); - - nilfs_add_checksums_on_logs(&sci->sc_segbufs, - nilfs->ns_crc_seed); + nilfs_prepare_write_logs(&sci->sc_segbufs, nilfs->ns_crc_seed); err = nilfs_segctor_write(sci, nilfs); if (unlikely(err)) From 68142cb628f75a22f12fde9f5c0c69dfc1bdcf47 Mon Sep 17 00:00:00 2001 From: Ryusuke Konishi Date: Tue, 11 Jun 2024 01:00:29 +0900 Subject: [PATCH 59/98] nilfs2: do not call inode_attach_wb() directly Call mark_buffer_dirty() for segment summary and super root block buffers on the backing device's page cache, thereby indirectly calling inode_attach_wb(). Then remove the no longer needed call to inode_attach_wb() in nilfs_attach_log_writer(), resolving the concern about its layer-violating use. Link: https://lkml.kernel.org/r/20240610160029.7673-3-konishi.ryusuke@gmail.com Signed-off-by: Ryusuke Konishi Signed-off-by: Andrew Morton --- fs/nilfs2/segment.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/fs/nilfs2/segment.c b/fs/nilfs2/segment.c index a92609816bc9..36e0bb38e1aa 100644 --- a/fs/nilfs2/segment.c +++ b/fs/nilfs2/segment.c @@ -1678,6 +1678,7 @@ static void nilfs_prepare_write_logs(struct list_head *logs, u32 seed) list_for_each_entry(segbuf, logs, sb_list) { list_for_each_entry(bh, &segbuf->sb_segsum_buffers, b_assoc_buffers) { + mark_buffer_dirty(bh); if (bh->b_folio == bd_folio) continue; if (bd_folio) { @@ -1694,6 +1695,7 @@ static void nilfs_prepare_write_logs(struct list_head *logs, u32 seed) /* Prepare to write super root block */ bh = NILFS_LAST_SEGBUF(logs)->sb_super_root; if (bh) { + mark_buffer_dirty(bh); if (bh->b_folio != bd_folio) { folio_lock(bd_folio); folio_wait_writeback(bd_folio); @@ -2843,8 +2845,6 @@ int nilfs_attach_log_writer(struct super_block *sb, struct nilfs_root *root) if (!nilfs->ns_writer) return -ENOMEM; - inode_attach_wb(nilfs->ns_bdev->bd_mapping->host, NULL); - err = nilfs_segctor_start_thread(nilfs->ns_writer); if (unlikely(err)) nilfs_detach_log_writer(sb); From d6bb39519aafed9800213db064104dd29699b939 Mon Sep 17 00:00:00 2001 From: Wolfram Sang Date: Mon, 10 Jun 2024 17:02:09 +0200 Subject: [PATCH 60/98] checkpatch: really skip LONG_LINE_* when LONG_LINE is ignored For a printout to happen, all types must be set to "show". So, AND is needed for the flags, not OR, if we want to ignore something. Link: https://lkml.kernel.org/r/20240610150420.2279-2-wsa+renesas@sang-engineering.com Fixes: 47e0c88b37a5 ("checkpatch: categorize some long line length checks") Signed-off-by: Wolfram Sang Acked-by: Joe Perches Cc: Andy Whitcroft Cc: Dwaipayan Ray Cc: Lukas Bulwahn Signed-off-by: Andrew Morton --- scripts/checkpatch.pl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/scripts/checkpatch.pl b/scripts/checkpatch.pl index 2b812210b412..375749e0a174 100755 --- a/scripts/checkpatch.pl +++ b/scripts/checkpatch.pl @@ -3858,7 +3858,7 @@ sub process { } if ($msg_type ne "" && - (show_type("LONG_LINE") || show_type($msg_type))) { + show_type("LONG_LINE") && show_type($msg_type)) { my $msg_level = \&WARN; $msg_level = \&CHK if ($file); &{$msg_level}($msg_type, From d5d6281ae8e0c929c3ff188652f5b12c680fe8bf Mon Sep 17 00:00:00 2001 From: Dan Carpenter Date: Tue, 11 Jun 2024 16:43:29 +0300 Subject: [PATCH 61/98] checkpatch: check for missing Fixes tags This check looks for common words that probably indicate a patch is a fix. For now the regex is: (?:(?:BUG: K.|UB)SAN: |Call Trace:|stable\@|syzkaller)/) Why are stable patches encouraged to have a fixes tag? Some people mark their stable patches as "# 5.10" etc. This is useful but a Fixes tag is still a good idea. For example, the Fixes tag helps in review. It helps people to not cherry-pick buggy patches without also cherry-picking the fix. Also if a bug affects the 5.7 kernel some people will round it up to 5.10+ because 5.7 is not supported on kernel.org. It's possible the Bad Binder bug was caused by this sort of gap where companies outside of kernel.org are supporting different kernels from kernel.org. Should it be counted as a Fix when a patch just silences harmless WARN_ON() stack trace. Yes. Definitely. Is silencing compiler warnings a fix? It seems unfair to the original authors, but we use -Werror now, and warnings break the build so let's just add Fixes tags. I tell people that silencing static checker warnings is not a fix but the rules on this vary by subsystem. Is fixing a minor LTP issue (Linux Test Project) a fix? Probably? It's hard to know what to do if the LTP test has technically always been broken. One clear false positive from this check is when someone updated their debug output and included before and after Call Traces. Or when crashes are introduced deliberately for testing. In those cases, you should just ignore checkpatch. Link: https://lkml.kernel.org/r/ZmhUgZBKeF_8ixA6@moroto Signed-off-by: Dan Carpenter Acked-by: Greg Kroah-Hartman Reviewed-by: Kees Cook Cc: Andy Whitcroft Cc: Arnd Bergmann Cc: Dwaipayan Ray Cc: Joe Perches Cc: Lukas Bulwahn Cc: Sasha Levin Cc: Thorsten Leemhuis Signed-off-by: Andrew Morton --- scripts/checkpatch.pl | 24 ++++++++++++++++++++++++ 1 file changed, 24 insertions(+) diff --git a/scripts/checkpatch.pl b/scripts/checkpatch.pl index 375749e0a174..39032224d504 100755 --- a/scripts/checkpatch.pl +++ b/scripts/checkpatch.pl @@ -28,6 +28,7 @@ my %verbose_messages = (); my %verbose_emitted = (); my $tree = 1; my $chk_signoff = 1; +my $chk_fixes_tag = 1; my $chk_patch = 1; my $tst_only; my $emacs = 0; @@ -88,6 +89,7 @@ Options: -v, --verbose verbose mode --no-tree run without a kernel tree --no-signoff do not check for 'Signed-off-by' line + --no-fixes-tag do not check for 'Fixes:' tag --patch treat FILE as patchfile (default) --emacs emacs compile window format --terse one line per report @@ -295,6 +297,7 @@ GetOptions( 'v|verbose!' => \$verbose, 'tree!' => \$tree, 'signoff!' => \$chk_signoff, + 'fixes-tag!' => \$chk_fixes_tag, 'patch!' => \$chk_patch, 'emacs!' => \$emacs, 'terse!' => \$terse, @@ -1257,6 +1260,7 @@ sub git_commit_info { } $chk_signoff = 0 if ($file); +$chk_fixes_tag = 0 if ($file); my @rawlines = (); my @lines = (); @@ -2636,6 +2640,9 @@ sub process { our $clean = 1; my $signoff = 0; + my $fixes_tag = 0; + my $is_revert = 0; + my $needs_fixes_tag = ""; my $author = ''; my $authorsignoff = 0; my $author_sob = ''; @@ -3189,6 +3196,16 @@ sub process { } } +# These indicate a bug fix + if (!$in_header_lines && !$is_patch && + $line =~ /^This reverts commit/) { + $is_revert = 1; + } + + if (!$in_header_lines && !$is_patch && + $line =~ /((?:(?:BUG: K.|UB)SAN: |Call Trace:|stable\@|syzkaller))/) { + $needs_fixes_tag = $1; + } # Check Fixes: styles is correct if (!$in_header_lines && @@ -3201,6 +3218,7 @@ sub process { my $id_length = 1; my $id_case = 1; my $title_has_quotes = 0; + $fixes_tag = 1; if ($line =~ /(\s*fixes:?)\s+([0-9a-f]{5,})\s+($balanced_parens)/i) { my $tag = $1; @@ -7697,6 +7715,12 @@ sub process { ERROR("NOT_UNIFIED_DIFF", "Does not appear to be a unified-diff format patch\n"); } + if ($is_patch && $has_commit_log && $chk_fixes_tag) { + if ($needs_fixes_tag ne "" && !$is_revert && !$fixes_tag) { + WARN("MISSING_FIXES_TAG", + "The commit message has '$needs_fixes_tag', perhaps it also needs a 'Fixes:' tag?\n"); + } + } if ($is_patch && $has_commit_log && $chk_signoff) { if ($signoff == 0) { ERROR("MISSING_SIGN_OFF", From abd8ac05570cf1488a311f95c4c539fbe119d5da Mon Sep 17 00:00:00 2001 From: Yongliang Gao Date: Thu, 13 Jun 2024 11:31:59 +0800 Subject: [PATCH 62/98] hung_task: ignore hung_task_warnings when hung_task_panic is enabled If hung_task_panic is enabled, don't consider the value of hung_task_warnings and display the information of the hung tasks. In some cases, hung_task_panic might not be initially set up, after several hung tasks occur, the hung_task_warnings count reaches zero. If hung_task_panic is set up later, it may not display any helpful hung task info in dmesg, only showing messages like: Kernel panic - not syncing: hung_task: blocked tasks CPU: 3 PID: 58 Comm: khungtaskd Not tainted 6.10.0-rc3 #19 Hardware name: QEMU Standard PC (i440FX + PIIX, 1996) Call Trace: panic+0x2f3/0x320 watchdog+0x2dd/0x510 ? __pfx_watchdog+0x10/0x10 kthread+0xe0/0x110 ? __pfx_kthread+0x10/0x10 ret_from_fork+0x2f/0x40 ? __pfx_kthread+0x10/0x10 ret_from_fork_asm+0x1a/0x30 Link: https://lkml.kernel.org/r/20240613033159.3446265-1-leonylgao@gmail.com Signed-off-by: Yongliang Gao Reviewed-by: Huang Cun Cc: Joel Granados Cc: John Siddle Cc: Kent Overstreet Signed-off-by: Andrew Morton --- kernel/hung_task.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/kernel/hung_task.c b/kernel/hung_task.c index 1d92016b0b3c..6ca859715d8a 100644 --- a/kernel/hung_task.c +++ b/kernel/hung_task.c @@ -127,7 +127,7 @@ static void check_hung_task(struct task_struct *t, unsigned long timeout) * Ok, the task did not get scheduled for more than 2 minutes, * complain: */ - if (sysctl_hung_task_warnings) { + if (sysctl_hung_task_warnings || hung_task_call_panic) { if (sysctl_hung_task_warnings > 0) sysctl_hung_task_warnings--; pr_err("INFO: task %s:%d blocked for more than %ld seconds.\n", From c8dab79f9eef6f1063128f1340f266321cccd17c Mon Sep 17 00:00:00 2001 From: I Hsin Cheng Date: Fri, 14 Jun 2024 23:46:03 +0800 Subject: [PATCH 63/98] lib/plist.c: avoid worst case scenario in plist_add Worst case scenario of plist_add() happens when the priority of the inserted plist_node is going to be the largest after the insertion is done. The cost is going to be more significant when the original plist is longer, because the iterator is going to traverse the whole plist to find the correct position to insert the new node. The situation can be avoided by using a reverse iterator at the same time, doing so the maximum possible number of iteration is going to shrink from N to N/2. The proposed change of plist_add pasts the test in lib/plist.c to validate its correctness, also add the worst case scenario test for plist_add() in plist_test(). The worst case test are tested with the size of test_data and test_node growing from 200 to 1000. The result are showned in the following table, in which we can observed that the proposed change of plist_add performs better than the original version, and the difference between these two implementations are more significant with the size of N growing. The random case test [1], and best case test [2] are also provided, with result showing the proposed change performs slightly better in random case test while the original implementation performs slightly better in best case test, while the difference in both test are minor, we can see them as even in those two situations. ----------------------------------------------------------- | Test size | 200 | 400 | 600 | 800 | 1000 | ----------------------------------------------------------- | new_plist_add | 140911| 548681| 1220512| 2048493| 3763755| ----------------------------------------------------------- | old_plist_add | 188198| 774222| 1643547| 3008929| 4947435| ----------------------------------------------------------- Link: https://lkml.kernel.org/r/20240614154603.65203-1-richard120310@gmail.com Signed-off-by: I Hsin Cheng Signed-off-by: Ching-Chun (Jim) Huang Signed-off-by: Andrew Morton --- lib/plist.c | 38 +++++++++++++++++++++++++++++++++++++- 1 file changed, 37 insertions(+), 1 deletion(-) diff --git a/lib/plist.c b/lib/plist.c index 2e51829d3db9..c6bce1226874 100644 --- a/lib/plist.c +++ b/lib/plist.c @@ -72,7 +72,7 @@ static void plist_check_head(struct plist_head *head) */ void plist_add(struct plist_node *node, struct plist_head *head) { - struct plist_node *first, *iter, *prev = NULL; + struct plist_node *first, *iter, *prev = NULL, *last, *reverse_iter; struct list_head *node_next = &head->node_list; plist_check_head(head); @@ -83,16 +83,26 @@ void plist_add(struct plist_node *node, struct plist_head *head) goto ins_node; first = iter = plist_first(head); + last = reverse_iter = list_entry(first->prio_list.prev, struct plist_node, prio_list); do { if (node->prio < iter->prio) { node_next = &iter->node_list; break; + } else if (node->prio >= reverse_iter->prio) { + prev = reverse_iter; + iter = list_entry(reverse_iter->prio_list.next, + struct plist_node, prio_list); + if (likely(reverse_iter != last)) + node_next = &iter->node_list; + break; } prev = iter; iter = list_entry(iter->prio_list.next, struct plist_node, prio_list); + reverse_iter = list_entry(reverse_iter->prio_list.prev, + struct plist_node, prio_list); } while (iter != first); if (!prev || prev->prio != node->prio) @@ -255,6 +265,32 @@ static int __init plist_test(void) } printk(KERN_DEBUG "end plist test\n"); + + /* Worst case test for plist_add() */ + unsigned int test_data[241]; + + for (i = 0; i < ARRAY_SIZE(test_data); i++) + test_data[i] = i; + + ktime_t start, end, time_elapsed = 0; + + plist_head_init(&test_head); + + for (i = 0; i < ARRAY_SIZE(test_node); i++) { + plist_node_init(test_node + i, 0); + test_node[i].prio = test_data[i]; + } + + for (i = 0; i < ARRAY_SIZE(test_node); i++) { + if (plist_node_empty(test_node + i)) { + start = ktime_get(); + plist_add(test_node + i, &test_head); + end = ktime_get(); + time_elapsed += (end - start); + } + } + + pr_debug("plist_add worst case test time elapsed %lld\n", time_elapsed); return 0; } From 3ebe69c0c3ae3ef5ecd1ad86aa98a3b7cbe3c7e9 Mon Sep 17 00:00:00 2001 From: Christophe JAILLET Date: Fri, 14 Jun 2024 18:25:07 +0200 Subject: [PATCH 64/98] ocfs2: constify struct ocfs2_lock_res_ops "struct ocfs2_lock_res_ops" are not modified in this driver. Constifying this structure moves some data to a read-only section, so increase overall security. In order to do it, "struct ocfs2_lock_res" also needs to be adjusted to this new const qualifier. On a x86_64, with allmodconfig: Before: ====== text data bss dec hex filename 83038 2632 400 86070 15036 fs/ocfs2/dlmglue.o After: ===== text data bss dec hex filename 83806 1992 272 86070 15036 fs/ocfs2/dlmglue.o Link: https://lkml.kernel.org/r/43d3e2ae3a97d3cbe93d6ba6ce48ae5ec04d7526.1718382288.git.christophe.jaillet@wanadoo.fr Signed-off-by: Christophe JAILLET Cc: Mark Fasheh Cc: Joel Becker Cc: Junxiao Bi Cc: Joseph Qi Cc: Changwei Ge Cc: Gang He Cc: Jun Piao Signed-off-by: Andrew Morton --- fs/ocfs2/dlmglue.c | 28 ++++++++++++++-------------- fs/ocfs2/ocfs2.h | 2 +- 2 files changed, 15 insertions(+), 15 deletions(-) diff --git a/fs/ocfs2/dlmglue.c b/fs/ocfs2/dlmglue.c index cb40cafbc062..da78a04d6f0b 100644 --- a/fs/ocfs2/dlmglue.c +++ b/fs/ocfs2/dlmglue.c @@ -221,12 +221,12 @@ struct ocfs2_lock_res_ops { */ #define LOCK_TYPE_USES_LVB 0x2 -static struct ocfs2_lock_res_ops ocfs2_inode_rw_lops = { +static const struct ocfs2_lock_res_ops ocfs2_inode_rw_lops = { .get_osb = ocfs2_get_inode_osb, .flags = 0, }; -static struct ocfs2_lock_res_ops ocfs2_inode_inode_lops = { +static const struct ocfs2_lock_res_ops ocfs2_inode_inode_lops = { .get_osb = ocfs2_get_inode_osb, .check_downconvert = ocfs2_check_meta_downconvert, .set_lvb = ocfs2_set_meta_lvb, @@ -234,50 +234,50 @@ static struct ocfs2_lock_res_ops ocfs2_inode_inode_lops = { .flags = LOCK_TYPE_REQUIRES_REFRESH|LOCK_TYPE_USES_LVB, }; -static struct ocfs2_lock_res_ops ocfs2_super_lops = { +static const struct ocfs2_lock_res_ops ocfs2_super_lops = { .flags = LOCK_TYPE_REQUIRES_REFRESH, }; -static struct ocfs2_lock_res_ops ocfs2_rename_lops = { +static const struct ocfs2_lock_res_ops ocfs2_rename_lops = { .flags = 0, }; -static struct ocfs2_lock_res_ops ocfs2_nfs_sync_lops = { +static const struct ocfs2_lock_res_ops ocfs2_nfs_sync_lops = { .flags = 0, }; -static struct ocfs2_lock_res_ops ocfs2_trim_fs_lops = { +static const struct ocfs2_lock_res_ops ocfs2_trim_fs_lops = { .flags = LOCK_TYPE_REQUIRES_REFRESH|LOCK_TYPE_USES_LVB, }; -static struct ocfs2_lock_res_ops ocfs2_orphan_scan_lops = { +static const struct ocfs2_lock_res_ops ocfs2_orphan_scan_lops = { .flags = LOCK_TYPE_REQUIRES_REFRESH|LOCK_TYPE_USES_LVB, }; -static struct ocfs2_lock_res_ops ocfs2_dentry_lops = { +static const struct ocfs2_lock_res_ops ocfs2_dentry_lops = { .get_osb = ocfs2_get_dentry_osb, .post_unlock = ocfs2_dentry_post_unlock, .downconvert_worker = ocfs2_dentry_convert_worker, .flags = 0, }; -static struct ocfs2_lock_res_ops ocfs2_inode_open_lops = { +static const struct ocfs2_lock_res_ops ocfs2_inode_open_lops = { .get_osb = ocfs2_get_inode_osb, .flags = 0, }; -static struct ocfs2_lock_res_ops ocfs2_flock_lops = { +static const struct ocfs2_lock_res_ops ocfs2_flock_lops = { .get_osb = ocfs2_get_file_osb, .flags = 0, }; -static struct ocfs2_lock_res_ops ocfs2_qinfo_lops = { +static const struct ocfs2_lock_res_ops ocfs2_qinfo_lops = { .set_lvb = ocfs2_set_qinfo_lvb, .get_osb = ocfs2_get_qinfo_osb, .flags = LOCK_TYPE_REQUIRES_REFRESH | LOCK_TYPE_USES_LVB, }; -static struct ocfs2_lock_res_ops ocfs2_refcount_block_lops = { +static const struct ocfs2_lock_res_ops ocfs2_refcount_block_lops = { .check_downconvert = ocfs2_check_refcount_downconvert, .downconvert_worker = ocfs2_refcount_convert_worker, .flags = 0, @@ -510,7 +510,7 @@ static inline void ocfs2_init_start_time(struct ocfs2_mask_waiter *mw) static void ocfs2_lock_res_init_common(struct ocfs2_super *osb, struct ocfs2_lock_res *res, enum ocfs2_lock_type type, - struct ocfs2_lock_res_ops *ops, + const struct ocfs2_lock_res_ops *ops, void *priv) { res->l_type = type; @@ -553,7 +553,7 @@ void ocfs2_inode_lock_res_init(struct ocfs2_lock_res *res, unsigned int generation, struct inode *inode) { - struct ocfs2_lock_res_ops *ops; + const struct ocfs2_lock_res_ops *ops; switch(type) { case OCFS2_LOCK_TYPE_RW: diff --git a/fs/ocfs2/ocfs2.h b/fs/ocfs2/ocfs2.h index 8fe826143d7b..51c52768132d 100644 --- a/fs/ocfs2/ocfs2.h +++ b/fs/ocfs2/ocfs2.h @@ -154,7 +154,7 @@ struct ocfs2_lock_stats { struct ocfs2_lock_res { void *l_priv; - struct ocfs2_lock_res_ops *l_ops; + const struct ocfs2_lock_res_ops *l_ops; struct list_head l_blocked_list; From 03bf0044e13ad6f20b6b4513131b73b090b9dbb8 Mon Sep 17 00:00:00 2001 From: Christophe JAILLET Date: Fri, 14 Jun 2024 22:57:09 +0200 Subject: [PATCH 65/98] ocfs2: constify struct ocfs2_stack_operations "struct ocfs2_stack_operations" are not modified in this driver. Constifying this structure moves some data to a read-only section, so increase overall security. In order to do it, "struct ocfs2_stack_plugin" also needs to be adjusted to this new const qualifier. On a x86_64, with allmodconfig: Before: ====== text data bss dec hex filename 6241 644 0 6885 1ae5 fs/ocfs2/stack_o2cb.o After: ===== text data bss dec hex filename 6337 548 0 6885 1ae5 fs/ocfs2/stack_o2cb.o Link: https://lkml.kernel.org/r/f52dab89ee0049ec6271de29183a781efbb275ab.1718398605.git.christophe.jaillet@wanadoo.fr Signed-off-by: Christophe JAILLET Cc: Mark Fasheh Cc: Joel Becker Cc: Junxiao Bi Cc: Joseph Qi Cc: Changwei Ge Cc: Gang He Cc: Jun Piao Signed-off-by: Andrew Morton --- fs/ocfs2/stack_o2cb.c | 2 +- fs/ocfs2/stack_user.c | 2 +- fs/ocfs2/stackglue.h | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) diff --git a/fs/ocfs2/stack_o2cb.c b/fs/ocfs2/stack_o2cb.c index c973c03f6fd8..10157d9d7a9c 100644 --- a/fs/ocfs2/stack_o2cb.c +++ b/fs/ocfs2/stack_o2cb.c @@ -404,7 +404,7 @@ static int o2cb_cluster_this_node(struct ocfs2_cluster_connection *conn, return 0; } -static struct ocfs2_stack_operations o2cb_stack_ops = { +static const struct ocfs2_stack_operations o2cb_stack_ops = { .connect = o2cb_cluster_connect, .disconnect = o2cb_cluster_disconnect, .this_node = o2cb_cluster_this_node, diff --git a/fs/ocfs2/stack_user.c b/fs/ocfs2/stack_user.c index c11406cd87a8..77edcd70f72c 100644 --- a/fs/ocfs2/stack_user.c +++ b/fs/ocfs2/stack_user.c @@ -1065,7 +1065,7 @@ static int user_cluster_this_node(struct ocfs2_cluster_connection *conn, return 0; } -static struct ocfs2_stack_operations ocfs2_user_plugin_ops = { +static const struct ocfs2_stack_operations ocfs2_user_plugin_ops = { .connect = user_cluster_connect, .disconnect = user_cluster_disconnect, .this_node = user_cluster_this_node, diff --git a/fs/ocfs2/stackglue.h b/fs/ocfs2/stackglue.h index 3636847fae19..02ab072c528a 100644 --- a/fs/ocfs2/stackglue.h +++ b/fs/ocfs2/stackglue.h @@ -223,7 +223,7 @@ struct ocfs2_stack_operations { */ struct ocfs2_stack_plugin { char *sp_name; - struct ocfs2_stack_operations *sp_ops; + const struct ocfs2_stack_operations *sp_ops; struct module *sp_owner; /* These are managed by the stackglue code. */ From 326c34efe3d1761686c385ff774b00012f1e1fc0 Mon Sep 17 00:00:00 2001 From: Sidhartha Kumar Date: Mon, 17 Jun 2024 12:52:21 -0700 Subject: [PATCH 66/98] tools/testing/radix-tree: add missing MODULE_DESCRIPTION definition Userspace builds of the radix-tree testing suite fails because of commit test_maple_tree: add the missing MODULE_DESCRIPTION() macro. Add the proper defines to tools/testing/radix-tree/maple.c and tools/testing/radix-tree/xarray.c so MODULE_DESCRIPTION has a definition. This allows the build to succeed. Link: https://lkml.kernel.org/r/20240617195221.106565-1-sidhartha.kumar@oracle.com Fixes: 9f8090e8c4d1 ("test_maple_tree: add the missing MODULE_DESCRIPTION() macro") Signed-off-by: Sidhartha Kumar Reviewed-by: Liam R. Howlett Cc: Jeff Johnson Cc: Matthew Wilcox (Oracle) Signed-off-by: Andrew Morton --- tools/testing/radix-tree/maple.c | 1 + tools/testing/radix-tree/xarray.c | 1 + 2 files changed, 2 insertions(+) diff --git a/tools/testing/radix-tree/maple.c b/tools/testing/radix-tree/maple.c index f1caf4bcf937..cd1cf05503b4 100644 --- a/tools/testing/radix-tree/maple.c +++ b/tools/testing/radix-tree/maple.c @@ -19,6 +19,7 @@ #define module_init(x) #define module_exit(x) #define MODULE_AUTHOR(x) +#define MODULE_DESCRIPTION(X) #define MODULE_LICENSE(x) #define dump_stack() assert(0) diff --git a/tools/testing/radix-tree/xarray.c b/tools/testing/radix-tree/xarray.c index f20e12cbbfd4..d0e53bff1eb6 100644 --- a/tools/testing/radix-tree/xarray.c +++ b/tools/testing/radix-tree/xarray.c @@ -10,6 +10,7 @@ #define module_init(x) #define module_exit(x) #define MODULE_AUTHOR(x) +#define MODULE_DESCRIPTION(X) #define MODULE_LICENSE(x) #define dump_stack() assert(0) From d2917ff19962951399bc33f596648836cc5a0be4 Mon Sep 17 00:00:00 2001 From: Suren Baghdasaryan Date: Fri, 14 Jun 2024 21:13:58 -0700 Subject: [PATCH 67/98] lib/dump_stack: report process UID in dump_stack_print_info() To make it easier to identify the crashing process, report effective UID when dumping the stack. Link: https://lkml.kernel.org/r/20240615041358.103791-1-surenb@google.com Signed-off-by: Suren Baghdasaryan Signed-off-by: Andrew Morton --- lib/dump_stack.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/lib/dump_stack.c b/lib/dump_stack.c index 8b6b70eaf949..1a996fbbf50a 100644 --- a/lib/dump_stack.c +++ b/lib/dump_stack.c @@ -54,8 +54,10 @@ void __init dump_stack_set_arch_desc(const char *fmt, ...) */ void dump_stack_print_info(const char *log_lvl) { - printk("%sCPU: %d PID: %d Comm: %.20s %s%s %s %.*s" BUILD_ID_FMT "\n", - log_lvl, raw_smp_processor_id(), current->pid, current->comm, + printk("%sCPU: %d UID: %u PID: %d Comm: %.20s %s%s %s %.*s" BUILD_ID_FMT "\n", + log_lvl, raw_smp_processor_id(), + __kuid_val(current_real_cred()->euid), + current->pid, current->comm, kexec_crash_loaded() ? "Kdump: loaded " : "", print_tainted(), init_utsname()->release, From 303474913271af4eb3ec1c5f955d1e01682f3e1f Mon Sep 17 00:00:00 2001 From: Jeff Johnson Date: Wed, 19 Jun 2024 13:59:15 -0700 Subject: [PATCH 68/98] KUnit: add missing MODULE_DESCRIPTION() macros for lib/test_*.ko make allmodconfig && make W=1 C=1 reports for lib/test_*.ko: WARNING: modpost: missing MODULE_DESCRIPTION() in lib/test_hexdump.o WARNING: modpost: missing MODULE_DESCRIPTION() in lib/test_dhry.o WARNING: modpost: missing MODULE_DESCRIPTION() in lib/test_firmware.o WARNING: modpost: missing MODULE_DESCRIPTION() in lib/test_sysctl.o WARNING: modpost: missing MODULE_DESCRIPTION() in lib/test_hash.o WARNING: modpost: missing MODULE_DESCRIPTION() in lib/test_ida.o WARNING: modpost: missing MODULE_DESCRIPTION() in lib/test_list_sort.o WARNING: modpost: missing MODULE_DESCRIPTION() in lib/test_min_heap.o WARNING: modpost: missing MODULE_DESCRIPTION() in lib/test_module.o WARNING: modpost: missing MODULE_DESCRIPTION() in lib/test_sort.o WARNING: modpost: missing MODULE_DESCRIPTION() in lib/test_static_keys.o WARNING: modpost: missing MODULE_DESCRIPTION() in lib/test_static_key_base.o WARNING: modpost: missing MODULE_DESCRIPTION() in lib/test_memcat_p.o WARNING: modpost: missing MODULE_DESCRIPTION() in lib/test_blackhole_dev.o WARNING: modpost: missing MODULE_DESCRIPTION() in lib/test_meminit.o WARNING: modpost: missing MODULE_DESCRIPTION() in lib/test_free_pages.o WARNING: modpost: missing MODULE_DESCRIPTION() in lib/test_kprobes.o WARNING: modpost: missing MODULE_DESCRIPTION() in lib/test_ref_tracker.o WARNING: modpost: missing MODULE_DESCRIPTION() in lib/test_bits.o Add the missing invocations of the MODULE_DESCRIPTION() macro. Link: https://lkml.kernel.org/r/20240619-md-lib-test-v2-1-301e30eeba1e@quicinc.com Signed-off-by: Jeff Johnson Reviewed-by: Kees Cook Reviewed-by: Masami Hiramatsu (Google) Cc: Anil S Keshavamurthy Cc: "David S. Miller" Cc: Masami Hiramatsu (Google) Cc: "Naveen N. Rao" Signed-off-by: Andrew Morton --- lib/dhry_run.c | 1 + lib/test-kstrtox.c | 1 + lib/test_bits.c | 1 + lib/test_blackhole_dev.c | 1 + lib/test_firmware.c | 1 + lib/test_free_pages.c | 1 + lib/test_hash.c | 1 + lib/test_hexdump.c | 1 + lib/test_ida.c | 1 + lib/test_kprobes.c | 3 ++- lib/test_list_sort.c | 1 + lib/test_memcat_p.c | 1 + lib/test_meminit.c | 1 + lib/test_min_heap.c | 1 + lib/test_module.c | 1 + lib/test_ref_tracker.c | 3 ++- lib/test_sort.c | 1 + lib/test_static_key_base.c | 1 + lib/test_static_keys.c | 1 + lib/test_sysctl.c | 1 + 20 files changed, 22 insertions(+), 2 deletions(-) diff --git a/lib/dhry_run.c b/lib/dhry_run.c index e6a279dabf84..4a6d05ce4361 100644 --- a/lib/dhry_run.c +++ b/lib/dhry_run.c @@ -83,4 +83,5 @@ static int __init dhry_init(void) module_init(dhry_init); MODULE_AUTHOR("Geert Uytterhoeven "); +MODULE_DESCRIPTION("Dhrystone benchmark test module"); MODULE_LICENSE("GPL"); diff --git a/lib/test-kstrtox.c b/lib/test-kstrtox.c index f355f67169b6..ee87fef66cb5 100644 --- a/lib/test-kstrtox.c +++ b/lib/test-kstrtox.c @@ -732,4 +732,5 @@ static int __init test_kstrtox_init(void) return -EINVAL; } module_init(test_kstrtox_init); +MODULE_DESCRIPTION("Module test for kstrto*() APIs"); MODULE_LICENSE("Dual BSD/GPL"); diff --git a/lib/test_bits.c b/lib/test_bits.c index c9368a2314e7..01313980f175 100644 --- a/lib/test_bits.c +++ b/lib/test_bits.c @@ -72,4 +72,5 @@ static struct kunit_suite bits_test_suite = { }; kunit_test_suite(bits_test_suite); +MODULE_DESCRIPTION("Test cases for functions and macros in bits.h"); MODULE_LICENSE("GPL"); diff --git a/lib/test_blackhole_dev.c b/lib/test_blackhole_dev.c index f247089d63c0..ec290ac2a0d9 100644 --- a/lib/test_blackhole_dev.c +++ b/lib/test_blackhole_dev.c @@ -96,4 +96,5 @@ module_init(test_blackholedev_init); module_exit(test_blackholedev_exit); MODULE_AUTHOR("Mahesh Bandewar "); +MODULE_DESCRIPTION("module test of the blackhole_dev"); MODULE_LICENSE("GPL"); diff --git a/lib/test_firmware.c b/lib/test_firmware.c index 9cfdcd6d21db..bcb32cbff188 100644 --- a/lib/test_firmware.c +++ b/lib/test_firmware.c @@ -1567,4 +1567,5 @@ static void __exit test_firmware_exit(void) module_exit(test_firmware_exit); MODULE_AUTHOR("Kees Cook "); +MODULE_DESCRIPTION("interface to trigger and test firmware loading"); MODULE_LICENSE("GPL"); diff --git a/lib/test_free_pages.c b/lib/test_free_pages.c index 9ebf6f5549f3..48952364c540 100644 --- a/lib/test_free_pages.c +++ b/lib/test_free_pages.c @@ -44,4 +44,5 @@ static void m_ex(void) module_init(m_in); module_exit(m_ex); MODULE_AUTHOR("Matthew Wilcox "); +MODULE_DESCRIPTION("Check that free_pages() doesn't leak memory"); MODULE_LICENSE("GPL"); diff --git a/lib/test_hash.c b/lib/test_hash.c index bb25fda34794..a7af39662a0a 100644 --- a/lib/test_hash.c +++ b/lib/test_hash.c @@ -235,4 +235,5 @@ static struct kunit_suite hash_test_suite = { kunit_test_suite(hash_test_suite); +MODULE_DESCRIPTION("Test cases for and "); MODULE_LICENSE("GPL"); diff --git a/lib/test_hexdump.c b/lib/test_hexdump.c index fe2682bb21e6..751645645988 100644 --- a/lib/test_hexdump.c +++ b/lib/test_hexdump.c @@ -253,4 +253,5 @@ static void __exit test_hexdump_exit(void) module_exit(test_hexdump_exit); MODULE_AUTHOR("Andy Shevchenko "); +MODULE_DESCRIPTION("Test cases for lib/hexdump.c module"); MODULE_LICENSE("Dual BSD/GPL"); diff --git a/lib/test_ida.c b/lib/test_ida.c index 072a49897e71..c80155a1956d 100644 --- a/lib/test_ida.c +++ b/lib/test_ida.c @@ -214,4 +214,5 @@ static void ida_exit(void) module_init(ida_checks); module_exit(ida_exit); MODULE_AUTHOR("Matthew Wilcox "); +MODULE_DESCRIPTION("Test the IDA API"); MODULE_LICENSE("GPL"); diff --git a/lib/test_kprobes.c b/lib/test_kprobes.c index 0648f7154f5c..b7582010125c 100644 --- a/lib/test_kprobes.c +++ b/lib/test_kprobes.c @@ -1,6 +1,6 @@ // SPDX-License-Identifier: GPL-2.0-or-later /* - * test_kprobes.c - simple sanity test for *probes + * test_kprobes.c - simple sanity test for k*probes * * Copyright IBM Corp. 2008 */ @@ -400,4 +400,5 @@ static struct kunit_suite kprobes_test_suite = { kunit_test_suites(&kprobes_test_suite); +MODULE_DESCRIPTION("simple sanity test for k*probes"); MODULE_LICENSE("GPL"); diff --git a/lib/test_list_sort.c b/lib/test_list_sort.c index cc5f335f29b5..30879abc8a42 100644 --- a/lib/test_list_sort.c +++ b/lib/test_list_sort.c @@ -119,4 +119,5 @@ static struct kunit_suite list_sort_suite = { kunit_test_suites(&list_sort_suite); +MODULE_DESCRIPTION("list_sort() KUnit test suite"); MODULE_LICENSE("GPL"); diff --git a/lib/test_memcat_p.c b/lib/test_memcat_p.c index 849c477d49d0..7e0797a6bebf 100644 --- a/lib/test_memcat_p.c +++ b/lib/test_memcat_p.c @@ -112,4 +112,5 @@ static void __exit test_memcat_p_exit(void) module_init(test_memcat_p_init); module_exit(test_memcat_p_exit); +MODULE_DESCRIPTION("Test cases for memcat_p() in lib/memcat_p.c"); MODULE_LICENSE("GPL"); diff --git a/lib/test_meminit.c b/lib/test_meminit.c index 0dc173849a54..6298f66c964b 100644 --- a/lib/test_meminit.c +++ b/lib/test_meminit.c @@ -436,4 +436,5 @@ static int __init test_meminit_init(void) } module_init(test_meminit_init); +MODULE_DESCRIPTION("Test cases for SL[AOU]B/page initialization at alloc/free time"); MODULE_LICENSE("GPL"); diff --git a/lib/test_min_heap.c b/lib/test_min_heap.c index 9e1feb9b679c..64c877e73b64 100644 --- a/lib/test_min_heap.c +++ b/lib/test_min_heap.c @@ -226,4 +226,5 @@ static void __exit test_min_heap_exit(void) } module_exit(test_min_heap_exit); +MODULE_DESCRIPTION("Test cases for the min max heap"); MODULE_LICENSE("GPL"); diff --git a/lib/test_module.c b/lib/test_module.c index debd19e35198..3d1b29b74807 100644 --- a/lib/test_module.c +++ b/lib/test_module.c @@ -31,4 +31,5 @@ static void __exit test_module_exit(void) module_exit(test_module_exit); MODULE_AUTHOR("Kees Cook "); +MODULE_DESCRIPTION("module loading subsystem test module"); MODULE_LICENSE("GPL"); diff --git a/lib/test_ref_tracker.c b/lib/test_ref_tracker.c index 49970a7c96f3..b983ceb12afc 100644 --- a/lib/test_ref_tracker.c +++ b/lib/test_ref_tracker.c @@ -1,6 +1,6 @@ // SPDX-License-Identifier: GPL-2.0-only /* - * Referrence tracker self test. + * Reference tracker self test. * * Copyright (c) 2021 Eric Dumazet */ @@ -112,4 +112,5 @@ static void __exit test_ref_tracker_exit(void) module_init(test_ref_tracker_init); module_exit(test_ref_tracker_exit); +MODULE_DESCRIPTION("Reference tracker self test"); MODULE_LICENSE("GPL v2"); diff --git a/lib/test_sort.c b/lib/test_sort.c index da4495125097..cd4a338d1153 100644 --- a/lib/test_sort.c +++ b/lib/test_sort.c @@ -57,4 +57,5 @@ static struct kunit_suite sort_test_suite = { kunit_test_suites(&sort_test_suite); +MODULE_DESCRIPTION("sort() KUnit test suite"); MODULE_LICENSE("GPL"); diff --git a/lib/test_static_key_base.c b/lib/test_static_key_base.c index 5089a2e2bdd8..9f507672afa5 100644 --- a/lib/test_static_key_base.c +++ b/lib/test_static_key_base.c @@ -57,4 +57,5 @@ module_init(test_static_key_base_init); module_exit(test_static_key_base_exit); MODULE_AUTHOR("Jason Baron "); +MODULE_DESCRIPTION("Kernel module to support testing static keys"); MODULE_LICENSE("GPL"); diff --git a/lib/test_static_keys.c b/lib/test_static_keys.c index 42daa74be029..00c715f30df9 100644 --- a/lib/test_static_keys.c +++ b/lib/test_static_keys.c @@ -236,4 +236,5 @@ module_init(test_static_key_init); module_exit(test_static_key_exit); MODULE_AUTHOR("Jason Baron "); +MODULE_DESCRIPTION("Kernel module for testing static keys"); MODULE_LICENSE("GPL"); diff --git a/lib/test_sysctl.c b/lib/test_sysctl.c index 9321d850931f..b6696fa1d426 100644 --- a/lib/test_sysctl.c +++ b/lib/test_sysctl.c @@ -280,4 +280,5 @@ static void __exit test_sysctl_exit(void) module_exit(test_sysctl_exit); MODULE_AUTHOR("Luis R. Rodriguez "); +MODULE_DESCRIPTION("proc sysctl test driver"); MODULE_LICENSE("GPL"); From 6f3283df275b19bdea8158a2e2d8ad181995022b Mon Sep 17 00:00:00 2001 From: Sidhartha Kumar Date: Wed, 26 Jun 2024 16:21:00 -0700 Subject: [PATCH 69/98] tools/testing/radix-tree/idr-test: add missing MODULE_DESCRIPTION define Userspace builds of the radix-tree testing suite fails because of patch KUnit: add missing MODULE_DESCRIPTION() macros for lib/test_*.ko. Add the proper defines to tools/testing/radix-tree/idr-test.c so MODULE_DESCRIPTION has a definition. This allows the build to succeed. Link: https://lkml.kernel.org/r/20240626232100.306130-1-sidhartha.kumar@oracle.com Fixes: f069e33dafe1 ("KUnit: add missing MODULE_DESCRIPTION() macros for lib/test_*.ko") Signed-off-by: Sidhartha Kumar Reviewed-by: Liam R. Howlett Cc: Jeff Johnson Cc: Matthew Wilcox (Oracle) Signed-off-by: Andrew Morton --- tools/testing/radix-tree/idr-test.c | 1 + 1 file changed, 1 insertion(+) diff --git a/tools/testing/radix-tree/idr-test.c b/tools/testing/radix-tree/idr-test.c index ca24f6839d50..84b8c3c92c79 100644 --- a/tools/testing/radix-tree/idr-test.c +++ b/tools/testing/radix-tree/idr-test.c @@ -424,6 +424,7 @@ void idr_checks(void) #define module_init(x) #define module_exit(x) #define MODULE_AUTHOR(x) +#define MODULE_DESCRIPTION(X) #define MODULE_LICENSE(x) #define dump_stack() assert(0) void ida_dump(struct ida *); From 2d87af0666d0b5838e3e3e6430c6498df8bf6ad5 Mon Sep 17 00:00:00 2001 From: Amer Al Shanawany Date: Mon, 3 Jun 2024 14:42:20 +0200 Subject: [PATCH 70/98] selftests: proc: remove unreached code and fix build warning fix the following warning: proc-empty-vm.c:385:17: warning: ignoring return value of `write' declared with attribute `warn_unused_result' [-Wunused-result] 385 | write(1, buf, rv); | ^~~~~~~~~~~~~~~~~ Link: https://lkml.kernel.org/r/20240603124220.33778-1-amer.shanawany@gmail.com Signed-off-by: Amer Al Shanawany Reported-by: kernel test robot Closes: https://lore.kernel.org/r/202404010211.ygidvMwa-lkp@intel.com/ Cc: Alexey Dobriyan Cc: Hugh Dickins Cc: Javier Carrasco Cc: Shuah Khan Cc: Swarup Laxman Kotiaklapudi Signed-off-by: Andrew Morton --- tools/testing/selftests/proc/proc-empty-vm.c | 3 --- 1 file changed, 3 deletions(-) diff --git a/tools/testing/selftests/proc/proc-empty-vm.c b/tools/testing/selftests/proc/proc-empty-vm.c index 56198d4ca2bf..b3f898aab4ab 100644 --- a/tools/testing/selftests/proc/proc-empty-vm.c +++ b/tools/testing/selftests/proc/proc-empty-vm.c @@ -381,9 +381,6 @@ static int test_proc_pid_statm(pid_t pid) assert(rv >= 0); assert(rv <= sizeof(buf)); - if (0) { - write(1, buf, rv); - } const char *p = buf; const char *const end = p + rv; From b8c7dd15ceb87e5f37ec1ed7b56c279d98f3eb53 Mon Sep 17 00:00:00 2001 From: Jesse Brandeburg Date: Tue, 11 Jun 2024 17:12:22 -0700 Subject: [PATCH 71/98] kernel-wide: fix spelling mistakes like "assocative" -> "associative" There were several instances of the string "assocat" in the kernel, which should have been spelled "associat", with the various endings of -ive, -ed, -ion, and sometimes beginnging with dis-. Add to the spelling dictionary the corrections so that future instances will be caught by checkpatch, and fix the instances found. Originally noticed by accident with a 'git grep socat'. Link: https://lkml.kernel.org/r/20240612001247.356867-1-jesse.brandeburg@intel.com Signed-off-by: Jesse Brandeburg Cc: Randy Dunlap Signed-off-by: Andrew Morton --- drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c | 2 +- drivers/net/wireless/ti/wl1251/acx.h | 2 +- drivers/scsi/qedf/qedf_main.c | 2 +- drivers/staging/rtl8723bs/core/rtw_mlme_ext.c | 2 +- drivers/staging/rtl8723bs/core/rtw_pwrctrl.c | 2 +- include/linux/nvme-fc-driver.h | 2 +- include/linux/soc/apple/rtkit.h | 4 ++-- net/netfilter/nf_conntrack_core.c | 2 +- net/tipc/socket.c | 2 +- scripts/spelling.txt | 3 +++ 10 files changed, 13 insertions(+), 10 deletions(-) diff --git a/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c b/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c index 090724fa766c..d54162ce0f99 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c @@ -340,7 +340,7 @@ static int eb_create(struct i915_execbuffer *eb) * Without a 1:1 association between relocation handles and * the execobject[] index, we instead create a hashtable. * We size it dynamically based on available memory, starting - * first with 1:1 assocative hash and scaling back until + * first with 1:1 associative hash and scaling back until * the allocation succeeds. * * Later on we use a positive lut_size to indicate we are diff --git a/drivers/net/wireless/ti/wl1251/acx.h b/drivers/net/wireless/ti/wl1251/acx.h index 1da6ab664e41..af5ec7f12231 100644 --- a/drivers/net/wireless/ti/wl1251/acx.h +++ b/drivers/net/wireless/ti/wl1251/acx.h @@ -229,7 +229,7 @@ struct acx_rx_msdu_lifetime { * === ========== * 31:12 Reserved - Always equal to 0. * 11 Association - When set, the WiLink receives all association - * related frames (association request/response, reassocation + * related frames (association request/response, reassociation * request/response, and disassociation). When clear, these frames * are discarded. * 10 Auth/De auth - When set, the WiLink receives all authentication diff --git a/drivers/scsi/qedf/qedf_main.c b/drivers/scsi/qedf/qedf_main.c index 49adddf978cc..4813087e58a1 100644 --- a/drivers/scsi/qedf/qedf_main.c +++ b/drivers/scsi/qedf/qedf_main.c @@ -2286,7 +2286,7 @@ static bool qedf_process_completions(struct qedf_fastpath *fp) * on. */ if (!io_req) - /* If there is not io_req assocated with this CQE + /* If there is not io_req associated with this CQE * just queue it on CPU 0 */ cpu = 0; diff --git a/drivers/staging/rtl8723bs/core/rtw_mlme_ext.c b/drivers/staging/rtl8723bs/core/rtw_mlme_ext.c index 985683767a40..9ebf25a0ef9b 100644 --- a/drivers/staging/rtl8723bs/core/rtw_mlme_ext.c +++ b/drivers/staging/rtl8723bs/core/rtw_mlme_ext.c @@ -979,7 +979,7 @@ unsigned int OnAssocReq(struct adapter *padapter, union recv_frame *precv_frame) left = pkt_len - (sizeof(struct ieee80211_hdr_3addr) + ie_offset); pos = pframe + (sizeof(struct ieee80211_hdr_3addr) + ie_offset); - /* check if this stat has been successfully authenticated/assocated */ + /* check if this stat has been successfully authenticated/associated */ if (!((pstat->state) & WIFI_FW_AUTH_SUCCESS)) { if (!((pstat->state) & WIFI_FW_ASSOC_SUCCESS)) { status = WLAN_REASON_CLASS2_FRAME_FROM_NONAUTH_STA; diff --git a/drivers/staging/rtl8723bs/core/rtw_pwrctrl.c b/drivers/staging/rtl8723bs/core/rtw_pwrctrl.c index a392d5b4caf2..e9763eab16f6 100644 --- a/drivers/staging/rtl8723bs/core/rtw_pwrctrl.c +++ b/drivers/staging/rtl8723bs/core/rtw_pwrctrl.c @@ -452,7 +452,7 @@ void LPS_Enter(struct adapter *padapter, const char *msg) if (hal_btcoex_IsBtControlLps(padapter)) return; - /* Skip lps enter request if number of assocated adapters is not 1 */ + /* Skip lps enter request if number of associated adapters is not 1 */ if (check_fwstate(&(dvobj->padapters->mlmepriv), WIFI_ASOC_STATE)) n_assoc_iface++; if (n_assoc_iface != 1) diff --git a/include/linux/nvme-fc-driver.h b/include/linux/nvme-fc-driver.h index 4109f1bd6128..1177dde77104 100644 --- a/include/linux/nvme-fc-driver.h +++ b/include/linux/nvme-fc-driver.h @@ -620,7 +620,7 @@ enum { * * Structure used between LLDD and nvmet-fc layer to represent the exchange * context for a FC-NVME FCP I/O operation (e.g. a nvme sqe, the sqe-related - * memory transfers, and its assocated cqe transfer). + * memory transfers, and its associated cqe transfer). * * The structure is allocated by the LLDD whenever a FCP CMD IU is received * from the FC link. The address of the structure is passed to the nvmet-fc diff --git a/include/linux/soc/apple/rtkit.h b/include/linux/soc/apple/rtkit.h index 8c9ca857ccf6..c06d17599ae7 100644 --- a/include/linux/soc/apple/rtkit.h +++ b/include/linux/soc/apple/rtkit.h @@ -69,7 +69,7 @@ struct apple_rtkit; * Initializes the internal state required to handle RTKit. This * should usually be called within _probe. * - * @dev: Pointer to the device node this coprocessor is assocated with + * @dev: Pointer to the device node this coprocessor is associated with * @cookie: opaque cookie passed to all functions defined in rtkit_ops * @mbox_name: mailbox name used to communicate with the co-processor * @mbox_idx: mailbox index to be used if mbox_name is NULL @@ -83,7 +83,7 @@ struct apple_rtkit *devm_apple_rtkit_init(struct device *dev, void *cookie, * Non-devm version of devm_apple_rtkit_init. Must be freed with * apple_rtkit_free. * - * @dev: Pointer to the device node this coprocessor is assocated with + * @dev: Pointer to the device node this coprocessor is associated with * @cookie: opaque cookie passed to all functions defined in rtkit_ops * @mbox_name: mailbox name used to communicate with the co-processor * @mbox_idx: mailbox index to be used if mbox_name is NULL diff --git a/net/netfilter/nf_conntrack_core.c b/net/netfilter/nf_conntrack_core.c index 7ac20750c127..9384426ddc06 100644 --- a/net/netfilter/nf_conntrack_core.c +++ b/net/netfilter/nf_conntrack_core.c @@ -1090,7 +1090,7 @@ static int nf_ct_resolve_clash_harder(struct sk_buff *skb, u32 repl_idx) * A conntrack entry can be inserted to the connection tracking table * if there is no existing entry with an identical tuple. * - * If there is one, @skb (and the assocated, unconfirmed conntrack) has + * If there is one, @skb (and the associated, unconfirmed conntrack) has * to be dropped. In case @skb is retransmitted, next conntrack lookup * will find the already-existing entry. * diff --git a/net/tipc/socket.c b/net/tipc/socket.c index 2d58ecae4e21..1a0cd06f0eae 100644 --- a/net/tipc/socket.c +++ b/net/tipc/socket.c @@ -657,7 +657,7 @@ static int tipc_release(struct socket *sock) } /** - * __tipc_bind - associate or disassocate TIPC name(s) with a socket + * __tipc_bind - associate or disassociate TIPC name(s) with a socket * @sock: socket structure * @skaddr: socket address describing name(s) and desired operation * @alen: size of socket address data structure diff --git a/scripts/spelling.txt b/scripts/spelling.txt index edec60d39bbf..554329a074ce 100644 --- a/scripts/spelling.txt +++ b/scripts/spelling.txt @@ -176,8 +176,10 @@ assigment||assignment assigments||assignments assistent||assistant assocaited||associated +assocated||associated assocating||associating assocation||association +assocative||associative associcated||associated assotiated||associated asssert||assert @@ -543,6 +545,7 @@ direcly||directly direectly||directly diregard||disregard disassocation||disassociation +disassocative||disassociative disapear||disappear disapeared||disappeared disappared||disappeared From 63ce5947ef45071d825d4712d6c5ece13f1ce2f6 Mon Sep 17 00:00:00 2001 From: Kuan-Ying Lee Date: Wed, 19 Jun 2024 15:49:06 +0800 Subject: [PATCH 72/98] scripts/gdb: redefine MAX_ORDER sanely Patch series "Fix GDB command error". This patchset fixes some GDB command errors. 1. Since memory layout of AARCH64 has been changed, we need to modify the layout in GDB scripts as well. 2. Fix pool_index naming of stackdepot. This patch (of 6): Change the definition of MAX_ORDER to be inclusive. Link: https://lkml.kernel.org/r/20240619074911.100434-1-kuan-ying.lee@canonical.com Link: https://lkml.kernel.org/r/20240619074911.100434-2-kuan-ying.lee@canonical.com Fixes: 23baf831a32c ("mm, treewide: redefine MAX_ORDER sanely") Signed-off-by: Kuan-Ying Lee Cc: Jan Kiszka Cc: Kieran Bingham Cc: Kirill A. Shutemov Cc: Michael Ellerman Signed-off-by: Andrew Morton --- scripts/gdb/linux/mm.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/scripts/gdb/linux/mm.py b/scripts/gdb/linux/mm.py index 515730fd4c9d..30738f174fe2 100644 --- a/scripts/gdb/linux/mm.py +++ b/scripts/gdb/linux/mm.py @@ -59,9 +59,9 @@ class aarch64_page_ops(): if str(constants.LX_CONFIG_ARCH_FORCE_MAX_ORDER).isdigit(): self.MAX_ORDER = constants.LX_CONFIG_ARCH_FORCE_MAX_ORDER else: - self.MAX_ORDER = 11 + self.MAX_ORDER = 10 - self.MAX_ORDER_NR_PAGES = 1 << (self.MAX_ORDER - 1) + self.MAX_ORDER_NR_PAGES = 1 << (self.MAX_ORDER) self.PFN_SECTION_SHIFT = self.SECTION_SIZE_BITS - self.PAGE_SHIFT self.NR_MEM_SECTIONS = 1 << self.SECTIONS_SHIFT self.PAGES_PER_SECTION = 1 << self.PFN_SECTION_SHIFT From f2eaed1565acc2bdeb5c433f5f6c7bd7a0d62db1 Mon Sep 17 00:00:00 2001 From: Kuan-Ying Lee Date: Wed, 19 Jun 2024 15:49:07 +0800 Subject: [PATCH 73/98] scripts/gdb: rework module VA range After we enlarge the module VA range, we also change the module VA range in gdb scripts. Link: https://lkml.kernel.org/r/20240619074911.100434-3-kuan-ying.lee@canonical.com Fixes: 3e35d303ab7d ("arm64: module: rework module VA range selection") Signed-off-by: Kuan-Ying Lee Cc: Jan Kiszka Cc: Kieran Bingham Cc: Kirill A. Shutemov Cc: Michael Ellerman Signed-off-by: Andrew Morton --- scripts/gdb/linux/mm.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/scripts/gdb/linux/mm.py b/scripts/gdb/linux/mm.py index 30738f174fe2..e0461248abe2 100644 --- a/scripts/gdb/linux/mm.py +++ b/scripts/gdb/linux/mm.py @@ -33,7 +33,7 @@ class aarch64_page_ops(): def __init__(self): self.SUBSECTION_SHIFT = 21 self.SEBSECTION_SIZE = 1 << self.SUBSECTION_SHIFT - self.MODULES_VSIZE = 128 * 1024 * 1024 + self.MODULES_VSIZE = 2 * 1024 * 1024 * 1024 if constants.LX_CONFIG_ARM64_64K_PAGES: self.SECTION_SIZE_BITS = 29 From 3c0e9a200434e8bb4a2bffbaaeb381bdff5a5938 Mon Sep 17 00:00:00 2001 From: Kuan-Ying Lee Date: Wed, 19 Jun 2024 15:49:08 +0800 Subject: [PATCH 74/98] scripts/gdb: change the layout of vmemmap We need to change the layout of vmemmap in gdb scripts after commit 32697ff38287 ("arm64: vmemmap: Avoid base2 order of struct page size to dimension region") changed it. Link: https://lkml.kernel.org/r/20240619074911.100434-4-kuan-ying.lee@canonical.com Fixes: 32697ff38287 ("arm64: vmemmap: Avoid base2 order of struct page size to dimension region") Signed-off-by: Kuan-Ying Lee Cc: Jan Kiszka Cc: Kieran Bingham Cc: Kirill A. Shutemov Cc: Michael Ellerman Signed-off-by: Andrew Morton --- scripts/gdb/linux/mm.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/scripts/gdb/linux/mm.py b/scripts/gdb/linux/mm.py index e0461248abe2..f8b9be3f43e8 100644 --- a/scripts/gdb/linux/mm.py +++ b/scripts/gdb/linux/mm.py @@ -89,10 +89,10 @@ class aarch64_page_ops(): self.MODULES_VADDR = self._PAGE_END(self.VA_BITS_MIN) self.MODULES_END = self.MODULES_VADDR + self.MODULES_VSIZE - self.VMEMMAP_SHIFT = (self.PAGE_SHIFT - self.STRUCT_PAGE_MAX_SHIFT) - self.VMEMMAP_SIZE = ((self._PAGE_END(self.VA_BITS_MIN) - self.PAGE_OFFSET) >> self.VMEMMAP_SHIFT) - self.VMEMMAP_START = (-(1 << (self.VA_BITS - self.VMEMMAP_SHIFT))) & 0xffffffffffffffff - self.VMEMMAP_END = self.VMEMMAP_START + self.VMEMMAP_SIZE + self.VMEMMAP_RANGE = self._PAGE_END(self.VA_BITS_MIN) - self.PAGE_OFFSET + self.VMEMMAP_SIZE = (self.VMEMMAP_RANGE >> self.PAGE_SHIFT) * self.struct_page_size + self.VMEMMAP_END = (-(1 * 1024 * 1024 * 1024)) & 0xffffffffffffffff + self.VMEMMAP_START = self.VMEMMAP_END - self.VMEMMAP_SIZE self.VMALLOC_START = self.MODULES_END self.VMALLOC_END = self.VMEMMAP_START - 256 * 1024 * 1024 From 04a40baec04fa0634d71ebfa0c91469160a9976e Mon Sep 17 00:00:00 2001 From: Kuan-Ying Lee Date: Wed, 19 Jun 2024 15:49:09 +0800 Subject: [PATCH 75/98] scripts/gdb: set vabits_actual based on TCR_EL1 We encounter the following issue after commit 9cce9c6c2c3b ("arm64: mm: Handle LVA support as a CPU feature"). (gdb) lx-slabinfo Python Exception : No symbol "vabits_actual" in current context. Error occurred in Python: No symbol "vabits_actual" in current context. We set vabits_actual based on TCR_EL1 value when VA_BITS is bigger than 48. Link: https://lkml.kernel.org/r/20240619074911.100434-5-kuan-ying.lee@canonical.com Fixes: 9cce9c6c2c3b ("arm64: mm: Handle LVA support as a CPU feature") Signed-off-by: Kuan-Ying Lee Cc: Jan Kiszka Cc: Kieran Bingham Cc: Kirill A. Shutemov Cc: Michael Ellerman Signed-off-by: Andrew Morton --- scripts/gdb/linux/mm.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/scripts/gdb/linux/mm.py b/scripts/gdb/linux/mm.py index f8b9be3f43e8..200def0e4b9a 100644 --- a/scripts/gdb/linux/mm.py +++ b/scripts/gdb/linux/mm.py @@ -48,7 +48,9 @@ class aarch64_page_ops(): self.VA_BITS = constants.LX_CONFIG_ARM64_VA_BITS if self.VA_BITS > 48: self.VA_BITS_MIN = 48 - self.vabits_actual = gdb.parse_and_eval('vabits_actual') + tcr_el1 = gdb.execute("info registers $TCR_EL1", to_string=True) + tcr_el1 = int(tcr_el1.split()[1], 16) + self.vabits_actual = 64 - ((tcr_el1 >> 16) & 63) else: self.VA_BITS_MIN = self.VA_BITS self.vabits_actual = self.VA_BITS From 7d8742bf853cc1d4faf08840cc64414ad5f34061 Mon Sep 17 00:00:00 2001 From: Kuan-Ying Lee Date: Wed, 19 Jun 2024 15:49:10 +0800 Subject: [PATCH 76/98] scripts/gdb: change VA_BITS_MIN when we use 16K page Change VA_BITS_MIN when we use 16K page. Link: https://lkml.kernel.org/r/20240619074911.100434-6-kuan-ying.lee@canonical.com Fixes: 9684ec186f8f ("arm64: Enable LPA2 at boot if supported by the system") Signed-off-by: Kuan-Ying Lee Cc: Jan Kiszka Cc: Kieran Bingham Cc: Kirill A. Shutemov Cc: Michael Ellerman Signed-off-by: Andrew Morton --- scripts/gdb/linux/mm.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/scripts/gdb/linux/mm.py b/scripts/gdb/linux/mm.py index 200def0e4b9a..7571aebbe650 100644 --- a/scripts/gdb/linux/mm.py +++ b/scripts/gdb/linux/mm.py @@ -47,7 +47,10 @@ class aarch64_page_ops(): self.VA_BITS = constants.LX_CONFIG_ARM64_VA_BITS if self.VA_BITS > 48: - self.VA_BITS_MIN = 48 + if constants.LX_CONFIG_ARM64_16K_PAGES: + self.VA_BITS_MIN = 47 + else: + self.VA_BITS_MIN = 48 tcr_el1 = gdb.execute("info registers $TCR_EL1", to_string=True) tcr_el1 = int(tcr_el1.split()[1], 16) self.vabits_actual = 64 - ((tcr_el1 >> 16) & 63) From 9d938f40b228a18a9521936337f2da7f393d5120 Mon Sep 17 00:00:00 2001 From: Kuan-Ying Lee Date: Wed, 19 Jun 2024 15:49:11 +0800 Subject: [PATCH 77/98] scripts/gdb: rename pool_index to pool_index_plus_1 We encounter the following issue after commit a6c1d9cb9a68 ("stackdepot: rename pool_index to pool_index_plus_1"). (gdb) lx-dump-page-owner --pfn 262144 ... Python Exception : There is no member named pool_index. Error occurred in Python: There is no member named pool_index. We rename pool_index to pool_index_plus_1 to fix this issue. Link: https://lkml.kernel.org/r/20240619074911.100434-7-kuan-ying.lee@canonical.com Fixes: a6c1d9cb9a68 ("stackdepot: rename pool_index to pool_index_plus_1") Signed-off-by: Kuan-Ying Lee Cc: Jan Kiszka Cc: Kieran Bingham Cc: Kirill A. Shutemov Cc: Michael Ellerman Signed-off-by: Andrew Morton --- scripts/gdb/linux/stackdepot.py | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/scripts/gdb/linux/stackdepot.py b/scripts/gdb/linux/stackdepot.py index 0281d9de4b7c..bb3a0f843931 100644 --- a/scripts/gdb/linux/stackdepot.py +++ b/scripts/gdb/linux/stackdepot.py @@ -27,14 +27,18 @@ def stack_depot_fetch(handle): offset = parts['offset'] << DEPOT_STACK_ALIGN pools_num = gdb.parse_and_eval('pools_num') - if parts['pool_index'] > pools_num: + if handle == 0: + raise gdb.GdbError("handle is 0\n") + + pool_index = parts['pool_index_plus_1'] - 1 + if pool_index >= pools_num: gdb.write("pool index %d out of bounds (%d) for stack id 0x%08x\n" % (parts['pool_index'], pools_num, handle)) return gdb.Value(0), 0 stack_pools = gdb.parse_and_eval('stack_pools') try: - pool = stack_pools[parts['pool_index']] + pool = stack_pools[pool_index] stack = (pool + gdb.Value(offset).cast(utils.get_size_t_type())).cast(stack_record_type.get_type().pointer()) size = int(stack['size'].cast(utils.get_ulong_type())) return stack['entries'], size From 9059044b6717b0c100e3ad63c5bad3ec13df0fc4 Mon Sep 17 00:00:00 2001 From: Jeff Johnson Date: Sat, 1 Jun 2024 17:50:27 -0700 Subject: [PATCH 78/98] kfifo: add missing MODULE_DESCRIPTION() macros make allmodconfig && make W=1 C=1 reports: WARNING: modpost: missing MODULE_DESCRIPTION() in samples/kfifo/bytestream-example.o WARNING: modpost: missing MODULE_DESCRIPTION() in samples/kfifo/dma-example.o WARNING: modpost: missing MODULE_DESCRIPTION() in samples/kfifo/inttype-example.o WARNING: modpost: missing MODULE_DESCRIPTION() in samples/kfifo/record-example.o Add the missing invocations of the MODULE_DESCRIPTION() macro. Link: https://lkml.kernel.org/r/20240601-md-samples-kfifo-v1-1-de34345c5fd8@quicinc.com Signed-off-by: Jeff Johnson Cc: Greg Kroah-Hartman Cc: Stefani Seibold Signed-off-by: Andrew Morton --- samples/kfifo/bytestream-example.c | 1 + samples/kfifo/dma-example.c | 1 + samples/kfifo/inttype-example.c | 1 + samples/kfifo/record-example.c | 1 + 4 files changed, 4 insertions(+) diff --git a/samples/kfifo/bytestream-example.c b/samples/kfifo/bytestream-example.c index 642d0748c169..4ae29a12cc8a 100644 --- a/samples/kfifo/bytestream-example.c +++ b/samples/kfifo/bytestream-example.c @@ -191,5 +191,6 @@ static void __exit example_exit(void) module_init(example_init); module_exit(example_exit); +MODULE_DESCRIPTION("Sample kfifo byte stream implementation"); MODULE_LICENSE("GPL"); MODULE_AUTHOR("Stefani Seibold "); diff --git a/samples/kfifo/dma-example.c b/samples/kfifo/dma-example.c index 74fe915b7ffe..48df719dac8c 100644 --- a/samples/kfifo/dma-example.c +++ b/samples/kfifo/dma-example.c @@ -138,5 +138,6 @@ static void __exit example_exit(void) module_init(example_init); module_exit(example_exit); +MODULE_DESCRIPTION("Sample fifo dma implementation"); MODULE_LICENSE("GPL"); MODULE_AUTHOR("Stefani Seibold "); diff --git a/samples/kfifo/inttype-example.c b/samples/kfifo/inttype-example.c index c61482ba94f4..e4f93317c5d0 100644 --- a/samples/kfifo/inttype-example.c +++ b/samples/kfifo/inttype-example.c @@ -182,5 +182,6 @@ static void __exit example_exit(void) module_init(example_init); module_exit(example_exit); +MODULE_DESCRIPTION("Sample kfifo int type implementation"); MODULE_LICENSE("GPL"); MODULE_AUTHOR("Stefani Seibold "); diff --git a/samples/kfifo/record-example.c b/samples/kfifo/record-example.c index e4087b2d3fc4..e4d1a2d7983c 100644 --- a/samples/kfifo/record-example.c +++ b/samples/kfifo/record-example.c @@ -198,5 +198,6 @@ static void __exit example_exit(void) module_init(example_init); module_exit(example_exit); +MODULE_DESCRIPTION("Sample dynamic sized record fifo implementation"); MODULE_LICENSE("GPL"); MODULE_AUTHOR("Stefani Seibold "); From 6073496a20c5e2e8eee63c50af4b30fb2f521643 Mon Sep 17 00:00:00 2001 From: Jeff Johnson Date: Wed, 29 May 2024 16:31:58 -0700 Subject: [PATCH 79/98] resource: add missing MODULE_DESCRIPTION() Fix the 'make W=1' warning: WARNING: modpost: missing MODULE_DESCRIPTION() in kernel/resource_kunit.o Link: https://lkml.kernel.org/r/20240529-md-kernel-resource_kunit-v1-1-bb719784b714@quicinc.com Signed-off-by: Jeff Johnson Cc: Andy Shevchenko Cc: Greg Kroah-Hartman Cc: Rafael J. Wysocki Signed-off-by: Andrew Morton --- kernel/resource_kunit.c | 1 + 1 file changed, 1 insertion(+) diff --git a/kernel/resource_kunit.c b/kernel/resource_kunit.c index 58ab9f914602..0e509985a44a 100644 --- a/kernel/resource_kunit.c +++ b/kernel/resource_kunit.c @@ -149,4 +149,5 @@ static struct kunit_suite resource_test_suite = { }; kunit_test_suite(resource_test_suite); +MODULE_DESCRIPTION("I/O Port & Memory Resource manager unit tests"); MODULE_LICENSE("GPL"); From 961a2851324561caed579764ffbee3db82b32829 Mon Sep 17 00:00:00 2001 From: Alexey Dobriyan Date: Fri, 21 Jun 2024 21:39:33 +0300 Subject: [PATCH 80/98] build-id: require program headers to be right after ELF header Neither ELF spec not ELF loader require program header to be placed right after ELF header, but build-id code very much assumes such placement: See find_get_page(vma->vm_file->f_mapping, 0); line and checks against PAGE_SIZE. Returns errors for now until someone rewrites build-id parser to be more inline with load_elf_binary(). Link: https://lkml.kernel.org/r/d58bc281-6ca7-467a-9a64-40fa214bd63e@p183 Signed-off-by: Alexey Dobriyan Reviewed-by: Jiri Olsa Signed-off-by: Andrew Morton --- lib/buildid.c | 14 ++++++++++++++ 1 file changed, 14 insertions(+) diff --git a/lib/buildid.c b/lib/buildid.c index 7954dd92e36c..e02b5507418b 100644 --- a/lib/buildid.c +++ b/lib/buildid.c @@ -73,6 +73,13 @@ static int get_build_id_32(const void *page_addr, unsigned char *build_id, Elf32_Phdr *phdr; int i; + /* + * FIXME + * Neither ELF spec nor ELF loader require that program headers + * start immediately after ELF header. + */ + if (ehdr->e_phoff != sizeof(Elf32_Ehdr)) + return -EINVAL; /* only supports phdr that fits in one page */ if (ehdr->e_phnum > (PAGE_SIZE - sizeof(Elf32_Ehdr)) / sizeof(Elf32_Phdr)) @@ -98,6 +105,13 @@ static int get_build_id_64(const void *page_addr, unsigned char *build_id, Elf64_Phdr *phdr; int i; + /* + * FIXME + * Neither ELF spec nor ELF loader require that program headers + * start immediately after ELF header. + */ + if (ehdr->e_phoff != sizeof(Elf64_Ehdr)) + return -EINVAL; /* only supports phdr that fits in one page */ if (ehdr->e_phnum > (PAGE_SIZE - sizeof(Elf64_Ehdr)) / sizeof(Elf64_Phdr)) From 7c812814e8c34a41bff6fe49987760ffaf8702af Mon Sep 17 00:00:00 2001 From: Alexey Dobriyan Date: Mon, 24 Jun 2024 18:39:49 +0300 Subject: [PATCH 81/98] compiler.h: simplify data_race() macro -Wdeclaration-after-statement used since forever required statement expressions to inject __kcsan_disable_current(), __kcsan_enable_current() to mark data race. Now that it is gone, make macro expansion simpler. __unqual_scalar_typeof() is wordy macro by itself. "expr" is expanded twice. Link: https://lkml.kernel.org/r/fb62163f-ba21-4661-be5b-bb5124abc87d@p183 Signed-off-by: Alexey Dobriyan Reviewed-by: Marco Elver Cc: Thomas Gleixner Signed-off-by: Andrew Morton --- include/linux/compiler.h | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/include/linux/compiler.h b/include/linux/compiler.h index 8c252e073bd8..2ea6120f3f7a 100644 --- a/include/linux/compiler.h +++ b/include/linux/compiler.h @@ -200,10 +200,8 @@ void ftrace_likely_update(struct ftrace_likely_data *f, int val, */ #define data_race(expr) \ ({ \ - __unqual_scalar_typeof(({ expr; })) __v = ({ \ - __kcsan_disable_current(); \ - expr; \ - }); \ + __kcsan_disable_current(); \ + __auto_type __v = (expr); \ __kcsan_enable_current(); \ __v; \ }) From 2a49c8b6b6d0dba9c5a4e921f07fbd7a8ad7a5f1 Mon Sep 17 00:00:00 2001 From: Jeff Johnson Date: Sat, 22 Jun 2024 07:55:05 -0700 Subject: [PATCH 82/98] selftests/fpu: add missing MODULE_DESCRIPTION() macro make allmodconfig && make W=1 C=1 now reports: WARNING: modpost: missing MODULE_DESCRIPTION() in lib/test_fpu.o Add the missing invocation of the MODULE_DESCRIPTION() macro. Link: https://lkml.kernel.org/r/20240622-md-i386-lib-test_fpu_glue-v1-1-a4e40b7b1264@quicinc.com Fixes: 9613736d852d ("selftests/fpu: move FP code to a separate translation unit") Signed-off-by: Jeff Johnson Reviewed-by: Samuel Holland Signed-off-by: Andrew Morton --- lib/test_fpu_glue.c | 1 + 1 file changed, 1 insertion(+) diff --git a/lib/test_fpu_glue.c b/lib/test_fpu_glue.c index eef282a2715f..074f30301f29 100644 --- a/lib/test_fpu_glue.c +++ b/lib/test_fpu_glue.c @@ -59,4 +59,5 @@ static void __exit test_fpu_exit(void) module_init(test_fpu_init); module_exit(test_fpu_exit); +MODULE_DESCRIPTION("Test cases for floating point operations"); MODULE_LICENSE("GPL"); From 1e3fa25fca48b25e0483c95bec626dd1007a9adf Mon Sep 17 00:00:00 2001 From: Oleg Nesterov Date: Tue, 25 Jun 2024 16:03:11 +0200 Subject: [PATCH 83/98] coredump: simplify zap_process() After commit 0258b5fd7c71 ("coredump: Limit coredumps to a single thread group") zap_process() doesn't need the "task_struct *start" arg, zap_threads() can pass "signal_struct *signal" instead. This simplifies the code and allows to use __for_each_thread() which is slightly more efficient. Link: https://lkml.kernel.org/r/20240625140311.GA20787@redhat.com Signed-off-by: Oleg Nesterov Cc: Christian Brauner Cc: Eric W. Biederman Signed-off-by: Andrew Morton --- fs/coredump.c | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/fs/coredump.c b/fs/coredump.c index a57a06b80f57..4dc5140bac3f 100644 --- a/fs/coredump.c +++ b/fs/coredump.c @@ -361,17 +361,16 @@ out: return ispipe; } -static int zap_process(struct task_struct *start, int exit_code) +static int zap_process(struct signal_struct *signal, int exit_code) { struct task_struct *t; int nr = 0; - /* Allow SIGKILL, see prepare_signal() */ - start->signal->flags = SIGNAL_GROUP_EXIT; - start->signal->group_exit_code = exit_code; - start->signal->group_stop_count = 0; + signal->flags = SIGNAL_GROUP_EXIT; + signal->group_exit_code = exit_code; + signal->group_stop_count = 0; - for_each_thread(start, t) { + __for_each_thread(signal, t) { task_clear_jobctl_pending(t, JOBCTL_PENDING_MASK); if (t != current && !(t->flags & PF_POSTCOREDUMP)) { sigaddset(&t->pending.signal, SIGKILL); @@ -391,8 +390,9 @@ static int zap_threads(struct task_struct *tsk, spin_lock_irq(&tsk->sighand->siglock); if (!(signal->flags & SIGNAL_GROUP_EXIT) && !signal->group_exec_task) { + /* Allow SIGKILL, see prepare_signal() */ signal->core_state = core_state; - nr = zap_process(tsk, exit_code); + nr = zap_process(signal, exit_code); clear_tsk_thread_flag(tsk, TIF_SIGPENDING); tsk->flags |= PF_DUMPCORE; atomic_set(&core_state->nr_threads, nr); From 937b2972ce900fcfad87c13432e36f39da06a0df Mon Sep 17 00:00:00 2001 From: Yang Li Date: Fri, 22 Mar 2024 14:37:18 +0800 Subject: [PATCH 84/98] fs: add kernel-doc comments to ocfs2_prepare_orphan_dir() This commit adds kernel-doc style comments with complete parameter descriptions for the function ocfs2_prepare_orphan_dir. Link: https://lkml.kernel.org/r/20240322063718.88183-1-yang.lee@linux.alibaba.com Signed-off-by: Yang Li Acked-by: Joseph Qi Cc: Mark Fasheh Cc: Joel Becker Cc: Junxiao Bi Cc: Changwei Ge Cc: Gang He Cc: Jun Piao Signed-off-by: Andrew Morton --- fs/ocfs2/namei.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/fs/ocfs2/namei.c b/fs/ocfs2/namei.c index 4d1ea8703fcd..59c92353151a 100644 --- a/fs/ocfs2/namei.c +++ b/fs/ocfs2/namei.c @@ -2189,8 +2189,10 @@ static int __ocfs2_prepare_orphan_dir(struct inode *orphan_dir_inode, * @osb: ocfs2 file system * @ret_orphan_dir: Orphan dir inode - returned locked! * @blkno: Actual block number of the inode to be inserted into orphan dir. + * @name: Buffer to store the name of the orphan. * @lookup: dir lookup result, to be passed back into functions like * ocfs2_orphan_add + * @dio: Flag indicating if direct IO is being used or not. * * Returns zero on success and the ret_orphan_dir, name and lookup * fields will be populated. From 255547c6bb8940a97eea94ef9d464ea5967763fb Mon Sep 17 00:00:00 2001 From: lei lu Date: Wed, 26 Jun 2024 18:44:33 +0800 Subject: [PATCH 85/98] ocfs2: add bounds checking to ocfs2_check_dir_entry() This adds sanity checks for ocfs2_dir_entry to make sure all members of ocfs2_dir_entry don't stray beyond valid memory region. Link: https://lkml.kernel.org/r/20240626104433.163270-1-llfamsec@gmail.com Signed-off-by: lei lu Reviewed-by: Heming Zhao Reviewed-by: Joseph Qi Cc: Mark Fasheh Cc: Joel Becker Cc: Junxiao Bi Cc: Changwei Ge Cc: Gang He Cc: Jun Piao Signed-off-by: Andrew Morton --- fs/ocfs2/dir.c | 46 +++++++++++++++++++++++++++++----------------- 1 file changed, 29 insertions(+), 17 deletions(-) diff --git a/fs/ocfs2/dir.c b/fs/ocfs2/dir.c index d620d4c53c6f..f0beb173dbba 100644 --- a/fs/ocfs2/dir.c +++ b/fs/ocfs2/dir.c @@ -294,13 +294,16 @@ out: * bh passed here can be an inode block or a dir data block, depending * on the inode inline data flag. */ -static int ocfs2_check_dir_entry(struct inode * dir, - struct ocfs2_dir_entry * de, - struct buffer_head * bh, +static int ocfs2_check_dir_entry(struct inode *dir, + struct ocfs2_dir_entry *de, + struct buffer_head *bh, + char *buf, + unsigned int size, unsigned long offset) { const char *error_msg = NULL; const int rlen = le16_to_cpu(de->rec_len); + const unsigned long next_offset = ((char *) de - buf) + rlen; if (unlikely(rlen < OCFS2_DIR_REC_LEN(1))) error_msg = "rec_len is smaller than minimal"; @@ -308,9 +311,11 @@ static int ocfs2_check_dir_entry(struct inode * dir, error_msg = "rec_len % 4 != 0"; else if (unlikely(rlen < OCFS2_DIR_REC_LEN(de->name_len))) error_msg = "rec_len is too small for name_len"; - else if (unlikely( - ((char *) de - bh->b_data) + rlen > dir->i_sb->s_blocksize)) - error_msg = "directory entry across blocks"; + else if (unlikely(next_offset > size)) + error_msg = "directory entry overrun"; + else if (unlikely(next_offset > size - OCFS2_DIR_REC_LEN(1)) && + next_offset != size) + error_msg = "directory entry too close to end"; if (unlikely(error_msg != NULL)) mlog(ML_ERROR, "bad entry in directory #%llu: %s - " @@ -352,16 +357,17 @@ static inline int ocfs2_search_dirblock(struct buffer_head *bh, de_buf = first_de; dlimit = de_buf + bytes; - while (de_buf < dlimit) { + while (de_buf < dlimit - OCFS2_DIR_MEMBER_LEN) { /* this code is executed quadratically often */ /* do minimal checking `by hand' */ de = (struct ocfs2_dir_entry *) de_buf; - if (de_buf + namelen <= dlimit && + if (de->name + namelen <= dlimit && ocfs2_match(namelen, name, de)) { /* found a match - just to be sure, do a full check */ - if (!ocfs2_check_dir_entry(dir, de, bh, offset)) { + if (!ocfs2_check_dir_entry(dir, de, bh, first_de, + bytes, offset)) { ret = -1; goto bail; } @@ -1138,7 +1144,7 @@ static int __ocfs2_delete_entry(handle_t *handle, struct inode *dir, pde = NULL; de = (struct ocfs2_dir_entry *) first_de; while (i < bytes) { - if (!ocfs2_check_dir_entry(dir, de, bh, i)) { + if (!ocfs2_check_dir_entry(dir, de, bh, first_de, bytes, i)) { status = -EIO; mlog_errno(status); goto bail; @@ -1635,7 +1641,8 @@ int __ocfs2_add_entry(handle_t *handle, /* These checks should've already been passed by the * prepare function, but I guess we can leave them * here anyway. */ - if (!ocfs2_check_dir_entry(dir, de, insert_bh, offset)) { + if (!ocfs2_check_dir_entry(dir, de, insert_bh, data_start, + size, offset)) { retval = -ENOENT; goto bail; } @@ -1774,7 +1781,8 @@ static int ocfs2_dir_foreach_blk_id(struct inode *inode, } de = (struct ocfs2_dir_entry *) (data->id_data + ctx->pos); - if (!ocfs2_check_dir_entry(inode, de, di_bh, ctx->pos)) { + if (!ocfs2_check_dir_entry(inode, de, di_bh, (char *)data->id_data, + i_size_read(inode), ctx->pos)) { /* On error, skip the f_pos to the end. */ ctx->pos = i_size_read(inode); break; @@ -1867,7 +1875,8 @@ static int ocfs2_dir_foreach_blk_el(struct inode *inode, while (ctx->pos < i_size_read(inode) && offset < sb->s_blocksize) { de = (struct ocfs2_dir_entry *) (bh->b_data + offset); - if (!ocfs2_check_dir_entry(inode, de, bh, offset)) { + if (!ocfs2_check_dir_entry(inode, de, bh, bh->b_data, + sb->s_blocksize, offset)) { /* On error, skip the f_pos to the next block. */ ctx->pos = (ctx->pos | (sb->s_blocksize - 1)) + 1; @@ -3339,7 +3348,7 @@ static int ocfs2_find_dir_space_id(struct inode *dir, struct buffer_head *di_bh, struct super_block *sb = dir->i_sb; struct ocfs2_dinode *di = (struct ocfs2_dinode *)di_bh->b_data; struct ocfs2_dir_entry *de, *last_de = NULL; - char *de_buf, *limit; + char *first_de, *de_buf, *limit; unsigned long offset = 0; unsigned int rec_len, new_rec_len, free_space; @@ -3352,14 +3361,16 @@ static int ocfs2_find_dir_space_id(struct inode *dir, struct buffer_head *di_bh, else free_space = dir->i_sb->s_blocksize - i_size_read(dir); - de_buf = di->id2.i_data.id_data; + first_de = di->id2.i_data.id_data; + de_buf = first_de; limit = de_buf + i_size_read(dir); rec_len = OCFS2_DIR_REC_LEN(namelen); while (de_buf < limit) { de = (struct ocfs2_dir_entry *)de_buf; - if (!ocfs2_check_dir_entry(dir, de, di_bh, offset)) { + if (!ocfs2_check_dir_entry(dir, de, di_bh, first_de, + i_size_read(dir), offset)) { ret = -ENOENT; goto out; } @@ -3441,7 +3452,8 @@ static int ocfs2_find_dir_space_el(struct inode *dir, const char *name, /* move to next block */ de = (struct ocfs2_dir_entry *) bh->b_data; } - if (!ocfs2_check_dir_entry(dir, de, bh, offset)) { + if (!ocfs2_check_dir_entry(dir, de, bh, bh->b_data, blocksize, + offset)) { status = -ENOENT; goto bail; } From cedb08caac587b55c79d0463400839acab4638c0 Mon Sep 17 00:00:00 2001 From: Hsin Chang Yu Date: Fri, 28 Jun 2024 22:22:29 +0800 Subject: [PATCH 86/98] lib/rbtree.c: fix the example typo Replace the "Sr" with "sr", the example is wrong if sl and N don't have child nodes, so sr should be red node. Link: https://lkml.kernel.org/r/20240628142229.69419-1-zxcvb600870024@gmail.com Signed-off-by: Hsin Chang Yu Signed-off-by: Andrew Morton --- lib/rbtree.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/lib/rbtree.c b/lib/rbtree.c index 5114eda6309c..989c2d615f92 100644 --- a/lib/rbtree.c +++ b/lib/rbtree.c @@ -297,9 +297,9 @@ ____rb_erase_color(struct rb_node *parent, struct rb_root *root, * / \ / \ * N S --> N sl * / \ \ - * sl Sr S + * sl sr S * \ - * Sr + * sr * * Note: p might be red, and then both * p and sl are red after rotation(which @@ -312,9 +312,9 @@ ____rb_erase_color(struct rb_node *parent, struct rb_root *root, * / \ / \ * N sl --> P S * \ / \ - * S N Sr + * S N sr * \ - * Sr + * sr */ tmp1 = tmp2->rb_right; WRITE_ONCE(sibling->rb_left, tmp1); From c61d7259a6a9b2403097dce9f0b5f465a60bd62a Mon Sep 17 00:00:00 2001 From: Jeff Johnson Date: Fri, 10 May 2024 12:02:15 -0700 Subject: [PATCH 87/98] fs: ufs: add MODULE_DESCRIPTION() Fix make W=1 warning: WARNING: modpost: missing MODULE_DESCRIPTION() in fs/ufs/ufs.o Link: https://lkml.kernel.org/r/20240510-ufs-md-v1-1-85eaff8c6beb@quicinc.com Signed-off-by: Jeff Johnson Cc: Evgeniy Dushistov Cc: Christian Brauner Signed-off-by: Andrew Morton --- fs/ufs/super.c | 1 + 1 file changed, 1 insertion(+) diff --git a/fs/ufs/super.c b/fs/ufs/super.c index 44666afc6209..bc625788589c 100644 --- a/fs/ufs/super.c +++ b/fs/ufs/super.c @@ -1540,4 +1540,5 @@ static void __exit exit_ufs_fs(void) module_init(init_ufs_fs) module_exit(exit_ufs_fs) +MODULE_DESCRIPTION("UFS Filesystem"); MODULE_LICENSE("GPL"); From bee6c683de2f7c086963b20afffb40b03e6c264d Mon Sep 17 00:00:00 2001 From: Jeff Johnson Date: Thu, 13 Jun 2024 22:01:09 -0700 Subject: [PATCH 88/98] lib/zlib: add missing MODULE_DESCRIPTION() macro With ARCH=csky, make allmodconfig && make W=1 C=1 reports: WARNING: modpost: missing MODULE_DESCRIPTION() in lib/zlib_deflate/zlib_deflate.o Add the missing invocation of the MODULE_DESCRIPTION() macro. Link: https://lkml.kernel.org/r/20240613-md-csky-lib-zlib_deflate-v1-1-83504d9a27d6@quicinc.com Signed-off-by: Jeff Johnson Cc: Guo Ren Signed-off-by: Andrew Morton --- lib/zlib_deflate/deflate_syms.c | 1 + 1 file changed, 1 insertion(+) diff --git a/lib/zlib_deflate/deflate_syms.c b/lib/zlib_deflate/deflate_syms.c index 24b740b99678..68941a2350ea 100644 --- a/lib/zlib_deflate/deflate_syms.c +++ b/lib/zlib_deflate/deflate_syms.c @@ -17,4 +17,5 @@ EXPORT_SYMBOL(zlib_deflate); EXPORT_SYMBOL(zlib_deflateInit2); EXPORT_SYMBOL(zlib_deflateEnd); EXPORT_SYMBOL(zlib_deflateReset); +MODULE_DESCRIPTION("Data compression using the deflation algorithm"); MODULE_LICENSE("GPL"); From 8547d1150f0dbd1d04f397c780182fc83ec2ab16 Mon Sep 17 00:00:00 2001 From: Jeff Johnson Date: Tue, 2 Jul 2024 15:47:59 -0700 Subject: [PATCH 89/98] math: rational: add missing MODULE_DESCRIPTION() macro With ARCH=sh, make allmodconfig && make W=1 C=1 reports: WARNING: modpost: missing MODULE_DESCRIPTION() in lib/math/rational.o Add the missing invocation of the MODULE_DESCRIPTION() macro. Link: https://lkml.kernel.org/r/20240702-md-sh-lib-math-v1-1-93f4ac4fa8fd@quicinc.com Signed-off-by: Jeff Johnson Signed-off-by: Andrew Morton --- lib/math/rational.c | 1 + 1 file changed, 1 insertion(+) diff --git a/lib/math/rational.c b/lib/math/rational.c index ec59d426ea63..d2c34e629ee1 100644 --- a/lib/math/rational.c +++ b/lib/math/rational.c @@ -108,4 +108,5 @@ void rational_best_approximation( EXPORT_SYMBOL(rational_best_approximation); +MODULE_DESCRIPTION("Rational fraction support library"); MODULE_LICENSE("GPL v2"); From 0f3819e8c483771a59cf9d3190cd68a7a990083c Mon Sep 17 00:00:00 2001 From: Ryusuke Konishi Date: Wed, 3 Jul 2024 03:35:12 +0900 Subject: [PATCH 90/98] nilfs2: avoid undefined behavior in nilfs_cnt32_ge macro According to the C standard 3.4.3p3, the result of signed integer overflow is undefined. The macro nilfs_cnt32_ge(), which compares two sequence numbers, uses signed integer subtraction that can overflow, and therefore the result of the calculation may differ from what is expected due to undefined behavior in different environments. Similar to an earlier change to the jiffies-related comparison macros in commit 5a581b367b5d ("jiffies: Avoid undefined behavior from signed overflow"), avoid this potential issue by changing the definition of the macro to perform the subtraction as unsigned integers, then cast the result to a signed integer for comparison. Link: https://lkml.kernel.org/r/20130727225828.GA11864@linux.vnet.ibm.com Link: https://lkml.kernel.org/r/20240702183512.6390-1-konishi.ryusuke@gmail.com Fixes: 9ff05123e3bf ("nilfs2: segment constructor") Signed-off-by: Ryusuke Konishi Signed-off-by: Andrew Morton --- fs/nilfs2/segment.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/nilfs2/segment.c b/fs/nilfs2/segment.c index 36e0bb38e1aa..0ca3110d6386 100644 --- a/fs/nilfs2/segment.c +++ b/fs/nilfs2/segment.c @@ -136,7 +136,7 @@ static void nilfs_dispose_list(struct the_nilfs *, struct list_head *, int); #define nilfs_cnt32_ge(a, b) \ (typecheck(__u32, a) && typecheck(__u32, b) && \ - ((__s32)(a) - (__s32)(b) >= 0)) + ((__s32)((a) - (b)) >= 0)) static int nilfs_prepare_segment_lock(struct super_block *sb, struct nilfs_transaction_info *ti) From fbc8846cd9c258e3844d22afd4d1ae7240077aab Mon Sep 17 00:00:00 2001 From: Christophe JAILLET Date: Mon, 8 Jul 2024 23:32:42 +0900 Subject: [PATCH 91/98] nilfs2: Constify struct kobj_type 'struct kobj_type' is not modified in this driver. It is only used with kobject_init_and_add() which takes a "const struct kobj_type *" parameter. Constifying this structure moves some data to a read-only section, so increase overall security. On a x86_64, with allmodconfig: Before: ====== text data bss dec hex filename 22403 4184 24 26611 67f3 fs/nilfs2/sysfs.o After: ===== text data bss dec hex filename 22723 3928 24 26675 6833 fs/nilfs2/sysfs.o Link: https://lkml.kernel.org/r/20240708143242.3296-1-konishi.ryusuke@gmail.com Signed-off-by: Christophe JAILLET Signed-off-by: Ryusuke Konishi Signed-off-by: Andrew Morton --- fs/nilfs2/sysfs.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/fs/nilfs2/sysfs.c b/fs/nilfs2/sysfs.c index 379d22e28ed6..a5569b7f47a3 100644 --- a/fs/nilfs2/sysfs.c +++ b/fs/nilfs2/sysfs.c @@ -56,7 +56,7 @@ static void nilfs_##name##_attr_release(struct kobject *kobj) \ sg_##name##_kobj); \ complete(&subgroups->sg_##name##_kobj_unregister); \ } \ -static struct kobj_type nilfs_##name##_ktype = { \ +static const struct kobj_type nilfs_##name##_ktype = { \ .default_groups = nilfs_##name##_groups, \ .sysfs_ops = &nilfs_##name##_attr_ops, \ .release = nilfs_##name##_attr_release, \ @@ -166,7 +166,7 @@ static const struct sysfs_ops nilfs_snapshot_attr_ops = { .store = nilfs_snapshot_attr_store, }; -static struct kobj_type nilfs_snapshot_ktype = { +static const struct kobj_type nilfs_snapshot_ktype = { .default_groups = nilfs_snapshot_groups, .sysfs_ops = &nilfs_snapshot_attr_ops, .release = nilfs_snapshot_attr_release, @@ -967,7 +967,7 @@ static const struct sysfs_ops nilfs_dev_attr_ops = { .store = nilfs_dev_attr_store, }; -static struct kobj_type nilfs_dev_ktype = { +static const struct kobj_type nilfs_dev_ktype = { .default_groups = nilfs_dev_groups, .sysfs_ops = &nilfs_dev_attr_ops, .release = nilfs_dev_attr_release, From 7a7127aa33c9f5b7b54ffa80619f644c5e000846 Mon Sep 17 00:00:00 2001 From: Masahiro Yamada Date: Sun, 7 Jul 2024 01:05:05 +0900 Subject: [PATCH 92/98] init: remove unused __MEMINIT* macros These macros are not used anywhere. Link: https://lkml.kernel.org/r/20240706160511.2331061-1-masahiroy@kernel.org Signed-off-by: Masahiro Yamada Reviewed-by: Wei Yang Signed-off-by: Andrew Morton --- include/linux/init.h | 4 ---- 1 file changed, 4 deletions(-) diff --git a/include/linux/init.h b/include/linux/init.h index 58cef4c2e59a..b2e9dfff8691 100644 --- a/include/linux/init.h +++ b/include/linux/init.h @@ -99,10 +99,6 @@ #define __INITRODATA .section ".init.rodata","a",%progbits #define __FINITDATA .previous -#define __MEMINIT .section ".meminit.text", "ax" -#define __MEMINITDATA .section ".meminit.data", "aw" -#define __MEMINITRODATA .section ".meminit.rodata", "a" - /* silence warnings when references are OK */ #define __REF .section ".ref.text", "ax" #define __REFDATA .section ".ref.data", "aw" From 73db3abdca58c8a014ec4c88cf5ef925cbf63669 Mon Sep 17 00:00:00 2001 From: Masahiro Yamada Date: Sun, 7 Jul 2024 01:05:06 +0900 Subject: [PATCH 93/98] init/modpost: conditionally check section mismatch to __meminit* This reverts commit eb8f689046b8 ("Use separate sections for __dev/ _cpu/__mem code/data"). Check section mismatch to __meminit* only when CONFIG_MEMORY_HOTPLUG=n. With this change, the linker script and modpost become simpler, and we can get rid of the __ref annotations from the memory hotplug code. [sfr@canb.auug.org.au: remove MEM_KEEP from arch/powerpc/kernel/vmlinux.lds.S] Link: https://lkml.kernel.org/r/20240710093213.2aefb25f@canb.auug.org.au Link: https://lkml.kernel.org/r/20240706160511.2331061-2-masahiroy@kernel.org Signed-off-by: Masahiro Yamada Signed-off-by: Stephen Rothwell Reviewed-by: Wei Yang Cc: Stephen Rothwell Signed-off-by: Andrew Morton --- arch/powerpc/kernel/vmlinux.lds.S | 2 -- include/asm-generic/vmlinux.lds.h | 18 ++---------------- include/linux/init.h | 14 +++++++++----- scripts/mod/modpost.c | 19 ++++--------------- 4 files changed, 15 insertions(+), 38 deletions(-) diff --git a/arch/powerpc/kernel/vmlinux.lds.S b/arch/powerpc/kernel/vmlinux.lds.S index f420df7888a7..7ab4e2fb28b1 100644 --- a/arch/powerpc/kernel/vmlinux.lds.S +++ b/arch/powerpc/kernel/vmlinux.lds.S @@ -123,8 +123,6 @@ SECTIONS */ *(.sfpr); *(.text.asan.* .text.tsan.*) - MEM_KEEP(init.text) - MEM_KEEP(exit.text) } :text . = ALIGN(PAGE_SIZE); diff --git a/include/asm-generic/vmlinux.lds.h b/include/asm-generic/vmlinux.lds.h index 5703526d6ebf..0db89c0aa2cc 100644 --- a/include/asm-generic/vmlinux.lds.h +++ b/include/asm-generic/vmlinux.lds.h @@ -141,14 +141,6 @@ * often happens at runtime) */ -#if defined(CONFIG_MEMORY_HOTPLUG) -#define MEM_KEEP(sec) *(.mem##sec) -#define MEM_DISCARD(sec) -#else -#define MEM_KEEP(sec) -#define MEM_DISCARD(sec) *(.mem##sec) -#endif - #ifndef CONFIG_HAVE_DYNAMIC_FTRACE_NO_PATCHABLE #define KEEP_PATCHABLE KEEP(*(__patchable_function_entries)) #define PATCHABLE_DISCARDS @@ -357,7 +349,6 @@ *(.data..decrypted) \ *(.ref.data) \ *(.data..shared_aligned) /* percpu related */ \ - MEM_KEEP(init.data*) \ *(.data.unlikely) \ __start_once = .; \ *(.data.once) \ @@ -542,7 +533,6 @@ /* __*init sections */ \ __init_rodata : AT(ADDR(__init_rodata) - LOAD_OFFSET) { \ *(.ref.rodata) \ - MEM_KEEP(init.rodata) \ } \ \ /* Built-in module parameters. */ \ @@ -593,8 +583,7 @@ *(.text.unknown .text.unknown.*) \ NOINSTR_TEXT \ *(.ref.text) \ - *(.text.asan.* .text.tsan.*) \ - MEM_KEEP(init.text*) \ + *(.text.asan.* .text.tsan.*) /* sched.text is aling to function alignment to secure we have same @@ -701,7 +690,6 @@ #define INIT_DATA \ KEEP(*(SORT(___kentry+*))) \ *(.init.data .init.data.*) \ - MEM_DISCARD(init.data*) \ KERNEL_CTORS() \ MCOUNT_REC() \ *(.init.rodata .init.rodata.*) \ @@ -709,7 +697,6 @@ TRACE_SYSCALLS() \ KPROBE_BLACKLIST() \ ERROR_INJECT_WHITELIST() \ - MEM_DISCARD(init.rodata) \ CLK_OF_TABLES() \ RESERVEDMEM_OF_TABLES() \ TIMER_OF_TABLES() \ @@ -727,8 +714,7 @@ #define INIT_TEXT \ *(.init.text .init.text.*) \ - *(.text.startup) \ - MEM_DISCARD(init.text*) + *(.text.startup) #define EXIT_DATA \ *(.exit.data .exit.data.*) \ diff --git a/include/linux/init.h b/include/linux/init.h index b2e9dfff8691..ee1309473bc6 100644 --- a/include/linux/init.h +++ b/include/linux/init.h @@ -84,11 +84,15 @@ #define __exit __section(".exit.text") __exitused __cold notrace -/* Used for MEMORY_HOTPLUG */ -#define __meminit __section(".meminit.text") __cold notrace \ - __latent_entropy -#define __meminitdata __section(".meminit.data") -#define __meminitconst __section(".meminit.rodata") +#ifdef CONFIG_MEMORY_HOTPLUG +#define __meminit +#define __meminitdata +#define __meminitconst +#else +#define __meminit __init +#define __meminitdata __initdata +#define __meminitconst __initconst +#endif /* For assembly routines */ #define __HEAD .section ".head.text","ax" diff --git a/scripts/mod/modpost.c b/scripts/mod/modpost.c index f48d72d22dc2..4b1edb257618 100644 --- a/scripts/mod/modpost.c +++ b/scripts/mod/modpost.c @@ -776,17 +776,14 @@ static void check_section(const char *modname, struct elf_info *elf, #define ALL_INIT_DATA_SECTIONS \ - ".init.setup", ".init.rodata", ".meminit.rodata", \ - ".init.data", ".meminit.data" + ".init.setup", ".init.rodata", ".init.data" #define ALL_PCI_INIT_SECTIONS \ ".pci_fixup_early", ".pci_fixup_header", ".pci_fixup_final", \ ".pci_fixup_enable", ".pci_fixup_resume", \ ".pci_fixup_resume_early", ".pci_fixup_suspend" -#define ALL_XXXINIT_SECTIONS ".meminit.*" - -#define ALL_INIT_SECTIONS INIT_SECTIONS, ALL_XXXINIT_SECTIONS +#define ALL_INIT_SECTIONS ".init.*" #define ALL_EXIT_SECTIONS ".exit.*" #define DATA_SECTIONS ".data", ".data.rel" @@ -797,9 +794,7 @@ static void check_section(const char *modname, struct elf_info *elf, ".fixup", ".entry.text", ".exception.text", \ ".coldtext", ".softirqentry.text" -#define INIT_SECTIONS ".init.*" - -#define ALL_TEXT_SECTIONS ".init.text", ".meminit.text", ".exit.text", \ +#define ALL_TEXT_SECTIONS ".init.text", ".exit.text", \ TEXT_SECTIONS, OTHER_TEXT_SECTIONS enum mismatch { @@ -839,12 +834,6 @@ static const struct sectioncheck sectioncheck[] = { .bad_tosec = { ALL_INIT_SECTIONS, ALL_EXIT_SECTIONS, NULL }, .mismatch = TEXTDATA_TO_ANY_INIT_EXIT, }, -/* Do not reference init code/data from meminit code/data */ -{ - .fromsec = { ALL_XXXINIT_SECTIONS, NULL }, - .bad_tosec = { INIT_SECTIONS, NULL }, - .mismatch = XXXINIT_TO_SOME_INIT, -}, /* Do not use exit code/data from init code */ { .fromsec = { ALL_INIT_SECTIONS, NULL }, @@ -859,7 +848,7 @@ static const struct sectioncheck sectioncheck[] = { }, { .fromsec = { ALL_PCI_INIT_SECTIONS, NULL }, - .bad_tosec = { INIT_SECTIONS, NULL }, + .bad_tosec = { ALL_INIT_SECTIONS, NULL }, .mismatch = ANY_INIT_TO_ANY_EXIT, }, { From 4f5d4a1ba7a1a23173e356186f3f8b7c27d2e948 Mon Sep 17 00:00:00 2001 From: Chen Ni Date: Tue, 9 Jul 2024 11:43:23 +0800 Subject: [PATCH 94/98] test_bpf: convert comma to semicolon Replace commas between expression statements with semicolons. Link: https://lkml.kernel.org/r/20240709034323.586185-1-nichen@iscas.ac.cn Signed-off-by: Chen Ni Cc: Alexei Starovoitov Cc: Andrii Nakryiko Cc: Daniel Borkmann Cc: Eduard Zingerman Cc: Hao Luo Cc: Jiri Olsa Cc: John Fastabend Cc: KP Singh Cc: Martin KaFai Lau Cc: Song Liu Cc: Stanislav Fomichev Cc: Yonghong Song Signed-off-by: Andrew Morton --- lib/test_bpf.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/lib/test_bpf.c b/lib/test_bpf.c index 207ff87194db..a6edbe842f65 100644 --- a/lib/test_bpf.c +++ b/lib/test_bpf.c @@ -1740,7 +1740,7 @@ static int __bpf_emit_cmpxchg32(struct bpf_test *self, void *arg, /* Result unsuccessful */ insns[i++] = BPF_STX_MEM(BPF_W, R10, R1, -4); insns[i++] = BPF_ATOMIC_OP(BPF_W, BPF_CMPXCHG, R10, R2, -4); - insns[i++] = BPF_ZEXT_REG(R0), /* Zext always inserted by verifier */ + insns[i++] = BPF_ZEXT_REG(R0); /* Zext always inserted by verifier */ insns[i++] = BPF_LDX_MEM(BPF_W, R3, R10, -4); insns[i++] = BPF_JMP32_REG(BPF_JEQ, R1, R3, 2); @@ -1754,7 +1754,7 @@ static int __bpf_emit_cmpxchg32(struct bpf_test *self, void *arg, /* Result successful */ i += __bpf_ld_imm64(&insns[i], R0, dst); insns[i++] = BPF_ATOMIC_OP(BPF_W, BPF_CMPXCHG, R10, R2, -4); - insns[i++] = BPF_ZEXT_REG(R0), /* Zext always inserted by verifier */ + insns[i++] = BPF_ZEXT_REG(R0); /* Zext always inserted by verifier */ insns[i++] = BPF_LDX_MEM(BPF_W, R3, R10, -4); insns[i++] = BPF_JMP32_REG(BPF_JEQ, R2, R3, 2); From e1fb7430fcb00431087fc1c088ec9e8737cf6c7d Mon Sep 17 00:00:00 2001 From: Thorsten Blum Date: Tue, 9 Jul 2024 00:40:24 +0200 Subject: [PATCH 95/98] lib/bch.c: use swap() to improve code Use the swap() macro to simplify the functions solve_linear_system() and gf_poly_gcd() and improve their readability. Remove the local variable tmp. Fixes the following three Coccinelle/coccicheck warnings reported by swap.cocci: WARNING opportunity for swap() WARNING opportunity for swap() WARNING opportunity for swap() Link: https://lkml.kernel.org/r/20240708224023.9312-2-thorsten.blum@toblux.com Signed-off-by: Thorsten Blum Signed-off-by: Andrew Morton --- lib/bch.c | 20 +++++--------------- 1 file changed, 5 insertions(+), 15 deletions(-) diff --git a/lib/bch.c b/lib/bch.c index 5f71fd76eca8..1c0cb07cdfeb 100644 --- a/lib/bch.c +++ b/lib/bch.c @@ -479,11 +479,8 @@ static int solve_linear_system(struct bch_control *bch, unsigned int *rows, /* find suitable row for elimination */ for (r = p; r < m; r++) { if (rows[r] & mask) { - if (r != p) { - tmp = rows[r]; - rows[r] = rows[p]; - rows[p] = tmp; - } + if (r != p) + swap(rows[r], rows[p]); rem = r+1; break; } @@ -799,21 +796,14 @@ static void gf_poly_div(struct bch_control *bch, struct gf_poly *a, static struct gf_poly *gf_poly_gcd(struct bch_control *bch, struct gf_poly *a, struct gf_poly *b) { - struct gf_poly *tmp; - dbg("gcd(%s,%s)=", gf_poly_str(a), gf_poly_str(b)); - if (a->deg < b->deg) { - tmp = b; - b = a; - a = tmp; - } + if (a->deg < b->deg) + swap(a, b); while (b->deg > 0) { gf_poly_mod(bch, a, b, NULL); - tmp = b; - b = a; - a = tmp; + swap(a, b); } dbg("%s\n", gf_poly_str(a)); From 0fe2356434e157b3952ff4dbdfe0a96070ddcaa2 Mon Sep 17 00:00:00 2001 From: Kees Cook Date: Thu, 11 Jul 2024 10:13:09 -0700 Subject: [PATCH 96/98] tsacct: replace strncpy() with strscpy() Replace the deprecated[1] use of strncpy() in bacct_add_tsk(). Since this is UAPI, include trailing padding in the copy. Link: https://github.com/KSPP/linux/issues/90 [1] Link: https://lkml.kernel.org/r/20240711171308.work.995-kees@kernel.org Signed-off-by: Kees Cook Cc: "Dr. Thomas Orgis" Cc: Eric W. Biederman Cc: Ismael Luceno Cc: Peng Liu Signed-off-by: Andrew Morton --- kernel/tsacct.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/kernel/tsacct.c b/kernel/tsacct.c index 4252f0645b9e..16b283f9d831 100644 --- a/kernel/tsacct.c +++ b/kernel/tsacct.c @@ -76,7 +76,7 @@ void bacct_add_tsk(struct user_namespace *user_ns, stats->ac_minflt = tsk->min_flt; stats->ac_majflt = tsk->maj_flt; - strncpy(stats->ac_comm, tsk->comm, sizeof(stats->ac_comm)); + strscpy_pad(stats->ac_comm, tsk->comm); } From f944ffcbc2e1c759764850261670586ddf3bdabb Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Thu, 11 Jul 2024 22:25:21 +0200 Subject: [PATCH 97/98] watchdog/perf: properly initialize the turbo mode timestamp and rearm counter For systems on which the performance counter can expire early due to turbo modes the watchdog handler has a safety net in place which validates that since the last watchdog event there has at least 4/5th of the watchdog period elapsed. This works reliably only after the first watchdog event because the per CPU variable which holds the timestamp of the last event is never initialized. So a first spurious event will validate against a timestamp of 0 which results in a delta which is likely to be way over the 4/5 threshold of the period. As this might happen before the first watchdog hrtimer event increments the watchdog counter, this can lead to false positives. Fix this by initializing the timestamp before enabling the hardware event. Reset the rearm counter as well, as that might be non zero after the watchdog was disabled and reenabled. Link: https://lkml.kernel.org/r/87frsfu15a.ffs@tglx Fixes: 7edaeb6841df ("kernel/watchdog: Prevent false positives with turbo modes") Signed-off-by: Thomas Gleixner Cc: Arjan van de Ven Cc: Peter Zijlstra Cc: Signed-off-by: Andrew Morton --- kernel/watchdog_perf.c | 11 ++++++++--- 1 file changed, 8 insertions(+), 3 deletions(-) diff --git a/kernel/watchdog_perf.c b/kernel/watchdog_perf.c index d577c4a8321e..59c1d86a73a2 100644 --- a/kernel/watchdog_perf.c +++ b/kernel/watchdog_perf.c @@ -75,11 +75,15 @@ static bool watchdog_check_timestamp(void) __this_cpu_write(last_timestamp, now); return true; } -#else -static inline bool watchdog_check_timestamp(void) + +static void watchdog_init_timestamp(void) { - return true; + __this_cpu_write(nmi_rearmed, 0); + __this_cpu_write(last_timestamp, ktime_get_mono_fast_ns()); } +#else +static inline bool watchdog_check_timestamp(void) { return true; } +static inline void watchdog_init_timestamp(void) { } #endif static struct perf_event_attr wd_hw_attr = { @@ -161,6 +165,7 @@ void watchdog_hardlockup_enable(unsigned int cpu) if (!atomic_fetch_inc(&watchdog_cpus)) pr_info("Enabled. Permanently consumes one hw-PMU counter.\n"); + watchdog_init_timestamp(); perf_event_enable(this_cpu_read(watchdog_ev)); } From 67856f44da381973caf4eb692ad2cca1de7b2d37 Mon Sep 17 00:00:00 2001 From: Alexey Dobriyan Date: Mon, 15 Jul 2024 08:25:33 +0300 Subject: [PATCH 98/98] ia64: scrub ia64 from poison.h Link: https://lkml.kernel.org/r/c72e5467-06a8-4739-ae6a-7c84c96cad77@p183 Signed-off-by: Alexey Dobriyan Signed-off-by: Andrew Morton --- include/linux/poison.h | 6 ------ 1 file changed, 6 deletions(-) diff --git a/include/linux/poison.h b/include/linux/poison.h index 1f0ee2459f2a..a265b499033b 100644 --- a/include/linux/poison.h +++ b/include/linux/poison.h @@ -52,12 +52,6 @@ /********** arch/$ARCH/mm/init.c **********/ #define POISON_FREE_INITMEM 0xcc -/********** arch/ia64/hp/common/sba_iommu.c **********/ -/* - * arch/ia64/hp/common/sba_iommu.c uses a 16-byte poison string with a - * value of "SBAIOMMU POISON\0" for spill-over poisoning. - */ - /********** fs/jbd/journal.c **********/ #define JBD_POISON_FREE 0x5b #define JBD2_POISON_FREE 0x5c