From 69dcbbd80421a5d8230c178e01869f8e2edb2317 Mon Sep 17 00:00:00 2001 From: "Paul E. McKenney" Date: Tue, 10 Oct 2023 09:52:19 -0700 Subject: [PATCH 01/17] locktorture: Increase Hamming distance between call_rcu_chain and rcu_call_chains One letter difference is really not enough, so this commit changes call_rcu_chain to call_rcu_chain_list. Reported-by: Dan Carpenter Signed-off-by: Paul E. McKenney Signed-off-by: Neeraj Upadhyay (AMD) --- kernel/locking/locktorture.c | 18 +++++++++--------- 1 file changed, 9 insertions(+), 9 deletions(-) diff --git a/kernel/locking/locktorture.c b/kernel/locking/locktorture.c index 69d3cd2cfc3b..415d81e6ce70 100644 --- a/kernel/locking/locktorture.c +++ b/kernel/locking/locktorture.c @@ -124,7 +124,7 @@ struct call_rcu_chain { struct rcu_head crc_rh; bool crc_stop; }; -struct call_rcu_chain *call_rcu_chain; +struct call_rcu_chain *call_rcu_chain_list; /* Forward reference. */ static void lock_torture_cleanup(void); @@ -1074,12 +1074,12 @@ static int call_rcu_chain_init(void) if (call_rcu_chains <= 0) return 0; - call_rcu_chain = kcalloc(call_rcu_chains, sizeof(*call_rcu_chain), GFP_KERNEL); - if (!call_rcu_chain) + call_rcu_chain_list = kcalloc(call_rcu_chains, sizeof(*call_rcu_chain_list), GFP_KERNEL); + if (!call_rcu_chain_list) return -ENOMEM; for (i = 0; i < call_rcu_chains; i++) { - call_rcu_chain[i].crc_stop = false; - call_rcu(&call_rcu_chain[i].crc_rh, call_rcu_chain_cb); + call_rcu_chain_list[i].crc_stop = false; + call_rcu(&call_rcu_chain_list[i].crc_rh, call_rcu_chain_cb); } return 0; } @@ -1089,13 +1089,13 @@ static void call_rcu_chain_cleanup(void) { int i; - if (!call_rcu_chain) + if (!call_rcu_chain_list) return; for (i = 0; i < call_rcu_chains; i++) - smp_store_release(&call_rcu_chain[i].crc_stop, true); + smp_store_release(&call_rcu_chain_list[i].crc_stop, true); rcu_barrier(); - kfree(call_rcu_chain); - call_rcu_chain = NULL; + kfree(call_rcu_chain_list); + call_rcu_chain_list = NULL; } static void lock_torture_cleanup(void) From 454723b1615f7423fcba0b8f722cef4992a1846f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Thomas=20Wei=C3=9Fschuh?= Date: Mon, 16 Oct 2023 00:25:14 +0200 Subject: [PATCH 02/17] rcutorture: add nolibc init support for mips, ppc and rv64 MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Use nolibc for all support architectures. Signed-off-by: Thomas Weißschuh Signed-off-by: Paul E. McKenney Signed-off-by: Neeraj Upadhyay (AMD) --- tools/testing/selftests/rcutorture/bin/mkinitrd.sh | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/tools/testing/selftests/rcutorture/bin/mkinitrd.sh b/tools/testing/selftests/rcutorture/bin/mkinitrd.sh index 212c52ca90b5..f3f867129560 100755 --- a/tools/testing/selftests/rcutorture/bin/mkinitrd.sh +++ b/tools/testing/selftests/rcutorture/bin/mkinitrd.sh @@ -67,7 +67,10 @@ ___EOF___ # build using nolibc on supported archs (smaller executable) and fall # back to regular glibc on other ones. if echo -e "#if __x86_64__||__i386__||__i486__||__i586__||__i686__" \ - "||__ARM_EABI__||__aarch64__||__s390x__||__loongarch__\nyes\n#endif" \ + "||__ARM_EABI__||__aarch64__||(__mips__ && _ABIO32)" \ + "||__powerpc__||(__riscv && __riscv_xlen == 64)" \ + "||__s390x__||__loongarch__" \ + "\nyes\n#endif" \ | ${CROSS_COMPILE}gcc -E -nostdlib -xc - \ | grep -q '^yes'; then # architecture supported by nolibc From af19a2526cba92082723b98fcf191a595054a952 Mon Sep 17 00:00:00 2001 From: "Paul E. McKenney" Date: Wed, 1 Nov 2023 10:30:56 -0700 Subject: [PATCH 03/17] rcutorture: Add mid-sized stall to TREE07 There is code in rcu_implicit_dynticks_qs() that checks for the current grace period being halfway to the RCU CPU stall timeout, but rcutorture currently does not test this code. This commit therefore adds a 14-second stall to the TREE07 scenario in order to test this code given the default RCU CPU stall warning timeout of 21 seconds. Signed-off-by: Paul E. McKenney Signed-off-by: Neeraj Upadhyay (AMD) --- tools/testing/selftests/rcutorture/configs/rcu/TREE07.boot | 3 +++ 1 file changed, 3 insertions(+) diff --git a/tools/testing/selftests/rcutorture/configs/rcu/TREE07.boot b/tools/testing/selftests/rcutorture/configs/rcu/TREE07.boot index d44609937503..979edbf4c820 100644 --- a/tools/testing/selftests/rcutorture/configs/rcu/TREE07.boot +++ b/tools/testing/selftests/rcutorture/configs/rcu/TREE07.boot @@ -1 +1,4 @@ nohz_full=2-9 +rcutorture.stall_cpu=14 +rcutorture.stall_cpu_holdoff=90 +rcutorture.fwd_progress=0 From 90f1015dfee3d33f8ca7bfe03296d100d465e385 Mon Sep 17 00:00:00 2001 From: Zqiang Date: Fri, 3 Nov 2023 15:26:39 +0800 Subject: [PATCH 04/17] rcutorture: Add fqs_holdoff check before fqs_task is created For rcutorture tests on RCU implementations that support force-quiescent-state operations and that set the fqs_duration module parameter greater than zero, the fqs_task kthread will be created. However, if the fqs_holdoff module parameter is not set, then its default value of zero will cause fqs_task enter a long-term busy loop until stopped by kthread_stop(). This commit therefore adds a fqs_holdoff check before the fqs_task is created, making sure that whenever the fqs_task is created, the fqs_holdoff will be greater than zero. Signed-off-by: Zqiang Signed-off-by: Paul E. McKenney Signed-off-by: Neeraj Upadhyay (AMD) --- kernel/rcu/rcutorture.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/kernel/rcu/rcutorture.c b/kernel/rcu/rcutorture.c index 30fc9d34e329..a0b2520bd32b 100644 --- a/kernel/rcu/rcutorture.c +++ b/kernel/rcu/rcutorture.c @@ -3872,7 +3872,9 @@ rcu_torture_init(void) } if (fqs_duration < 0) fqs_duration = 0; - if (fqs_duration) { + if (fqs_holdoff < 0) + fqs_holdoff = 0; + if (fqs_duration && fqs_holdoff) { /* Create the fqs thread */ firsterr = torture_create_kthread(rcu_torture_fqs, NULL, fqs_task); From 18966f7b9458d3b19412fe9dfb421ab59401bfe1 Mon Sep 17 00:00:00 2001 From: "Paul E. McKenney" Date: Wed, 11 Oct 2023 09:45:54 -0700 Subject: [PATCH 05/17] rcu-tasks: Mark RCU Tasks accesses to current->rcu_tasks_idle_cpu The task_struct structure's ->rcu_tasks_idle_cpu can be concurrently read and written from the RCU Tasks grace-period kthread and from the CPU on which the task_struct structure's task is running. This commit therefore marks the accesses appropriately. Reported-by: Boqun Feng Signed-off-by: Paul E. McKenney Reviewed-by: Joel Fernandes (Google) Signed-off-by: Neeraj Upadhyay (AMD) --- kernel/rcu/tasks.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/kernel/rcu/tasks.h b/kernel/rcu/tasks.h index f54d5782eca0..732ad5b39946 100644 --- a/kernel/rcu/tasks.h +++ b/kernel/rcu/tasks.h @@ -975,7 +975,7 @@ static void check_holdout_task(struct task_struct *t, t->rcu_tasks_nvcsw != READ_ONCE(t->nvcsw) || !rcu_tasks_is_holdout(t) || (IS_ENABLED(CONFIG_NO_HZ_FULL) && - !is_idle_task(t) && t->rcu_tasks_idle_cpu >= 0)) { + !is_idle_task(t) && READ_ONCE(t->rcu_tasks_idle_cpu) >= 0)) { WRITE_ONCE(t->rcu_tasks_holdout, false); list_del_init(&t->rcu_tasks_holdout_list); put_task_struct(t); @@ -993,7 +993,7 @@ static void check_holdout_task(struct task_struct *t, t, ".I"[is_idle_task(t)], "N."[cpu < 0 || !tick_nohz_full_cpu(cpu)], t->rcu_tasks_nvcsw, t->nvcsw, t->rcu_tasks_holdout, - t->rcu_tasks_idle_cpu, cpu); + data_race(t->rcu_tasks_idle_cpu), cpu); sched_show_task(t); } From 4e58aaeebb3c27993c734c99eae6881b196b1ddb Mon Sep 17 00:00:00 2001 From: "Paul E. McKenney" Date: Wed, 1 Nov 2023 18:28:38 -0700 Subject: [PATCH 06/17] rcu: Restrict access to RCU CPU stall notifiers Although the RCU CPU stall notifiers can be useful for dumping state when tracking down delicate forward-progress bugs where NUMA effects cause cache lines to be delivered to a given CPU regularly, but always in a state that prevents that CPU from making forward progress. These bugs can be detected by the RCU CPU stall-warning mechanism, but in some cases, the stall-warnings printk()s disrupt the forward-progress bug before any useful state can be obtained. Unfortunately, the notifier mechanism added by commit 5b404fdabacf ("rcu: Add RCU CPU stall notifier") can make matters worse if used at all carelessly. For example, if the stall warning was caused by a lock not being released, then any attempt to acquire that lock in the notifier will hang. This will prevent not only the notifier from producing any useful output, but it will also prevent the stall-warning message from ever appearing. This commit therefore hides this new RCU CPU stall notifier mechanism under a new RCU_CPU_STALL_NOTIFIER Kconfig option that depends on both DEBUG_KERNEL and RCU_EXPERT. In addition, the rcupdate.rcu_cpu_stall_notifiers=1 kernel boot parameter must also be specified. The RCU_CPU_STALL_NOTIFIER Kconfig option's help text contains a warning and explains the dangers of careless use, recommending lockless notifier code. In addition, a WARN() is triggered each time that an attempt is made to register a stall-warning notifier in kernels built with CONFIG_RCU_CPU_STALL_NOTIFIER=y. This combination of measures will keep use of this mechanism confined to debug kernels and away from routine deployments. [ paulmck: Apply Dan Carpenter feedback. ] Fixes: 5b404fdabacf ("rcu: Add RCU CPU stall notifier") Reported-by: Linus Torvalds Signed-off-by: Paul E. McKenney Reviewed-by: Joel Fernandes (Google) Signed-off-by: Neeraj Upadhyay (AMD) --- .../admin-guide/kernel-parameters.txt | 6 +++++ include/linux/rcu_notifier.h | 6 ++--- kernel/rcu/Kconfig.debug | 25 +++++++++++++++++++ kernel/rcu/rcu.h | 8 +++--- kernel/rcu/rcutorture.c | 12 +++++---- kernel/rcu/tree_stall.h | 11 +++++++- kernel/rcu/update.c | 6 +++++ 7 files changed, 62 insertions(+), 12 deletions(-) diff --git a/Documentation/admin-guide/kernel-parameters.txt b/Documentation/admin-guide/kernel-parameters.txt index 65731b060e3f..b72e2049c487 100644 --- a/Documentation/admin-guide/kernel-parameters.txt +++ b/Documentation/admin-guide/kernel-parameters.txt @@ -5302,6 +5302,12 @@ Dump ftrace buffer after reporting RCU CPU stall warning. + rcupdate.rcu_cpu_stall_notifiers= [KNL] + Provide RCU CPU stall notifiers, but see the + warnings in the RCU_CPU_STALL_NOTIFIER Kconfig + option's help text. TL;DR: You almost certainly + do not want rcupdate.rcu_cpu_stall_notifiers. + rcupdate.rcu_cpu_stall_suppress= [KNL] Suppress RCU CPU stall warning messages. diff --git a/include/linux/rcu_notifier.h b/include/linux/rcu_notifier.h index ebf371364581..5640f024773b 100644 --- a/include/linux/rcu_notifier.h +++ b/include/linux/rcu_notifier.h @@ -13,7 +13,7 @@ #define RCU_STALL_NOTIFY_NORM 1 #define RCU_STALL_NOTIFY_EXP 2 -#ifdef CONFIG_RCU_STALL_COMMON +#if defined(CONFIG_RCU_STALL_COMMON) && defined(CONFIG_RCU_CPU_STALL_NOTIFIER) #include #include @@ -21,12 +21,12 @@ int rcu_stall_chain_notifier_register(struct notifier_block *n); int rcu_stall_chain_notifier_unregister(struct notifier_block *n); -#else // #ifdef CONFIG_RCU_STALL_COMMON +#else // #if defined(CONFIG_RCU_STALL_COMMON) && defined(CONFIG_RCU_CPU_STALL_NOTIFIER) // No RCU CPU stall warnings in Tiny RCU. static inline int rcu_stall_chain_notifier_register(struct notifier_block *n) { return -EEXIST; } static inline int rcu_stall_chain_notifier_unregister(struct notifier_block *n) { return -ENOENT; } -#endif // #else // #ifdef CONFIG_RCU_STALL_COMMON +#endif // #else // #if defined(CONFIG_RCU_STALL_COMMON) && defined(CONFIG_RCU_CPU_STALL_NOTIFIER) #endif /* __LINUX_RCU_NOTIFIER_H */ diff --git a/kernel/rcu/Kconfig.debug b/kernel/rcu/Kconfig.debug index 2984de629f74..9b0b52e1836f 100644 --- a/kernel/rcu/Kconfig.debug +++ b/kernel/rcu/Kconfig.debug @@ -105,6 +105,31 @@ config RCU_CPU_STALL_CPUTIME The boot option rcupdate.rcu_cpu_stall_cputime has the same function as this one, but will override this if it exists. +config RCU_CPU_STALL_NOTIFIER + bool "Provide RCU CPU-stall notifiers" + depends on RCU_STALL_COMMON + depends on DEBUG_KERNEL + depends on RCU_EXPERT + default n + help + WARNING: You almost certainly do not want this!!! + + Enable RCU CPU-stall notifiers, which are invoked just before + printing the RCU CPU stall warning. As such, bugs in notifier + callbacks can prevent stall warnings from being printed. + And the whole reason that a stall warning is being printed is + that something is hung up somewhere. Therefore, the notifier + callbacks must be written extremely carefully, preferably + containing only lockless code. After all, it is quite possible + that the whole reason that the RCU CPU stall is happening in + the first place is that someone forgot to release whatever lock + that you are thinking of acquiring. In which case, having your + notifier callback acquire that lock will hang, preventing the + RCU CPU stall warning from appearing. + + Say Y here if you want RCU CPU stall notifiers (you don't want them) + Say N if you are unsure. + config RCU_TRACE bool "Enable tracing for RCU" depends on DEBUG_KERNEL diff --git a/kernel/rcu/rcu.h b/kernel/rcu/rcu.h index b531c33e9545..f94f65877f2b 100644 --- a/kernel/rcu/rcu.h +++ b/kernel/rcu/rcu.h @@ -262,6 +262,8 @@ static inline bool rcu_stall_is_suppressed_at_boot(void) return rcu_cpu_stall_suppress_at_boot && !rcu_inkernel_boot_has_ended(); } +extern int rcu_cpu_stall_notifiers; + #ifdef CONFIG_RCU_STALL_COMMON extern int rcu_cpu_stall_ftrace_dump; @@ -659,10 +661,10 @@ static inline bool rcu_cpu_beenfullyonline(int cpu) { return true; } bool rcu_cpu_beenfullyonline(int cpu); #endif -#ifdef CONFIG_RCU_STALL_COMMON +#if defined(CONFIG_RCU_STALL_COMMON) && defined(CONFIG_RCU_CPU_STALL_NOTIFIER) int rcu_stall_notifier_call_chain(unsigned long val, void *v); -#else // #ifdef CONFIG_RCU_STALL_COMMON +#else // #if defined(CONFIG_RCU_STALL_COMMON) && defined(CONFIG_RCU_CPU_STALL_NOTIFIER) static inline int rcu_stall_notifier_call_chain(unsigned long val, void *v) { return NOTIFY_DONE; } -#endif // #else // #ifdef CONFIG_RCU_STALL_COMMON +#endif // #else // #if defined(CONFIG_RCU_STALL_COMMON) && defined(CONFIG_RCU_CPU_STALL_NOTIFIER) #endif /* __LINUX_RCU_H */ diff --git a/kernel/rcu/rcutorture.c b/kernel/rcu/rcutorture.c index 30fc9d34e329..07a6a183c555 100644 --- a/kernel/rcu/rcutorture.c +++ b/kernel/rcu/rcutorture.c @@ -2450,10 +2450,12 @@ static int rcu_torture_stall(void *args) unsigned long stop_at; VERBOSE_TOROUT_STRING("rcu_torture_stall task started"); - ret = rcu_stall_chain_notifier_register(&rcu_torture_stall_block); - if (ret) - pr_info("%s: rcu_stall_chain_notifier_register() returned %d, %sexpected.\n", - __func__, ret, !IS_ENABLED(CONFIG_RCU_STALL_COMMON) ? "un" : ""); + if (rcu_cpu_stall_notifiers) { + ret = rcu_stall_chain_notifier_register(&rcu_torture_stall_block); + if (ret) + pr_info("%s: rcu_stall_chain_notifier_register() returned %d, %sexpected.\n", + __func__, ret, !IS_ENABLED(CONFIG_RCU_STALL_COMMON) ? "un" : ""); + } if (stall_cpu_holdoff > 0) { VERBOSE_TOROUT_STRING("rcu_torture_stall begin holdoff"); schedule_timeout_interruptible(stall_cpu_holdoff * HZ); @@ -2497,7 +2499,7 @@ static int rcu_torture_stall(void *args) cur_ops->readunlock(idx); } pr_alert("%s end.\n", __func__); - if (!ret) { + if (rcu_cpu_stall_notifiers && !ret) { ret = rcu_stall_chain_notifier_unregister(&rcu_torture_stall_block); if (ret) pr_info("%s: rcu_stall_chain_notifier_unregister() returned %d.\n", __func__, ret); diff --git a/kernel/rcu/tree_stall.h b/kernel/rcu/tree_stall.h index ac8e86babe44..5d666428546b 100644 --- a/kernel/rcu/tree_stall.h +++ b/kernel/rcu/tree_stall.h @@ -1061,6 +1061,7 @@ static int __init rcu_sysrq_init(void) } early_initcall(rcu_sysrq_init); +#ifdef CONFIG_RCU_CPU_STALL_NOTIFIER ////////////////////////////////////////////////////////////////////////////// // @@ -1081,7 +1082,13 @@ static ATOMIC_NOTIFIER_HEAD(rcu_cpu_stall_notifier_list); */ int rcu_stall_chain_notifier_register(struct notifier_block *n) { - return atomic_notifier_chain_register(&rcu_cpu_stall_notifier_list, n); + int rcsn = rcu_cpu_stall_notifiers; + + WARN(1, "Adding %pS() to RCU stall notifier list (%s).\n", n->notifier_call, + rcsn ? "possibly suppressing RCU CPU stall warnings" : "failed, so all is well"); + if (rcsn) + return atomic_notifier_chain_register(&rcu_cpu_stall_notifier_list, n); + return -EEXIST; } EXPORT_SYMBOL_GPL(rcu_stall_chain_notifier_register); @@ -1115,3 +1122,5 @@ int rcu_stall_notifier_call_chain(unsigned long val, void *v) { return atomic_notifier_call_chain(&rcu_cpu_stall_notifier_list, val, v); } + +#endif // #ifdef CONFIG_RCU_CPU_STALL_NOTIFIER diff --git a/kernel/rcu/update.c b/kernel/rcu/update.c index c534d6806d3d..46aaaa9fe339 100644 --- a/kernel/rcu/update.c +++ b/kernel/rcu/update.c @@ -538,9 +538,15 @@ long torture_sched_setaffinity(pid_t pid, const struct cpumask *in_mask) EXPORT_SYMBOL_GPL(torture_sched_setaffinity); #endif +int rcu_cpu_stall_notifiers __read_mostly; // !0 = provide stall notifiers (rarely useful) +EXPORT_SYMBOL_GPL(rcu_cpu_stall_notifiers); + #ifdef CONFIG_RCU_STALL_COMMON int rcu_cpu_stall_ftrace_dump __read_mostly; module_param(rcu_cpu_stall_ftrace_dump, int, 0644); +#ifdef CONFIG_RCU_CPU_STALL_NOTIFIER +module_param(rcu_cpu_stall_notifiers, int, 0444); +#endif // #ifdef CONFIG_RCU_CPU_STALL_NOTIFIER int rcu_cpu_stall_suppress __read_mostly; // !0 = suppress stall warnings. EXPORT_SYMBOL_GPL(rcu_cpu_stall_suppress); module_param(rcu_cpu_stall_suppress, int, 0644); From 23d90b2404050c00c15058710d56bb46e1c5ab36 Mon Sep 17 00:00:00 2001 From: Pedro Falcato Date: Fri, 20 Oct 2023 18:30:15 +0100 Subject: [PATCH 07/17] rcu: Remove unused macros from rcupdate.h ulong2long, USHORT_CMP_GE and USHORT_CMP_LT are redundant and have been unused for quite a few releases. Signed-off-by: Pedro Falcato Signed-off-by: Paul E. McKenney Signed-off-by: Neeraj Upadhyay (AMD) --- include/linux/rcupdate.h | 3 --- 1 file changed, 3 deletions(-) diff --git a/include/linux/rcupdate.h b/include/linux/rcupdate.h index f7206b2623c9..aa87c82236dd 100644 --- a/include/linux/rcupdate.h +++ b/include/linux/rcupdate.h @@ -34,9 +34,6 @@ #define ULONG_CMP_GE(a, b) (ULONG_MAX / 2 >= (a) - (b)) #define ULONG_CMP_LT(a, b) (ULONG_MAX / 2 < (a) - (b)) -#define ulong2long(a) (*(long *)(&(a))) -#define USHORT_CMP_GE(a, b) (USHRT_MAX / 2 >= (unsigned short)((a) - (b))) -#define USHORT_CMP_LT(a, b) (USHRT_MAX / 2 < (unsigned short)((a) - (b))) /* Exported common interfaces */ void call_rcu(struct rcu_head *head, rcu_callback_t func); From 20eb4142397cf3ec221de43f10ea149af462c572 Mon Sep 17 00:00:00 2001 From: Frederic Weisbecker Date: Wed, 4 Oct 2023 01:29:01 +0200 Subject: [PATCH 08/17] srcu: Remove superfluous callbacks advancing from srcu_gp_start() Callbacks advancing on SRCU must be performed on two specific places: 1) On enqueue time in order to make room for the acceleration of the new callback. 2) On invocation time in order to move the callbacks ready to invoke. Any other callback advancing callsite is needless. Remove the remaining one in srcu_gp_start(). Co-developed-by: Yong He Signed-off-by: Yong He Co-developed-by: Joel Fernandes Signed-off-by: Joel Fernandes Signed-off-by: Frederic Weisbecker Signed-off-by: Paul E. McKenney Co-developed-by: Neeraj Upadhyay (AMD) Signed-off-by: Neeraj Upadhyay (AMD) --- kernel/rcu/srcutree.c | 10 ---------- 1 file changed, 10 deletions(-) diff --git a/kernel/rcu/srcutree.c b/kernel/rcu/srcutree.c index 560e99ec5333..e9356a103626 100644 --- a/kernel/rcu/srcutree.c +++ b/kernel/rcu/srcutree.c @@ -772,20 +772,10 @@ EXPORT_SYMBOL_GPL(__srcu_read_unlock_nmisafe); */ static void srcu_gp_start(struct srcu_struct *ssp) { - struct srcu_data *sdp; int state; - if (smp_load_acquire(&ssp->srcu_sup->srcu_size_state) < SRCU_SIZE_WAIT_BARRIER) - sdp = per_cpu_ptr(ssp->sda, get_boot_cpu_id()); - else - sdp = this_cpu_ptr(ssp->sda); lockdep_assert_held(&ACCESS_PRIVATE(ssp->srcu_sup, lock)); WARN_ON_ONCE(ULONG_CMP_GE(ssp->srcu_sup->srcu_gp_seq, ssp->srcu_sup->srcu_gp_seq_needed)); - spin_lock_rcu_node(sdp); /* Interrupts already disabled. */ - rcu_segcblist_advance(&sdp->srcu_cblist, - rcu_seq_current(&ssp->srcu_sup->srcu_gp_seq)); - WARN_ON_ONCE(!rcu_segcblist_segempty(&sdp->srcu_cblist, RCU_NEXT_TAIL)); - spin_unlock_rcu_node(sdp); /* Interrupts remain disabled. */ WRITE_ONCE(ssp->srcu_sup->srcu_gp_start, jiffies); WRITE_ONCE(ssp->srcu_sup->srcu_n_exp_nodelay, 0); smp_mb(); /* Order prior store to ->srcu_gp_seq_needed vs. GP start. */ From 94c55b9e21979daa88e190bf971c47432a818ebe Mon Sep 17 00:00:00 2001 From: Frederic Weisbecker Date: Wed, 4 Oct 2023 01:29:02 +0200 Subject: [PATCH 09/17] srcu: No need to advance/accelerate if no callback enqueued While in grace period start, there is nothing to accelerate and therefore no need to advance the callbacks either if no callback is to be enqueued. Spare these needless operations in this case. Signed-off-by: Frederic Weisbecker Reviewed-by: Joel Fernandes (Google) Signed-off-by: Paul E. McKenney Signed-off-by: Neeraj Upadhyay (AMD) --- kernel/rcu/srcutree.c | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/kernel/rcu/srcutree.c b/kernel/rcu/srcutree.c index e9356a103626..2bfc8ed1eed2 100644 --- a/kernel/rcu/srcutree.c +++ b/kernel/rcu/srcutree.c @@ -1261,9 +1261,11 @@ static unsigned long srcu_gp_start_if_needed(struct srcu_struct *ssp, * period (gp_num = X + 8). So acceleration fails. */ s = rcu_seq_snap(&ssp->srcu_sup->srcu_gp_seq); - rcu_segcblist_advance(&sdp->srcu_cblist, - rcu_seq_current(&ssp->srcu_sup->srcu_gp_seq)); - WARN_ON_ONCE(!rcu_segcblist_accelerate(&sdp->srcu_cblist, s) && rhp); + if (rhp) { + rcu_segcblist_advance(&sdp->srcu_cblist, + rcu_seq_current(&ssp->srcu_sup->srcu_gp_seq)); + WARN_ON_ONCE(!rcu_segcblist_accelerate(&sdp->srcu_cblist, s)); + } if (ULONG_CMP_LT(sdp->srcu_gp_seq_needed, s)) { sdp->srcu_gp_seq_needed = s; needgp = true; From c21357e4461f3f9c8ff93302906b5372411ee108 Mon Sep 17 00:00:00 2001 From: Frederic Weisbecker Date: Wed, 4 Oct 2023 01:29:03 +0200 Subject: [PATCH 10/17] srcu: Explain why callbacks invocations can't run concurrently If an SRCU barrier is queued while callbacks are running and a new callbacks invocator for the same sdp were to run concurrently, the RCU barrier might execute too early. As this requirement is non-obvious, make sure to keep a record. Signed-off-by: Frederic Weisbecker Reviewed-by: Joel Fernandes (Google) Signed-off-by: Paul E. McKenney Signed-off-by: Neeraj Upadhyay (AMD) --- kernel/rcu/srcutree.c | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/kernel/rcu/srcutree.c b/kernel/rcu/srcutree.c index 2bfc8ed1eed2..0351a4e83529 100644 --- a/kernel/rcu/srcutree.c +++ b/kernel/rcu/srcutree.c @@ -1715,6 +1715,11 @@ static void srcu_invoke_callbacks(struct work_struct *work) WARN_ON_ONCE(!rcu_segcblist_segempty(&sdp->srcu_cblist, RCU_NEXT_TAIL)); rcu_segcblist_advance(&sdp->srcu_cblist, rcu_seq_current(&ssp->srcu_sup->srcu_gp_seq)); + /* + * Although this function is theoretically re-entrant, concurrent + * callbacks invocation is disallowed to avoid executing an SRCU barrier + * too early. + */ if (sdp->srcu_cblist_invoking || !rcu_segcblist_ready_cbs(&sdp->srcu_cblist)) { spin_unlock_irq_rcu_node(sdp); @@ -1745,6 +1750,7 @@ static void srcu_invoke_callbacks(struct work_struct *work) sdp->srcu_cblist_invoking = false; more = rcu_segcblist_ready_cbs(&sdp->srcu_cblist); spin_unlock_irq_rcu_node(sdp); + /* An SRCU barrier or callbacks from previous nesting work pending */ if (more) srcu_schedule_cbs_sdp(sdp, 0); } From 3c6b0c1c28184038d90dffe8eb542bedcb8ccf98 Mon Sep 17 00:00:00 2001 From: Sebastian Andrzej Siewior Date: Thu, 30 Nov 2023 14:27:29 +0100 Subject: [PATCH 11/17] srcu: Use try-lock lockdep annotation for NMI-safe access. It is claimed that srcu_read_lock_nmisafe() NMI-safe. However it triggers a lockdep if used from NMI because lockdep expects a deadlock since nothing disables NMIs while the lock is acquired. This is because commit f0f44752f5f61 ("rcu: Annotate SRCU's update-side lockdep dependencies") annotates synchronize_srcu() as a write lock usage. This helps to detect a deadlocks such as srcu_read_lock(); synchronize_srcu(); srcu_read_unlock(); The side effect is that the lock srcu_struct now has a USED usage in normal contexts, so it conflicts with a USED_READ usage in NMI. But this shouldn't cause a real deadlock because the write lock usage from synchronize_srcu() is a fake one and only used for read/write deadlock detection. Use a try-lock annotation for srcu_read_lock_nmisafe() to avoid lockdep complains if used from NMI. Fixes: f0f44752f5f6 ("rcu: Annotate SRCU's update-side lockdep dependencies") Link: https://lore.kernel.org/r/20230927160231.XRCDDSK4@linutronix.de Reviewed-by: Boqun Feng Signed-off-by: Sebastian Andrzej Siewior Signed-off-by: Paul E. McKenney Signed-off-by: Neeraj Upadhyay (AMD) --- include/linux/rcupdate.h | 6 ++++++ include/linux/srcu.h | 2 +- 2 files changed, 7 insertions(+), 1 deletion(-) diff --git a/include/linux/rcupdate.h b/include/linux/rcupdate.h index f7206b2623c9..31d523c4e089 100644 --- a/include/linux/rcupdate.h +++ b/include/linux/rcupdate.h @@ -301,6 +301,11 @@ static inline void rcu_lock_acquire(struct lockdep_map *map) lock_acquire(map, 0, 0, 2, 0, NULL, _THIS_IP_); } +static inline void rcu_try_lock_acquire(struct lockdep_map *map) +{ + lock_acquire(map, 0, 1, 2, 0, NULL, _THIS_IP_); +} + static inline void rcu_lock_release(struct lockdep_map *map) { lock_release(map, _THIS_IP_); @@ -315,6 +320,7 @@ int rcu_read_lock_any_held(void); #else /* #ifdef CONFIG_DEBUG_LOCK_ALLOC */ # define rcu_lock_acquire(a) do { } while (0) +# define rcu_try_lock_acquire(a) do { } while (0) # define rcu_lock_release(a) do { } while (0) static inline int rcu_read_lock_held(void) diff --git a/include/linux/srcu.h b/include/linux/srcu.h index 127ef3b2e607..236610e4a8fa 100644 --- a/include/linux/srcu.h +++ b/include/linux/srcu.h @@ -229,7 +229,7 @@ static inline int srcu_read_lock_nmisafe(struct srcu_struct *ssp) __acquires(ssp srcu_check_nmi_safety(ssp, true); retval = __srcu_read_lock_nmisafe(ssp); - rcu_lock_acquire(&ssp->dep_map); + rcu_try_lock_acquire(&ssp->dep_map); return retval; } From fb91e42fe3bfca0293cb85c5fbb6f98d9d173aec Mon Sep 17 00:00:00 2001 From: Charles Han Date: Mon, 18 Sep 2023 11:13:09 +0800 Subject: [PATCH 12/17] Documentation: RCU: Remove repeated word in comments Remove the repeated word "of" in comments. Signed-off-by: Charles Han Signed-off-by: Paul E. McKenney Signed-off-by: Neeraj Upadhyay (AMD) --- Documentation/RCU/torture.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Documentation/RCU/torture.rst b/Documentation/RCU/torture.rst index b3b6dfa85248..49e7beea6ae1 100644 --- a/Documentation/RCU/torture.rst +++ b/Documentation/RCU/torture.rst @@ -185,7 +185,7 @@ argument. Not all changes require that all scenarios be run. For example, a change to Tree SRCU might run only the SRCU-N and SRCU-P scenarios using the --configs argument to kvm.sh as follows: "--configs 'SRCU-N SRCU-P'". -Large systems can run multiple copies of of the full set of scenarios, +Large systems can run multiple copies of the full set of scenarios, for example, a system with 448 hardware threads can run five instances of the full set concurrently. To make this happen:: From 493dffa3ab07b5d2c0b7bd5de5bff6e85f01f52a Mon Sep 17 00:00:00 2001 From: Philipp Stanner Date: Wed, 20 Sep 2023 11:22:12 +0200 Subject: [PATCH 13/17] rculist.h: docs: Fix wrong function summary The brief summary in the docstring for function list_next_or_null_rcu() states that the function is supposed to provide the "first" member of a list, whereas in truth it returns the next member. Change the docstring so it describes what the function actually does. Signed-off-by: Philipp Stanner Signed-off-by: Paul E. McKenney Signed-off-by: Neeraj Upadhyay (AMD) --- include/linux/rculist.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/include/linux/rculist.h b/include/linux/rculist.h index d29740be4833..3dc1e58865f7 100644 --- a/include/linux/rculist.h +++ b/include/linux/rculist.h @@ -355,7 +355,7 @@ static inline void list_splice_tail_init_rcu(struct list_head *list, }) /** - * list_next_or_null_rcu - get the first element from a list + * list_next_or_null_rcu - get the next element from a list * @head: the head for the list. * @ptr: the list head to take the next element from. * @type: the type of the struct this is embedded in. From 1b7178b23dc915fc6801ff3eeb6c4ffa51b562f7 Mon Sep 17 00:00:00 2001 From: "Paul E. McKenney" Date: Tue, 3 Oct 2023 10:30:01 -0700 Subject: [PATCH 14/17] doc: Clarify RCU Tasks reader/updater checklist Currently, the reader/updater compatibility rules for the three RCU Tasks flavors are squished together in a single paragraph, which can result in confusion. This commit therefore splits them out into a list, clearly showing the distinction between these flavors. Link: https://lore.kernel.org/all/20231002211936.5948253e@gandalf.local.home/ Reported-by: Steven Rostedt Signed-off-by: Paul E. McKenney Reviewed-by: Mathieu Desnoyers Reviewed-by: Steven Rostedt (Google) Signed-off-by: Neeraj Upadhyay (AMD) --- Documentation/RCU/checklist.rst | 25 ++++++++++++++++--------- 1 file changed, 16 insertions(+), 9 deletions(-) diff --git a/Documentation/RCU/checklist.rst b/Documentation/RCU/checklist.rst index bd3c58c44bef..2d42998a89a6 100644 --- a/Documentation/RCU/checklist.rst +++ b/Documentation/RCU/checklist.rst @@ -241,15 +241,22 @@ over a rather long period of time, but improvements are always welcome! srcu_struct. The rules for the expedited RCU grace-period-wait primitives are the same as for their non-expedited counterparts. - If the updater uses call_rcu_tasks() or synchronize_rcu_tasks(), - then the readers must refrain from executing voluntary - context switches, that is, from blocking. If the updater uses - call_rcu_tasks_trace() or synchronize_rcu_tasks_trace(), then - the corresponding readers must use rcu_read_lock_trace() and - rcu_read_unlock_trace(). If an updater uses call_rcu_tasks_rude() - or synchronize_rcu_tasks_rude(), then the corresponding readers - must use anything that disables preemption, for example, - preempt_disable() and preempt_enable(). + Similarly, it is necessary to correctly use the RCU Tasks flavors: + + a. If the updater uses synchronize_rcu_tasks() or + call_rcu_tasks(), then the readers must refrain from + executing voluntary context switches, that is, from + blocking. + + b. If the updater uses call_rcu_tasks_trace() + or synchronize_rcu_tasks_trace(), then the + corresponding readers must use rcu_read_lock_trace() + and rcu_read_unlock_trace(). + + c. If an updater uses call_rcu_tasks_rude() or + synchronize_rcu_tasks_rude(), then the corresponding + readers must use anything that disables preemption, + for example, preempt_disable() and preempt_enable(). Mixing things up will result in confusion and broken kernels, and has even resulted in an exploitable security issue. Therefore, From c49956be75152a533787f5daa06ef4b710207499 Mon Sep 17 00:00:00 2001 From: "Paul E. McKenney" Date: Fri, 20 Oct 2023 10:51:26 -0700 Subject: [PATCH 15/17] doc: Mention address and data dependencies in rcu_dereference.rst This commit adds discussion of address and data dependencies to the beginning of rcu_dereference.rst in order to enable readers to more easily make the connection to the Linux-kernel memory model in general and to memory-barriers.txt in particular. Reported-by: Jonas Oberhauser Reported-by: Akira Yokosawa Signed-off-by: Paul E. McKenney Signed-off-by: Neeraj Upadhyay (AMD) --- Documentation/RCU/rcu_dereference.rst | 25 +++++++++++++++++++------ 1 file changed, 19 insertions(+), 6 deletions(-) diff --git a/Documentation/RCU/rcu_dereference.rst b/Documentation/RCU/rcu_dereference.rst index 3b739f6243c8..659d5913784d 100644 --- a/Documentation/RCU/rcu_dereference.rst +++ b/Documentation/RCU/rcu_dereference.rst @@ -3,13 +3,26 @@ PROPER CARE AND FEEDING OF RETURN VALUES FROM rcu_dereference() =============================================================== -Most of the time, you can use values from rcu_dereference() or one of -the similar primitives without worries. Dereferencing (prefix "*"), -field selection ("->"), assignment ("="), address-of ("&"), addition and -subtraction of constants, and casts all work quite naturally and safely. +Proper care and feeding of address and data dependencies is critically +important to correct use of things like RCU. To this end, the pointers +returned from the rcu_dereference() family of primitives carry address and +data dependencies. These dependencies extend from the rcu_dereference() +macro's load of the pointer to the later use of that pointer to compute +either the address of a later memory access (representing an address +dependency) or the value written by a later memory access (representing +a data dependency). -It is nevertheless possible to get into trouble with other operations. -Follow these rules to keep your RCU code working properly: +Most of the time, these dependencies are preserved, permitting you to +freely use values from rcu_dereference(). For example, dereferencing +(prefix "*"), field selection ("->"), assignment ("="), address-of +("&"), casts, and addition or subtraction of constants all work quite +naturally and safely. However, because current compilers do not take +either address or data dependencies into account it is still possible +to get into trouble. + +Follow these rules to preserve the address and data dependencies emanating +from your calls to rcu_dereference() and friends, thus keeping your RCU +readers working properly: - You must use one of the rcu_dereference() family of primitives to load an RCU-protected pointer, otherwise CONFIG_PROVE_RCU From ad9446302919ee3a646ad667a9ea15f992685dca Mon Sep 17 00:00:00 2001 From: "Paul E. McKenney" Date: Fri, 20 Oct 2023 11:04:27 -0700 Subject: [PATCH 16/17] doc: Clarify historical disclaimers in memory-barriers.txt This commit makes it clear that the reason that these sections are historical is that smp_read_barrier_depends() is no more. It also removes the point about comparison operations, given that there are other optimizations that can break address dependencies. Suggested-by: Jonas Oberhauser Signed-off-by: Paul E. McKenney Cc: Alan Stern Cc: Andrea Parri Cc: Will Deacon Cc: Peter Zijlstra Cc: Boqun Feng Cc: Nicholas Piggin Cc: David Howells Cc: Jade Alglave Cc: Luc Maranget Cc: Akira Yokosawa Cc: Daniel Lustig Cc: Joel Fernandes Cc: Jonathan Corbet Cc: Cc: Signed-off-by: Neeraj Upadhyay (AMD) --- Documentation/memory-barriers.txt | 17 ++++++++++------- 1 file changed, 10 insertions(+), 7 deletions(-) diff --git a/Documentation/memory-barriers.txt b/Documentation/memory-barriers.txt index d414e145f912..4202174a6262 100644 --- a/Documentation/memory-barriers.txt +++ b/Documentation/memory-barriers.txt @@ -396,10 +396,11 @@ Memory barriers come in four basic varieties: (2) Address-dependency barriers (historical). - [!] This section is marked as HISTORICAL: For more up-to-date - information, including how compiler transformations related to pointer - comparisons can sometimes cause problems, see - Documentation/RCU/rcu_dereference.rst. + [!] This section is marked as HISTORICAL: it covers the long-obsolete + smp_read_barrier_depends() macro, the semantics of which are now + implicit in all marked accesses. For more up-to-date information, + including how compiler transformations can sometimes break address + dependencies, see Documentation/RCU/rcu_dereference.rst. An address-dependency barrier is a weaker form of read barrier. In the case where two loads are performed such that the second depends on the @@ -560,9 +561,11 @@ There are certain things that the Linux kernel memory barriers do not guarantee: ADDRESS-DEPENDENCY BARRIERS (HISTORICAL) ---------------------------------------- -[!] This section is marked as HISTORICAL: For more up-to-date information, -including how compiler transformations related to pointer comparisons can -sometimes cause problems, see Documentation/RCU/rcu_dereference.rst. +[!] This section is marked as HISTORICAL: it covers the long-obsolete +smp_read_barrier_depends() macro, the semantics of which are now implicit +in all marked accesses. For more up-to-date information, including +how compiler transformations can sometimes break address dependencies, +see Documentation/RCU/rcu_dereference.rst. As of v4.15 of the Linux kernel, an smp_mb() was added to READ_ONCE() for DEC Alpha, which means that about the only people who need to pay attention From dee39c0c1e9624f925da4ca0bece46bdc7427257 Mon Sep 17 00:00:00 2001 From: Zqiang Date: Wed, 1 Nov 2023 11:35:07 +0800 Subject: [PATCH 17/17] rcu: Force quiescent states only for ongoing grace period If an rcutorture test scenario creates an fqs_task kthread, it will periodically invoke rcu_force_quiescent_state() in order to start force-quiescent-state (FQS) operations. However, an FQS operation will be started even if there is no RCU grace period in progress. Although testing FQS operations startup when there is no grace period in progress is necessary, it need not happen all that often. This commit therefore causes rcu_force_quiescent_state() to take an early exit if there is no grace period in progress. Note that there will still be attempts to start an FQS scan in the absence of a grace period because the grace period might end right after the rcu_force_quiescent_state() function's check. In actual testing, this happens about once every ten minutes, which should provide adequate testing. Signed-off-by: Zqiang Reviewed-by: Joel Fernandes (Google) Signed-off-by: Paul E. McKenney Signed-off-by: Neeraj Upadhyay (AMD) --- kernel/rcu/tree.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/kernel/rcu/tree.c b/kernel/rcu/tree.c index 3ac3c846105f..1ae851777806 100644 --- a/kernel/rcu/tree.c +++ b/kernel/rcu/tree.c @@ -2338,6 +2338,8 @@ void rcu_force_quiescent_state(void) struct rcu_node *rnp; struct rcu_node *rnp_old = NULL; + if (!rcu_gp_in_progress()) + return; /* Funnel through hierarchy to reduce memory contention. */ rnp = raw_cpu_read(rcu_data.mynode); for (; rnp != NULL; rnp = rnp->parent) {