Merge branches 'doc.2023.05.10a', 'fixes.2023.05.11a', 'kvfree.2023.05.10a', 'nocb.2023.05.11a', 'rcu-tasks.2023.05.10a', 'torture.2023.05.15a' and 'rcu-urgent.2023.06.06a' into HEAD

doc.2023.05.10a: Documentation updates fixes.2023.05.11a: Miscellaneous fixes kvfree.2023.05.10a: kvfree_rcu updates nocb.2023.05.11a: Callback-offloading updates rcu-tasks.2023.05.10a: Tasks RCU updates torture.2023.05.15a: Torture-test updates rcu-urgent.2023.06.06a: Urgent SRCU fix
2024-11-23 20:24:12 +08:00 · 2023-06-07 13:44:06 -07:00 · 2023-06-07 13:44:06 -07:00 · 2e31da752c
commit 2e31da752c
parent e1bd2334f1 401b0de3ae 6b706e5603 fbde57d2d2 edff5e9a99 ce2544b2d0 de29a96acc
18 changed files with 332 additions and 271 deletions
--- a/Documentation/RCU/Design/Requirements/Requirements.rst
+++ b/Documentation/RCU/Design/Requirements/Requirements.rst
@ -2071,41 +2071,7 @@ call.
 Because RCU avoids interrupting idle CPUs, it is illegal to execute an
 RCU read-side critical section on an idle CPU. (Kernels built with
-``CONFIG_PROVE_RCU=y`` will splat if you try it.) The RCU_NONIDLE()
+``CONFIG_PROVE_RCU=y`` will splat if you try it.)
 macro and ``_rcuidle`` event tracing is provided to work around this
 restriction. In addition, rcu_is_watching() may be used to test
 whether or not it is currently legal to run RCU read-side critical
 sections on this CPU. I learned of the need for diagnostics on the one
 hand and RCU_NONIDLE() on the other while inspecting idle-loop code.
 Steven Rostedt supplied ``_rcuidle`` event tracing, which is used quite
 heavily in the idle loop. However, there are some restrictions on the
 code placed within RCU_NONIDLE():
 #. Blocking is prohibited. In practice, this is not a serious
   restriction given that idle tasks are prohibited from blocking to
   begin with.
 #. Although nesting RCU_NONIDLE() is permitted, they cannot nest
   indefinitely deeply. However, given that they can be nested on the
   order of a million deep, even on 32-bit systems, this should not be a
   serious restriction. This nesting limit would probably be reached
   long after the compiler OOMed or the stack overflowed.
 #. Any code path that enters RCU_NONIDLE() must sequence out of that
   same RCU_NONIDLE(). For example, the following is grossly
   illegal:
      ::
 	  1     RCU_NONIDLE({
 	  2       do_something();
 	  3       goto bad_idea;  /* BUG!!! */
 	  4       do_something_else();});
 	  5   bad_idea:
   It is just as illegal to transfer control into the middle of
   RCU_NONIDLE()'s argument. Yes, in theory, you could transfer in
   as long as you also transferred out, but in practice you could also
   expect to get sharply worded review comments.
 It is similarly socially unacceptable to interrupt an ``nohz_full`` CPU
 running in userspace. RCU must therefore track ``nohz_full`` userspace
--- a/Documentation/RCU/whatisRCU.rst
+++ b/Documentation/RCU/whatisRCU.rst
@ -1117,7 +1117,6 @@ All: lockdep-checked RCU utility APIs::
 	RCU_LOCKDEP_WARN
 	rcu_sleep_check
 	RCU_NONIDLE
 All: Unchecked RCU-protected pointer access::
--- a/Documentation/admin-guide/kernel-parameters.txt
+++ b/Documentation/admin-guide/kernel-parameters.txt
@ -5094,8 +5094,17 @@
 	rcutorture.stall_cpu_block= [KNL]
 			Sleep while stalling if set.  This will result
-			in warnings from preemptible RCU in addition
+			in warnings from preemptible RCU in addition to
-			to any other stall-related activity.
+			any other stall-related activity.  Note that
 			in kernels built with CONFIG_PREEMPTION=n and
 			CONFIG_PREEMPT_COUNT=y, this parameter will
 			cause the CPU to pass through a quiescent state.
 			Given CONFIG_PREEMPTION=n, this will suppress
 			RCU CPU stall warnings, but will instead result
 			in scheduling-while-atomic splats.
 			Use of this module parameter results in splats.
 	rcutorture.stall_cpu_holdoff= [KNL]
 			Time to wait (s) after boot before inducing stall.
--- a/include/linux/notifier.h
+++ b/include/linux/notifier.h
@ -106,12 +106,22 @@ extern void srcu_init_notifier_head(struct srcu_notifier_head *nh);
 #define RAW_NOTIFIER_INIT(name)	{				\
 		.head = NULL }
 #ifdef CONFIG_TREE_SRCU
 #define SRCU_NOTIFIER_INIT(name, pcpu)				\
 	{							\
 		.mutex = __MUTEX_INITIALIZER(name.mutex),	\
 		.head = NULL,					\
 		.srcuu = __SRCU_USAGE_INIT(name.srcuu),		\
 		.srcu = __SRCU_STRUCT_INIT(name.srcu, name.srcuu, pcpu), \
 	}
 #else
 #define SRCU_NOTIFIER_INIT(name, pcpu)				\
 	{							\
 		.mutex = __MUTEX_INITIALIZER(name.mutex),	\
 		.head = NULL,					\
 		.srcu = __SRCU_STRUCT_INIT(name.srcu, name.srcuu, pcpu), \
 	}
 #endif
 #define ATOMIC_NOTIFIER_HEAD(name)				\
 	struct atomic_notifier_head name =			\
--- a/include/linux/rcupdate.h
+++ b/include/linux/rcupdate.h
@ -156,31 +156,6 @@ static inline int rcu_nocb_cpu_deoffload(int cpu) { return 0; }
 static inline void rcu_nocb_flush_deferred_wakeup(void) { }
 #endif /* #else #ifdef CONFIG_RCU_NOCB_CPU */
 /**
 * RCU_NONIDLE - Indicate idle-loop code that needs RCU readers
 * @a: Code that RCU needs to pay attention to.
 *
 * RCU read-side critical sections are forbidden in the inner idle loop,
 * that is, between the ct_idle_enter() and the ct_idle_exit() -- RCU
 * will happily ignore any such read-side critical sections.  However,
 * things like powertop need tracepoints in the inner idle loop.
 *
 * This macro provides the way out:  RCU_NONIDLE(do_something_with_RCU())
 * will tell RCU that it needs to pay attention, invoke its argument
 * (in this example, calling the do_something_with_RCU() function),
 * and then tell RCU to go back to ignoring this CPU.  It is permissible
 * to nest RCU_NONIDLE() wrappers, but not indefinitely (but the limit is
 * on the order of a million or so, even on 32-bit systems).  It is
 * not legal to block within RCU_NONIDLE(), nor is it permissible to
 * transfer control either into or out of RCU_NONIDLE()'s statement.
 */
 #define RCU_NONIDLE(a) \
 	do { \
 		ct_irq_enter_irqson(); \
 		do { a; } while (0); \
 		ct_irq_exit_irqson(); \
 	} while (0)
 /*
 * Note a quasi-voluntary context switch for RCU-tasks's benefit.
 * This is a macro rather than an inline function to avoid #include hell.
@ -957,9 +932,8 @@ static inline notrace void rcu_read_unlock_sched_notrace(void)
 /**
 * kfree_rcu() - kfree an object after a grace period.
- * @ptr: pointer to kfree for both single- and double-argument invocations.
+ * @ptr: pointer to kfree for double-argument invocations.
- * @rhf: the name of the struct rcu_head within the type of @ptr,
+ * @rhf: the name of the struct rcu_head within the type of @ptr.
 *       but only for double-argument invocations.
 *
 * Many rcu callbacks functions just call kfree() on the base structure.
 * These functions are trivial, but their size adds up, and furthermore
@ -984,26 +958,18 @@ static inline notrace void rcu_read_unlock_sched_notrace(void)
 * The BUILD_BUG_ON check must not involve any function calls, hence the
 * checks are done in macros here.
 */
-#define kfree_rcu(ptr, rhf...) kvfree_rcu(ptr, ## rhf)
+#define kfree_rcu(ptr, rhf) kvfree_rcu_arg_2(ptr, rhf)
 #define kvfree_rcu(ptr, rhf) kvfree_rcu_arg_2(ptr, rhf)
 /**
- * kvfree_rcu() - kvfree an object after a grace period.
+ * kfree_rcu_mightsleep() - kfree an object after a grace period.
- *
+ * @ptr: pointer to kfree for single-argument invocations.
 * This macro consists of one or two arguments and it is
 * based on whether an object is head-less or not. If it
 * has a head then a semantic stays the same as it used
 * to be before:
 *
 *     kvfree_rcu(ptr, rhf);
 *
 * where @ptr is a pointer to kvfree(), @rhf is the name
 * of the rcu_head structure within the type of @ptr.
 *
 * When it comes to head-less variant, only one argument
 * is passed and that is just a pointer which has to be
 * freed after a grace period. Therefore the semantic is
 *
- *     kvfree_rcu(ptr);
+ *     kfree_rcu_mightsleep(ptr);
 *
 * where @ptr is the pointer to be freed by kvfree().
 *
@ -1012,13 +978,9 @@ static inline notrace void rcu_read_unlock_sched_notrace(void)
 * annotation. Otherwise, please switch and embed the
 * rcu_head structure within the type of @ptr.
 */
-#define kvfree_rcu(...) KVFREE_GET_MACRO(__VA_ARGS__,		\
+#define kfree_rcu_mightsleep(ptr) kvfree_rcu_arg_1(ptr)
 	kvfree_rcu_arg_2, kvfree_rcu_arg_1)(__VA_ARGS__)
 #define kvfree_rcu_mightsleep(ptr) kvfree_rcu_arg_1(ptr)
 #define kfree_rcu_mightsleep(ptr) kvfree_rcu_mightsleep(ptr)
 #define KVFREE_GET_MACRO(_1, _2, NAME, ...) NAME
 #define kvfree_rcu_arg_2(ptr, rhf)					\
 do {									\
 	typeof (ptr) ___p = (ptr);					\
--- a/include/linux/srcu.h
+++ b/include/linux/srcu.h
@ -212,7 +212,7 @@ static inline int srcu_read_lock(struct srcu_struct *ssp) __acquires(ssp)
 	srcu_check_nmi_safety(ssp, false);
 	retval = __srcu_read_lock(ssp);
-	srcu_lock_acquire(&(ssp)->dep_map);
+	srcu_lock_acquire(&ssp->dep_map);
 	return retval;
 }
@ -229,7 +229,7 @@ static inline int srcu_read_lock_nmisafe(struct srcu_struct *ssp) __acquires(ssp
 	srcu_check_nmi_safety(ssp, true);
 	retval = __srcu_read_lock_nmisafe(ssp);
-	rcu_lock_acquire(&(ssp)->dep_map);
+	rcu_lock_acquire(&ssp->dep_map);
 	return retval;
 }
@ -284,7 +284,7 @@ static inline void srcu_read_unlock(struct srcu_struct *ssp, int idx)
 {
 	WARN_ON_ONCE(idx & ~0x1);
 	srcu_check_nmi_safety(ssp, false);
-	srcu_lock_release(&(ssp)->dep_map);
+	srcu_lock_release(&ssp->dep_map);
 	__srcu_read_unlock(ssp, idx);
 }
@ -300,7 +300,7 @@ static inline void srcu_read_unlock_nmisafe(struct srcu_struct *ssp, int idx)
 {
 	WARN_ON_ONCE(idx & ~0x1);
 	srcu_check_nmi_safety(ssp, true);
-	rcu_lock_release(&(ssp)->dep_map);
+	rcu_lock_release(&ssp->dep_map);
 	__srcu_read_unlock_nmisafe(ssp, idx);
 }
--- a/kernel/locking/locktorture.c
+++ b/kernel/locking/locktorture.c
@ -33,24 +33,19 @@
 MODULE_LICENSE("GPL");
 MODULE_AUTHOR("Paul E. McKenney <paulmck@linux.ibm.com>");
-torture_param(int, nwriters_stress, -1,
+torture_param(int, nwriters_stress, -1, "Number of write-locking stress-test threads");
-	     "Number of write-locking stress-test threads");
+torture_param(int, nreaders_stress, -1, "Number of read-locking stress-test threads");
-torture_param(int, nreaders_stress, -1,
+torture_param(int, long_hold, 100, "Do occasional long hold of lock (ms), 0=disable");
 	     "Number of read-locking stress-test threads");
 torture_param(int, onoff_holdoff, 0, "Time after boot before CPU hotplugs (s)");
-torture_param(int, onoff_interval, 0,
+torture_param(int, onoff_interval, 0, "Time between CPU hotplugs (s), 0=disable");
-	     "Time between CPU hotplugs (s), 0=disable");
+torture_param(int, shuffle_interval, 3, "Number of jiffies between shuffles, 0=disable");
 torture_param(int, shuffle_interval, 3,
 	     "Number of jiffies between shuffles, 0=disable");
 torture_param(int, shutdown_secs, 0, "Shutdown time (j), <= zero to disable.");
-torture_param(int, stat_interval, 60,
+torture_param(int, stat_interval, 60, "Number of seconds between stats printk()s");
 	     "Number of seconds between stats printk()s");
 torture_param(int, stutter, 5, "Number of jiffies to run/halt test, 0=disable");
 torture_param(int, rt_boost, 2,
-		"Do periodic rt-boost. 0=Disable, 1=Only for rt_mutex, 2=For all lock types.");
+		   "Do periodic rt-boost. 0=Disable, 1=Only for rt_mutex, 2=For all lock types.");
 torture_param(int, rt_boost_factor, 50, "A factor determining how often rt-boost happens.");
-torture_param(int, verbose, 1,
+torture_param(int, verbose, 1, "Enable verbose debugging printk()s");
 	     "Enable verbose debugging printk()s");
 torture_param(int, nested_locks, 0, "Number of nested locks (max = 8)");
 /* Going much higher trips "BUG: MAX_LOCKDEP_CHAIN_HLOCKS too low!" errors */
 #define MAX_NESTED_LOCKS 8
@ -120,7 +115,7 @@ static int torture_lock_busted_write_lock(int tid __maybe_unused)
 static void torture_lock_busted_write_delay(struct torture_random_state *trsp)
 {
-	const unsigned long longdelay_ms = 100;
+	const unsigned long longdelay_ms = long_hold ? long_hold : ULONG_MAX;
 	/* We want a long delay occasionally to force massive contention.  */
 	if (!(torture_random(trsp) %
@ -198,16 +193,18 @@ __acquires(torture_spinlock)
 static void torture_spin_lock_write_delay(struct torture_random_state *trsp)
 {
 	const unsigned long shortdelay_us = 2;
-	const unsigned long longdelay_ms = 100;
+	const unsigned long longdelay_ms = long_hold ? long_hold : ULONG_MAX;
 	unsigned long j;
 	/* We want a short delay mostly to emulate likely code, and
 	 * we want a long delay occasionally to force massive contention.
 	 */
-	if (!(torture_random(trsp) %
+	if (!(torture_random(trsp) % (cxt.nrealwriters_stress * 2000 * longdelay_ms))) {
-	      (cxt.nrealwriters_stress * 2000 * longdelay_ms)))
+		j = jiffies;
 		mdelay(longdelay_ms);
-	if (!(torture_random(trsp) %
+		pr_alert("%s: delay = %lu jiffies.\n", __func__, jiffies - j);
-	      (cxt.nrealwriters_stress * 2 * shortdelay_us)))
+	}
 	if (!(torture_random(trsp) % (cxt.nrealwriters_stress * 200 * shortdelay_us)))
 		udelay(shortdelay_us);
 	if (!(torture_random(trsp) % (cxt.nrealwriters_stress * 20000)))
 		torture_preempt_schedule();  /* Allow test to be preempted. */
@ -322,7 +319,7 @@ __acquires(torture_rwlock)
 static void torture_rwlock_write_delay(struct torture_random_state *trsp)
 {
 	const unsigned long shortdelay_us = 2;
-	const unsigned long longdelay_ms = 100;
+	const unsigned long longdelay_ms = long_hold ? long_hold : ULONG_MAX;
 	/* We want a short delay mostly to emulate likely code, and
 	 * we want a long delay occasionally to force massive contention.
@ -455,14 +452,12 @@ __acquires(torture_mutex)
 static void torture_mutex_delay(struct torture_random_state *trsp)
 {
-	const unsigned long longdelay_ms = 100;
+	const unsigned long longdelay_ms = long_hold ? long_hold : ULONG_MAX;
 	/* We want a long delay occasionally to force massive contention.  */
 	if (!(torture_random(trsp) %
 	      (cxt.nrealwriters_stress * 2000 * longdelay_ms)))
 		mdelay(longdelay_ms * 5);
 	else
 		mdelay(longdelay_ms / 5);
 	if (!(torture_random(trsp) % (cxt.nrealwriters_stress * 20000)))
 		torture_preempt_schedule();  /* Allow test to be preempted. */
 }
@ -630,7 +625,7 @@ __acquires(torture_rtmutex)
 static void torture_rtmutex_delay(struct torture_random_state *trsp)
 {
 	const unsigned long shortdelay_us = 2;
-	const unsigned long longdelay_ms = 100;
+	const unsigned long longdelay_ms = long_hold ? long_hold : ULONG_MAX;
 	/*
 	 * We want a short delay mostly to emulate likely code, and
@ -640,7 +635,7 @@ static void torture_rtmutex_delay(struct torture_random_state *trsp)
 	      (cxt.nrealwriters_stress * 2000 * longdelay_ms)))
 		mdelay(longdelay_ms);
 	if (!(torture_random(trsp) %
-	      (cxt.nrealwriters_stress * 2 * shortdelay_us)))
+	      (cxt.nrealwriters_stress * 200 * shortdelay_us)))
 		udelay(shortdelay_us);
 	if (!(torture_random(trsp) % (cxt.nrealwriters_stress * 20000)))
 		torture_preempt_schedule();  /* Allow test to be preempted. */
@ -695,14 +690,12 @@ __acquires(torture_rwsem)
 static void torture_rwsem_write_delay(struct torture_random_state *trsp)
 {
-	const unsigned long longdelay_ms = 100;
+	const unsigned long longdelay_ms = long_hold ? long_hold : ULONG_MAX;
 	/* We want a long delay occasionally to force massive contention.  */
 	if (!(torture_random(trsp) %
 	      (cxt.nrealwriters_stress * 2000 * longdelay_ms)))
 		mdelay(longdelay_ms * 10);
 	else
 		mdelay(longdelay_ms / 10);
 	if (!(torture_random(trsp) % (cxt.nrealwriters_stress * 20000)))
 		torture_preempt_schedule();  /* Allow test to be preempted. */
 }
@ -848,8 +841,8 @@ static int lock_torture_writer(void *arg)
 			lwsp->n_lock_acquired++;
 		}
 		cxt.cur_ops->write_delay(&rand);
 		if (!skip_main_lock) {
 			cxt.cur_ops->write_delay(&rand);
 			lock_is_write_held = false;
 			WRITE_ONCE(last_lock_release, jiffies);
 			cxt.cur_ops->writeunlock(tid);
--- a/kernel/rcu/Kconfig
+++ b/kernel/rcu/Kconfig
@ -314,4 +314,22 @@ config RCU_LAZY
 	  To save power, batch RCU callbacks and flush after delay, memory
 	  pressure, or callback list growing too big.
 config RCU_DOUBLE_CHECK_CB_TIME
 	bool "RCU callback-batch backup time check"
 	depends on RCU_EXPERT
 	default n
 	help
 	  Use this option to provide more precise enforcement of the
 	  rcutree.rcu_resched_ns module parameter in situations where
 	  a single RCU callback might run for hundreds of microseconds,
 	  thus defeating the 32-callback batching used to amortize the
 	  cost of the fine-grained but expensive local_clock() function.
 	  This option rounds rcutree.rcu_resched_ns up to the next
 	  jiffy, and overrides the 32-callback batching if this limit
 	  is exceeded.
 	  Say Y here if you need tighter callback-limit enforcement.
 	  Say N here if you are unsure.
 endmenu # "RCU Subsystem"
--- a/kernel/rcu/rcu.h
+++ b/kernel/rcu/rcu.h
@ -642,4 +642,10 @@ void show_rcu_tasks_trace_gp_kthread(void);
 static inline void show_rcu_tasks_trace_gp_kthread(void) {}
 #endif
 #ifdef CONFIG_TINY_RCU
 static inline bool rcu_cpu_beenfullyonline(int cpu) { return true; }
 #else
 bool rcu_cpu_beenfullyonline(int cpu);
 #endif
 #endif /* __LINUX_RCU_H */
--- a/kernel/rcu/rcuscale.c
+++ b/kernel/rcu/rcuscale.c
@ -522,89 +522,6 @@ rcu_scale_print_module_parms(struct rcu_scale_ops *cur_ops, const char *tag)
 		 scale_type, tag, nrealreaders, nrealwriters, verbose, shutdown);
 }
 static void
 rcu_scale_cleanup(void)
 {
 	int i;
 	int j;
 	int ngps = 0;
 	u64 *wdp;
 	u64 *wdpp;
 	/*
 	 * Would like warning at start, but everything is expedited
 	 * during the mid-boot phase, so have to wait till the end.
 	 */
 	if (rcu_gp_is_expedited() && !rcu_gp_is_normal() && !gp_exp)
 		SCALEOUT_ERRSTRING("All grace periods expedited, no normal ones to measure!");
 	if (rcu_gp_is_normal() && gp_exp)
 		SCALEOUT_ERRSTRING("All grace periods normal, no expedited ones to measure!");
 	if (gp_exp && gp_async)
 		SCALEOUT_ERRSTRING("No expedited async GPs, so went with async!");
 	if (torture_cleanup_begin())
 		return;
 	if (!cur_ops) {
 		torture_cleanup_end();
 		return;
 	}
 	if (reader_tasks) {
 		for (i = 0; i < nrealreaders; i++)
 			torture_stop_kthread(rcu_scale_reader,
 					     reader_tasks[i]);
 		kfree(reader_tasks);
 	}
 	if (writer_tasks) {
 		for (i = 0; i < nrealwriters; i++) {
 			torture_stop_kthread(rcu_scale_writer,
 					     writer_tasks[i]);
 			if (!writer_n_durations)
 				continue;
 			j = writer_n_durations[i];
 			pr_alert("%s%s writer %d gps: %d\n",
 				 scale_type, SCALE_FLAG, i, j);
 			ngps += j;
 		}
 		pr_alert("%s%s start: %llu end: %llu duration: %llu gps: %d batches: %ld\n",
 			 scale_type, SCALE_FLAG,
 			 t_rcu_scale_writer_started, t_rcu_scale_writer_finished,
 			 t_rcu_scale_writer_finished -
 			 t_rcu_scale_writer_started,
 			 ngps,
 			 rcuscale_seq_diff(b_rcu_gp_test_finished,
 					   b_rcu_gp_test_started));
 		for (i = 0; i < nrealwriters; i++) {
 			if (!writer_durations)
 				break;
 			if (!writer_n_durations)
 				continue;
 			wdpp = writer_durations[i];
 			if (!wdpp)
 				continue;
 			for (j = 0; j < writer_n_durations[i]; j++) {
 				wdp = &wdpp[j];
 				pr_alert("%s%s %4d writer-duration: %5d %llu\n",
 					scale_type, SCALE_FLAG,
 					i, j, *wdp);
 				if (j % 100 == 0)
 					schedule_timeout_uninterruptible(1);
 			}
 			kfree(writer_durations[i]);
 		}
 		kfree(writer_tasks);
 		kfree(writer_durations);
 		kfree(writer_n_durations);
 	}
 	/* Do torture-type-specific cleanup operations.  */
 	if (cur_ops->cleanup != NULL)
 		cur_ops->cleanup();
 	torture_cleanup_end();
 }
 /*
 * Return the number if non-negative.  If -1, the number of CPUs.
 * If less than -1, that much less than the number of CPUs, but
@ -624,20 +541,6 @@ static int compute_real(int n)
 	return nr;
 }
 /*
 * RCU scalability shutdown kthread.  Just waits to be awakened, then shuts
 * down system.
 */
 static int
 rcu_scale_shutdown(void *arg)
 {
 	wait_event_idle(shutdown_wq, atomic_read(&n_rcu_scale_writer_finished) >= nrealwriters);
 	smp_mb(); /* Wake before output. */
 	rcu_scale_cleanup();
 	kernel_power_off();
 	return -EINVAL;
 }
 /*
 * kfree_rcu() scalability tests: Start a kfree_rcu() loop on all CPUs for number
 * of iterations and measure total time and number of GP for all iterations to complete.
@ -874,6 +777,108 @@ unwind:
 	return firsterr;
 }
 static void
 rcu_scale_cleanup(void)
 {
 	int i;
 	int j;
 	int ngps = 0;
 	u64 *wdp;
 	u64 *wdpp;
 	/*
 	 * Would like warning at start, but everything is expedited
 	 * during the mid-boot phase, so have to wait till the end.
 	 */
 	if (rcu_gp_is_expedited() && !rcu_gp_is_normal() && !gp_exp)
 		SCALEOUT_ERRSTRING("All grace periods expedited, no normal ones to measure!");
 	if (rcu_gp_is_normal() && gp_exp)
 		SCALEOUT_ERRSTRING("All grace periods normal, no expedited ones to measure!");
 	if (gp_exp && gp_async)
 		SCALEOUT_ERRSTRING("No expedited async GPs, so went with async!");
 	if (kfree_rcu_test) {
 		kfree_scale_cleanup();
 		return;
 	}
 	if (torture_cleanup_begin())
 		return;
 	if (!cur_ops) {
 		torture_cleanup_end();
 		return;
 	}
 	if (reader_tasks) {
 		for (i = 0; i < nrealreaders; i++)
 			torture_stop_kthread(rcu_scale_reader,
 					     reader_tasks[i]);
 		kfree(reader_tasks);
 	}
 	if (writer_tasks) {
 		for (i = 0; i < nrealwriters; i++) {
 			torture_stop_kthread(rcu_scale_writer,
 					     writer_tasks[i]);
 			if (!writer_n_durations)
 				continue;
 			j = writer_n_durations[i];
 			pr_alert("%s%s writer %d gps: %d\n",
 				 scale_type, SCALE_FLAG, i, j);
 			ngps += j;
 		}
 		pr_alert("%s%s start: %llu end: %llu duration: %llu gps: %d batches: %ld\n",
 			 scale_type, SCALE_FLAG,
 			 t_rcu_scale_writer_started, t_rcu_scale_writer_finished,
 			 t_rcu_scale_writer_finished -
 			 t_rcu_scale_writer_started,
 			 ngps,
 			 rcuscale_seq_diff(b_rcu_gp_test_finished,
 					   b_rcu_gp_test_started));
 		for (i = 0; i < nrealwriters; i++) {
 			if (!writer_durations)
 				break;
 			if (!writer_n_durations)
 				continue;
 			wdpp = writer_durations[i];
 			if (!wdpp)
 				continue;
 			for (j = 0; j < writer_n_durations[i]; j++) {
 				wdp = &wdpp[j];
 				pr_alert("%s%s %4d writer-duration: %5d %llu\n",
 					scale_type, SCALE_FLAG,
 					i, j, *wdp);
 				if (j % 100 == 0)
 					schedule_timeout_uninterruptible(1);
 			}
 			kfree(writer_durations[i]);
 		}
 		kfree(writer_tasks);
 		kfree(writer_durations);
 		kfree(writer_n_durations);
 	}
 	/* Do torture-type-specific cleanup operations.  */
 	if (cur_ops->cleanup != NULL)
 		cur_ops->cleanup();
 	torture_cleanup_end();
 }
 /*
 * RCU scalability shutdown kthread.  Just waits to be awakened, then shuts
 * down system.
 */
 static int
 rcu_scale_shutdown(void *arg)
 {
 	wait_event_idle(shutdown_wq, atomic_read(&n_rcu_scale_writer_finished) >= nrealwriters);
 	smp_mb(); /* Wake before output. */
 	rcu_scale_cleanup();
 	kernel_power_off();
 	return -EINVAL;
 }
 static int __init
 rcu_scale_init(void)
 {
--- a/kernel/rcu/tasks.h
+++ b/kernel/rcu/tasks.h
@ -241,7 +241,6 @@ static void cblist_init_generic(struct rcu_tasks *rtp)
 	if (rcu_task_enqueue_lim < 0) {
 		rcu_task_enqueue_lim = 1;
 		rcu_task_cb_adjust = true;
 		pr_info("%s: Setting adjustable number of callback queues.\n", __func__);
 	} else if (rcu_task_enqueue_lim == 0) {
 		rcu_task_enqueue_lim = 1;
 	}
@ -272,7 +271,9 @@ static void cblist_init_generic(struct rcu_tasks *rtp)
 		raw_spin_unlock_rcu_node(rtpcp); // irqs remain disabled.
 	}
 	raw_spin_unlock_irqrestore(&rtp->cbs_gbl_lock, flags);
-	pr_info("%s: Setting shift to %d and lim to %d.\n", __func__, data_race(rtp->percpu_enqueue_shift), data_race(rtp->percpu_enqueue_lim));
+
 	pr_info("%s: Setting shift to %d and lim to %d rcu_task_cb_adjust=%d.\n", rtp->name,
 			data_race(rtp->percpu_enqueue_shift), data_race(rtp->percpu_enqueue_lim), rcu_task_cb_adjust);
 }
 // IRQ-work handler that does deferred wakeup for call_rcu_tasks_generic().
@ -463,6 +464,7 @@ static void rcu_tasks_invoke_cbs(struct rcu_tasks *rtp, struct rcu_tasks_percpu
 {
 	int cpu;
 	int cpunext;
 	int cpuwq;
 	unsigned long flags;
 	int len;
 	struct rcu_head *rhp;
@ -473,11 +475,13 @@ static void rcu_tasks_invoke_cbs(struct rcu_tasks *rtp, struct rcu_tasks_percpu
 	cpunext = cpu * 2 + 1;
 	if (cpunext < smp_load_acquire(&rtp->percpu_dequeue_lim)) {
 		rtpcp_next = per_cpu_ptr(rtp->rtpcpu, cpunext);
-		queue_work_on(cpunext, system_wq, &rtpcp_next->rtp_work);
+		cpuwq = rcu_cpu_beenfullyonline(cpunext) ? cpunext : WORK_CPU_UNBOUND;
 		queue_work_on(cpuwq, system_wq, &rtpcp_next->rtp_work);
 		cpunext++;
 		if (cpunext < smp_load_acquire(&rtp->percpu_dequeue_lim)) {
 			rtpcp_next = per_cpu_ptr(rtp->rtpcpu, cpunext);
-			queue_work_on(cpunext, system_wq, &rtpcp_next->rtp_work);
+			cpuwq = rcu_cpu_beenfullyonline(cpunext) ? cpunext : WORK_CPU_UNBOUND;
 			queue_work_on(cpuwq, system_wq, &rtpcp_next->rtp_work);
 		}
 	}
--- a/kernel/rcu/tree.c
+++ b/kernel/rcu/tree.c
@ -2046,19 +2046,35 @@ rcu_check_quiescent_state(struct rcu_data *rdp)
 	rcu_report_qs_rdp(rdp);
 }
 /* Return true if callback-invocation time limit exceeded. */
 static bool rcu_do_batch_check_time(long count, long tlimit,
 				    bool jlimit_check, unsigned long jlimit)
 {
 	// Invoke local_clock() only once per 32 consecutive callbacks.
 	return unlikely(tlimit) &&
 	       (!likely(count & 31) ||
 		(IS_ENABLED(CONFIG_RCU_DOUBLE_CHECK_CB_TIME) &&
 		 jlimit_check && time_after(jiffies, jlimit))) &&
 	       local_clock() >= tlimit;
 }
 /*
 * Invoke any RCU callbacks that have made it to the end of their grace
 * period.  Throttle as specified by rdp->blimit.
 */
 static void rcu_do_batch(struct rcu_data *rdp)
 {
 	long bl;
 	long count = 0;
 	int div;
 	bool __maybe_unused empty;
 	unsigned long flags;
-	struct rcu_head *rhp;
+	unsigned long jlimit;
 	bool jlimit_check = false;
 	long pending;
 	struct rcu_cblist rcl = RCU_CBLIST_INITIALIZER(rcl);
-	long bl, count = 0;
+	struct rcu_head *rhp;
-	long pending, tlimit = 0;
+	long tlimit = 0;
 	/* If no callbacks are ready, just return. */
 	if (!rcu_segcblist_ready_cbs(&rdp->cblist)) {
@ -2082,11 +2098,15 @@ static void rcu_do_batch(struct rcu_data *rdp)
 	div = READ_ONCE(rcu_divisor);
 	div = div < 0 ? 7 : div > sizeof(long) * 8 - 2 ? sizeof(long) * 8 - 2 : div;
 	bl = max(rdp->blimit, pending >> div);
-	if (in_serving_softirq() && unlikely(bl > 100)) {
+	if ((in_serving_softirq() || rdp->rcu_cpu_kthread_status == RCU_KTHREAD_RUNNING) &&
 	    (IS_ENABLED(CONFIG_RCU_DOUBLE_CHECK_CB_TIME) || unlikely(bl > 100))) {
 		const long npj = NSEC_PER_SEC / HZ;
 		long rrn = READ_ONCE(rcu_resched_ns);
 		rrn = rrn < NSEC_PER_MSEC ? NSEC_PER_MSEC : rrn > NSEC_PER_SEC ? NSEC_PER_SEC : rrn;
 		tlimit = local_clock() + rrn;
 		jlimit = jiffies + (rrn + npj + 1) / npj;
 		jlimit_check = true;
 	}
 	trace_rcu_batch_start(rcu_state.name,
 			      rcu_segcblist_n_cbs(&rdp->cblist), bl);
@ -2126,21 +2146,23 @@ static void rcu_do_batch(struct rcu_data *rdp)
 			 * Make sure we don't spend too much time here and deprive other
 			 * softirq vectors of CPU cycles.
 			 */
-			if (unlikely(tlimit)) {
+			if (rcu_do_batch_check_time(count, tlimit, jlimit_check, jlimit))
 				/* only call local_clock() every 32 callbacks */
 				if (likely((count & 31) || local_clock() < tlimit))
 					continue;
 				/* Exceeded the time limit, so leave. */
 				break;
 			}
 		} else {
-			// In rcuoc context, so no worries about depriving
+			// In rcuc/rcuoc context, so no worries about
-			// other softirq vectors of CPU cycles.
+			// depriving other softirq vectors of CPU cycles.
 			local_bh_enable();
 			lockdep_assert_irqs_enabled();
 			cond_resched_tasks_rcu_qs();
 			lockdep_assert_irqs_enabled();
 			local_bh_disable();
 			// But rcuc kthreads can delay quiescent-state
 			// reporting, so check time limits for them.
 			if (rdp->rcu_cpu_kthread_status == RCU_KTHREAD_RUNNING &&
 			    rcu_do_batch_check_time(count, tlimit, jlimit_check, jlimit)) {
 				rdp->rcu_cpu_has_work = 1;
 				break;
 			}
 		}
 	}
@ -2459,12 +2481,12 @@ static void rcu_cpu_kthread(unsigned int cpu)
 		*statusp = RCU_KTHREAD_RUNNING;
 		local_irq_disable();
 		work = *workp;
-		*workp = 0;
+		WRITE_ONCE(*workp, 0);
 		local_irq_enable();
 		if (work)
 			rcu_core();
 		local_bh_enable();
-		if (*workp == 0) {
+		if (!READ_ONCE(*workp)) {
 			trace_rcu_utilization(TPS("End CPU kthread@rcu_wait"));
 			*statusp = RCU_KTHREAD_WAITING;
 			return;
@ -2756,7 +2778,7 @@ EXPORT_SYMBOL_GPL(call_rcu);
 */
 struct kvfree_rcu_bulk_data {
 	struct list_head list;
-	unsigned long gp_snap;
+	struct rcu_gp_oldstate gp_snap;
 	unsigned long nr_records;
 	void *records[];
 };
@ -2773,6 +2795,7 @@ struct kvfree_rcu_bulk_data {
 * struct kfree_rcu_cpu_work - single batch of kfree_rcu() requests
 * @rcu_work: Let queue_rcu_work() invoke workqueue handler after grace period
 * @head_free: List of kfree_rcu() objects waiting for a grace period
 * @head_free_gp_snap: Grace-period snapshot to check for attempted premature frees.
 * @bulk_head_free: Bulk-List of kvfree_rcu() objects waiting for a grace period
 * @krcp: Pointer to @kfree_rcu_cpu structure
 */
@ -2780,6 +2803,7 @@ struct kvfree_rcu_bulk_data {
 struct kfree_rcu_cpu_work {
 	struct rcu_work rcu_work;
 	struct rcu_head *head_free;
 	struct rcu_gp_oldstate head_free_gp_snap;
 	struct list_head bulk_head_free[FREE_N_CHANNELS];
 	struct kfree_rcu_cpu *krcp;
 };
@ -2900,6 +2924,9 @@ drain_page_cache(struct kfree_rcu_cpu *krcp)
 	struct llist_node *page_list, *pos, *n;
 	int freed = 0;
 	if (!rcu_min_cached_objs)
 		return 0;
 	raw_spin_lock_irqsave(&krcp->lock, flags);
 	page_list = llist_del_all(&krcp->bkvcache);
 	WRITE_ONCE(krcp->nr_bkv_objs, 0);
@ -2920,24 +2947,25 @@ kvfree_rcu_bulk(struct kfree_rcu_cpu *krcp,
 	unsigned long flags;
 	int i;
-	debug_rcu_bhead_unqueue(bnode);
+	if (!WARN_ON_ONCE(!poll_state_synchronize_rcu_full(&bnode->gp_snap))) {
 		debug_rcu_bhead_unqueue(bnode);
 		rcu_lock_acquire(&rcu_callback_map);
 		if (idx == 0) { // kmalloc() / kfree().
 			trace_rcu_invoke_kfree_bulk_callback(
 				rcu_state.name, bnode->nr_records,
 				bnode->records);
-	rcu_lock_acquire(&rcu_callback_map);
+			kfree_bulk(bnode->nr_records, bnode->records);
-	if (idx == 0) { // kmalloc() / kfree().
+		} else { // vmalloc() / vfree().
-		trace_rcu_invoke_kfree_bulk_callback(
+			for (i = 0; i < bnode->nr_records; i++) {
-			rcu_state.name, bnode->nr_records,
+				trace_rcu_invoke_kvfree_callback(
-			bnode->records);
+					rcu_state.name, bnode->records[i], 0);
-		kfree_bulk(bnode->nr_records, bnode->records);
+				vfree(bnode->records[i]);
-	} else { // vmalloc() / vfree().
+			}
 		for (i = 0; i < bnode->nr_records; i++) {
 			trace_rcu_invoke_kvfree_callback(
 				rcu_state.name, bnode->records[i], 0);
 			vfree(bnode->records[i]);
 		}
 		rcu_lock_release(&rcu_callback_map);
 	}
 	rcu_lock_release(&rcu_callback_map);
 	raw_spin_lock_irqsave(&krcp->lock, flags);
 	if (put_cached_bnode(krcp, bnode))
@ -2984,6 +3012,7 @@ static void kfree_rcu_work(struct work_struct *work)
 	struct rcu_head *head;
 	struct kfree_rcu_cpu *krcp;
 	struct kfree_rcu_cpu_work *krwp;
 	struct rcu_gp_oldstate head_gp_snap;
 	int i;
 	krwp = container_of(to_rcu_work(work),
@ -2998,6 +3027,7 @@ static void kfree_rcu_work(struct work_struct *work)
 	// Channel 3.
 	head = krwp->head_free;
 	krwp->head_free = NULL;
 	head_gp_snap = krwp->head_free_gp_snap;
 	raw_spin_unlock_irqrestore(&krcp->lock, flags);
 	// Handle the first two channels.
@ -3014,7 +3044,8 @@ static void kfree_rcu_work(struct work_struct *work)
 	 * queued on a linked list through their rcu_head structures.
 	 * This list is named "Channel 3".
 	 */
-	kvfree_rcu_list(head);
+	if (head && !WARN_ON_ONCE(!poll_state_synchronize_rcu_full(&head_gp_snap)))
 		kvfree_rcu_list(head);
 }
 static bool
@ -3081,7 +3112,7 @@ kvfree_rcu_drain_ready(struct kfree_rcu_cpu *krcp)
 		INIT_LIST_HEAD(&bulk_ready[i]);
 		list_for_each_entry_safe_reverse(bnode, n, &krcp->bulk_head[i], list) {
-			if (!poll_state_synchronize_rcu(bnode->gp_snap))
+			if (!poll_state_synchronize_rcu_full(&bnode->gp_snap))
 				break;
 			atomic_sub(bnode->nr_records, &krcp->bulk_count[i]);
@ -3146,6 +3177,7 @@ static void kfree_rcu_monitor(struct work_struct *work)
 			// objects queued on the linked list.
 			if (!krwp->head_free) {
 				krwp->head_free = krcp->head;
 				get_state_synchronize_rcu_full(&krwp->head_free_gp_snap);
 				atomic_set(&krcp->head_count, 0);
 				WRITE_ONCE(krcp->head, NULL);
 			}
@ -3194,7 +3226,7 @@ static void fill_page_cache_func(struct work_struct *work)
 	nr_pages = atomic_read(&krcp->backoff_page_cache_fill) ?
 		1 : rcu_min_cached_objs;
-	for (i = 0; i < nr_pages; i++) {
+	for (i = READ_ONCE(krcp->nr_bkv_objs); i < nr_pages; i++) {
 		bnode = (struct kvfree_rcu_bulk_data *)
 			__get_free_page(GFP_KERNEL | __GFP_NORETRY | __GFP_NOMEMALLOC | __GFP_NOWARN);
@ -3218,6 +3250,10 @@ static void fill_page_cache_func(struct work_struct *work)
 static void
 run_page_cache_worker(struct kfree_rcu_cpu *krcp)
 {
 	// If cache disabled, bail out.
 	if (!rcu_min_cached_objs)
 		return;
 	if (rcu_scheduler_active == RCU_SCHEDULER_RUNNING &&
 			!atomic_xchg(&krcp->work_in_progress, 1)) {
 		if (atomic_read(&krcp->backoff_page_cache_fill)) {
@ -3272,7 +3308,7 @@ add_ptr_to_bulk_krc_lock(struct kfree_rcu_cpu **krcp,
 			// scenarios.
 			bnode = (struct kvfree_rcu_bulk_data *)
 				__get_free_page(GFP_KERNEL | __GFP_NORETRY | __GFP_NOMEMALLOC | __GFP_NOWARN);
-			*krcp = krc_this_cpu_lock(flags);
+			raw_spin_lock_irqsave(&(*krcp)->lock, *flags);
 		}
 		if (!bnode)
@ -3285,7 +3321,7 @@ add_ptr_to_bulk_krc_lock(struct kfree_rcu_cpu **krcp,
 	// Finally insert and update the GP for this page.
 	bnode->records[bnode->nr_records++] = ptr;
-	bnode->gp_snap = get_state_synchronize_rcu();
+	get_state_synchronize_rcu_full(&bnode->gp_snap);
 	atomic_inc(&(*krcp)->bulk_count[idx]);
 	return true;
@ -4283,7 +4319,6 @@ int rcutree_prepare_cpu(unsigned int cpu)
 	 */
 	rnp = rdp->mynode;
 	raw_spin_lock_rcu_node(rnp);		/* irqs already disabled. */
 	rdp->beenonline = true;	 /* We have now been online. */
 	rdp->gp_seq = READ_ONCE(rnp->gp_seq);
 	rdp->gp_seq_needed = rdp->gp_seq;
 	rdp->cpu_no_qs.b.norm = true;
@ -4310,6 +4345,16 @@ static void rcutree_affinity_setting(unsigned int cpu, int outgoing)
 	rcu_boost_kthread_setaffinity(rdp->mynode, outgoing);
 }
 /*
 * Has the specified (known valid) CPU ever been fully online?
 */
 bool rcu_cpu_beenfullyonline(int cpu)
 {
 	struct rcu_data *rdp = per_cpu_ptr(&rcu_data, cpu);
 	return smp_load_acquire(&rdp->beenonline);
 }
 /*
 * Near the end of the CPU-online process.  Pretty much all services
 * enabled, and the CPU is now very much alive.
@ -4368,15 +4413,16 @@ int rcutree_offline_cpu(unsigned int cpu)
 * Note that this function is special in that it is invoked directly
 * from the incoming CPU rather than from the cpuhp_step mechanism.
 * This is because this function must be invoked at a precise location.
 * This incoming CPU must not have enabled interrupts yet.
 */
 void rcu_cpu_starting(unsigned int cpu)
 {
 	unsigned long flags;
 	unsigned long mask;
 	struct rcu_data *rdp;
 	struct rcu_node *rnp;
 	bool newcpu;
 	lockdep_assert_irqs_disabled();
 	rdp = per_cpu_ptr(&rcu_data, cpu);
 	if (rdp->cpu_started)
 		return;
@ -4384,7 +4430,6 @@ void rcu_cpu_starting(unsigned int cpu)
 	rnp = rdp->mynode;
 	mask = rdp->grpmask;
 	local_irq_save(flags);
 	arch_spin_lock(&rcu_state.ofl_lock);
 	rcu_dynticks_eqs_online();
 	raw_spin_lock(&rcu_state.barrier_lock);
@ -4403,17 +4448,17 @@ void rcu_cpu_starting(unsigned int cpu)
 	/* An incoming CPU should never be blocking a grace period. */
 	if (WARN_ON_ONCE(rnp->qsmask & mask)) { /* RCU waiting on incoming CPU? */
 		/* rcu_report_qs_rnp() *really* wants some flags to restore */
-		unsigned long flags2;
+		unsigned long flags;
-		local_irq_save(flags2);
+		local_irq_save(flags);
 		rcu_disable_urgency_upon_qs(rdp);
 		/* Report QS -after- changing ->qsmaskinitnext! */
-		rcu_report_qs_rnp(mask, rnp, rnp->gp_seq, flags2);
+		rcu_report_qs_rnp(mask, rnp, rnp->gp_seq, flags);
 	} else {
 		raw_spin_unlock_rcu_node(rnp);
 	}
 	arch_spin_unlock(&rcu_state.ofl_lock);
-	local_irq_restore(flags);
+	smp_store_release(&rdp->beenonline, true);
 	smp_mb(); /* Ensure RCU read-side usage follows above initialization. */
 }
--- a/kernel/rcu/tree_exp.h
+++ b/kernel/rcu/tree_exp.h
@ -643,7 +643,7 @@ static void synchronize_rcu_expedited_wait(void)
 					"O."[!!cpu_online(cpu)],
 					"o."[!!(rdp->grpmask & rnp->expmaskinit)],
 					"N."[!!(rdp->grpmask & rnp->expmaskinitnext)],
-					"D."[!!(rdp->cpu_no_qs.b.exp)]);
+					"D."[!!data_race(rdp->cpu_no_qs.b.exp)]);
 			}
 		}
 		pr_cont(" } %lu jiffies s: %lu root: %#lx/%c\n",
--- a/kernel/rcu/tree_nocb.h
+++ b/kernel/rcu/tree_nocb.h
@ -1319,13 +1319,22 @@ lazy_rcu_shrink_count(struct shrinker *shrink, struct shrink_control *sc)
 	int cpu;
 	unsigned long count = 0;
 	if (WARN_ON_ONCE(!cpumask_available(rcu_nocb_mask)))
 		return 0;
 	/*  Protect rcu_nocb_mask against concurrent (de-)offloading. */
 	if (!mutex_trylock(&rcu_state.barrier_mutex))
 		return 0;
 	/* Snapshot count of all CPUs */
-	for_each_possible_cpu(cpu) {
+	for_each_cpu(cpu, rcu_nocb_mask) {
 		struct rcu_data *rdp = per_cpu_ptr(&rcu_data, cpu);
 		count +=  READ_ONCE(rdp->lazy_len);
 	}
 	mutex_unlock(&rcu_state.barrier_mutex);
 	return count ? count : SHRINK_EMPTY;
 }
@ -1336,15 +1345,45 @@ lazy_rcu_shrink_scan(struct shrinker *shrink, struct shrink_control *sc)
 	unsigned long flags;
 	unsigned long count = 0;
-	/* Snapshot count of all CPUs */
+	if (WARN_ON_ONCE(!cpumask_available(rcu_nocb_mask)))
-	for_each_possible_cpu(cpu) {
+		return 0;
-		struct rcu_data *rdp = per_cpu_ptr(&rcu_data, cpu);
+	/*
-		int _count = READ_ONCE(rdp->lazy_len);
+	 * Protect against concurrent (de-)offloading. Otherwise nocb locking
 	 * may be ignored or imbalanced.
 	 */
 	if (!mutex_trylock(&rcu_state.barrier_mutex)) {
 		/*
 		 * But really don't insist if barrier_mutex is contended since we
 		 * can't guarantee that it will never engage in a dependency
 		 * chain involving memory allocation. The lock is seldom contended
 		 * anyway.
 		 */
 		return 0;
 	}
-		if (_count == 0)
+	/* Snapshot count of all CPUs */
 	for_each_cpu(cpu, rcu_nocb_mask) {
 		struct rcu_data *rdp = per_cpu_ptr(&rcu_data, cpu);
 		int _count;
 		if (WARN_ON_ONCE(!rcu_rdp_is_offloaded(rdp)))
 			continue;
 		if (!READ_ONCE(rdp->lazy_len))
 			continue;
 		rcu_nocb_lock_irqsave(rdp, flags);
-		WRITE_ONCE(rdp->lazy_len, 0);
+		/*
 		 * Recheck under the nocb lock. Since we are not holding the bypass
 		 * lock we may still race with increments from the enqueuer but still
 		 * we know for sure if there is at least one lazy callback.
 		 */
 		_count = READ_ONCE(rdp->lazy_len);
 		if (!_count) {
 			rcu_nocb_unlock_irqrestore(rdp, flags);
 			continue;
 		}
 		WARN_ON_ONCE(!rcu_nocb_flush_bypass(rdp, NULL, jiffies, false));
 		rcu_nocb_unlock_irqrestore(rdp, flags);
 		wake_nocb_gp(rdp, false);
 		sc->nr_to_scan -= _count;
@ -1352,6 +1391,9 @@ lazy_rcu_shrink_scan(struct shrinker *shrink, struct shrink_control *sc)
 		if (sc->nr_to_scan <= 0)
 			break;
 	}
 	mutex_unlock(&rcu_state.barrier_mutex);
 	return count ? count : SHRINK_STOP;
 }
--- a/kernel/rcu/tree_plugin.h
+++ b/kernel/rcu/tree_plugin.h
@ -257,6 +257,8 @@ static void rcu_preempt_ctxt_queue(struct rcu_node *rnp, struct rcu_data *rdp)
 	 * GP should not be able to end until we report, so there should be
 	 * no need to check for a subsequent expedited GP.  (Though we are
 	 * still in a quiescent state in any case.)
 	 *
 	 * Interrupts are disabled, so ->cpu_no_qs.b.exp cannot change.
 	 */
 	if (blkd_state & RCU_EXP_BLKD && rdp->cpu_no_qs.b.exp)
 		rcu_report_exp_rdp(rdp);
@ -941,7 +943,7 @@ notrace void rcu_preempt_deferred_qs(struct task_struct *t)
 {
 	struct rcu_data *rdp = this_cpu_ptr(&rcu_data);
-	if (rdp->cpu_no_qs.b.exp)
+	if (READ_ONCE(rdp->cpu_no_qs.b.exp))
 		rcu_report_exp_rdp(rdp);
 }
--- a/tools/testing/selftests/rcutorture/bin/functions.sh
+++ b/tools/testing/selftests/rcutorture/bin/functions.sh
@ -250,7 +250,7 @@ identify_qemu_args () {
 		echo -machine virt,gic-version=host -cpu host
 		;;
 	qemu-system-ppc64)
-		echo -enable-kvm -M pseries -nodefaults
+		echo -M pseries -nodefaults
 		echo -device spapr-vscsi
 		if test -n "$TORTURE_QEMU_INTERACTIVE" -a -n "$TORTURE_QEMU_MAC"
 		then
--- a/tools/testing/selftests/rcutorture/configs/rcu/BUSTED-BOOST.boot
+++ b/tools/testing/selftests/rcutorture/configs/rcu/BUSTED-BOOST.boot
@ -5,4 +5,4 @@ rcutree.gp_init_delay=3
 rcutree.gp_cleanup_delay=3
 rcutree.kthread_prio=2
 threadirqs
-tree.use_softirq=0
+rcutree.use_softirq=0
--- a/tools/testing/selftests/rcutorture/configs/rcu/TREE03.boot
+++ b/tools/testing/selftests/rcutorture/configs/rcu/TREE03.boot
@ -4,4 +4,4 @@ rcutree.gp_init_delay=3
 rcutree.gp_cleanup_delay=3
 rcutree.kthread_prio=2
 threadirqs
-tree.use_softirq=0
+rcutree.use_softirq=0