rcutorture: Don't count CPU-stalled time against priority boosting

It will frequently be the case that rcu_torture_boost() will get a ->start_gp_poll() cookie that needs almost all of the current grace period plus an additional grace period to elapse before ->poll_gp_state() will return true. It is quite possible that the current grace period will have (say) two seconds of stall by a CPU failing to pass through a quiescent state, followed by 300 milliseconds of delay due to a preempted reader. The next grace period might suffer only one second of stall by a CPU, followed by another 300 milliseconds of delay due to a preempted reader. This is an example of RCU priority boosting doing its job, but the full elapsed time of 3.6 seconds exceeds the 3.5-second limit. In addition, there is no CPU stall in force at the 3.5-second mark, so this would nevertheless currently be counted as an RCU priority boosting failure. This commit therefore avoids this sort of false positive by resetting the gp_state_time timestamp any time that the current grace period is being blocked by a CPU. This results in extremely frequent calls to the ->check_boost_failed() function, so this commit provides a lockless fastpath that is selected by supplying a NULL CPU-number pointer. Signed-off-by: Paul E. McKenney <paulmck@kernel.org>
2024-12-23 04:34:11 +08:00 · 2021-04-14 13:00:10 -07:00 · 2021-04-14 13:00:10 -07:00 · 063f5a4df9
commit 063f5a4df9
parent 0260b92e1c
2 changed files with 18 additions and 5 deletions
--- a/kernel/rcu/rcutorture.c
+++ b/kernel/rcu/rcutorture.c
@ -918,17 +918,18 @@ static void rcu_torture_enable_rt_throttle(void)
 	old_rt_runtime = -1;
 }

-static bool rcu_torture_boost_failed(unsigned long gp_state, unsigned long start, unsigned long end)
+static bool rcu_torture_boost_failed(unsigned long gp_state, unsigned long *start)
 {
 	int cpu;
 	static int dbg_done;
+	unsigned long end = jiffies;
 	bool gp_done;
 	unsigned long j;
 	static unsigned long last_persist;
 	unsigned long lp;
 	unsigned long mininterval = test_boost_duration * HZ - HZ / 2;

-	if (end - start > mininterval) {
+	if (end - *start > mininterval) {
 		// Recheck after checking time to avoid false positives.
 		smp_mb(); // Time check before grace-period check.
 		if (cur_ops->poll_gp_state(gp_state))
@ -945,7 +946,7 @@ static bool rcu_torture_boost_failed(unsigned long gp_state, unsigned long start
 		n_rcu_torture_boost_failure++;
 		if (!xchg(&dbg_done, 1) && cur_ops->gp_kthread_dbg) {
 			pr_info("Boost inversion thread ->rt_priority %u gp_state %lu jiffies %lu\n",
-				current->rt_priority, gp_state, end - start);
+				current->rt_priority, gp_state, end - *start);
 			cur_ops->gp_kthread_dbg();
 			// Recheck after print to flag grace period ending during splat.
 			gp_done = cur_ops->poll_gp_state(gp_state);
@ -955,6 +956,8 @@ static bool rcu_torture_boost_failed(unsigned long gp_state, unsigned long start
 		}

 		return true; // failed
+	} else if (cur_ops->check_boost_failed && !cur_ops->check_boost_failed(gp_state, NULL)) {
+		*start = jiffies;
 	}

 	return false; // passed
@ -995,7 +998,7 @@ static int rcu_torture_boost(void *arg)
 		while (time_before(jiffies, endtime)) {
 			// Has current GP gone too long?
 			if (gp_initiated && !failed && !cur_ops->poll_gp_state(gp_state))
-				failed = rcu_torture_boost_failed(gp_state, gp_state_time, jiffies);
+				failed = rcu_torture_boost_failed(gp_state, &gp_state_time);
 			// If we don't have a grace period in flight, start one.
 			if (!gp_initiated || cur_ops->poll_gp_state(gp_state)) {
 				gp_state = cur_ops->start_gp_poll();
@ -1016,7 +1019,7 @@ static int rcu_torture_boost(void *arg)

 		// In case the grace period extended beyond the end of the loop.
 		if (gp_initiated && !failed && !cur_ops->poll_gp_state(gp_state))
-			rcu_torture_boost_failed(gp_state, gp_state_time, jiffies);
+			rcu_torture_boost_failed(gp_state, &gp_state_time);

 		/*
 		 * Set the start time of the next test interval.
--- a/kernel/rcu/tree_stall.h
+++ b/kernel/rcu/tree_stall.h
@ -723,6 +723,10 @@ static void check_cpu_stall(struct rcu_data *rdp)
 * count this as an RCU priority boosting failure.  A return of true says
 * RCU priority boosting is to blame, and false says otherwise.  If false
 * is returned, the first of the CPUs to blame is stored through cpup.
+ *
+ * If cpup is NULL, then a lockless quick check is carried out, suitable
+ * for high-rate usage.  On the other hand, if cpup is non-NULL, each
+ * rcu_node structure's ->lock is acquired, ruling out high-rate usage.
 */
 bool rcu_check_boost_fail(unsigned long gp_state, int *cpup)
 {
@ -731,6 +735,12 @@ bool rcu_check_boost_fail(unsigned long gp_state, int *cpup)
 	struct rcu_node *rnp;

 	rcu_for_each_leaf_node(rnp) {
+		if (!cpup) {
+			if (READ_ONCE(rnp->qsmask))
+				return false;
+			else
+				continue;
+		}
 		raw_spin_lock_irqsave_rcu_node(rnp, flags);
 		if (!rnp->qsmask) {
 			// No CPUs without quiescent states for this rnp.