torture: Maintain torture-specific set of CPUs-online books

The TREE01 rcutorture scenario intentionally creates confusion as to the
number of available CPUs by specifying the "maxcpus=8 nr_cpus=43" kernel
boot parameters.  This can disable rcutorture's load shedding, which
currently uses num_online_cpus(), which would count the extra 35 CPUs.
However, the rcutorture guest OS will be provisioned with only 8 CPUs,
which means that rcutorture will present full load even when all but one
of the original 8 CPUs are offline.  This can result in spurious errors
due to extreme overloading of that single remaining CPU.

This commit therefore keeps a separate set of books on the number of
usable online CPUs, so that torture_num_online_cpus() is used for load
shedding instead of num_online_cpus().  Note that initial sizing must
use num_online_cpus() because torture_num_online_cpus() will return
NR_CPUS until shortly after torture_onoff_init() is invoked.

Reported-by: Frederic Weisbecker <frederic@kernel.org>
[ paulmck: Apply feedback from kernel test robot. ]
Signed-off-by: Paul E. McKenney <paulmck@kernel.org>
This commit is contained in:
Paul E. McKenney 2020-12-19 07:34:35 -08:00
parent 0b962c8fe0
commit 1afb95fee0
3 changed files with 23 additions and 2 deletions

View File

@ -48,6 +48,11 @@ do { \
void verbose_torout_sleep(void);
/* Definitions for online/offline exerciser. */
#ifdef CONFIG_HOTPLUG_CPU
int torture_num_online_cpus(void);
#else /* #ifdef CONFIG_HOTPLUG_CPU */
static inline int torture_num_online_cpus(void) { return 1; }
#endif /* #else #ifdef CONFIG_HOTPLUG_CPU */
typedef void torture_ofl_func(void);
bool torture_offline(int cpu, long *n_onl_attempts, long *n_onl_successes,
unsigned long *sum_offl, int *min_onl, int *max_onl);

View File

@ -1338,7 +1338,7 @@ static void rcu_torture_reader_do_mbchk(long myid, struct rcu_torture *rtp,
struct torture_random_state *trsp)
{
unsigned long loops;
int noc = num_online_cpus();
int noc = torture_num_online_cpus();
int rdrchked;
int rdrchker;
struct rcu_torture_reader_check *rtrcp; // Me.
@ -1658,7 +1658,7 @@ rcu_torture_reader(void *arg)
torture_hrtimeout_us(500, 1000, &rand);
lastsleep = jiffies + 10;
}
while (num_online_cpus() < mynumonline && !torture_must_stop())
while (torture_num_online_cpus() < mynumonline && !torture_must_stop())
schedule_timeout_interruptible(HZ / 5);
stutter_wait("rcu_torture_reader");
} while (!torture_must_stop());

View File

@ -175,6 +175,19 @@ static unsigned long sum_online;
static int min_online = -1;
static int max_online;
static int torture_online_cpus = NR_CPUS;
/*
* Some torture testing leverages confusion as to the number of online
* CPUs. This function returns the torture-testing view of this number,
* which allows torture tests to load-balance appropriately.
*/
int torture_num_online_cpus(void)
{
return READ_ONCE(torture_online_cpus);
}
EXPORT_SYMBOL_GPL(torture_num_online_cpus);
/*
* Attempt to take a CPU offline. Return false if the CPU is already
* offline or if it is not subject to CPU-hotplug operations. The
@ -229,6 +242,8 @@ bool torture_offline(int cpu, long *n_offl_attempts, long *n_offl_successes,
*min_offl = delta;
if (*max_offl < delta)
*max_offl = delta;
WRITE_ONCE(torture_online_cpus, torture_online_cpus - 1);
WARN_ON_ONCE(torture_online_cpus <= 0);
}
return true;
@ -285,6 +300,7 @@ bool torture_online(int cpu, long *n_onl_attempts, long *n_onl_successes,
*min_onl = delta;
if (*max_onl < delta)
*max_onl = delta;
WRITE_ONCE(torture_online_cpus, torture_online_cpus + 1);
}
return true;