Merge branch 'strictgp.2020.08.24a' into HEAD

strictgp.2020.08.24a: Strict grace periods for KASAN testing.
This commit is contained in:
Paul E. McKenney 2020-09-03 09:47:42 -07:00
commit 7fbe67e46a
7 changed files with 129 additions and 16 deletions

View File

@ -4152,6 +4152,15 @@
This wake_up() will be accompanied by a This wake_up() will be accompanied by a
WARN_ONCE() splat and an ftrace_dump(). WARN_ONCE() splat and an ftrace_dump().
rcutree.rcu_unlock_delay= [KNL]
In CONFIG_RCU_STRICT_GRACE_PERIOD=y kernels,
this specifies an rcu_read_unlock()-time delay
in microseconds. This defaults to zero.
Larger delays increase the probability of
catching RCU pointer leaks, that is, buggy use
of RCU-protected pointers after the relevant
rcu_read_unlock() has completed.
rcutree.sysrq_rcu= [KNL] rcutree.sysrq_rcu= [KNL]
Commandeer a sysrq key to dump out Tree RCU's Commandeer a sysrq key to dump out Tree RCU's
rcu_node tree with an eye towards determining rcu_node tree with an eye towards determining

View File

@ -55,6 +55,12 @@ void __rcu_read_unlock(void);
#else /* #ifdef CONFIG_PREEMPT_RCU */ #else /* #ifdef CONFIG_PREEMPT_RCU */
#ifdef CONFIG_TINY_RCU
#define rcu_read_unlock_strict() do { } while (0)
#else
void rcu_read_unlock_strict(void);
#endif
static inline void __rcu_read_lock(void) static inline void __rcu_read_lock(void)
{ {
preempt_disable(); preempt_disable();
@ -63,6 +69,7 @@ static inline void __rcu_read_lock(void)
static inline void __rcu_read_unlock(void) static inline void __rcu_read_unlock(void)
{ {
preempt_enable(); preempt_enable();
rcu_read_unlock_strict();
} }
static inline int rcu_preempt_depth(void) static inline int rcu_preempt_depth(void)

View File

@ -135,10 +135,12 @@ config RCU_FANOUT
config RCU_FANOUT_LEAF config RCU_FANOUT_LEAF
int "Tree-based hierarchical RCU leaf-level fanout value" int "Tree-based hierarchical RCU leaf-level fanout value"
range 2 64 if 64BIT range 2 64 if 64BIT && !RCU_STRICT_GRACE_PERIOD
range 2 32 if !64BIT range 2 32 if !64BIT && !RCU_STRICT_GRACE_PERIOD
range 2 3 if RCU_STRICT_GRACE_PERIOD
depends on TREE_RCU && RCU_EXPERT depends on TREE_RCU && RCU_EXPERT
default 16 default 16 if !RCU_STRICT_GRACE_PERIOD
default 2 if RCU_STRICT_GRACE_PERIOD
help help
This option controls the leaf-level fanout of hierarchical This option controls the leaf-level fanout of hierarchical
implementations of RCU, and allows trading off cache misses implementations of RCU, and allows trading off cache misses

View File

@ -114,4 +114,19 @@ config RCU_EQS_DEBUG
Say N here if you need ultimate kernel/user switch latencies Say N here if you need ultimate kernel/user switch latencies
Say Y if you are unsure Say Y if you are unsure
config RCU_STRICT_GRACE_PERIOD
bool "Provide debug RCU implementation with short grace periods"
depends on DEBUG_KERNEL && RCU_EXPERT
default n
select PREEMPT_COUNT if PREEMPT=n
help
Select this option to build an RCU variant that is strict about
grace periods, making them as short as it can. This limits
scalability, destroys real-time response, degrades battery
lifetime and kills performance. Don't try this on large
machines, as in systems with more than about 10 or 20 CPUs.
But in conjunction with tools like KASAN, it can be helpful
when looking for certain types of RCU usage bugs, for example,
too-short RCU read-side critical sections.
endmenu # "RCU Debugging" endmenu # "RCU Debugging"

View File

@ -165,6 +165,12 @@ module_param(gp_init_delay, int, 0444);
static int gp_cleanup_delay; static int gp_cleanup_delay;
module_param(gp_cleanup_delay, int, 0444); module_param(gp_cleanup_delay, int, 0444);
// Add delay to rcu_read_unlock() for strict grace periods.
static int rcu_unlock_delay;
#ifdef CONFIG_RCU_STRICT_GRACE_PERIOD
module_param(rcu_unlock_delay, int, 0444);
#endif
/* /*
* This rcu parameter is runtime-read-only. It reflects * This rcu parameter is runtime-read-only. It reflects
* a minimum allowed number of objects which can be cached * a minimum allowed number of objects which can be cached
@ -455,24 +461,25 @@ static int rcu_is_cpu_rrupt_from_idle(void)
return __this_cpu_read(rcu_data.dynticks_nesting) == 0; return __this_cpu_read(rcu_data.dynticks_nesting) == 0;
} }
#define DEFAULT_RCU_BLIMIT 10 /* Maximum callbacks per rcu_do_batch ... */ #define DEFAULT_RCU_BLIMIT (IS_ENABLED(CONFIG_RCU_STRICT_GRACE_PERIOD) ? 1000 : 10)
#define DEFAULT_MAX_RCU_BLIMIT 10000 /* ... even during callback flood. */ // Maximum callbacks per rcu_do_batch ...
#define DEFAULT_MAX_RCU_BLIMIT 10000 // ... even during callback flood.
static long blimit = DEFAULT_RCU_BLIMIT; static long blimit = DEFAULT_RCU_BLIMIT;
#define DEFAULT_RCU_QHIMARK 10000 /* If this many pending, ignore blimit. */ #define DEFAULT_RCU_QHIMARK 10000 // If this many pending, ignore blimit.
static long qhimark = DEFAULT_RCU_QHIMARK; static long qhimark = DEFAULT_RCU_QHIMARK;
#define DEFAULT_RCU_QLOMARK 100 /* Once only this many pending, use blimit. */ #define DEFAULT_RCU_QLOMARK 100 // Once only this many pending, use blimit.
static long qlowmark = DEFAULT_RCU_QLOMARK; static long qlowmark = DEFAULT_RCU_QLOMARK;
#define DEFAULT_RCU_QOVLD_MULT 2 #define DEFAULT_RCU_QOVLD_MULT 2
#define DEFAULT_RCU_QOVLD (DEFAULT_RCU_QOVLD_MULT * DEFAULT_RCU_QHIMARK) #define DEFAULT_RCU_QOVLD (DEFAULT_RCU_QOVLD_MULT * DEFAULT_RCU_QHIMARK)
static long qovld = DEFAULT_RCU_QOVLD; /* If this many pending, hammer QS. */ static long qovld = DEFAULT_RCU_QOVLD; // If this many pending, hammer QS.
static long qovld_calc = -1; /* No pre-initialization lock acquisitions! */ static long qovld_calc = -1; // No pre-initialization lock acquisitions!
module_param(blimit, long, 0444); module_param(blimit, long, 0444);
module_param(qhimark, long, 0444); module_param(qhimark, long, 0444);
module_param(qlowmark, long, 0444); module_param(qlowmark, long, 0444);
module_param(qovld, long, 0444); module_param(qovld, long, 0444);
static ulong jiffies_till_first_fqs = ULONG_MAX; static ulong jiffies_till_first_fqs = IS_ENABLED(CONFIG_RCU_STRICT_GRACE_PERIOD) ? 0 : ULONG_MAX;
static ulong jiffies_till_next_fqs = ULONG_MAX; static ulong jiffies_till_next_fqs = ULONG_MAX;
static bool rcu_kick_kthreads; static bool rcu_kick_kthreads;
static int rcu_divisor = 7; static int rcu_divisor = 7;
@ -1571,6 +1578,19 @@ static void __maybe_unused rcu_advance_cbs_nowake(struct rcu_node *rnp,
raw_spin_unlock_rcu_node(rnp); raw_spin_unlock_rcu_node(rnp);
} }
/*
* In CONFIG_RCU_STRICT_GRACE_PERIOD=y kernels, attempt to generate a
* quiescent state. This is intended to be invoked when the CPU notices
* a new grace period.
*/
static void rcu_strict_gp_check_qs(void)
{
if (IS_ENABLED(CONFIG_RCU_STRICT_GRACE_PERIOD)) {
rcu_read_lock();
rcu_read_unlock();
}
}
/* /*
* Update CPU-local rcu_data state to record the beginnings and ends of * Update CPU-local rcu_data state to record the beginnings and ends of
* grace periods. The caller must hold the ->lock of the leaf rcu_node * grace periods. The caller must hold the ->lock of the leaf rcu_node
@ -1641,6 +1661,7 @@ static void note_gp_changes(struct rcu_data *rdp)
} }
needwake = __note_gp_changes(rnp, rdp); needwake = __note_gp_changes(rnp, rdp);
raw_spin_unlock_irqrestore_rcu_node(rnp, flags); raw_spin_unlock_irqrestore_rcu_node(rnp, flags);
rcu_strict_gp_check_qs();
if (needwake) if (needwake)
rcu_gp_kthread_wake(); rcu_gp_kthread_wake();
} }
@ -1678,6 +1699,15 @@ static void rcu_gp_torture_wait(void)
} }
} }
/*
* Handler for on_each_cpu() to invoke the target CPU's RCU core
* processing.
*/
static void rcu_strict_gp_boundary(void *unused)
{
invoke_rcu_core();
}
/* /*
* Initialize a new grace period. Return false if no grace period required. * Initialize a new grace period. Return false if no grace period required.
*/ */
@ -1809,6 +1839,10 @@ static bool rcu_gp_init(void)
WRITE_ONCE(rcu_state.gp_activity, jiffies); WRITE_ONCE(rcu_state.gp_activity, jiffies);
} }
// If strict, make all CPUs aware of new grace period.
if (IS_ENABLED(CONFIG_RCU_STRICT_GRACE_PERIOD))
on_each_cpu(rcu_strict_gp_boundary, NULL, 0);
return true; return true;
} }
@ -2025,6 +2059,10 @@ static void rcu_gp_cleanup(void)
rcu_state.gp_flags & RCU_GP_FLAG_INIT); rcu_state.gp_flags & RCU_GP_FLAG_INIT);
} }
raw_spin_unlock_irq_rcu_node(rnp); raw_spin_unlock_irq_rcu_node(rnp);
// If strict, make all CPUs aware of the end of the old grace period.
if (IS_ENABLED(CONFIG_RCU_STRICT_GRACE_PERIOD))
on_each_cpu(rcu_strict_gp_boundary, NULL, 0);
} }
/* /*
@ -2203,7 +2241,7 @@ rcu_report_unblock_qs_rnp(struct rcu_node *rnp, unsigned long flags)
* structure. This must be called from the specified CPU. * structure. This must be called from the specified CPU.
*/ */
static void static void
rcu_report_qs_rdp(int cpu, struct rcu_data *rdp) rcu_report_qs_rdp(struct rcu_data *rdp)
{ {
unsigned long flags; unsigned long flags;
unsigned long mask; unsigned long mask;
@ -2212,6 +2250,7 @@ rcu_report_qs_rdp(int cpu, struct rcu_data *rdp)
rcu_segcblist_is_offloaded(&rdp->cblist); rcu_segcblist_is_offloaded(&rdp->cblist);
struct rcu_node *rnp; struct rcu_node *rnp;
WARN_ON_ONCE(rdp->cpu != smp_processor_id());
rnp = rdp->mynode; rnp = rdp->mynode;
raw_spin_lock_irqsave_rcu_node(rnp, flags); raw_spin_lock_irqsave_rcu_node(rnp, flags);
if (rdp->cpu_no_qs.b.norm || rdp->gp_seq != rnp->gp_seq || if (rdp->cpu_no_qs.b.norm || rdp->gp_seq != rnp->gp_seq ||
@ -2228,8 +2267,7 @@ rcu_report_qs_rdp(int cpu, struct rcu_data *rdp)
return; return;
} }
mask = rdp->grpmask; mask = rdp->grpmask;
if (rdp->cpu == smp_processor_id()) rdp->core_needs_qs = false;
rdp->core_needs_qs = false;
if ((rnp->qsmask & mask) == 0) { if ((rnp->qsmask & mask) == 0) {
raw_spin_unlock_irqrestore_rcu_node(rnp, flags); raw_spin_unlock_irqrestore_rcu_node(rnp, flags);
} else { } else {
@ -2278,7 +2316,7 @@ rcu_check_quiescent_state(struct rcu_data *rdp)
* Tell RCU we are done (but rcu_report_qs_rdp() will be the * Tell RCU we are done (but rcu_report_qs_rdp() will be the
* judge of that). * judge of that).
*/ */
rcu_report_qs_rdp(rdp->cpu, rdp); rcu_report_qs_rdp(rdp);
} }
/* /*
@ -2621,6 +2659,14 @@ void rcu_force_quiescent_state(void)
} }
EXPORT_SYMBOL_GPL(rcu_force_quiescent_state); EXPORT_SYMBOL_GPL(rcu_force_quiescent_state);
// Workqueue handler for an RCU reader for kernels enforcing struct RCU
// grace periods.
static void strict_work_handler(struct work_struct *work)
{
rcu_read_lock();
rcu_read_unlock();
}
/* Perform RCU core processing work for the current CPU. */ /* Perform RCU core processing work for the current CPU. */
static __latent_entropy void rcu_core(void) static __latent_entropy void rcu_core(void)
{ {
@ -2665,6 +2711,10 @@ static __latent_entropy void rcu_core(void)
/* Do any needed deferred wakeups of rcuo kthreads. */ /* Do any needed deferred wakeups of rcuo kthreads. */
do_nocb_deferred_wakeup(rdp); do_nocb_deferred_wakeup(rdp);
trace_rcu_utilization(TPS("End RCU core")); trace_rcu_utilization(TPS("End RCU core"));
// If strict GPs, schedule an RCU reader in a clean environment.
if (IS_ENABLED(CONFIG_RCU_STRICT_GRACE_PERIOD))
queue_work_on(rdp->cpu, rcu_gp_wq, &rdp->strict_work);
} }
static void rcu_core_si(struct softirq_action *h) static void rcu_core_si(struct softirq_action *h)
@ -3862,6 +3912,7 @@ rcu_boot_init_percpu_data(int cpu)
/* Set up local state, ensuring consistent view of global state. */ /* Set up local state, ensuring consistent view of global state. */
rdp->grpmask = leaf_node_cpu_bit(rdp->mynode, cpu); rdp->grpmask = leaf_node_cpu_bit(rdp->mynode, cpu);
INIT_WORK(&rdp->strict_work, strict_work_handler);
WARN_ON_ONCE(rdp->dynticks_nesting != 1); WARN_ON_ONCE(rdp->dynticks_nesting != 1);
WARN_ON_ONCE(rcu_dynticks_in_eqs(rcu_dynticks_snap(rdp))); WARN_ON_ONCE(rcu_dynticks_in_eqs(rcu_dynticks_snap(rdp)));
rdp->rcu_ofl_gp_seq = rcu_state.gp_seq; rdp->rcu_ofl_gp_seq = rcu_state.gp_seq;

View File

@ -165,6 +165,7 @@ struct rcu_data {
/* period it is aware of. */ /* period it is aware of. */
struct irq_work defer_qs_iw; /* Obtain later scheduler attention. */ struct irq_work defer_qs_iw; /* Obtain later scheduler attention. */
bool defer_qs_iw_pending; /* Scheduler attention pending? */ bool defer_qs_iw_pending; /* Scheduler attention pending? */
struct work_struct strict_work; /* Schedule readers for strict GPs. */
/* 2) batch handling */ /* 2) batch handling */
struct rcu_segcblist cblist; /* Segmented callback list, with */ struct rcu_segcblist cblist; /* Segmented callback list, with */

View File

@ -36,6 +36,8 @@ static void __init rcu_bootup_announce_oddness(void)
pr_info("\tRCU dyntick-idle grace-period acceleration is enabled.\n"); pr_info("\tRCU dyntick-idle grace-period acceleration is enabled.\n");
if (IS_ENABLED(CONFIG_PROVE_RCU)) if (IS_ENABLED(CONFIG_PROVE_RCU))
pr_info("\tRCU lockdep checking is enabled.\n"); pr_info("\tRCU lockdep checking is enabled.\n");
if (IS_ENABLED(CONFIG_RCU_STRICT_GRACE_PERIOD))
pr_info("\tRCU strict (and thus non-scalable) grace periods enabled.\n");
if (RCU_NUM_LVLS >= 4) if (RCU_NUM_LVLS >= 4)
pr_info("\tFour(or more)-level hierarchy is enabled.\n"); pr_info("\tFour(or more)-level hierarchy is enabled.\n");
if (RCU_FANOUT_LEAF != 16) if (RCU_FANOUT_LEAF != 16)
@ -374,6 +376,8 @@ void __rcu_read_lock(void)
rcu_preempt_read_enter(); rcu_preempt_read_enter();
if (IS_ENABLED(CONFIG_PROVE_LOCKING)) if (IS_ENABLED(CONFIG_PROVE_LOCKING))
WARN_ON_ONCE(rcu_preempt_depth() > RCU_NEST_PMAX); WARN_ON_ONCE(rcu_preempt_depth() > RCU_NEST_PMAX);
if (IS_ENABLED(CONFIG_RCU_STRICT_GRACE_PERIOD) && rcu_state.gp_kthread)
WRITE_ONCE(current->rcu_read_unlock_special.b.need_qs, true);
barrier(); /* critical section after entry code. */ barrier(); /* critical section after entry code. */
} }
EXPORT_SYMBOL_GPL(__rcu_read_lock); EXPORT_SYMBOL_GPL(__rcu_read_lock);
@ -455,8 +459,14 @@ rcu_preempt_deferred_qs_irqrestore(struct task_struct *t, unsigned long flags)
return; return;
} }
t->rcu_read_unlock_special.s = 0; t->rcu_read_unlock_special.s = 0;
if (special.b.need_qs) if (special.b.need_qs) {
rcu_qs(); if (IS_ENABLED(CONFIG_RCU_STRICT_GRACE_PERIOD)) {
rcu_report_qs_rdp(rdp);
udelay(rcu_unlock_delay);
} else {
rcu_qs();
}
}
/* /*
* Respond to a request by an expedited grace period for a * Respond to a request by an expedited grace period for a
@ -768,6 +778,24 @@ dump_blkd_tasks(struct rcu_node *rnp, int ncheck)
#else /* #ifdef CONFIG_PREEMPT_RCU */ #else /* #ifdef CONFIG_PREEMPT_RCU */
/*
* If strict grace periods are enabled, and if the calling
* __rcu_read_unlock() marks the beginning of a quiescent state, immediately
* report that quiescent state and, if requested, spin for a bit.
*/
void rcu_read_unlock_strict(void)
{
struct rcu_data *rdp;
if (!IS_ENABLED(CONFIG_RCU_STRICT_GRACE_PERIOD) ||
irqs_disabled() || preempt_count() || !rcu_state.gp_kthread)
return;
rdp = this_cpu_ptr(&rcu_data);
rcu_report_qs_rdp(rdp);
udelay(rcu_unlock_delay);
}
EXPORT_SYMBOL_GPL(rcu_read_unlock_strict);
/* /*
* Tell them what RCU they are running. * Tell them what RCU they are running.
*/ */