diff --git a/Documentation/RCU/trace.txt b/Documentation/RCU/trace.txt index f3778f8952da..b8c3c813ea57 100644 --- a/Documentation/RCU/trace.txt +++ b/Documentation/RCU/trace.txt @@ -396,14 +396,14 @@ o Each element of the form "3/3 ..>. 0:7 ^0" represents one rcu_node The output of "cat rcu/rcu_sched/rcu_pending" looks as follows: - 0!np=26111 qsp=29 rpq=5386 cbr=1 cng=570 gpc=3674 gps=577 nn=15903 - 1!np=28913 qsp=35 rpq=6097 cbr=1 cng=448 gpc=3700 gps=554 nn=18113 - 2!np=32740 qsp=37 rpq=6202 cbr=0 cng=476 gpc=4627 gps=546 nn=20889 - 3 np=23679 qsp=22 rpq=5044 cbr=1 cng=415 gpc=3403 gps=347 nn=14469 - 4!np=30714 qsp=4 rpq=5574 cbr=0 cng=528 gpc=3931 gps=639 nn=20042 - 5 np=28910 qsp=2 rpq=5246 cbr=0 cng=428 gpc=4105 gps=709 nn=18422 - 6!np=38648 qsp=5 rpq=7076 cbr=0 cng=840 gpc=4072 gps=961 nn=25699 - 7 np=37275 qsp=2 rpq=6873 cbr=0 cng=868 gpc=3416 gps=971 nn=25147 + 0!np=26111 qsp=29 rpq=5386 cbr=1 cng=570 gpc=3674 gps=577 nn=15903 ndw=0 + 1!np=28913 qsp=35 rpq=6097 cbr=1 cng=448 gpc=3700 gps=554 nn=18113 ndw=0 + 2!np=32740 qsp=37 rpq=6202 cbr=0 cng=476 gpc=4627 gps=546 nn=20889 ndw=0 + 3 np=23679 qsp=22 rpq=5044 cbr=1 cng=415 gpc=3403 gps=347 nn=14469 ndw=0 + 4!np=30714 qsp=4 rpq=5574 cbr=0 cng=528 gpc=3931 gps=639 nn=20042 ndw=0 + 5 np=28910 qsp=2 rpq=5246 cbr=0 cng=428 gpc=4105 gps=709 nn=18422 ndw=0 + 6!np=38648 qsp=5 rpq=7076 cbr=0 cng=840 gpc=4072 gps=961 nn=25699 ndw=0 + 7 np=37275 qsp=2 rpq=6873 cbr=0 cng=868 gpc=3416 gps=971 nn=25147 ndw=0 The fields are as follows: @@ -432,6 +432,10 @@ o "gpc" is the number of times that an old grace period had o "gps" is the number of times that a new grace period had started, but this CPU was not yet aware of it. +o "ndw" is the number of times that a wakeup of an rcuo + callback-offload kthread had to be deferred in order to avoid + deadlock. + o "nn" is the number of times that this CPU needed nothing. diff --git a/kernel/rcu/tree.c b/kernel/rcu/tree.c index abef9c358d47..264f0284c0bd 100644 --- a/kernel/rcu/tree.c +++ b/kernel/rcu/tree.c @@ -369,6 +369,9 @@ static struct rcu_node *rcu_get_root(struct rcu_state *rsp) static void rcu_eqs_enter_common(struct rcu_dynticks *rdtp, long long oldval, bool user) { + struct rcu_state *rsp; + struct rcu_data *rdp; + trace_rcu_dyntick(TPS("Start"), oldval, rdtp->dynticks_nesting); if (!user && !is_idle_task(current)) { struct task_struct *idle __maybe_unused = @@ -380,6 +383,10 @@ static void rcu_eqs_enter_common(struct rcu_dynticks *rdtp, long long oldval, current->pid, current->comm, idle->pid, idle->comm); /* must be idle task! */ } + for_each_rcu_flavor(rsp) { + rdp = this_cpu_ptr(rsp->rda); + do_nocb_deferred_wakeup(rdp); + } rcu_prepare_for_idle(smp_processor_id()); /* CPUs seeing atomic_inc() must see prior RCU read-side crit sects */ smp_mb__before_atomic_inc(); /* See above. */ @@ -1928,13 +1935,13 @@ rcu_send_cbs_to_orphanage(int cpu, struct rcu_state *rsp, * Adopt the RCU callbacks from the specified rcu_state structure's * orphanage. The caller must hold the ->orphan_lock. */ -static void rcu_adopt_orphan_cbs(struct rcu_state *rsp) +static void rcu_adopt_orphan_cbs(struct rcu_state *rsp, unsigned long flags) { int i; struct rcu_data *rdp = __this_cpu_ptr(rsp->rda); /* No-CBs CPUs are handled specially. */ - if (rcu_nocb_adopt_orphan_cbs(rsp, rdp)) + if (rcu_nocb_adopt_orphan_cbs(rsp, rdp, flags)) return; /* Do the accounting first. */ @@ -2013,7 +2020,7 @@ static void rcu_cleanup_dead_cpu(int cpu, struct rcu_state *rsp) /* Orphan the dead CPU's callbacks, and adopt them if appropriate. */ rcu_send_cbs_to_orphanage(cpu, rsp, rnp, rdp); - rcu_adopt_orphan_cbs(rsp); + rcu_adopt_orphan_cbs(rsp, flags); /* Remove the outgoing CPU from the masks in the rcu_node hierarchy. */ mask = rdp->grpmask; /* rnp->grplo is constant. */ @@ -2330,6 +2337,9 @@ __rcu_process_callbacks(struct rcu_state *rsp) /* If there are callbacks ready, invoke them. */ if (cpu_has_callbacks_ready_to_invoke(rdp)) invoke_rcu_callbacks(rsp, rdp); + + /* Do any needed deferred wakeups of rcuo kthreads. */ + do_nocb_deferred_wakeup(rdp); } /* @@ -2464,7 +2474,7 @@ __call_rcu(struct rcu_head *head, void (*func)(struct rcu_head *rcu), if (cpu != -1) rdp = per_cpu_ptr(rsp->rda, cpu); - offline = !__call_rcu_nocb(rdp, head, lazy); + offline = !__call_rcu_nocb(rdp, head, lazy, flags); WARN_ON_ONCE(offline); /* _call_rcu() is illegal on offline CPU; leak the callback. */ local_irq_restore(flags); @@ -2817,6 +2827,12 @@ static int __rcu_pending(struct rcu_state *rsp, struct rcu_data *rdp) return 1; } + /* Does this CPU need a deferred NOCB wakeup? */ + if (rcu_nocb_need_deferred_wakeup(rdp)) { + rdp->n_rp_nocb_defer_wakeup++; + return 1; + } + /* nothing to do */ rdp->n_rp_need_nothing++; return 0; diff --git a/kernel/rcu/tree.h b/kernel/rcu/tree.h index 8e34d8674a4e..a87adfc2916b 100644 --- a/kernel/rcu/tree.h +++ b/kernel/rcu/tree.h @@ -317,6 +317,7 @@ struct rcu_data { unsigned long n_rp_cpu_needs_gp; unsigned long n_rp_gp_completed; unsigned long n_rp_gp_started; + unsigned long n_rp_nocb_defer_wakeup; unsigned long n_rp_need_nothing; /* 6) _rcu_barrier() and OOM callbacks. */ @@ -335,6 +336,7 @@ struct rcu_data { int nocb_p_count_lazy; /* (approximate). */ wait_queue_head_t nocb_wq; /* For nocb kthreads to sleep on. */ struct task_struct *nocb_kthread; + bool nocb_defer_wakeup; /* Defer wakeup of nocb_kthread. */ #endif /* #ifdef CONFIG_RCU_NOCB_CPU */ /* 8) RCU CPU stall data. */ @@ -550,9 +552,12 @@ static void rcu_nocb_gp_set(struct rcu_node *rnp, int nrq); static void rcu_nocb_gp_cleanup(struct rcu_state *rsp, struct rcu_node *rnp); static void rcu_init_one_nocb(struct rcu_node *rnp); static bool __call_rcu_nocb(struct rcu_data *rdp, struct rcu_head *rhp, - bool lazy); + bool lazy, unsigned long flags); static bool rcu_nocb_adopt_orphan_cbs(struct rcu_state *rsp, - struct rcu_data *rdp); + struct rcu_data *rdp, + unsigned long flags); +static bool rcu_nocb_need_deferred_wakeup(struct rcu_data *rdp); +static void do_nocb_deferred_wakeup(struct rcu_data *rdp); static void rcu_boot_init_nocb_percpu_data(struct rcu_data *rdp); static void rcu_spawn_nocb_kthreads(struct rcu_state *rsp); static void rcu_kick_nohz_cpu(int cpu); diff --git a/kernel/rcu/tree_plugin.h b/kernel/rcu/tree_plugin.h index b023e5407111..752ffaa0d681 100644 --- a/kernel/rcu/tree_plugin.h +++ b/kernel/rcu/tree_plugin.h @@ -2104,7 +2104,8 @@ bool rcu_is_nocb_cpu(int cpu) static void __call_rcu_nocb_enqueue(struct rcu_data *rdp, struct rcu_head *rhp, struct rcu_head **rhtp, - int rhcount, int rhcount_lazy) + int rhcount, int rhcount_lazy, + unsigned long flags) { int len; struct rcu_head **old_rhpp; @@ -2125,9 +2126,16 @@ static void __call_rcu_nocb_enqueue(struct rcu_data *rdp, } len = atomic_long_read(&rdp->nocb_q_count); if (old_rhpp == &rdp->nocb_head) { - wake_up(&rdp->nocb_wq); /* ... only if queue was empty ... */ + if (!irqs_disabled_flags(flags)) { + wake_up(&rdp->nocb_wq); /* ... if queue was empty ... */ + trace_rcu_nocb_wake(rdp->rsp->name, rdp->cpu, + TPS("WakeEmpty")); + } else { + rdp->nocb_defer_wakeup = true; + trace_rcu_nocb_wake(rdp->rsp->name, rdp->cpu, + TPS("WakeEmptyIsDeferred")); + } rdp->qlen_last_fqs_check = 0; - trace_rcu_nocb_wake(rdp->rsp->name, rdp->cpu, TPS("WakeEmpty")); } else if (len > rdp->qlen_last_fqs_check + qhimark) { wake_up_process(t); /* ... or if many callbacks queued. */ rdp->qlen_last_fqs_check = LONG_MAX / 2; @@ -2148,12 +2156,12 @@ static void __call_rcu_nocb_enqueue(struct rcu_data *rdp, * "rcuo" kthread can find it. */ static bool __call_rcu_nocb(struct rcu_data *rdp, struct rcu_head *rhp, - bool lazy) + bool lazy, unsigned long flags) { if (!rcu_is_nocb_cpu(rdp->cpu)) return 0; - __call_rcu_nocb_enqueue(rdp, rhp, &rhp->next, 1, lazy); + __call_rcu_nocb_enqueue(rdp, rhp, &rhp->next, 1, lazy, flags); if (__is_kfree_rcu_offset((unsigned long)rhp->func)) trace_rcu_kfree_callback(rdp->rsp->name, rhp, (unsigned long)rhp->func, @@ -2171,7 +2179,8 @@ static bool __call_rcu_nocb(struct rcu_data *rdp, struct rcu_head *rhp, * not a no-CBs CPU. */ static bool __maybe_unused rcu_nocb_adopt_orphan_cbs(struct rcu_state *rsp, - struct rcu_data *rdp) + struct rcu_data *rdp, + unsigned long flags) { long ql = rsp->qlen; long qll = rsp->qlen_lazy; @@ -2185,14 +2194,14 @@ static bool __maybe_unused rcu_nocb_adopt_orphan_cbs(struct rcu_state *rsp, /* First, enqueue the donelist, if any. This preserves CB ordering. */ if (rsp->orphan_donelist != NULL) { __call_rcu_nocb_enqueue(rdp, rsp->orphan_donelist, - rsp->orphan_donetail, ql, qll); + rsp->orphan_donetail, ql, qll, flags); ql = qll = 0; rsp->orphan_donelist = NULL; rsp->orphan_donetail = &rsp->orphan_donelist; } if (rsp->orphan_nxtlist != NULL) { __call_rcu_nocb_enqueue(rdp, rsp->orphan_nxtlist, - rsp->orphan_nxttail, ql, qll); + rsp->orphan_nxttail, ql, qll, flags); ql = qll = 0; rsp->orphan_nxtlist = NULL; rsp->orphan_nxttail = &rsp->orphan_nxtlist; @@ -2314,6 +2323,22 @@ static int rcu_nocb_kthread(void *arg) return 0; } +/* Is a deferred wakeup of rcu_nocb_kthread() required? */ +static bool rcu_nocb_need_deferred_wakeup(struct rcu_data *rdp) +{ + return ACCESS_ONCE(rdp->nocb_defer_wakeup); +} + +/* Do a deferred wakeup of rcu_nocb_kthread(). */ +static void do_nocb_deferred_wakeup(struct rcu_data *rdp) +{ + if (!rcu_nocb_need_deferred_wakeup(rdp)) + return; + ACCESS_ONCE(rdp->nocb_defer_wakeup) = false; + wake_up(&rdp->nocb_wq); + trace_rcu_nocb_wake(rdp->rsp->name, rdp->cpu, TPS("DeferredWakeEmpty")); +} + /* Initialize per-rcu_data variables for no-CBs CPUs. */ static void __init rcu_boot_init_nocb_percpu_data(struct rcu_data *rdp) { @@ -2369,13 +2394,14 @@ static void rcu_init_one_nocb(struct rcu_node *rnp) } static bool __call_rcu_nocb(struct rcu_data *rdp, struct rcu_head *rhp, - bool lazy) + bool lazy, unsigned long flags) { return 0; } static bool __maybe_unused rcu_nocb_adopt_orphan_cbs(struct rcu_state *rsp, - struct rcu_data *rdp) + struct rcu_data *rdp, + unsigned long flags) { return 0; } @@ -2384,6 +2410,15 @@ static void __init rcu_boot_init_nocb_percpu_data(struct rcu_data *rdp) { } +static bool rcu_nocb_need_deferred_wakeup(struct rcu_data *rdp) +{ + return false; +} + +static void do_nocb_deferred_wakeup(struct rcu_data *rdp) +{ +} + static void __init rcu_spawn_nocb_kthreads(struct rcu_state *rsp) { } diff --git a/kernel/rcu/tree_trace.c b/kernel/rcu/tree_trace.c index 3596797b7e46..4def475336d4 100644 --- a/kernel/rcu/tree_trace.c +++ b/kernel/rcu/tree_trace.c @@ -364,9 +364,10 @@ static void print_one_rcu_pending(struct seq_file *m, struct rcu_data *rdp) rdp->n_rp_report_qs, rdp->n_rp_cb_ready, rdp->n_rp_cpu_needs_gp); - seq_printf(m, "gpc=%ld gps=%ld nn=%ld\n", + seq_printf(m, "gpc=%ld gps=%ld nn=%ld ndw%ld\n", rdp->n_rp_gp_completed, rdp->n_rp_gp_started, + rdp->n_rp_nocb_defer_wakeup, rdp->n_rp_need_nothing); }