mirror of
https://mirrors.bfsu.edu.cn/git/linux.git
synced 2024-11-11 12:28:41 +08:00
rcu: add support for dynamic ticks and preempt rcu
The PREEMPT-RCU can get stuck if a CPU goes idle and NO_HZ is set. The idle CPU will not progress the RCU through its grace period and a synchronize_rcu my get stuck. Without this patch I have a box that will not boot when PREEMPT_RCU and NO_HZ are set. That same box boots fine with this patch. This patch comes from the -rt kernel where it has been tested for several months. Signed-off-by: Steven Rostedt <srostedt@redhat.com> Signed-off-by: Paul E. McKenney <paulmck@linux.vnet.ibm.com> Signed-off-by: Ingo Molnar <mingo@elte.hu>
This commit is contained in:
parent
c0f4133b8f
commit
2232c2d8e0
@ -109,6 +109,14 @@ static inline void account_system_vtime(struct task_struct *tsk)
|
||||
}
|
||||
#endif
|
||||
|
||||
#if defined(CONFIG_PREEMPT_RCU) && defined(CONFIG_NO_HZ)
|
||||
extern void rcu_irq_enter(void);
|
||||
extern void rcu_irq_exit(void);
|
||||
#else
|
||||
# define rcu_irq_enter() do { } while (0)
|
||||
# define rcu_irq_exit() do { } while (0)
|
||||
#endif /* CONFIG_PREEMPT_RCU */
|
||||
|
||||
/*
|
||||
* It is safe to do non-atomic ops on ->hardirq_context,
|
||||
* because NMI handlers may not preempt and the ops are
|
||||
@ -117,6 +125,7 @@ static inline void account_system_vtime(struct task_struct *tsk)
|
||||
*/
|
||||
#define __irq_enter() \
|
||||
do { \
|
||||
rcu_irq_enter(); \
|
||||
account_system_vtime(current); \
|
||||
add_preempt_count(HARDIRQ_OFFSET); \
|
||||
trace_hardirq_enter(); \
|
||||
@ -135,6 +144,7 @@ extern void irq_enter(void);
|
||||
trace_hardirq_exit(); \
|
||||
account_system_vtime(current); \
|
||||
sub_preempt_count(HARDIRQ_OFFSET); \
|
||||
rcu_irq_exit(); \
|
||||
} while (0)
|
||||
|
||||
/*
|
||||
|
@ -160,5 +160,8 @@ extern void rcu_restart_cpu(int cpu);
|
||||
extern long rcu_batches_completed(void);
|
||||
extern long rcu_batches_completed_bh(void);
|
||||
|
||||
#define rcu_enter_nohz() do { } while (0)
|
||||
#define rcu_exit_nohz() do { } while (0)
|
||||
|
||||
#endif /* __KERNEL__ */
|
||||
#endif /* __LINUX_RCUCLASSIC_H */
|
||||
|
@ -82,5 +82,27 @@ extern struct rcupreempt_trace *rcupreempt_trace_cpu(int cpu);
|
||||
|
||||
struct softirq_action;
|
||||
|
||||
#ifdef CONFIG_NO_HZ
|
||||
DECLARE_PER_CPU(long, dynticks_progress_counter);
|
||||
|
||||
static inline void rcu_enter_nohz(void)
|
||||
{
|
||||
__get_cpu_var(dynticks_progress_counter)++;
|
||||
WARN_ON(__get_cpu_var(dynticks_progress_counter) & 0x1);
|
||||
mb();
|
||||
}
|
||||
|
||||
static inline void rcu_exit_nohz(void)
|
||||
{
|
||||
mb();
|
||||
__get_cpu_var(dynticks_progress_counter)++;
|
||||
WARN_ON(!(__get_cpu_var(dynticks_progress_counter) & 0x1));
|
||||
}
|
||||
|
||||
#else /* CONFIG_NO_HZ */
|
||||
#define rcu_enter_nohz() do { } while (0)
|
||||
#define rcu_exit_nohz() do { } while (0)
|
||||
#endif /* CONFIG_NO_HZ */
|
||||
|
||||
#endif /* __KERNEL__ */
|
||||
#endif /* __LINUX_RCUPREEMPT_H */
|
||||
|
@ -23,6 +23,10 @@
|
||||
* to Suparna Bhattacharya for pushing me completely away
|
||||
* from atomic instructions on the read side.
|
||||
*
|
||||
* - Added handling of Dynamic Ticks
|
||||
* Copyright 2007 - Paul E. Mckenney <paulmck@us.ibm.com>
|
||||
* - Steven Rostedt <srostedt@redhat.com>
|
||||
*
|
||||
* Papers: http://www.rdrop.com/users/paulmck/RCU
|
||||
*
|
||||
* Design Document: http://lwn.net/Articles/253651/
|
||||
@ -409,6 +413,212 @@ static void __rcu_advance_callbacks(struct rcu_data *rdp)
|
||||
}
|
||||
}
|
||||
|
||||
#ifdef CONFIG_NO_HZ
|
||||
|
||||
DEFINE_PER_CPU(long, dynticks_progress_counter) = 1;
|
||||
static DEFINE_PER_CPU(long, rcu_dyntick_snapshot);
|
||||
static DEFINE_PER_CPU(int, rcu_update_flag);
|
||||
|
||||
/**
|
||||
* rcu_irq_enter - Called from Hard irq handlers and NMI/SMI.
|
||||
*
|
||||
* If the CPU was idle with dynamic ticks active, this updates the
|
||||
* dynticks_progress_counter to let the RCU handling know that the
|
||||
* CPU is active.
|
||||
*/
|
||||
void rcu_irq_enter(void)
|
||||
{
|
||||
int cpu = smp_processor_id();
|
||||
|
||||
if (per_cpu(rcu_update_flag, cpu))
|
||||
per_cpu(rcu_update_flag, cpu)++;
|
||||
|
||||
/*
|
||||
* Only update if we are coming from a stopped ticks mode
|
||||
* (dynticks_progress_counter is even).
|
||||
*/
|
||||
if (!in_interrupt() &&
|
||||
(per_cpu(dynticks_progress_counter, cpu) & 0x1) == 0) {
|
||||
/*
|
||||
* The following might seem like we could have a race
|
||||
* with NMI/SMIs. But this really isn't a problem.
|
||||
* Here we do a read/modify/write, and the race happens
|
||||
* when an NMI/SMI comes in after the read and before
|
||||
* the write. But NMI/SMIs will increment this counter
|
||||
* twice before returning, so the zero bit will not
|
||||
* be corrupted by the NMI/SMI which is the most important
|
||||
* part.
|
||||
*
|
||||
* The only thing is that we would bring back the counter
|
||||
* to a postion that it was in during the NMI/SMI.
|
||||
* But the zero bit would be set, so the rest of the
|
||||
* counter would again be ignored.
|
||||
*
|
||||
* On return from the IRQ, the counter may have the zero
|
||||
* bit be 0 and the counter the same as the return from
|
||||
* the NMI/SMI. If the state machine was so unlucky to
|
||||
* see that, it still doesn't matter, since all
|
||||
* RCU read-side critical sections on this CPU would
|
||||
* have already completed.
|
||||
*/
|
||||
per_cpu(dynticks_progress_counter, cpu)++;
|
||||
/*
|
||||
* The following memory barrier ensures that any
|
||||
* rcu_read_lock() primitives in the irq handler
|
||||
* are seen by other CPUs to follow the above
|
||||
* increment to dynticks_progress_counter. This is
|
||||
* required in order for other CPUs to correctly
|
||||
* determine when it is safe to advance the RCU
|
||||
* grace-period state machine.
|
||||
*/
|
||||
smp_mb(); /* see above block comment. */
|
||||
/*
|
||||
* Since we can't determine the dynamic tick mode from
|
||||
* the dynticks_progress_counter after this routine,
|
||||
* we use a second flag to acknowledge that we came
|
||||
* from an idle state with ticks stopped.
|
||||
*/
|
||||
per_cpu(rcu_update_flag, cpu)++;
|
||||
/*
|
||||
* If we take an NMI/SMI now, they will also increment
|
||||
* the rcu_update_flag, and will not update the
|
||||
* dynticks_progress_counter on exit. That is for
|
||||
* this IRQ to do.
|
||||
*/
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* rcu_irq_exit - Called from exiting Hard irq context.
|
||||
*
|
||||
* If the CPU was idle with dynamic ticks active, update the
|
||||
* dynticks_progress_counter to put let the RCU handling be
|
||||
* aware that the CPU is going back to idle with no ticks.
|
||||
*/
|
||||
void rcu_irq_exit(void)
|
||||
{
|
||||
int cpu = smp_processor_id();
|
||||
|
||||
/*
|
||||
* rcu_update_flag is set if we interrupted the CPU
|
||||
* when it was idle with ticks stopped.
|
||||
* Once this occurs, we keep track of interrupt nesting
|
||||
* because a NMI/SMI could also come in, and we still
|
||||
* only want the IRQ that started the increment of the
|
||||
* dynticks_progress_counter to be the one that modifies
|
||||
* it on exit.
|
||||
*/
|
||||
if (per_cpu(rcu_update_flag, cpu)) {
|
||||
if (--per_cpu(rcu_update_flag, cpu))
|
||||
return;
|
||||
|
||||
/* This must match the interrupt nesting */
|
||||
WARN_ON(in_interrupt());
|
||||
|
||||
/*
|
||||
* If an NMI/SMI happens now we are still
|
||||
* protected by the dynticks_progress_counter being odd.
|
||||
*/
|
||||
|
||||
/*
|
||||
* The following memory barrier ensures that any
|
||||
* rcu_read_unlock() primitives in the irq handler
|
||||
* are seen by other CPUs to preceed the following
|
||||
* increment to dynticks_progress_counter. This
|
||||
* is required in order for other CPUs to determine
|
||||
* when it is safe to advance the RCU grace-period
|
||||
* state machine.
|
||||
*/
|
||||
smp_mb(); /* see above block comment. */
|
||||
per_cpu(dynticks_progress_counter, cpu)++;
|
||||
WARN_ON(per_cpu(dynticks_progress_counter, cpu) & 0x1);
|
||||
}
|
||||
}
|
||||
|
||||
static void dyntick_save_progress_counter(int cpu)
|
||||
{
|
||||
per_cpu(rcu_dyntick_snapshot, cpu) =
|
||||
per_cpu(dynticks_progress_counter, cpu);
|
||||
}
|
||||
|
||||
static inline int
|
||||
rcu_try_flip_waitack_needed(int cpu)
|
||||
{
|
||||
long curr;
|
||||
long snap;
|
||||
|
||||
curr = per_cpu(dynticks_progress_counter, cpu);
|
||||
snap = per_cpu(rcu_dyntick_snapshot, cpu);
|
||||
smp_mb(); /* force ordering with cpu entering/leaving dynticks. */
|
||||
|
||||
/*
|
||||
* If the CPU remained in dynticks mode for the entire time
|
||||
* and didn't take any interrupts, NMIs, SMIs, or whatever,
|
||||
* then it cannot be in the middle of an rcu_read_lock(), so
|
||||
* the next rcu_read_lock() it executes must use the new value
|
||||
* of the counter. So we can safely pretend that this CPU
|
||||
* already acknowledged the counter.
|
||||
*/
|
||||
|
||||
if ((curr == snap) && ((curr & 0x1) == 0))
|
||||
return 0;
|
||||
|
||||
/*
|
||||
* If the CPU passed through or entered a dynticks idle phase with
|
||||
* no active irq handlers, then, as above, we can safely pretend
|
||||
* that this CPU already acknowledged the counter.
|
||||
*/
|
||||
|
||||
if ((curr - snap) > 2 || (snap & 0x1) == 0)
|
||||
return 0;
|
||||
|
||||
/* We need this CPU to explicitly acknowledge the counter flip. */
|
||||
|
||||
return 1;
|
||||
}
|
||||
|
||||
static inline int
|
||||
rcu_try_flip_waitmb_needed(int cpu)
|
||||
{
|
||||
long curr;
|
||||
long snap;
|
||||
|
||||
curr = per_cpu(dynticks_progress_counter, cpu);
|
||||
snap = per_cpu(rcu_dyntick_snapshot, cpu);
|
||||
smp_mb(); /* force ordering with cpu entering/leaving dynticks. */
|
||||
|
||||
/*
|
||||
* If the CPU remained in dynticks mode for the entire time
|
||||
* and didn't take any interrupts, NMIs, SMIs, or whatever,
|
||||
* then it cannot have executed an RCU read-side critical section
|
||||
* during that time, so there is no need for it to execute a
|
||||
* memory barrier.
|
||||
*/
|
||||
|
||||
if ((curr == snap) && ((curr & 0x1) == 0))
|
||||
return 0;
|
||||
|
||||
/*
|
||||
* If the CPU either entered or exited an outermost interrupt,
|
||||
* SMI, NMI, or whatever handler, then we know that it executed
|
||||
* a memory barrier when doing so. So we don't need another one.
|
||||
*/
|
||||
if (curr != snap)
|
||||
return 0;
|
||||
|
||||
/* We need the CPU to execute a memory barrier. */
|
||||
|
||||
return 1;
|
||||
}
|
||||
|
||||
#else /* !CONFIG_NO_HZ */
|
||||
|
||||
# define dyntick_save_progress_counter(cpu) do { } while (0)
|
||||
# define rcu_try_flip_waitack_needed(cpu) (1)
|
||||
# define rcu_try_flip_waitmb_needed(cpu) (1)
|
||||
|
||||
#endif /* CONFIG_NO_HZ */
|
||||
|
||||
/*
|
||||
* Get here when RCU is idle. Decide whether we need to
|
||||
* move out of idle state, and return non-zero if so.
|
||||
@ -447,8 +657,10 @@ rcu_try_flip_idle(void)
|
||||
|
||||
/* Now ask each CPU for acknowledgement of the flip. */
|
||||
|
||||
for_each_cpu_mask(cpu, rcu_cpu_online_map)
|
||||
for_each_cpu_mask(cpu, rcu_cpu_online_map) {
|
||||
per_cpu(rcu_flip_flag, cpu) = rcu_flipped;
|
||||
dyntick_save_progress_counter(cpu);
|
||||
}
|
||||
|
||||
return 1;
|
||||
}
|
||||
@ -464,7 +676,8 @@ rcu_try_flip_waitack(void)
|
||||
|
||||
RCU_TRACE_ME(rcupreempt_trace_try_flip_a1);
|
||||
for_each_cpu_mask(cpu, rcu_cpu_online_map)
|
||||
if (per_cpu(rcu_flip_flag, cpu) != rcu_flip_seen) {
|
||||
if (rcu_try_flip_waitack_needed(cpu) &&
|
||||
per_cpu(rcu_flip_flag, cpu) != rcu_flip_seen) {
|
||||
RCU_TRACE_ME(rcupreempt_trace_try_flip_ae1);
|
||||
return 0;
|
||||
}
|
||||
@ -509,8 +722,10 @@ rcu_try_flip_waitzero(void)
|
||||
smp_mb(); /* ^^^^^^^^^^^^ */
|
||||
|
||||
/* Call for a memory barrier from each CPU. */
|
||||
for_each_cpu_mask(cpu, rcu_cpu_online_map)
|
||||
for_each_cpu_mask(cpu, rcu_cpu_online_map) {
|
||||
per_cpu(rcu_mb_flag, cpu) = rcu_mb_needed;
|
||||
dyntick_save_progress_counter(cpu);
|
||||
}
|
||||
|
||||
RCU_TRACE_ME(rcupreempt_trace_try_flip_z2);
|
||||
return 1;
|
||||
@ -528,7 +743,8 @@ rcu_try_flip_waitmb(void)
|
||||
|
||||
RCU_TRACE_ME(rcupreempt_trace_try_flip_m1);
|
||||
for_each_cpu_mask(cpu, rcu_cpu_online_map)
|
||||
if (per_cpu(rcu_mb_flag, cpu) != rcu_mb_done) {
|
||||
if (rcu_try_flip_waitmb_needed(cpu) &&
|
||||
per_cpu(rcu_mb_flag, cpu) != rcu_mb_done) {
|
||||
RCU_TRACE_ME(rcupreempt_trace_try_flip_me1);
|
||||
return 0;
|
||||
}
|
||||
|
@ -313,6 +313,7 @@ void irq_exit(void)
|
||||
/* Make sure that timer wheel updates are propagated */
|
||||
if (!in_interrupt() && idle_cpu(smp_processor_id()) && !need_resched())
|
||||
tick_nohz_stop_sched_tick();
|
||||
rcu_irq_exit();
|
||||
#endif
|
||||
preempt_enable_no_resched();
|
||||
}
|
||||
|
@ -282,6 +282,7 @@ void tick_nohz_stop_sched_tick(void)
|
||||
ts->idle_tick = ts->sched_timer.expires;
|
||||
ts->tick_stopped = 1;
|
||||
ts->idle_jiffies = last_jiffies;
|
||||
rcu_enter_nohz();
|
||||
}
|
||||
|
||||
/*
|
||||
@ -375,6 +376,8 @@ void tick_nohz_restart_sched_tick(void)
|
||||
return;
|
||||
}
|
||||
|
||||
rcu_exit_nohz();
|
||||
|
||||
/* Update jiffies first */
|
||||
select_nohz_load_balancer(0);
|
||||
now = ktime_get();
|
||||
|
Loading…
Reference in New Issue
Block a user