watchdog: Use hotplug thread infrastructure

Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Srivatsa S. Bhat <srivatsa.bhat@linux.vnet.ibm.com>
Cc: Rusty Russell <rusty@rustcorp.com.au>
Reviewed-by: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
Cc: Namhyung Kim <namhyung@kernel.org>
Link: http://lkml.kernel.org/r/20120716103948.563736676@linutronix.de
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
This commit is contained in:
Thomas Gleixner 2012-07-16 10:42:38 +00:00
parent 3e339b5dae
commit bcd951cf10

View File

@ -22,6 +22,7 @@
#include <linux/notifier.h> #include <linux/notifier.h>
#include <linux/module.h> #include <linux/module.h>
#include <linux/sysctl.h> #include <linux/sysctl.h>
#include <linux/smpboot.h>
#include <asm/irq_regs.h> #include <asm/irq_regs.h>
#include <linux/kvm_para.h> #include <linux/kvm_para.h>
@ -29,16 +30,18 @@
int watchdog_enabled = 1; int watchdog_enabled = 1;
int __read_mostly watchdog_thresh = 10; int __read_mostly watchdog_thresh = 10;
static int __read_mostly watchdog_disabled;
static DEFINE_PER_CPU(unsigned long, watchdog_touch_ts); static DEFINE_PER_CPU(unsigned long, watchdog_touch_ts);
static DEFINE_PER_CPU(struct task_struct *, softlockup_watchdog); static DEFINE_PER_CPU(struct task_struct *, softlockup_watchdog);
static DEFINE_PER_CPU(struct hrtimer, watchdog_hrtimer); static DEFINE_PER_CPU(struct hrtimer, watchdog_hrtimer);
static DEFINE_PER_CPU(bool, softlockup_touch_sync); static DEFINE_PER_CPU(bool, softlockup_touch_sync);
static DEFINE_PER_CPU(bool, soft_watchdog_warn); static DEFINE_PER_CPU(bool, soft_watchdog_warn);
static DEFINE_PER_CPU(unsigned long, hrtimer_interrupts);
static DEFINE_PER_CPU(unsigned long, soft_lockup_hrtimer_cnt);
#ifdef CONFIG_HARDLOCKUP_DETECTOR #ifdef CONFIG_HARDLOCKUP_DETECTOR
static DEFINE_PER_CPU(bool, hard_watchdog_warn); static DEFINE_PER_CPU(bool, hard_watchdog_warn);
static DEFINE_PER_CPU(bool, watchdog_nmi_touch); static DEFINE_PER_CPU(bool, watchdog_nmi_touch);
static DEFINE_PER_CPU(unsigned long, hrtimer_interrupts);
static DEFINE_PER_CPU(unsigned long, hrtimer_interrupts_saved); static DEFINE_PER_CPU(unsigned long, hrtimer_interrupts_saved);
static DEFINE_PER_CPU(struct perf_event *, watchdog_ev); static DEFINE_PER_CPU(struct perf_event *, watchdog_ev);
#endif #endif
@ -248,13 +251,15 @@ static void watchdog_overflow_callback(struct perf_event *event,
__this_cpu_write(hard_watchdog_warn, false); __this_cpu_write(hard_watchdog_warn, false);
return; return;
} }
#endif /* CONFIG_HARDLOCKUP_DETECTOR */
static void watchdog_interrupt_count(void) static void watchdog_interrupt_count(void)
{ {
__this_cpu_inc(hrtimer_interrupts); __this_cpu_inc(hrtimer_interrupts);
} }
#else
static inline void watchdog_interrupt_count(void) { return; } static int watchdog_nmi_enable(unsigned int cpu);
#endif /* CONFIG_HARDLOCKUP_DETECTOR */ static void watchdog_nmi_disable(unsigned int cpu);
/* watchdog kicker functions */ /* watchdog kicker functions */
static enum hrtimer_restart watchdog_timer_fn(struct hrtimer *hrtimer) static enum hrtimer_restart watchdog_timer_fn(struct hrtimer *hrtimer)
@ -327,50 +332,69 @@ static enum hrtimer_restart watchdog_timer_fn(struct hrtimer *hrtimer)
return HRTIMER_RESTART; return HRTIMER_RESTART;
} }
static void watchdog_set_prio(unsigned int policy, unsigned int prio)
/* {
* The watchdog thread - touches the timestamp. struct sched_param param = { .sched_priority = prio };
*/
static int watchdog(void *unused) sched_setscheduler(current, policy, &param);
}
static void watchdog_enable(unsigned int cpu)
{ {
struct sched_param param = { .sched_priority = 0 };
struct hrtimer *hrtimer = &__raw_get_cpu_var(watchdog_hrtimer); struct hrtimer *hrtimer = &__raw_get_cpu_var(watchdog_hrtimer);
/* initialize timestamp */ if (!watchdog_enabled) {
__touch_watchdog(); kthread_park(current);
return;
}
/* Enable the perf event */
watchdog_nmi_enable(cpu);
/* kick off the timer for the hardlockup detector */ /* kick off the timer for the hardlockup detector */
hrtimer_init(hrtimer, CLOCK_MONOTONIC, HRTIMER_MODE_REL);
hrtimer->function = watchdog_timer_fn;
/* done here because hrtimer_start can only pin to smp_processor_id() */ /* done here because hrtimer_start can only pin to smp_processor_id() */
hrtimer_start(hrtimer, ns_to_ktime(get_sample_period()), hrtimer_start(hrtimer, ns_to_ktime(get_sample_period()),
HRTIMER_MODE_REL_PINNED); HRTIMER_MODE_REL_PINNED);
set_current_state(TASK_INTERRUPTIBLE); /* initialize timestamp */
/* watchdog_set_prio(SCHED_FIFO, MAX_RT_PRIO - 1);
* Run briefly (kicked by the hrtimer callback function) once every
* get_sample_period() seconds (4 seconds by default) to reset the
* softlockup timestamp. If this gets delayed for more than
* 2*watchdog_thresh seconds then the debug-printout triggers in
* watchdog_timer_fn().
*/
while (!kthread_should_stop()) {
__touch_watchdog(); __touch_watchdog();
schedule();
if (kthread_should_stop())
break;
set_current_state(TASK_INTERRUPTIBLE);
} }
static void watchdog_disable(unsigned int cpu)
{
struct hrtimer *hrtimer = &__raw_get_cpu_var(watchdog_hrtimer);
watchdog_set_prio(SCHED_NORMAL, 0);
hrtimer_cancel(hrtimer);
/* disable the perf event */
watchdog_nmi_disable(cpu);
}
static int watchdog_should_run(unsigned int cpu)
{
return __this_cpu_read(hrtimer_interrupts) !=
__this_cpu_read(soft_lockup_hrtimer_cnt);
}
/* /*
* Drop the policy/priority elevation during thread exit to avoid a * The watchdog thread function - touches the timestamp.
* scheduling latency spike. *
* It only runs once every get_sample_period() seconds (4 seconds by
* default) to reset the softlockup timestamp. If this gets delayed
* for more than 2*watchdog_thresh seconds then the debug-printout
* triggers in watchdog_timer_fn().
*/ */
__set_current_state(TASK_RUNNING); static void watchdog(unsigned int cpu)
sched_setscheduler(current, SCHED_NORMAL, &param); {
return 0; __this_cpu_write(soft_lockup_hrtimer_cnt,
__this_cpu_read(hrtimer_interrupts));
__touch_watchdog();
} }
#ifdef CONFIG_HARDLOCKUP_DETECTOR #ifdef CONFIG_HARDLOCKUP_DETECTOR
/* /*
* People like the simple clean cpu node info on boot. * People like the simple clean cpu node info on boot.
@ -379,7 +403,7 @@ static int watchdog(void *unused)
*/ */
static unsigned long cpu0_err; static unsigned long cpu0_err;
static int watchdog_nmi_enable(int cpu) static int watchdog_nmi_enable(unsigned int cpu)
{ {
struct perf_event_attr *wd_attr; struct perf_event_attr *wd_attr;
struct perf_event *event = per_cpu(watchdog_ev, cpu); struct perf_event *event = per_cpu(watchdog_ev, cpu);
@ -433,7 +457,7 @@ out:
return 0; return 0;
} }
static void watchdog_nmi_disable(int cpu) static void watchdog_nmi_disable(unsigned int cpu)
{ {
struct perf_event *event = per_cpu(watchdog_ev, cpu); struct perf_event *event = per_cpu(watchdog_ev, cpu);
@ -447,106 +471,34 @@ static void watchdog_nmi_disable(int cpu)
return; return;
} }
#else #else
static int watchdog_nmi_enable(int cpu) { return 0; } static int watchdog_nmi_enable(unsigned int cpu) { return 0; }
static void watchdog_nmi_disable(int cpu) { return; } static void watchdog_nmi_disable(unsigned int cpu) { return; }
#endif /* CONFIG_HARDLOCKUP_DETECTOR */ #endif /* CONFIG_HARDLOCKUP_DETECTOR */
/* prepare/enable/disable routines */ /* prepare/enable/disable routines */
static void watchdog_prepare_cpu(int cpu)
{
struct hrtimer *hrtimer = &per_cpu(watchdog_hrtimer, cpu);
WARN_ON(per_cpu(softlockup_watchdog, cpu));
hrtimer_init(hrtimer, CLOCK_MONOTONIC, HRTIMER_MODE_REL);
hrtimer->function = watchdog_timer_fn;
}
static int watchdog_enable(int cpu)
{
struct task_struct *p = per_cpu(softlockup_watchdog, cpu);
int err = 0;
/* enable the perf event */
err = watchdog_nmi_enable(cpu);
/* Regardless of err above, fall through and start softlockup */
/* create the watchdog thread */
if (!p) {
struct sched_param param = { .sched_priority = MAX_RT_PRIO-1 };
p = kthread_create_on_node(watchdog, NULL, cpu_to_node(cpu), "watchdog/%d", cpu);
if (IS_ERR(p)) {
pr_err("softlockup watchdog for %i failed\n", cpu);
if (!err) {
/* if hardlockup hasn't already set this */
err = PTR_ERR(p);
/* and disable the perf event */
watchdog_nmi_disable(cpu);
}
goto out;
}
sched_setscheduler(p, SCHED_FIFO, &param);
kthread_bind(p, cpu);
per_cpu(watchdog_touch_ts, cpu) = 0;
per_cpu(softlockup_watchdog, cpu) = p;
wake_up_process(p);
}
out:
return err;
}
static void watchdog_disable(int cpu)
{
struct task_struct *p = per_cpu(softlockup_watchdog, cpu);
struct hrtimer *hrtimer = &per_cpu(watchdog_hrtimer, cpu);
/*
* cancel the timer first to stop incrementing the stats
* and waking up the kthread
*/
hrtimer_cancel(hrtimer);
/* disable the perf event */
watchdog_nmi_disable(cpu);
/* stop the watchdog thread */
if (p) {
per_cpu(softlockup_watchdog, cpu) = NULL;
kthread_stop(p);
}
}
/* sysctl functions */ /* sysctl functions */
#ifdef CONFIG_SYSCTL #ifdef CONFIG_SYSCTL
static void watchdog_enable_all_cpus(void) static void watchdog_enable_all_cpus(void)
{ {
int cpu; unsigned int cpu;
watchdog_enabled = 0;
if (watchdog_disabled) {
watchdog_disabled = 0;
for_each_online_cpu(cpu) for_each_online_cpu(cpu)
if (!watchdog_enable(cpu)) kthread_unpark(per_cpu(softlockup_watchdog, cpu));
/* if any cpu succeeds, watchdog is considered }
enabled for the system */
watchdog_enabled = 1;
if (!watchdog_enabled)
pr_err("failed to be enabled on some cpus\n");
} }
static void watchdog_disable_all_cpus(void) static void watchdog_disable_all_cpus(void)
{ {
int cpu; unsigned int cpu;
if (!watchdog_disabled) {
watchdog_disabled = 1;
for_each_online_cpu(cpu) for_each_online_cpu(cpu)
watchdog_disable(cpu); kthread_park(per_cpu(softlockup_watchdog, cpu));
}
/* if all watchdogs are disabled, then they are disabled for the system */
watchdog_enabled = 0;
} }
/* /*
* proc handler for /proc/sys/kernel/nmi_watchdog,watchdog_thresh * proc handler for /proc/sys/kernel/nmi_watchdog,watchdog_thresh
@ -557,73 +509,36 @@ int proc_dowatchdog(struct ctl_table *table, int write,
{ {
int ret; int ret;
if (watchdog_disabled < 0)
return -ENODEV;
ret = proc_dointvec_minmax(table, write, buffer, lenp, ppos); ret = proc_dointvec_minmax(table, write, buffer, lenp, ppos);
if (ret || !write) if (ret || !write)
goto out; return ret;
if (watchdog_enabled && watchdog_thresh) if (watchdog_enabled && watchdog_thresh)
watchdog_enable_all_cpus(); watchdog_enable_all_cpus();
else else
watchdog_disable_all_cpus(); watchdog_disable_all_cpus();
out:
return ret; return ret;
} }
#endif /* CONFIG_SYSCTL */ #endif /* CONFIG_SYSCTL */
static struct smp_hotplug_thread watchdog_threads = {
/* .store = &softlockup_watchdog,
* Create/destroy watchdog threads as CPUs come and go: .thread_should_run = watchdog_should_run,
*/ .thread_fn = watchdog,
static int __cpuinit .thread_comm = "watchdog/%u",
cpu_callback(struct notifier_block *nfb, unsigned long action, void *hcpu) .setup = watchdog_enable,
{ .park = watchdog_disable,
int hotcpu = (unsigned long)hcpu; .unpark = watchdog_enable,
switch (action) {
case CPU_UP_PREPARE:
case CPU_UP_PREPARE_FROZEN:
watchdog_prepare_cpu(hotcpu);
break;
case CPU_ONLINE:
case CPU_ONLINE_FROZEN:
if (watchdog_enabled)
watchdog_enable(hotcpu);
break;
#ifdef CONFIG_HOTPLUG_CPU
case CPU_UP_CANCELED:
case CPU_UP_CANCELED_FROZEN:
watchdog_disable(hotcpu);
break;
case CPU_DEAD:
case CPU_DEAD_FROZEN:
watchdog_disable(hotcpu);
break;
#endif /* CONFIG_HOTPLUG_CPU */
}
/*
* hardlockup and softlockup are not important enough
* to block cpu bring up. Just always succeed and
* rely on printk output to flag problems.
*/
return NOTIFY_OK;
}
static struct notifier_block __cpuinitdata cpu_nfb = {
.notifier_call = cpu_callback
}; };
void __init lockup_detector_init(void) void __init lockup_detector_init(void)
{ {
void *cpu = (void *)(long)smp_processor_id(); if (smpboot_register_percpu_thread(&watchdog_threads)) {
int err; pr_err("Failed to create watchdog threads, disabled\n");
watchdog_disabled = -ENODEV;
err = cpu_callback(&cpu_nfb, CPU_UP_PREPARE, cpu); }
WARN_ON(notifier_to_errno(err));
cpu_callback(&cpu_nfb, CPU_ONLINE, cpu);
register_cpu_notifier(&cpu_nfb);
return;
} }