diff --git a/drivers/infiniband/hw/ehca/ehca_irq.c b/drivers/infiniband/hw/ehca/ehca_irq.c index 53589000fd07..8615d7cf7e01 100644 --- a/drivers/infiniband/hw/ehca/ehca_irq.c +++ b/drivers/infiniband/hw/ehca/ehca_irq.c @@ -42,6 +42,7 @@ */ #include +#include #include "ehca_classes.h" #include "ehca_irq.h" @@ -652,7 +653,7 @@ void ehca_tasklet_eq(unsigned long data) ehca_process_eq((struct ehca_shca*)data, 1); } -static inline int find_next_online_cpu(struct ehca_comp_pool *pool) +static int find_next_online_cpu(struct ehca_comp_pool *pool) { int cpu; unsigned long flags; @@ -662,17 +663,20 @@ static inline int find_next_online_cpu(struct ehca_comp_pool *pool) ehca_dmp(cpu_online_mask, cpumask_size(), ""); spin_lock_irqsave(&pool->last_cpu_lock, flags); - cpu = cpumask_next(pool->last_cpu, cpu_online_mask); - if (cpu >= nr_cpu_ids) - cpu = cpumask_first(cpu_online_mask); - pool->last_cpu = cpu; + do { + cpu = cpumask_next(pool->last_cpu, cpu_online_mask); + if (cpu >= nr_cpu_ids) + cpu = cpumask_first(cpu_online_mask); + pool->last_cpu = cpu; + } while (!per_cpu_ptr(pool->cpu_comp_tasks, cpu)->active); spin_unlock_irqrestore(&pool->last_cpu_lock, flags); return cpu; } static void __queue_comp_task(struct ehca_cq *__cq, - struct ehca_cpu_comp_task *cct) + struct ehca_cpu_comp_task *cct, + struct task_struct *thread) { unsigned long flags; @@ -683,7 +687,7 @@ static void __queue_comp_task(struct ehca_cq *__cq, __cq->nr_callbacks++; list_add_tail(&__cq->entry, &cct->cq_list); cct->cq_jobs++; - wake_up(&cct->wait_queue); + wake_up_process(thread); } else __cq->nr_callbacks++; @@ -695,6 +699,7 @@ static void queue_comp_task(struct ehca_cq *__cq) { int cpu_id; struct ehca_cpu_comp_task *cct; + struct task_struct *thread; int cq_jobs; unsigned long flags; @@ -702,7 +707,8 @@ static void queue_comp_task(struct ehca_cq *__cq) BUG_ON(!cpu_online(cpu_id)); cct = per_cpu_ptr(pool->cpu_comp_tasks, cpu_id); - BUG_ON(!cct); + thread = *per_cpu_ptr(pool->cpu_comp_threads, cpu_id); + BUG_ON(!cct || !thread); spin_lock_irqsave(&cct->task_lock, flags); cq_jobs = cct->cq_jobs; @@ -710,28 +716,25 @@ static void queue_comp_task(struct ehca_cq *__cq) if (cq_jobs > 0) { cpu_id = find_next_online_cpu(pool); cct = per_cpu_ptr(pool->cpu_comp_tasks, cpu_id); - BUG_ON(!cct); + thread = *per_cpu_ptr(pool->cpu_comp_threads, cpu_id); + BUG_ON(!cct || !thread); } - - __queue_comp_task(__cq, cct); + __queue_comp_task(__cq, cct, thread); } static void run_comp_task(struct ehca_cpu_comp_task *cct) { struct ehca_cq *cq; - unsigned long flags; - - spin_lock_irqsave(&cct->task_lock, flags); while (!list_empty(&cct->cq_list)) { cq = list_entry(cct->cq_list.next, struct ehca_cq, entry); - spin_unlock_irqrestore(&cct->task_lock, flags); + spin_unlock_irq(&cct->task_lock); comp_event_callback(cq); if (atomic_dec_and_test(&cq->nr_events)) wake_up(&cq->wait_completion); - spin_lock_irqsave(&cct->task_lock, flags); + spin_lock_irq(&cct->task_lock); spin_lock(&cq->task_lock); cq->nr_callbacks--; if (!cq->nr_callbacks) { @@ -740,159 +743,76 @@ static void run_comp_task(struct ehca_cpu_comp_task *cct) } spin_unlock(&cq->task_lock); } - - spin_unlock_irqrestore(&cct->task_lock, flags); } -static int comp_task(void *__cct) -{ - struct ehca_cpu_comp_task *cct = __cct; - int cql_empty; - DECLARE_WAITQUEUE(wait, current); - - set_current_state(TASK_INTERRUPTIBLE); - while (!kthread_should_stop()) { - add_wait_queue(&cct->wait_queue, &wait); - - spin_lock_irq(&cct->task_lock); - cql_empty = list_empty(&cct->cq_list); - spin_unlock_irq(&cct->task_lock); - if (cql_empty) - schedule(); - else - __set_current_state(TASK_RUNNING); - - remove_wait_queue(&cct->wait_queue, &wait); - - spin_lock_irq(&cct->task_lock); - cql_empty = list_empty(&cct->cq_list); - spin_unlock_irq(&cct->task_lock); - if (!cql_empty) - run_comp_task(__cct); - - set_current_state(TASK_INTERRUPTIBLE); - } - __set_current_state(TASK_RUNNING); - - return 0; -} - -static struct task_struct *create_comp_task(struct ehca_comp_pool *pool, - int cpu) -{ - struct ehca_cpu_comp_task *cct; - - cct = per_cpu_ptr(pool->cpu_comp_tasks, cpu); - spin_lock_init(&cct->task_lock); - INIT_LIST_HEAD(&cct->cq_list); - init_waitqueue_head(&cct->wait_queue); - cct->task = kthread_create_on_node(comp_task, cct, cpu_to_node(cpu), - "ehca_comp/%d", cpu); - - return cct->task; -} - -static void destroy_comp_task(struct ehca_comp_pool *pool, - int cpu) -{ - struct ehca_cpu_comp_task *cct; - struct task_struct *task; - unsigned long flags_cct; - - cct = per_cpu_ptr(pool->cpu_comp_tasks, cpu); - - spin_lock_irqsave(&cct->task_lock, flags_cct); - - task = cct->task; - cct->task = NULL; - cct->cq_jobs = 0; - - spin_unlock_irqrestore(&cct->task_lock, flags_cct); - - if (task) - kthread_stop(task); -} - -static void __cpuinit take_over_work(struct ehca_comp_pool *pool, int cpu) +static void comp_task_park(unsigned int cpu) { struct ehca_cpu_comp_task *cct = per_cpu_ptr(pool->cpu_comp_tasks, cpu); + struct ehca_cpu_comp_task *target; + struct task_struct *thread; + struct ehca_cq *cq, *tmp; LIST_HEAD(list); - struct ehca_cq *cq; - unsigned long flags_cct; - - spin_lock_irqsave(&cct->task_lock, flags_cct); + spin_lock_irq(&cct->task_lock); + cct->cq_jobs = 0; + cct->active = 0; list_splice_init(&cct->cq_list, &list); + spin_unlock_irq(&cct->task_lock); - while (!list_empty(&list)) { - cq = list_entry(cct->cq_list.next, struct ehca_cq, entry); - + cpu = find_next_online_cpu(pool); + target = per_cpu_ptr(pool->cpu_comp_tasks, cpu); + thread = *per_cpu_ptr(pool->cpu_comp_threads, cpu); + spin_lock_irq(&target->task_lock); + list_for_each_entry_safe(cq, tmp, &list, entry) { list_del(&cq->entry); - __queue_comp_task(cq, this_cpu_ptr(pool->cpu_comp_tasks)); + __queue_comp_task(cq, target, thread); } - - spin_unlock_irqrestore(&cct->task_lock, flags_cct); - + spin_unlock_irq(&target->task_lock); } -static int __cpuinit comp_pool_callback(struct notifier_block *nfb, - unsigned long action, - void *hcpu) +static void comp_task_stop(unsigned int cpu, bool online) { - unsigned int cpu = (unsigned long)hcpu; - struct ehca_cpu_comp_task *cct; + struct ehca_cpu_comp_task *cct = per_cpu_ptr(pool->cpu_comp_tasks, cpu); - switch (action) { - case CPU_UP_PREPARE: - case CPU_UP_PREPARE_FROZEN: - ehca_gen_dbg("CPU: %x (CPU_PREPARE)", cpu); - if (!create_comp_task(pool, cpu)) { - ehca_gen_err("Can't create comp_task for cpu: %x", cpu); - return notifier_from_errno(-ENOMEM); - } - break; - case CPU_UP_CANCELED: - case CPU_UP_CANCELED_FROZEN: - ehca_gen_dbg("CPU: %x (CPU_CANCELED)", cpu); - cct = per_cpu_ptr(pool->cpu_comp_tasks, cpu); - kthread_bind(cct->task, cpumask_any(cpu_online_mask)); - destroy_comp_task(pool, cpu); - break; - case CPU_ONLINE: - case CPU_ONLINE_FROZEN: - ehca_gen_dbg("CPU: %x (CPU_ONLINE)", cpu); - cct = per_cpu_ptr(pool->cpu_comp_tasks, cpu); - kthread_bind(cct->task, cpu); - wake_up_process(cct->task); - break; - case CPU_DOWN_PREPARE: - case CPU_DOWN_PREPARE_FROZEN: - ehca_gen_dbg("CPU: %x (CPU_DOWN_PREPARE)", cpu); - break; - case CPU_DOWN_FAILED: - case CPU_DOWN_FAILED_FROZEN: - ehca_gen_dbg("CPU: %x (CPU_DOWN_FAILED)", cpu); - break; - case CPU_DEAD: - case CPU_DEAD_FROZEN: - ehca_gen_dbg("CPU: %x (CPU_DEAD)", cpu); - destroy_comp_task(pool, cpu); - take_over_work(pool, cpu); - break; - } - - return NOTIFY_OK; + spin_lock_irq(&cct->task_lock); + cct->cq_jobs = 0; + cct->active = 0; + WARN_ON(!list_empty(&cct->cq_list)); + spin_unlock_irq(&cct->task_lock); } -static struct notifier_block comp_pool_callback_nb __cpuinitdata = { - .notifier_call = comp_pool_callback, - .priority = 0, +static int comp_task_should_run(unsigned int cpu) +{ + struct ehca_cpu_comp_task *cct = per_cpu_ptr(pool->cpu_comp_tasks, cpu); + + return cct->cq_jobs; +} + +static void comp_task(unsigned int cpu) +{ + struct ehca_cpu_comp_task *cct = this_cpu_ptr(pool->cpu_comp_tasks); + int cql_empty; + + spin_lock_irq(&cct->task_lock); + cql_empty = list_empty(&cct->cq_list); + if (!cql_empty) { + __set_current_state(TASK_RUNNING); + run_comp_task(cct); + } + spin_unlock_irq(&cct->task_lock); +} + +static struct smp_hotplug_thread comp_pool_threads = { + .thread_should_run = comp_task_should_run, + .thread_fn = comp_task, + .thread_comm = "ehca_comp/%u", + .cleanup = comp_task_stop, + .park = comp_task_park, }; int ehca_create_comp_pool(void) { - int cpu; - struct task_struct *task; + int cpu, ret = -ENOMEM; if (!ehca_scaling_code) return 0; @@ -905,38 +825,46 @@ int ehca_create_comp_pool(void) pool->last_cpu = cpumask_any(cpu_online_mask); pool->cpu_comp_tasks = alloc_percpu(struct ehca_cpu_comp_task); - if (pool->cpu_comp_tasks == NULL) { - kfree(pool); - return -EINVAL; + if (!pool->cpu_comp_tasks) + goto out_pool; + + pool->cpu_comp_threads = alloc_percpu(struct task_struct *); + if (!pool->cpu_comp_threads) + goto out_tasks; + + for_each_present_cpu(cpu) { + struct ehca_cpu_comp_task *cct; + + cct = per_cpu_ptr(pool->cpu_comp_tasks, cpu); + spin_lock_init(&cct->task_lock); + INIT_LIST_HEAD(&cct->cq_list); } - for_each_online_cpu(cpu) { - task = create_comp_task(pool, cpu); - if (task) { - kthread_bind(task, cpu); - wake_up_process(task); - } - } + comp_pool_threads.store = pool->cpu_comp_threads; + ret = smpboot_register_percpu_thread(&comp_pool_threads); + if (ret) + goto out_threads; - register_hotcpu_notifier(&comp_pool_callback_nb); + pr_info("eHCA scaling code enabled\n"); + return ret; - printk(KERN_INFO "eHCA scaling code enabled\n"); - - return 0; +out_threads: + free_percpu(pool->cpu_comp_threads); +out_tasks: + free_percpu(pool->cpu_comp_tasks); +out_pool: + kfree(pool); + return ret; } void ehca_destroy_comp_pool(void) { - int i; - if (!ehca_scaling_code) return; - unregister_hotcpu_notifier(&comp_pool_callback_nb); - - for_each_online_cpu(i) - destroy_comp_task(pool, i); + smpboot_unregister_percpu_thread(&comp_pool_threads); + free_percpu(pool->cpu_comp_threads); free_percpu(pool->cpu_comp_tasks); kfree(pool); } diff --git a/drivers/infiniband/hw/ehca/ehca_irq.h b/drivers/infiniband/hw/ehca/ehca_irq.h index 3346cb06cea6..5370199f08c7 100644 --- a/drivers/infiniband/hw/ehca/ehca_irq.h +++ b/drivers/infiniband/hw/ehca/ehca_irq.h @@ -58,15 +58,15 @@ void ehca_tasklet_eq(unsigned long data); void ehca_process_eq(struct ehca_shca *shca, int is_irq); struct ehca_cpu_comp_task { - wait_queue_head_t wait_queue; struct list_head cq_list; - struct task_struct *task; spinlock_t task_lock; int cq_jobs; + int active; }; struct ehca_comp_pool { - struct ehca_cpu_comp_task *cpu_comp_tasks; + struct ehca_cpu_comp_task __percpu *cpu_comp_tasks; + struct task_struct * __percpu *cpu_comp_threads; int last_cpu; spinlock_t last_cpu_lock; }; diff --git a/include/linux/kthread.h b/include/linux/kthread.h index 22ccf9dee177..8d816646f766 100644 --- a/include/linux/kthread.h +++ b/include/linux/kthread.h @@ -14,6 +14,11 @@ struct task_struct *kthread_create_on_node(int (*threadfn)(void *data), kthread_create_on_node(threadfn, data, -1, namefmt, ##arg) +struct task_struct *kthread_create_on_cpu(int (*threadfn)(void *data), + void *data, + unsigned int cpu, + const char *namefmt); + /** * kthread_run - create and wake a thread. * @threadfn: the function to run until signal_pending(current). @@ -34,9 +39,13 @@ struct task_struct *kthread_create_on_node(int (*threadfn)(void *data), void kthread_bind(struct task_struct *k, unsigned int cpu); int kthread_stop(struct task_struct *k); -int kthread_should_stop(void); +bool kthread_should_stop(void); +bool kthread_should_park(void); bool kthread_freezable_should_stop(bool *was_frozen); void *kthread_data(struct task_struct *k); +int kthread_park(struct task_struct *k); +void kthread_unpark(struct task_struct *k); +void kthread_parkme(void); int kthreadd(void *unused); extern struct task_struct *kthreadd_task; diff --git a/include/linux/smpboot.h b/include/linux/smpboot.h new file mode 100644 index 000000000000..e0106d8581d3 --- /dev/null +++ b/include/linux/smpboot.h @@ -0,0 +1,43 @@ +#ifndef _LINUX_SMPBOOT_H +#define _LINUX_SMPBOOT_H + +#include + +struct task_struct; +/* Cookie handed to the thread_fn*/ +struct smpboot_thread_data; + +/** + * struct smp_hotplug_thread - CPU hotplug related thread descriptor + * @store: Pointer to per cpu storage for the task pointers + * @list: List head for core management + * @thread_should_run: Check whether the thread should run or not. Called with + * preemption disabled. + * @thread_fn: The associated thread function + * @setup: Optional setup function, called when the thread gets + * operational the first time + * @cleanup: Optional cleanup function, called when the thread + * should stop (module exit) + * @park: Optional park function, called when the thread is + * parked (cpu offline) + * @unpark: Optional unpark function, called when the thread is + * unparked (cpu online) + * @thread_comm: The base name of the thread + */ +struct smp_hotplug_thread { + struct task_struct __percpu **store; + struct list_head list; + int (*thread_should_run)(unsigned int cpu); + void (*thread_fn)(unsigned int cpu); + void (*setup)(unsigned int cpu); + void (*cleanup)(unsigned int cpu, bool online); + void (*park)(unsigned int cpu); + void (*unpark)(unsigned int cpu); + const char *thread_comm; +}; + +int smpboot_register_percpu_thread(struct smp_hotplug_thread *plug_thread); +void smpboot_unregister_percpu_thread(struct smp_hotplug_thread *plug_thread); +int smpboot_thread_schedule(void); + +#endif diff --git a/kernel/Makefile b/kernel/Makefile index c0cc67ad764c..e5602d32acb3 100644 --- a/kernel/Makefile +++ b/kernel/Makefile @@ -10,7 +10,7 @@ obj-y = fork.o exec_domain.o panic.o printk.o \ kthread.o wait.o kfifo.o sys_ni.o posix-cpu-timers.o mutex.o \ hrtimer.o rwsem.o nsproxy.o srcu.o semaphore.o \ notifier.o ksysfs.o cred.o \ - async.o range.o groups.o lglock.o + async.o range.o groups.o lglock.o smpboot.o ifdef CONFIG_FUNCTION_TRACER # Do not trace debug files and internal ftrace files @@ -46,7 +46,6 @@ obj-$(CONFIG_DEBUG_RT_MUTEXES) += rtmutex-debug.o obj-$(CONFIG_RT_MUTEX_TESTER) += rtmutex-tester.o obj-$(CONFIG_GENERIC_ISA_DMA) += dma.o obj-$(CONFIG_SMP) += smp.o -obj-$(CONFIG_SMP) += smpboot.o ifneq ($(CONFIG_SMP),y) obj-y += up.o endif diff --git a/kernel/cpu.c b/kernel/cpu.c index 14d32588cccd..e615dfbcf794 100644 --- a/kernel/cpu.c +++ b/kernel/cpu.c @@ -280,12 +280,13 @@ static int __ref _cpu_down(unsigned int cpu, int tasks_frozen) __func__, cpu); goto out_release; } + smpboot_park_threads(cpu); err = __stop_machine(take_cpu_down, &tcd_param, cpumask_of(cpu)); if (err) { /* CPU didn't die: tell everyone. Can't complain. */ + smpboot_unpark_threads(cpu); cpu_notify_nofail(CPU_DOWN_FAILED | mod, hcpu); - goto out_release; } BUG_ON(cpu_online(cpu)); @@ -354,6 +355,10 @@ static int __cpuinit _cpu_up(unsigned int cpu, int tasks_frozen) goto out; } + ret = smpboot_create_threads(cpu); + if (ret) + goto out; + ret = __cpu_notify(CPU_UP_PREPARE | mod, hcpu, -1, &nr_calls); if (ret) { nr_calls--; @@ -368,6 +373,9 @@ static int __cpuinit _cpu_up(unsigned int cpu, int tasks_frozen) goto out_notify; BUG_ON(!cpu_online(cpu)); + /* Wake the per cpu threads */ + smpboot_unpark_threads(cpu); + /* Now call notifier in preparation. */ cpu_notify(CPU_ONLINE | mod, hcpu); diff --git a/kernel/kthread.c b/kernel/kthread.c index b579af57ea10..146a6fa96825 100644 --- a/kernel/kthread.c +++ b/kernel/kthread.c @@ -37,11 +37,20 @@ struct kthread_create_info }; struct kthread { - int should_stop; + unsigned long flags; + unsigned int cpu; void *data; + struct completion parked; struct completion exited; }; +enum KTHREAD_BITS { + KTHREAD_IS_PER_CPU = 0, + KTHREAD_SHOULD_STOP, + KTHREAD_SHOULD_PARK, + KTHREAD_IS_PARKED, +}; + #define to_kthread(tsk) \ container_of((tsk)->vfork_done, struct kthread, exited) @@ -52,12 +61,28 @@ struct kthread { * and this will return true. You should then return, and your return * value will be passed through to kthread_stop(). */ -int kthread_should_stop(void) +bool kthread_should_stop(void) { - return to_kthread(current)->should_stop; + return test_bit(KTHREAD_SHOULD_STOP, &to_kthread(current)->flags); } EXPORT_SYMBOL(kthread_should_stop); +/** + * kthread_should_park - should this kthread park now? + * + * When someone calls kthread_park() on your kthread, it will be woken + * and this will return true. You should then do the necessary + * cleanup and call kthread_parkme() + * + * Similar to kthread_should_stop(), but this keeps the thread alive + * and in a park position. kthread_unpark() "restarts" the thread and + * calls the thread function again. + */ +bool kthread_should_park(void) +{ + return test_bit(KTHREAD_SHOULD_PARK, &to_kthread(current)->flags); +} + /** * kthread_freezable_should_stop - should this freezable kthread return now? * @was_frozen: optional out parameter, indicates whether %current was frozen @@ -96,6 +121,24 @@ void *kthread_data(struct task_struct *task) return to_kthread(task)->data; } +static void __kthread_parkme(struct kthread *self) +{ + __set_current_state(TASK_INTERRUPTIBLE); + while (test_bit(KTHREAD_SHOULD_PARK, &self->flags)) { + if (!test_and_set_bit(KTHREAD_IS_PARKED, &self->flags)) + complete(&self->parked); + schedule(); + __set_current_state(TASK_INTERRUPTIBLE); + } + clear_bit(KTHREAD_IS_PARKED, &self->flags); + __set_current_state(TASK_RUNNING); +} + +void kthread_parkme(void) +{ + __kthread_parkme(to_kthread(current)); +} + static int kthread(void *_create) { /* Copy data: it's on kthread's stack */ @@ -105,9 +148,10 @@ static int kthread(void *_create) struct kthread self; int ret; - self.should_stop = 0; + self.flags = 0; self.data = data; init_completion(&self.exited); + init_completion(&self.parked); current->vfork_done = &self.exited; /* OK, tell user we're spawned, wait for stop or wakeup */ @@ -117,9 +161,11 @@ static int kthread(void *_create) schedule(); ret = -EINTR; - if (!self.should_stop) - ret = threadfn(data); + if (!test_bit(KTHREAD_SHOULD_STOP, &self.flags)) { + __kthread_parkme(&self); + ret = threadfn(data); + } /* we can't just return, we must preserve "self" on stack */ do_exit(ret); } @@ -172,8 +218,7 @@ static void create_kthread(struct kthread_create_info *create) * Returns a task_struct or ERR_PTR(-ENOMEM). */ struct task_struct *kthread_create_on_node(int (*threadfn)(void *data), - void *data, - int node, + void *data, int node, const char namefmt[], ...) { @@ -210,6 +255,13 @@ struct task_struct *kthread_create_on_node(int (*threadfn)(void *data), } EXPORT_SYMBOL(kthread_create_on_node); +static void __kthread_bind(struct task_struct *p, unsigned int cpu) +{ + /* It's safe because the task is inactive. */ + do_set_cpus_allowed(p, cpumask_of(cpu)); + p->flags |= PF_THREAD_BOUND; +} + /** * kthread_bind - bind a just-created kthread to a cpu. * @p: thread created by kthread_create(). @@ -226,13 +278,111 @@ void kthread_bind(struct task_struct *p, unsigned int cpu) WARN_ON(1); return; } - - /* It's safe because the task is inactive. */ - do_set_cpus_allowed(p, cpumask_of(cpu)); - p->flags |= PF_THREAD_BOUND; + __kthread_bind(p, cpu); } EXPORT_SYMBOL(kthread_bind); +/** + * kthread_create_on_cpu - Create a cpu bound kthread + * @threadfn: the function to run until signal_pending(current). + * @data: data ptr for @threadfn. + * @cpu: The cpu on which the thread should be bound, + * @namefmt: printf-style name for the thread. Format is restricted + * to "name.*%u". Code fills in cpu number. + * + * Description: This helper function creates and names a kernel thread + * The thread will be woken and put into park mode. + */ +struct task_struct *kthread_create_on_cpu(int (*threadfn)(void *data), + void *data, unsigned int cpu, + const char *namefmt) +{ + struct task_struct *p; + + p = kthread_create_on_node(threadfn, data, cpu_to_node(cpu), namefmt, + cpu); + if (IS_ERR(p)) + return p; + set_bit(KTHREAD_IS_PER_CPU, &to_kthread(p)->flags); + to_kthread(p)->cpu = cpu; + /* Park the thread to get it out of TASK_UNINTERRUPTIBLE state */ + kthread_park(p); + return p; +} + +static struct kthread *task_get_live_kthread(struct task_struct *k) +{ + struct kthread *kthread; + + get_task_struct(k); + kthread = to_kthread(k); + /* It might have exited */ + barrier(); + if (k->vfork_done != NULL) + return kthread; + return NULL; +} + +/** + * kthread_unpark - unpark a thread created by kthread_create(). + * @k: thread created by kthread_create(). + * + * Sets kthread_should_park() for @k to return false, wakes it, and + * waits for it to return. If the thread is marked percpu then its + * bound to the cpu again. + */ +void kthread_unpark(struct task_struct *k) +{ + struct kthread *kthread = task_get_live_kthread(k); + + if (kthread) { + clear_bit(KTHREAD_SHOULD_PARK, &kthread->flags); + /* + * We clear the IS_PARKED bit here as we don't wait + * until the task has left the park code. So if we'd + * park before that happens we'd see the IS_PARKED bit + * which might be about to be cleared. + */ + if (test_and_clear_bit(KTHREAD_IS_PARKED, &kthread->flags)) { + if (test_bit(KTHREAD_IS_PER_CPU, &kthread->flags)) + __kthread_bind(k, kthread->cpu); + wake_up_process(k); + } + } + put_task_struct(k); +} + +/** + * kthread_park - park a thread created by kthread_create(). + * @k: thread created by kthread_create(). + * + * Sets kthread_should_park() for @k to return true, wakes it, and + * waits for it to return. This can also be called after kthread_create() + * instead of calling wake_up_process(): the thread will park without + * calling threadfn(). + * + * Returns 0 if the thread is parked, -ENOSYS if the thread exited. + * If called by the kthread itself just the park bit is set. + */ +int kthread_park(struct task_struct *k) +{ + struct kthread *kthread = task_get_live_kthread(k); + int ret = -ENOSYS; + + if (kthread) { + if (!test_bit(KTHREAD_IS_PARKED, &kthread->flags)) { + set_bit(KTHREAD_SHOULD_PARK, &kthread->flags); + if (k != current) { + wake_up_process(k); + wait_for_completion(&kthread->parked); + } + } + ret = 0; + } + put_task_struct(k); + return ret; +} + /** * kthread_stop - stop a thread created by kthread_create(). * @k: thread created by kthread_create(). @@ -250,16 +400,13 @@ EXPORT_SYMBOL(kthread_bind); */ int kthread_stop(struct task_struct *k) { - struct kthread *kthread; + struct kthread *kthread = task_get_live_kthread(k); int ret; trace_sched_kthread_stop(k); - get_task_struct(k); - - kthread = to_kthread(k); - barrier(); /* it might have exited */ - if (k->vfork_done != NULL) { - kthread->should_stop = 1; + if (kthread) { + set_bit(KTHREAD_SHOULD_STOP, &kthread->flags); + clear_bit(KTHREAD_SHOULD_PARK, &kthread->flags); wake_up_process(k); wait_for_completion(&kthread->exited); } diff --git a/kernel/rcutree.c b/kernel/rcutree.c index 80fd02e5d115..7387e46009d9 100644 --- a/kernel/rcutree.c +++ b/kernel/rcutree.c @@ -134,13 +134,12 @@ static int rcu_scheduler_fully_active __read_mostly; */ static DEFINE_PER_CPU(struct task_struct *, rcu_cpu_kthread_task); DEFINE_PER_CPU(unsigned int, rcu_cpu_kthread_status); -DEFINE_PER_CPU(int, rcu_cpu_kthread_cpu); DEFINE_PER_CPU(unsigned int, rcu_cpu_kthread_loops); DEFINE_PER_CPU(char, rcu_cpu_has_work); #endif /* #ifdef CONFIG_RCU_BOOST */ -static void rcu_node_kthread_setaffinity(struct rcu_node *rnp, int outgoingcpu); +static void rcu_boost_kthread_setaffinity(struct rcu_node *rnp, int outgoingcpu); static void invoke_rcu_core(void); static void invoke_rcu_callbacks(struct rcu_state *rsp, struct rcu_data *rdp); @@ -1543,8 +1542,7 @@ static void rcu_cleanup_dead_cpu(int cpu, struct rcu_state *rsp) struct rcu_node *rnp = rdp->mynode; /* Outgoing CPU's rdp & rnp. */ /* Adjust any no-longer-needed kthreads. */ - rcu_stop_cpu_kthread(cpu); - rcu_node_kthread_setaffinity(rnp, -1); + rcu_boost_kthread_setaffinity(rnp, -1); /* Remove the dead CPU from the bitmasks in the rcu_node hierarchy. */ @@ -2572,12 +2570,10 @@ static int __cpuinit rcu_cpu_notify(struct notifier_block *self, break; case CPU_ONLINE: case CPU_DOWN_FAILED: - rcu_node_kthread_setaffinity(rnp, -1); - rcu_cpu_kthread_setrt(cpu, 1); + rcu_boost_kthread_setaffinity(rnp, -1); break; case CPU_DOWN_PREPARE: - rcu_node_kthread_setaffinity(rnp, cpu); - rcu_cpu_kthread_setrt(cpu, 0); + rcu_boost_kthread_setaffinity(rnp, cpu); break; case CPU_DYING: case CPU_DYING_FROZEN: diff --git a/kernel/rcutree.h b/kernel/rcutree.h index 3cf71ce206e0..7576fd4d8ce6 100644 --- a/kernel/rcutree.h +++ b/kernel/rcutree.h @@ -196,12 +196,6 @@ struct rcu_node { /* Refused to boost: not sure why, though. */ /* This can happen due to race conditions. */ #endif /* #ifdef CONFIG_RCU_BOOST */ - struct task_struct *node_kthread_task; - /* kthread that takes care of this rcu_node */ - /* structure, for example, awakening the */ - /* per-CPU kthreads as needed. */ - unsigned int node_kthread_status; - /* State of node_kthread_task for tracing. */ raw_spinlock_t fqslock ____cacheline_internodealigned_in_smp; } ____cacheline_internodealigned_in_smp; @@ -465,7 +459,6 @@ static int rcu_preempt_blocked_readers_cgp(struct rcu_node *rnp); #ifdef CONFIG_HOTPLUG_CPU static void rcu_report_unblock_qs_rnp(struct rcu_node *rnp, unsigned long flags); -static void rcu_stop_cpu_kthread(int cpu); #endif /* #ifdef CONFIG_HOTPLUG_CPU */ static void rcu_print_detail_task_stall(struct rcu_state *rsp); static int rcu_print_task_stall(struct rcu_node *rnp); @@ -488,15 +481,9 @@ static void invoke_rcu_callbacks_kthread(void); static bool rcu_is_callbacks_kthread(void); #ifdef CONFIG_RCU_BOOST static void rcu_preempt_do_callbacks(void); -static void rcu_boost_kthread_setaffinity(struct rcu_node *rnp, - cpumask_var_t cm); static int __cpuinit rcu_spawn_one_boost_kthread(struct rcu_state *rsp, - struct rcu_node *rnp, - int rnp_index); -static void invoke_rcu_node_kthread(struct rcu_node *rnp); -static void rcu_yield(void (*f)(unsigned long), unsigned long arg); + struct rcu_node *rnp); #endif /* #ifdef CONFIG_RCU_BOOST */ -static void rcu_cpu_kthread_setrt(int cpu, int to_rt); static void __cpuinit rcu_prepare_kthreads(int cpu); static void rcu_prepare_for_idle_init(int cpu); static void rcu_cleanup_after_idle(int cpu); diff --git a/kernel/rcutree_plugin.h b/kernel/rcutree_plugin.h index 46d7d6cf16db..9c71c1b18e03 100644 --- a/kernel/rcutree_plugin.h +++ b/kernel/rcutree_plugin.h @@ -26,6 +26,7 @@ #include #include +#include #define RCU_KTHREAD_PRIO 1 @@ -1090,6 +1091,16 @@ static void rcu_initiate_boost_trace(struct rcu_node *rnp) #endif /* #else #ifdef CONFIG_RCU_TRACE */ +static void rcu_wake_cond(struct task_struct *t, int status) +{ + /* + * If the thread is yielding, only wake it when this + * is invoked from idle + */ + if (status != RCU_KTHREAD_YIELDING || is_idle_task(current)) + wake_up_process(t); +} + /* * Carry out RCU priority boosting on the task indicated by ->exp_tasks * or ->boost_tasks, advancing the pointer to the next task in the @@ -1161,17 +1172,6 @@ static int rcu_boost(struct rcu_node *rnp) ACCESS_ONCE(rnp->boost_tasks) != NULL; } -/* - * Timer handler to initiate waking up of boost kthreads that - * have yielded the CPU due to excessive numbers of tasks to - * boost. We wake up the per-rcu_node kthread, which in turn - * will wake up the booster kthread. - */ -static void rcu_boost_kthread_timer(unsigned long arg) -{ - invoke_rcu_node_kthread((struct rcu_node *)arg); -} - /* * Priority-boosting kthread. One per leaf rcu_node and one for the * root rcu_node. @@ -1195,8 +1195,9 @@ static int rcu_boost_kthread(void *arg) else spincnt = 0; if (spincnt > 10) { + rnp->boost_kthread_status = RCU_KTHREAD_YIELDING; trace_rcu_utilization("End boost kthread@rcu_yield"); - rcu_yield(rcu_boost_kthread_timer, (unsigned long)rnp); + schedule_timeout_interruptible(2); trace_rcu_utilization("Start boost kthread@rcu_yield"); spincnt = 0; } @@ -1234,8 +1235,8 @@ static void rcu_initiate_boost(struct rcu_node *rnp, unsigned long flags) rnp->boost_tasks = rnp->gp_tasks; raw_spin_unlock_irqrestore(&rnp->lock, flags); t = rnp->boost_kthread_task; - if (t != NULL) - wake_up_process(t); + if (t) + rcu_wake_cond(t, rnp->boost_kthread_status); } else { rcu_initiate_boost_trace(rnp); raw_spin_unlock_irqrestore(&rnp->lock, flags); @@ -1252,8 +1253,10 @@ static void invoke_rcu_callbacks_kthread(void) local_irq_save(flags); __this_cpu_write(rcu_cpu_has_work, 1); if (__this_cpu_read(rcu_cpu_kthread_task) != NULL && - current != __this_cpu_read(rcu_cpu_kthread_task)) - wake_up_process(__this_cpu_read(rcu_cpu_kthread_task)); + current != __this_cpu_read(rcu_cpu_kthread_task)) { + rcu_wake_cond(__this_cpu_read(rcu_cpu_kthread_task), + __this_cpu_read(rcu_cpu_kthread_status)); + } local_irq_restore(flags); } @@ -1266,21 +1269,6 @@ static bool rcu_is_callbacks_kthread(void) return __get_cpu_var(rcu_cpu_kthread_task) == current; } -/* - * Set the affinity of the boost kthread. The CPU-hotplug locks are - * held, so no one should be messing with the existence of the boost - * kthread. - */ -static void rcu_boost_kthread_setaffinity(struct rcu_node *rnp, - cpumask_var_t cm) -{ - struct task_struct *t; - - t = rnp->boost_kthread_task; - if (t != NULL) - set_cpus_allowed_ptr(rnp->boost_kthread_task, cm); -} - #define RCU_BOOST_DELAY_JIFFIES DIV_ROUND_UP(CONFIG_RCU_BOOST_DELAY * HZ, 1000) /* @@ -1297,15 +1285,19 @@ static void rcu_preempt_boost_start_gp(struct rcu_node *rnp) * Returns zero if all is well, a negated errno otherwise. */ static int __cpuinit rcu_spawn_one_boost_kthread(struct rcu_state *rsp, - struct rcu_node *rnp, - int rnp_index) + struct rcu_node *rnp) { + int rnp_index = rnp - &rsp->node[0]; unsigned long flags; struct sched_param sp; struct task_struct *t; if (&rcu_preempt_state != rsp) return 0; + + if (!rcu_scheduler_fully_active || rnp->qsmaskinit == 0) + return 0; + rsp->boost = 1; if (rnp->boost_kthread_task != NULL) return 0; @@ -1322,25 +1314,6 @@ static int __cpuinit rcu_spawn_one_boost_kthread(struct rcu_state *rsp, return 0; } -#ifdef CONFIG_HOTPLUG_CPU - -/* - * Stop the RCU's per-CPU kthread when its CPU goes offline,. - */ -static void rcu_stop_cpu_kthread(int cpu) -{ - struct task_struct *t; - - /* Stop the CPU's kthread. */ - t = per_cpu(rcu_cpu_kthread_task, cpu); - if (t != NULL) { - per_cpu(rcu_cpu_kthread_task, cpu) = NULL; - kthread_stop(t); - } -} - -#endif /* #ifdef CONFIG_HOTPLUG_CPU */ - static void rcu_kthread_do_work(void) { rcu_do_batch(&rcu_sched_state, &__get_cpu_var(rcu_sched_data)); @@ -1348,112 +1321,22 @@ static void rcu_kthread_do_work(void) rcu_preempt_do_callbacks(); } -/* - * Wake up the specified per-rcu_node-structure kthread. - * Because the per-rcu_node kthreads are immortal, we don't need - * to do anything to keep them alive. - */ -static void invoke_rcu_node_kthread(struct rcu_node *rnp) -{ - struct task_struct *t; - - t = rnp->node_kthread_task; - if (t != NULL) - wake_up_process(t); -} - -/* - * Set the specified CPU's kthread to run RT or not, as specified by - * the to_rt argument. The CPU-hotplug locks are held, so the task - * is not going away. - */ -static void rcu_cpu_kthread_setrt(int cpu, int to_rt) -{ - int policy; - struct sched_param sp; - struct task_struct *t; - - t = per_cpu(rcu_cpu_kthread_task, cpu); - if (t == NULL) - return; - if (to_rt) { - policy = SCHED_FIFO; - sp.sched_priority = RCU_KTHREAD_PRIO; - } else { - policy = SCHED_NORMAL; - sp.sched_priority = 0; - } - sched_setscheduler_nocheck(t, policy, &sp); -} - -/* - * Timer handler to initiate the waking up of per-CPU kthreads that - * have yielded the CPU due to excess numbers of RCU callbacks. - * We wake up the per-rcu_node kthread, which in turn will wake up - * the booster kthread. - */ -static void rcu_cpu_kthread_timer(unsigned long arg) -{ - struct rcu_data *rdp = per_cpu_ptr(rcu_state->rda, arg); - struct rcu_node *rnp = rdp->mynode; - - atomic_or(rdp->grpmask, &rnp->wakemask); - invoke_rcu_node_kthread(rnp); -} - -/* - * Drop to non-real-time priority and yield, but only after posting a - * timer that will cause us to regain our real-time priority if we - * remain preempted. Either way, we restore our real-time priority - * before returning. - */ -static void rcu_yield(void (*f)(unsigned long), unsigned long arg) +static void rcu_cpu_kthread_setup(unsigned int cpu) { struct sched_param sp; - struct timer_list yield_timer; - int prio = current->rt_priority; - setup_timer_on_stack(&yield_timer, f, arg); - mod_timer(&yield_timer, jiffies + 2); - sp.sched_priority = 0; - sched_setscheduler_nocheck(current, SCHED_NORMAL, &sp); - set_user_nice(current, 19); - schedule(); - set_user_nice(current, 0); - sp.sched_priority = prio; + sp.sched_priority = RCU_KTHREAD_PRIO; sched_setscheduler_nocheck(current, SCHED_FIFO, &sp); - del_timer(&yield_timer); } -/* - * Handle cases where the rcu_cpu_kthread() ends up on the wrong CPU. - * This can happen while the corresponding CPU is either coming online - * or going offline. We cannot wait until the CPU is fully online - * before starting the kthread, because the various notifier functions - * can wait for RCU grace periods. So we park rcu_cpu_kthread() until - * the corresponding CPU is online. - * - * Return 1 if the kthread needs to stop, 0 otherwise. - * - * Caller must disable bh. This function can momentarily enable it. - */ -static int rcu_cpu_kthread_should_stop(int cpu) +static void rcu_cpu_kthread_park(unsigned int cpu) { - while (cpu_is_offline(cpu) || - !cpumask_equal(¤t->cpus_allowed, cpumask_of(cpu)) || - smp_processor_id() != cpu) { - if (kthread_should_stop()) - return 1; - per_cpu(rcu_cpu_kthread_status, cpu) = RCU_KTHREAD_OFFCPU; - per_cpu(rcu_cpu_kthread_cpu, cpu) = raw_smp_processor_id(); - local_bh_enable(); - schedule_timeout_uninterruptible(1); - if (!cpumask_equal(¤t->cpus_allowed, cpumask_of(cpu))) - set_cpus_allowed_ptr(current, cpumask_of(cpu)); - local_bh_disable(); - } - per_cpu(rcu_cpu_kthread_cpu, cpu) = cpu; - return 0; + per_cpu(rcu_cpu_kthread_status, cpu) = RCU_KTHREAD_OFFCPU; +} + +static int rcu_cpu_kthread_should_run(unsigned int cpu) +{ + return __get_cpu_var(rcu_cpu_has_work); } /* @@ -1461,138 +1344,35 @@ static int rcu_cpu_kthread_should_stop(int cpu) * RCU softirq used in flavors and configurations of RCU that do not * support RCU priority boosting. */ -static int rcu_cpu_kthread(void *arg) +static void rcu_cpu_kthread(unsigned int cpu) { - int cpu = (int)(long)arg; - unsigned long flags; - int spincnt = 0; - unsigned int *statusp = &per_cpu(rcu_cpu_kthread_status, cpu); - char work; - char *workp = &per_cpu(rcu_cpu_has_work, cpu); + unsigned int *statusp = &__get_cpu_var(rcu_cpu_kthread_status); + char work, *workp = &__get_cpu_var(rcu_cpu_has_work); + int spincnt; - trace_rcu_utilization("Start CPU kthread@init"); - for (;;) { - *statusp = RCU_KTHREAD_WAITING; - trace_rcu_utilization("End CPU kthread@rcu_wait"); - rcu_wait(*workp != 0 || kthread_should_stop()); + for (spincnt = 0; spincnt < 10; spincnt++) { trace_rcu_utilization("Start CPU kthread@rcu_wait"); local_bh_disable(); - if (rcu_cpu_kthread_should_stop(cpu)) { - local_bh_enable(); - break; - } *statusp = RCU_KTHREAD_RUNNING; - per_cpu(rcu_cpu_kthread_loops, cpu)++; - local_irq_save(flags); + this_cpu_inc(rcu_cpu_kthread_loops); + local_irq_disable(); work = *workp; *workp = 0; - local_irq_restore(flags); + local_irq_enable(); if (work) rcu_kthread_do_work(); local_bh_enable(); - if (*workp != 0) - spincnt++; - else - spincnt = 0; - if (spincnt > 10) { - *statusp = RCU_KTHREAD_YIELDING; - trace_rcu_utilization("End CPU kthread@rcu_yield"); - rcu_yield(rcu_cpu_kthread_timer, (unsigned long)cpu); - trace_rcu_utilization("Start CPU kthread@rcu_yield"); - spincnt = 0; + if (*workp == 0) { + trace_rcu_utilization("End CPU kthread@rcu_wait"); + *statusp = RCU_KTHREAD_WAITING; + return; } } - *statusp = RCU_KTHREAD_STOPPED; - trace_rcu_utilization("End CPU kthread@term"); - return 0; -} - -/* - * Spawn a per-CPU kthread, setting up affinity and priority. - * Because the CPU hotplug lock is held, no other CPU will be attempting - * to manipulate rcu_cpu_kthread_task. There might be another CPU - * attempting to access it during boot, but the locking in kthread_bind() - * will enforce sufficient ordering. - * - * Please note that we cannot simply refuse to wake up the per-CPU - * kthread because kthreads are created in TASK_UNINTERRUPTIBLE state, - * which can result in softlockup complaints if the task ends up being - * idle for more than a couple of minutes. - * - * However, please note also that we cannot bind the per-CPU kthread to its - * CPU until that CPU is fully online. We also cannot wait until the - * CPU is fully online before we create its per-CPU kthread, as this would - * deadlock the system when CPU notifiers tried waiting for grace - * periods. So we bind the per-CPU kthread to its CPU only if the CPU - * is online. If its CPU is not yet fully online, then the code in - * rcu_cpu_kthread() will wait until it is fully online, and then do - * the binding. - */ -static int __cpuinit rcu_spawn_one_cpu_kthread(int cpu) -{ - struct sched_param sp; - struct task_struct *t; - - if (!rcu_scheduler_fully_active || - per_cpu(rcu_cpu_kthread_task, cpu) != NULL) - return 0; - t = kthread_create_on_node(rcu_cpu_kthread, - (void *)(long)cpu, - cpu_to_node(cpu), - "rcuc/%d", cpu); - if (IS_ERR(t)) - return PTR_ERR(t); - if (cpu_online(cpu)) - kthread_bind(t, cpu); - per_cpu(rcu_cpu_kthread_cpu, cpu) = cpu; - WARN_ON_ONCE(per_cpu(rcu_cpu_kthread_task, cpu) != NULL); - sp.sched_priority = RCU_KTHREAD_PRIO; - sched_setscheduler_nocheck(t, SCHED_FIFO, &sp); - per_cpu(rcu_cpu_kthread_task, cpu) = t; - wake_up_process(t); /* Get to TASK_INTERRUPTIBLE quickly. */ - return 0; -} - -/* - * Per-rcu_node kthread, which is in charge of waking up the per-CPU - * kthreads when needed. We ignore requests to wake up kthreads - * for offline CPUs, which is OK because force_quiescent_state() - * takes care of this case. - */ -static int rcu_node_kthread(void *arg) -{ - int cpu; - unsigned long flags; - unsigned long mask; - struct rcu_node *rnp = (struct rcu_node *)arg; - struct sched_param sp; - struct task_struct *t; - - for (;;) { - rnp->node_kthread_status = RCU_KTHREAD_WAITING; - rcu_wait(atomic_read(&rnp->wakemask) != 0); - rnp->node_kthread_status = RCU_KTHREAD_RUNNING; - raw_spin_lock_irqsave(&rnp->lock, flags); - mask = atomic_xchg(&rnp->wakemask, 0); - rcu_initiate_boost(rnp, flags); /* releases rnp->lock. */ - for (cpu = rnp->grplo; cpu <= rnp->grphi; cpu++, mask >>= 1) { - if ((mask & 0x1) == 0) - continue; - preempt_disable(); - t = per_cpu(rcu_cpu_kthread_task, cpu); - if (!cpu_online(cpu) || t == NULL) { - preempt_enable(); - continue; - } - per_cpu(rcu_cpu_has_work, cpu) = 1; - sp.sched_priority = RCU_KTHREAD_PRIO; - sched_setscheduler_nocheck(t, SCHED_FIFO, &sp); - preempt_enable(); - } - } - /* NOTREACHED */ - rnp->node_kthread_status = RCU_KTHREAD_STOPPED; - return 0; + *statusp = RCU_KTHREAD_YIELDING; + trace_rcu_utilization("Start CPU kthread@rcu_yield"); + schedule_timeout_interruptible(2); + trace_rcu_utilization("End CPU kthread@rcu_yield"); + *statusp = RCU_KTHREAD_WAITING; } /* @@ -1604,17 +1384,17 @@ static int rcu_node_kthread(void *arg) * no outgoing CPU. If there are no CPUs left in the affinity set, * this function allows the kthread to execute on any CPU. */ -static void rcu_node_kthread_setaffinity(struct rcu_node *rnp, int outgoingcpu) +static void rcu_boost_kthread_setaffinity(struct rcu_node *rnp, int outgoingcpu) { + struct task_struct *t = rnp->boost_kthread_task; + unsigned long mask = rnp->qsmaskinit; cpumask_var_t cm; int cpu; - unsigned long mask = rnp->qsmaskinit; - if (rnp->node_kthread_task == NULL) + if (!t) return; - if (!alloc_cpumask_var(&cm, GFP_KERNEL)) + if (!zalloc_cpumask_var(&cm, GFP_KERNEL)) return; - cpumask_clear(cm); for (cpu = rnp->grplo; cpu <= rnp->grphi; cpu++, mask >>= 1) if ((mask & 0x1) && cpu != outgoingcpu) cpumask_set_cpu(cpu, cm); @@ -1624,62 +1404,36 @@ static void rcu_node_kthread_setaffinity(struct rcu_node *rnp, int outgoingcpu) cpumask_clear_cpu(cpu, cm); WARN_ON_ONCE(cpumask_weight(cm) == 0); } - set_cpus_allowed_ptr(rnp->node_kthread_task, cm); - rcu_boost_kthread_setaffinity(rnp, cm); + set_cpus_allowed_ptr(t, cm); free_cpumask_var(cm); } -/* - * Spawn a per-rcu_node kthread, setting priority and affinity. - * Called during boot before online/offline can happen, or, if - * during runtime, with the main CPU-hotplug locks held. So only - * one of these can be executing at a time. - */ -static int __cpuinit rcu_spawn_one_node_kthread(struct rcu_state *rsp, - struct rcu_node *rnp) -{ - unsigned long flags; - int rnp_index = rnp - &rsp->node[0]; - struct sched_param sp; - struct task_struct *t; - - if (!rcu_scheduler_fully_active || - rnp->qsmaskinit == 0) - return 0; - if (rnp->node_kthread_task == NULL) { - t = kthread_create(rcu_node_kthread, (void *)rnp, - "rcun/%d", rnp_index); - if (IS_ERR(t)) - return PTR_ERR(t); - raw_spin_lock_irqsave(&rnp->lock, flags); - rnp->node_kthread_task = t; - raw_spin_unlock_irqrestore(&rnp->lock, flags); - sp.sched_priority = 99; - sched_setscheduler_nocheck(t, SCHED_FIFO, &sp); - wake_up_process(t); /* get to TASK_INTERRUPTIBLE quickly. */ - } - return rcu_spawn_one_boost_kthread(rsp, rnp, rnp_index); -} +static struct smp_hotplug_thread rcu_cpu_thread_spec = { + .store = &rcu_cpu_kthread_task, + .thread_should_run = rcu_cpu_kthread_should_run, + .thread_fn = rcu_cpu_kthread, + .thread_comm = "rcuc/%u", + .setup = rcu_cpu_kthread_setup, + .park = rcu_cpu_kthread_park, +}; /* * Spawn all kthreads -- called as soon as the scheduler is running. */ static int __init rcu_spawn_kthreads(void) { - int cpu; struct rcu_node *rnp; + int cpu; rcu_scheduler_fully_active = 1; - for_each_possible_cpu(cpu) { + for_each_possible_cpu(cpu) per_cpu(rcu_cpu_has_work, cpu) = 0; - if (cpu_online(cpu)) - (void)rcu_spawn_one_cpu_kthread(cpu); - } + BUG_ON(smpboot_register_percpu_thread(&rcu_cpu_thread_spec)); rnp = rcu_get_root(rcu_state); - (void)rcu_spawn_one_node_kthread(rcu_state, rnp); + (void)rcu_spawn_one_boost_kthread(rcu_state, rnp); if (NUM_RCU_NODES > 1) { rcu_for_each_leaf_node(rcu_state, rnp) - (void)rcu_spawn_one_node_kthread(rcu_state, rnp); + (void)rcu_spawn_one_boost_kthread(rcu_state, rnp); } return 0; } @@ -1691,11 +1445,8 @@ static void __cpuinit rcu_prepare_kthreads(int cpu) struct rcu_node *rnp = rdp->mynode; /* Fire up the incoming CPU's kthread and leaf rcu_node kthread. */ - if (rcu_scheduler_fully_active) { - (void)rcu_spawn_one_cpu_kthread(cpu); - if (rnp->node_kthread_task == NULL) - (void)rcu_spawn_one_node_kthread(rcu_state, rnp); - } + if (rcu_scheduler_fully_active) + (void)rcu_spawn_one_boost_kthread(rcu_state, rnp); } #else /* #ifdef CONFIG_RCU_BOOST */ @@ -1719,19 +1470,7 @@ static void rcu_preempt_boost_start_gp(struct rcu_node *rnp) { } -#ifdef CONFIG_HOTPLUG_CPU - -static void rcu_stop_cpu_kthread(int cpu) -{ -} - -#endif /* #ifdef CONFIG_HOTPLUG_CPU */ - -static void rcu_node_kthread_setaffinity(struct rcu_node *rnp, int outgoingcpu) -{ -} - -static void rcu_cpu_kthread_setrt(int cpu, int to_rt) +static void rcu_boost_kthread_setaffinity(struct rcu_node *rnp, int outgoingcpu) { } diff --git a/kernel/rcutree_trace.c b/kernel/rcutree_trace.c index 7340efdcd324..693513bc50e6 100644 --- a/kernel/rcutree_trace.c +++ b/kernel/rcutree_trace.c @@ -107,11 +107,10 @@ static void print_one_rcu_data(struct seq_file *m, struct rcu_data *rdp) rdp->nxttail[RCU_WAIT_TAIL]], ".D"[&rdp->nxtlist != rdp->nxttail[RCU_DONE_TAIL]]); #ifdef CONFIG_RCU_BOOST - seq_printf(m, " kt=%d/%c/%d ktl=%x", + seq_printf(m, " kt=%d/%c ktl=%x", per_cpu(rcu_cpu_has_work, rdp->cpu), convert_kthread_status(per_cpu(rcu_cpu_kthread_status, rdp->cpu)), - per_cpu(rcu_cpu_kthread_cpu, rdp->cpu), per_cpu(rcu_cpu_kthread_loops, rdp->cpu) & 0xffff); #endif /* #ifdef CONFIG_RCU_BOOST */ seq_printf(m, " b=%ld", rdp->blimit); diff --git a/kernel/smpboot.c b/kernel/smpboot.c index 98f60c5caa1b..d6c5fc054242 100644 --- a/kernel/smpboot.c +++ b/kernel/smpboot.c @@ -1,14 +1,22 @@ /* * Common SMP CPU bringup/teardown functions */ +#include #include #include #include +#include +#include #include +#include #include +#include +#include #include "smpboot.h" +#ifdef CONFIG_SMP + #ifdef CONFIG_GENERIC_SMP_IDLE_THREAD /* * For the hotplug case we keep the task structs around and reuse @@ -65,3 +73,228 @@ void __init idle_threads_init(void) } } #endif + +#endif /* #ifdef CONFIG_SMP */ + +static LIST_HEAD(hotplug_threads); +static DEFINE_MUTEX(smpboot_threads_lock); + +struct smpboot_thread_data { + unsigned int cpu; + unsigned int status; + struct smp_hotplug_thread *ht; +}; + +enum { + HP_THREAD_NONE = 0, + HP_THREAD_ACTIVE, + HP_THREAD_PARKED, +}; + +/** + * smpboot_thread_fn - percpu hotplug thread loop function + * @data: thread data pointer + * + * Checks for thread stop and park conditions. Calls the necessary + * setup, cleanup, park and unpark functions for the registered + * thread. + * + * Returns 1 when the thread should exit, 0 otherwise. + */ +static int smpboot_thread_fn(void *data) +{ + struct smpboot_thread_data *td = data; + struct smp_hotplug_thread *ht = td->ht; + + while (1) { + set_current_state(TASK_INTERRUPTIBLE); + preempt_disable(); + if (kthread_should_stop()) { + set_current_state(TASK_RUNNING); + preempt_enable(); + if (ht->cleanup) + ht->cleanup(td->cpu, cpu_online(td->cpu)); + kfree(td); + return 0; + } + + if (kthread_should_park()) { + __set_current_state(TASK_RUNNING); + preempt_enable(); + if (ht->park && td->status == HP_THREAD_ACTIVE) { + BUG_ON(td->cpu != smp_processor_id()); + ht->park(td->cpu); + td->status = HP_THREAD_PARKED; + } + kthread_parkme(); + /* We might have been woken for stop */ + continue; + } + + BUG_ON(td->cpu != smp_processor_id()); + + /* Check for state change setup */ + switch (td->status) { + case HP_THREAD_NONE: + preempt_enable(); + if (ht->setup) + ht->setup(td->cpu); + td->status = HP_THREAD_ACTIVE; + preempt_disable(); + break; + case HP_THREAD_PARKED: + preempt_enable(); + if (ht->unpark) + ht->unpark(td->cpu); + td->status = HP_THREAD_ACTIVE; + preempt_disable(); + break; + } + + if (!ht->thread_should_run(td->cpu)) { + preempt_enable(); + schedule(); + } else { + set_current_state(TASK_RUNNING); + preempt_enable(); + ht->thread_fn(td->cpu); + } + } +} + +static int +__smpboot_create_thread(struct smp_hotplug_thread *ht, unsigned int cpu) +{ + struct task_struct *tsk = *per_cpu_ptr(ht->store, cpu); + struct smpboot_thread_data *td; + + if (tsk) + return 0; + + td = kzalloc_node(sizeof(*td), GFP_KERNEL, cpu_to_node(cpu)); + if (!td) + return -ENOMEM; + td->cpu = cpu; + td->ht = ht; + + tsk = kthread_create_on_cpu(smpboot_thread_fn, td, cpu, + ht->thread_comm); + if (IS_ERR(tsk)) { + kfree(td); + return PTR_ERR(tsk); + } + + get_task_struct(tsk); + *per_cpu_ptr(ht->store, cpu) = tsk; + return 0; +} + +int smpboot_create_threads(unsigned int cpu) +{ + struct smp_hotplug_thread *cur; + int ret = 0; + + mutex_lock(&smpboot_threads_lock); + list_for_each_entry(cur, &hotplug_threads, list) { + ret = __smpboot_create_thread(cur, cpu); + if (ret) + break; + } + mutex_unlock(&smpboot_threads_lock); + return ret; +} + +static void smpboot_unpark_thread(struct smp_hotplug_thread *ht, unsigned int cpu) +{ + struct task_struct *tsk = *per_cpu_ptr(ht->store, cpu); + + kthread_unpark(tsk); +} + +void smpboot_unpark_threads(unsigned int cpu) +{ + struct smp_hotplug_thread *cur; + + mutex_lock(&smpboot_threads_lock); + list_for_each_entry(cur, &hotplug_threads, list) + smpboot_unpark_thread(cur, cpu); + mutex_unlock(&smpboot_threads_lock); +} + +static void smpboot_park_thread(struct smp_hotplug_thread *ht, unsigned int cpu) +{ + struct task_struct *tsk = *per_cpu_ptr(ht->store, cpu); + + if (tsk) + kthread_park(tsk); +} + +void smpboot_park_threads(unsigned int cpu) +{ + struct smp_hotplug_thread *cur; + + mutex_lock(&smpboot_threads_lock); + list_for_each_entry_reverse(cur, &hotplug_threads, list) + smpboot_park_thread(cur, cpu); + mutex_unlock(&smpboot_threads_lock); +} + +static void smpboot_destroy_threads(struct smp_hotplug_thread *ht) +{ + unsigned int cpu; + + /* We need to destroy also the parked threads of offline cpus */ + for_each_possible_cpu(cpu) { + struct task_struct *tsk = *per_cpu_ptr(ht->store, cpu); + + if (tsk) { + kthread_stop(tsk); + put_task_struct(tsk); + *per_cpu_ptr(ht->store, cpu) = NULL; + } + } +} + +/** + * smpboot_register_percpu_thread - Register a per_cpu thread related to hotplug + * @plug_thread: Hotplug thread descriptor + * + * Creates and starts the threads on all online cpus. + */ +int smpboot_register_percpu_thread(struct smp_hotplug_thread *plug_thread) +{ + unsigned int cpu; + int ret = 0; + + mutex_lock(&smpboot_threads_lock); + for_each_online_cpu(cpu) { + ret = __smpboot_create_thread(plug_thread, cpu); + if (ret) { + smpboot_destroy_threads(plug_thread); + goto out; + } + smpboot_unpark_thread(plug_thread, cpu); + } + list_add(&plug_thread->list, &hotplug_threads); +out: + mutex_unlock(&smpboot_threads_lock); + return ret; +} +EXPORT_SYMBOL_GPL(smpboot_register_percpu_thread); + +/** + * smpboot_unregister_percpu_thread - Unregister a per_cpu thread related to hotplug + * @plug_thread: Hotplug thread descriptor + * + * Stops all threads on all possible cpus. + */ +void smpboot_unregister_percpu_thread(struct smp_hotplug_thread *plug_thread) +{ + get_online_cpus(); + mutex_lock(&smpboot_threads_lock); + list_del(&plug_thread->list); + smpboot_destroy_threads(plug_thread); + mutex_unlock(&smpboot_threads_lock); + put_online_cpus(); +} +EXPORT_SYMBOL_GPL(smpboot_unregister_percpu_thread); diff --git a/kernel/smpboot.h b/kernel/smpboot.h index 6ef9433e1c70..72415a0eb955 100644 --- a/kernel/smpboot.h +++ b/kernel/smpboot.h @@ -13,4 +13,8 @@ static inline void idle_thread_set_boot_cpu(void) { } static inline void idle_threads_init(void) { } #endif +int smpboot_create_threads(unsigned int cpu); +void smpboot_park_threads(unsigned int cpu); +void smpboot_unpark_threads(unsigned int cpu); + #endif diff --git a/kernel/softirq.c b/kernel/softirq.c index b73e681df09e..5c6a5bd8462f 100644 --- a/kernel/softirq.c +++ b/kernel/softirq.c @@ -23,6 +23,7 @@ #include #include #include +#include #include #define CREATE_TRACE_POINTS @@ -742,49 +743,22 @@ void __init softirq_init(void) open_softirq(HI_SOFTIRQ, tasklet_hi_action); } -static int run_ksoftirqd(void * __bind_cpu) +static int ksoftirqd_should_run(unsigned int cpu) { - set_current_state(TASK_INTERRUPTIBLE); + return local_softirq_pending(); +} - while (!kthread_should_stop()) { - preempt_disable(); - if (!local_softirq_pending()) { - schedule_preempt_disabled(); - } - - __set_current_state(TASK_RUNNING); - - while (local_softirq_pending()) { - /* Preempt disable stops cpu going offline. - If already offline, we'll be on wrong CPU: - don't process */ - if (cpu_is_offline((long)__bind_cpu)) - goto wait_to_die; - local_irq_disable(); - if (local_softirq_pending()) - __do_softirq(); - local_irq_enable(); - sched_preempt_enable_no_resched(); - cond_resched(); - preempt_disable(); - rcu_note_context_switch((long)__bind_cpu); - } - preempt_enable(); - set_current_state(TASK_INTERRUPTIBLE); +static void run_ksoftirqd(unsigned int cpu) +{ + local_irq_disable(); + if (local_softirq_pending()) { + __do_softirq(); + rcu_note_context_switch(cpu); + local_irq_enable(); + cond_resched(); + return; } - __set_current_state(TASK_RUNNING); - return 0; - -wait_to_die: - preempt_enable(); - /* Wait for kthread_stop */ - set_current_state(TASK_INTERRUPTIBLE); - while (!kthread_should_stop()) { - schedule(); - set_current_state(TASK_INTERRUPTIBLE); - } - __set_current_state(TASK_RUNNING); - return 0; + local_irq_enable(); } #ifdef CONFIG_HOTPLUG_CPU @@ -850,50 +824,14 @@ static int __cpuinit cpu_callback(struct notifier_block *nfb, unsigned long action, void *hcpu) { - int hotcpu = (unsigned long)hcpu; - struct task_struct *p; - switch (action) { - case CPU_UP_PREPARE: - case CPU_UP_PREPARE_FROZEN: - p = kthread_create_on_node(run_ksoftirqd, - hcpu, - cpu_to_node(hotcpu), - "ksoftirqd/%d", hotcpu); - if (IS_ERR(p)) { - printk("ksoftirqd for %i failed\n", hotcpu); - return notifier_from_errno(PTR_ERR(p)); - } - kthread_bind(p, hotcpu); - per_cpu(ksoftirqd, hotcpu) = p; - break; - case CPU_ONLINE: - case CPU_ONLINE_FROZEN: - wake_up_process(per_cpu(ksoftirqd, hotcpu)); - break; #ifdef CONFIG_HOTPLUG_CPU - case CPU_UP_CANCELED: - case CPU_UP_CANCELED_FROZEN: - if (!per_cpu(ksoftirqd, hotcpu)) - break; - /* Unbind so it can run. Fall thru. */ - kthread_bind(per_cpu(ksoftirqd, hotcpu), - cpumask_any(cpu_online_mask)); case CPU_DEAD: - case CPU_DEAD_FROZEN: { - static const struct sched_param param = { - .sched_priority = MAX_RT_PRIO-1 - }; - - p = per_cpu(ksoftirqd, hotcpu); - per_cpu(ksoftirqd, hotcpu) = NULL; - sched_setscheduler_nocheck(p, SCHED_FIFO, ¶m); - kthread_stop(p); - takeover_tasklets(hotcpu); + case CPU_DEAD_FROZEN: + takeover_tasklets((unsigned long)hcpu); break; - } #endif /* CONFIG_HOTPLUG_CPU */ - } + } return NOTIFY_OK; } @@ -901,14 +839,19 @@ static struct notifier_block __cpuinitdata cpu_nfb = { .notifier_call = cpu_callback }; +static struct smp_hotplug_thread softirq_threads = { + .store = &ksoftirqd, + .thread_should_run = ksoftirqd_should_run, + .thread_fn = run_ksoftirqd, + .thread_comm = "ksoftirqd/%u", +}; + static __init int spawn_ksoftirqd(void) { - void *cpu = (void *)(long)smp_processor_id(); - int err = cpu_callback(&cpu_nfb, CPU_UP_PREPARE, cpu); - - BUG_ON(err != NOTIFY_OK); - cpu_callback(&cpu_nfb, CPU_ONLINE, cpu); register_cpu_notifier(&cpu_nfb); + + BUG_ON(smpboot_register_percpu_thread(&softirq_threads)); + return 0; } early_initcall(spawn_ksoftirqd); diff --git a/kernel/watchdog.c b/kernel/watchdog.c index 4b1dfba70f7c..9d4c8d5a1f53 100644 --- a/kernel/watchdog.c +++ b/kernel/watchdog.c @@ -22,6 +22,7 @@ #include #include #include +#include #include #include @@ -29,16 +30,18 @@ int watchdog_enabled = 1; int __read_mostly watchdog_thresh = 10; +static int __read_mostly watchdog_disabled; static DEFINE_PER_CPU(unsigned long, watchdog_touch_ts); static DEFINE_PER_CPU(struct task_struct *, softlockup_watchdog); static DEFINE_PER_CPU(struct hrtimer, watchdog_hrtimer); static DEFINE_PER_CPU(bool, softlockup_touch_sync); static DEFINE_PER_CPU(bool, soft_watchdog_warn); +static DEFINE_PER_CPU(unsigned long, hrtimer_interrupts); +static DEFINE_PER_CPU(unsigned long, soft_lockup_hrtimer_cnt); #ifdef CONFIG_HARDLOCKUP_DETECTOR static DEFINE_PER_CPU(bool, hard_watchdog_warn); static DEFINE_PER_CPU(bool, watchdog_nmi_touch); -static DEFINE_PER_CPU(unsigned long, hrtimer_interrupts); static DEFINE_PER_CPU(unsigned long, hrtimer_interrupts_saved); static DEFINE_PER_CPU(struct perf_event *, watchdog_ev); #endif @@ -248,13 +251,15 @@ static void watchdog_overflow_callback(struct perf_event *event, __this_cpu_write(hard_watchdog_warn, false); return; } +#endif /* CONFIG_HARDLOCKUP_DETECTOR */ + static void watchdog_interrupt_count(void) { __this_cpu_inc(hrtimer_interrupts); } -#else -static inline void watchdog_interrupt_count(void) { return; } -#endif /* CONFIG_HARDLOCKUP_DETECTOR */ + +static int watchdog_nmi_enable(unsigned int cpu); +static void watchdog_nmi_disable(unsigned int cpu); /* watchdog kicker functions */ static enum hrtimer_restart watchdog_timer_fn(struct hrtimer *hrtimer) @@ -327,49 +332,68 @@ static enum hrtimer_restart watchdog_timer_fn(struct hrtimer *hrtimer) return HRTIMER_RESTART; } - -/* - * The watchdog thread - touches the timestamp. - */ -static int watchdog(void *unused) +static void watchdog_set_prio(unsigned int policy, unsigned int prio) +{ + struct sched_param param = { .sched_priority = prio }; + + sched_setscheduler(current, policy, ¶m); +} + +static void watchdog_enable(unsigned int cpu) { - struct sched_param param = { .sched_priority = 0 }; struct hrtimer *hrtimer = &__raw_get_cpu_var(watchdog_hrtimer); - /* initialize timestamp */ - __touch_watchdog(); + if (!watchdog_enabled) { + kthread_park(current); + return; + } + + /* Enable the perf event */ + watchdog_nmi_enable(cpu); /* kick off the timer for the hardlockup detector */ + hrtimer_init(hrtimer, CLOCK_MONOTONIC, HRTIMER_MODE_REL); + hrtimer->function = watchdog_timer_fn; + /* done here because hrtimer_start can only pin to smp_processor_id() */ hrtimer_start(hrtimer, ns_to_ktime(get_sample_period()), HRTIMER_MODE_REL_PINNED); - set_current_state(TASK_INTERRUPTIBLE); - /* - * Run briefly (kicked by the hrtimer callback function) once every - * get_sample_period() seconds (4 seconds by default) to reset the - * softlockup timestamp. If this gets delayed for more than - * 2*watchdog_thresh seconds then the debug-printout triggers in - * watchdog_timer_fn(). - */ - while (!kthread_should_stop()) { - __touch_watchdog(); - schedule(); - - if (kthread_should_stop()) - break; - - set_current_state(TASK_INTERRUPTIBLE); - } - /* - * Drop the policy/priority elevation during thread exit to avoid a - * scheduling latency spike. - */ - __set_current_state(TASK_RUNNING); - sched_setscheduler(current, SCHED_NORMAL, ¶m); - return 0; + /* initialize timestamp */ + watchdog_set_prio(SCHED_FIFO, MAX_RT_PRIO - 1); + __touch_watchdog(); } +static void watchdog_disable(unsigned int cpu) +{ + struct hrtimer *hrtimer = &__raw_get_cpu_var(watchdog_hrtimer); + + watchdog_set_prio(SCHED_NORMAL, 0); + hrtimer_cancel(hrtimer); + /* disable the perf event */ + watchdog_nmi_disable(cpu); +} + +static int watchdog_should_run(unsigned int cpu) +{ + return __this_cpu_read(hrtimer_interrupts) != + __this_cpu_read(soft_lockup_hrtimer_cnt); +} + +/* + * The watchdog thread function - touches the timestamp. + * + * It only runs once every get_sample_period() seconds (4 seconds by + * default) to reset the softlockup timestamp. If this gets delayed + * for more than 2*watchdog_thresh seconds then the debug-printout + * triggers in watchdog_timer_fn(). + */ +static void watchdog(unsigned int cpu) +{ + __this_cpu_write(soft_lockup_hrtimer_cnt, + __this_cpu_read(hrtimer_interrupts)); + __touch_watchdog(); +} #ifdef CONFIG_HARDLOCKUP_DETECTOR /* @@ -379,7 +403,7 @@ static int watchdog(void *unused) */ static unsigned long cpu0_err; -static int watchdog_nmi_enable(int cpu) +static int watchdog_nmi_enable(unsigned int cpu) { struct perf_event_attr *wd_attr; struct perf_event *event = per_cpu(watchdog_ev, cpu); @@ -433,7 +457,7 @@ out: return 0; } -static void watchdog_nmi_disable(int cpu) +static void watchdog_nmi_disable(unsigned int cpu) { struct perf_event *event = per_cpu(watchdog_ev, cpu); @@ -447,107 +471,35 @@ static void watchdog_nmi_disable(int cpu) return; } #else -static int watchdog_nmi_enable(int cpu) { return 0; } -static void watchdog_nmi_disable(int cpu) { return; } +static int watchdog_nmi_enable(unsigned int cpu) { return 0; } +static void watchdog_nmi_disable(unsigned int cpu) { return; } #endif /* CONFIG_HARDLOCKUP_DETECTOR */ /* prepare/enable/disable routines */ -static void watchdog_prepare_cpu(int cpu) -{ - struct hrtimer *hrtimer = &per_cpu(watchdog_hrtimer, cpu); - - WARN_ON(per_cpu(softlockup_watchdog, cpu)); - hrtimer_init(hrtimer, CLOCK_MONOTONIC, HRTIMER_MODE_REL); - hrtimer->function = watchdog_timer_fn; -} - -static int watchdog_enable(int cpu) -{ - struct task_struct *p = per_cpu(softlockup_watchdog, cpu); - int err = 0; - - /* enable the perf event */ - err = watchdog_nmi_enable(cpu); - - /* Regardless of err above, fall through and start softlockup */ - - /* create the watchdog thread */ - if (!p) { - struct sched_param param = { .sched_priority = MAX_RT_PRIO-1 }; - p = kthread_create_on_node(watchdog, NULL, cpu_to_node(cpu), "watchdog/%d", cpu); - if (IS_ERR(p)) { - pr_err("softlockup watchdog for %i failed\n", cpu); - if (!err) { - /* if hardlockup hasn't already set this */ - err = PTR_ERR(p); - /* and disable the perf event */ - watchdog_nmi_disable(cpu); - } - goto out; - } - sched_setscheduler(p, SCHED_FIFO, ¶m); - kthread_bind(p, cpu); - per_cpu(watchdog_touch_ts, cpu) = 0; - per_cpu(softlockup_watchdog, cpu) = p; - wake_up_process(p); - } - -out: - return err; -} - -static void watchdog_disable(int cpu) -{ - struct task_struct *p = per_cpu(softlockup_watchdog, cpu); - struct hrtimer *hrtimer = &per_cpu(watchdog_hrtimer, cpu); - - /* - * cancel the timer first to stop incrementing the stats - * and waking up the kthread - */ - hrtimer_cancel(hrtimer); - - /* disable the perf event */ - watchdog_nmi_disable(cpu); - - /* stop the watchdog thread */ - if (p) { - per_cpu(softlockup_watchdog, cpu) = NULL; - kthread_stop(p); - } -} - /* sysctl functions */ #ifdef CONFIG_SYSCTL static void watchdog_enable_all_cpus(void) { - int cpu; - - watchdog_enabled = 0; - - for_each_online_cpu(cpu) - if (!watchdog_enable(cpu)) - /* if any cpu succeeds, watchdog is considered - enabled for the system */ - watchdog_enabled = 1; - - if (!watchdog_enabled) - pr_err("failed to be enabled on some cpus\n"); + unsigned int cpu; + if (watchdog_disabled) { + watchdog_disabled = 0; + for_each_online_cpu(cpu) + kthread_unpark(per_cpu(softlockup_watchdog, cpu)); + } } static void watchdog_disable_all_cpus(void) { - int cpu; + unsigned int cpu; - for_each_online_cpu(cpu) - watchdog_disable(cpu); - - /* if all watchdogs are disabled, then they are disabled for the system */ - watchdog_enabled = 0; + if (!watchdog_disabled) { + watchdog_disabled = 1; + for_each_online_cpu(cpu) + kthread_park(per_cpu(softlockup_watchdog, cpu)); + } } - /* * proc handler for /proc/sys/kernel/nmi_watchdog,watchdog_thresh */ @@ -557,73 +509,36 @@ int proc_dowatchdog(struct ctl_table *table, int write, { int ret; + if (watchdog_disabled < 0) + return -ENODEV; + ret = proc_dointvec_minmax(table, write, buffer, lenp, ppos); if (ret || !write) - goto out; + return ret; if (watchdog_enabled && watchdog_thresh) watchdog_enable_all_cpus(); else watchdog_disable_all_cpus(); -out: return ret; } #endif /* CONFIG_SYSCTL */ - -/* - * Create/destroy watchdog threads as CPUs come and go: - */ -static int __cpuinit -cpu_callback(struct notifier_block *nfb, unsigned long action, void *hcpu) -{ - int hotcpu = (unsigned long)hcpu; - - switch (action) { - case CPU_UP_PREPARE: - case CPU_UP_PREPARE_FROZEN: - watchdog_prepare_cpu(hotcpu); - break; - case CPU_ONLINE: - case CPU_ONLINE_FROZEN: - if (watchdog_enabled) - watchdog_enable(hotcpu); - break; -#ifdef CONFIG_HOTPLUG_CPU - case CPU_UP_CANCELED: - case CPU_UP_CANCELED_FROZEN: - watchdog_disable(hotcpu); - break; - case CPU_DEAD: - case CPU_DEAD_FROZEN: - watchdog_disable(hotcpu); - break; -#endif /* CONFIG_HOTPLUG_CPU */ - } - - /* - * hardlockup and softlockup are not important enough - * to block cpu bring up. Just always succeed and - * rely on printk output to flag problems. - */ - return NOTIFY_OK; -} - -static struct notifier_block __cpuinitdata cpu_nfb = { - .notifier_call = cpu_callback +static struct smp_hotplug_thread watchdog_threads = { + .store = &softlockup_watchdog, + .thread_should_run = watchdog_should_run, + .thread_fn = watchdog, + .thread_comm = "watchdog/%u", + .setup = watchdog_enable, + .park = watchdog_disable, + .unpark = watchdog_enable, }; void __init lockup_detector_init(void) { - void *cpu = (void *)(long)smp_processor_id(); - int err; - - err = cpu_callback(&cpu_nfb, CPU_UP_PREPARE, cpu); - WARN_ON(notifier_to_errno(err)); - - cpu_callback(&cpu_nfb, CPU_ONLINE, cpu); - register_cpu_notifier(&cpu_nfb); - - return; + if (smpboot_register_percpu_thread(&watchdog_threads)) { + pr_err("Failed to create watchdog threads, disabled\n"); + watchdog_disabled = -ENODEV; + } }