mirror of
https://mirrors.bfsu.edu.cn/git/linux.git
synced 2024-11-24 04:34:08 +08:00
wq: handle VM suspension in stall detection
If VCPU is suspended (VM suspend) in wq_watchdog_timer_fn() then once this VCPU resumes it will see the new jiffies value, while it may take a while before IRQ detects PVCLOCK_GUEST_STOPPED on this VCPU and updates all the watchdogs via pvclock_touch_watchdogs(). There is a small chance of misreported WQ stalls in the meantime, because new jiffies is time_after() old 'ts + thresh'. wq_watchdog_timer_fn() { for_each_pool(pool, pi) { if (time_after(jiffies, ts + thresh)) { pr_emerg("BUG: workqueue lockup - pool"); } } } Save jiffies at the beginning of this function and use that value for stall detection. If VM gets suspended then we continue using "old" jiffies value and old WQ touch timestamps. If IRQ at some point restarts the stall detection cycle (pvclock_touch_watchdogs()) then old jiffies will always be before new 'ts + thresh'. Signed-off-by: Sergey Senozhatsky <senozhatsky@chromium.org> Signed-off-by: Tejun Heo <tj@kernel.org>
This commit is contained in:
parent
c3d0e3fd41
commit
940d71c646
@ -50,6 +50,7 @@
|
|||||||
#include <linux/uaccess.h>
|
#include <linux/uaccess.h>
|
||||||
#include <linux/sched/isolation.h>
|
#include <linux/sched/isolation.h>
|
||||||
#include <linux/nmi.h>
|
#include <linux/nmi.h>
|
||||||
|
#include <linux/kvm_para.h>
|
||||||
|
|
||||||
#include "workqueue_internal.h"
|
#include "workqueue_internal.h"
|
||||||
|
|
||||||
@ -5772,6 +5773,7 @@ static void wq_watchdog_timer_fn(struct timer_list *unused)
|
|||||||
{
|
{
|
||||||
unsigned long thresh = READ_ONCE(wq_watchdog_thresh) * HZ;
|
unsigned long thresh = READ_ONCE(wq_watchdog_thresh) * HZ;
|
||||||
bool lockup_detected = false;
|
bool lockup_detected = false;
|
||||||
|
unsigned long now = jiffies;
|
||||||
struct worker_pool *pool;
|
struct worker_pool *pool;
|
||||||
int pi;
|
int pi;
|
||||||
|
|
||||||
@ -5786,6 +5788,12 @@ static void wq_watchdog_timer_fn(struct timer_list *unused)
|
|||||||
if (list_empty(&pool->worklist))
|
if (list_empty(&pool->worklist))
|
||||||
continue;
|
continue;
|
||||||
|
|
||||||
|
/*
|
||||||
|
* If a virtual machine is stopped by the host it can look to
|
||||||
|
* the watchdog like a stall.
|
||||||
|
*/
|
||||||
|
kvm_check_and_clear_guest_paused();
|
||||||
|
|
||||||
/* get the latest of pool and touched timestamps */
|
/* get the latest of pool and touched timestamps */
|
||||||
if (pool->cpu >= 0)
|
if (pool->cpu >= 0)
|
||||||
touched = READ_ONCE(per_cpu(wq_watchdog_touched_cpu, pool->cpu));
|
touched = READ_ONCE(per_cpu(wq_watchdog_touched_cpu, pool->cpu));
|
||||||
@ -5799,12 +5807,12 @@ static void wq_watchdog_timer_fn(struct timer_list *unused)
|
|||||||
ts = touched;
|
ts = touched;
|
||||||
|
|
||||||
/* did we stall? */
|
/* did we stall? */
|
||||||
if (time_after(jiffies, ts + thresh)) {
|
if (time_after(now, ts + thresh)) {
|
||||||
lockup_detected = true;
|
lockup_detected = true;
|
||||||
pr_emerg("BUG: workqueue lockup - pool");
|
pr_emerg("BUG: workqueue lockup - pool");
|
||||||
pr_cont_pool_info(pool);
|
pr_cont_pool_info(pool);
|
||||||
pr_cont(" stuck for %us!\n",
|
pr_cont(" stuck for %us!\n",
|
||||||
jiffies_to_msecs(jiffies - pool_ts) / 1000);
|
jiffies_to_msecs(now - pool_ts) / 1000);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
Loading…
Reference in New Issue
Block a user