mirror of
https://mirrors.bfsu.edu.cn/git/linux.git
synced 2025-01-22 05:44:31 +08:00
xen/balloon: use a kernel thread instead a workqueue
Today the Xen ballooning is done via delayed work in a workqueue. This might result in workqueue hangups being reported in case of large amounts of memory are being ballooned in one go (here 16GB): BUG: workqueue lockup - pool cpus=6 node=0 flags=0x0 nice=0 stuck for 64s! Showing busy workqueues and worker pools: workqueue events: flags=0x0 pwq 12: cpus=6 node=0 flags=0x0 nice=0 active=2/256 refcnt=3 in-flight: 229:balloon_process pending: cache_reap workqueue events_freezable_power_: flags=0x84 pwq 12: cpus=6 node=0 flags=0x0 nice=0 active=1/256 refcnt=2 pending: disk_events_workfn workqueue mm_percpu_wq: flags=0x8 pwq 12: cpus=6 node=0 flags=0x0 nice=0 active=1/256 refcnt=2 pending: vmstat_update pool 12: cpus=6 node=0 flags=0x0 nice=0 hung=64s workers=3 idle: 2222 43 This can easily be avoided by using a dedicated kernel thread for doing the ballooning work. Reported-by: Jan Beulich <jbeulich@suse.com> Signed-off-by: Juergen Gross <jgross@suse.com> Reviewed-by: Boris Ostrovsky <boris.ostrovsky@oracle.com> Link: https://lore.kernel.org/r/20210827123206.15429-1-jgross@suse.com Signed-off-by: Juergen Gross <jgross@suse.com>
This commit is contained in:
parent
58e636039b
commit
8480ed9c2b
@ -43,6 +43,8 @@
|
|||||||
#include <linux/sched.h>
|
#include <linux/sched.h>
|
||||||
#include <linux/cred.h>
|
#include <linux/cred.h>
|
||||||
#include <linux/errno.h>
|
#include <linux/errno.h>
|
||||||
|
#include <linux/freezer.h>
|
||||||
|
#include <linux/kthread.h>
|
||||||
#include <linux/mm.h>
|
#include <linux/mm.h>
|
||||||
#include <linux/memblock.h>
|
#include <linux/memblock.h>
|
||||||
#include <linux/pagemap.h>
|
#include <linux/pagemap.h>
|
||||||
@ -115,7 +117,7 @@ static struct ctl_table xen_root[] = {
|
|||||||
#define EXTENT_ORDER (fls(XEN_PFN_PER_PAGE) - 1)
|
#define EXTENT_ORDER (fls(XEN_PFN_PER_PAGE) - 1)
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* balloon_process() state:
|
* balloon_thread() state:
|
||||||
*
|
*
|
||||||
* BP_DONE: done or nothing to do,
|
* BP_DONE: done or nothing to do,
|
||||||
* BP_WAIT: wait to be rescheduled,
|
* BP_WAIT: wait to be rescheduled,
|
||||||
@ -130,6 +132,8 @@ enum bp_state {
|
|||||||
BP_ECANCELED
|
BP_ECANCELED
|
||||||
};
|
};
|
||||||
|
|
||||||
|
/* Main waiting point for xen-balloon thread. */
|
||||||
|
static DECLARE_WAIT_QUEUE_HEAD(balloon_thread_wq);
|
||||||
|
|
||||||
static DEFINE_MUTEX(balloon_mutex);
|
static DEFINE_MUTEX(balloon_mutex);
|
||||||
|
|
||||||
@ -144,10 +148,6 @@ static xen_pfn_t frame_list[PAGE_SIZE / sizeof(xen_pfn_t)];
|
|||||||
static LIST_HEAD(ballooned_pages);
|
static LIST_HEAD(ballooned_pages);
|
||||||
static DECLARE_WAIT_QUEUE_HEAD(balloon_wq);
|
static DECLARE_WAIT_QUEUE_HEAD(balloon_wq);
|
||||||
|
|
||||||
/* Main work function, always executed in process context. */
|
|
||||||
static void balloon_process(struct work_struct *work);
|
|
||||||
static DECLARE_DELAYED_WORK(balloon_worker, balloon_process);
|
|
||||||
|
|
||||||
/* When ballooning out (allocating memory to return to Xen) we don't really
|
/* When ballooning out (allocating memory to return to Xen) we don't really
|
||||||
want the kernel to try too hard since that can trigger the oom killer. */
|
want the kernel to try too hard since that can trigger the oom killer. */
|
||||||
#define GFP_BALLOON \
|
#define GFP_BALLOON \
|
||||||
@ -366,7 +366,7 @@ static void xen_online_page(struct page *page, unsigned int order)
|
|||||||
static int xen_memory_notifier(struct notifier_block *nb, unsigned long val, void *v)
|
static int xen_memory_notifier(struct notifier_block *nb, unsigned long val, void *v)
|
||||||
{
|
{
|
||||||
if (val == MEM_ONLINE)
|
if (val == MEM_ONLINE)
|
||||||
schedule_delayed_work(&balloon_worker, 0);
|
wake_up(&balloon_thread_wq);
|
||||||
|
|
||||||
return NOTIFY_OK;
|
return NOTIFY_OK;
|
||||||
}
|
}
|
||||||
@ -491,18 +491,43 @@ static enum bp_state decrease_reservation(unsigned long nr_pages, gfp_t gfp)
|
|||||||
}
|
}
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* As this is a work item it is guaranteed to run as a single instance only.
|
* Stop waiting if either state is not BP_EAGAIN and ballooning action is
|
||||||
|
* needed, or if the credit has changed while state is BP_EAGAIN.
|
||||||
|
*/
|
||||||
|
static bool balloon_thread_cond(enum bp_state state, long credit)
|
||||||
|
{
|
||||||
|
if (state != BP_EAGAIN)
|
||||||
|
credit = 0;
|
||||||
|
|
||||||
|
return current_credit() != credit || kthread_should_stop();
|
||||||
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
* As this is a kthread it is guaranteed to run as a single instance only.
|
||||||
* We may of course race updates of the target counts (which are protected
|
* We may of course race updates of the target counts (which are protected
|
||||||
* by the balloon lock), or with changes to the Xen hard limit, but we will
|
* by the balloon lock), or with changes to the Xen hard limit, but we will
|
||||||
* recover from these in time.
|
* recover from these in time.
|
||||||
*/
|
*/
|
||||||
static void balloon_process(struct work_struct *work)
|
static int balloon_thread(void *unused)
|
||||||
{
|
{
|
||||||
enum bp_state state = BP_DONE;
|
enum bp_state state = BP_DONE;
|
||||||
long credit;
|
long credit;
|
||||||
|
unsigned long timeout;
|
||||||
|
|
||||||
|
set_freezable();
|
||||||
|
for (;;) {
|
||||||
|
if (state == BP_EAGAIN)
|
||||||
|
timeout = balloon_stats.schedule_delay * HZ;
|
||||||
|
else
|
||||||
|
timeout = 3600 * HZ;
|
||||||
|
credit = current_credit();
|
||||||
|
|
||||||
|
wait_event_interruptible_timeout(balloon_thread_wq,
|
||||||
|
balloon_thread_cond(state, credit), timeout);
|
||||||
|
|
||||||
|
if (kthread_should_stop())
|
||||||
|
return 0;
|
||||||
|
|
||||||
do {
|
|
||||||
mutex_lock(&balloon_mutex);
|
mutex_lock(&balloon_mutex);
|
||||||
|
|
||||||
credit = current_credit();
|
credit = current_credit();
|
||||||
@ -529,12 +554,7 @@ static void balloon_process(struct work_struct *work)
|
|||||||
mutex_unlock(&balloon_mutex);
|
mutex_unlock(&balloon_mutex);
|
||||||
|
|
||||||
cond_resched();
|
cond_resched();
|
||||||
|
}
|
||||||
} while (credit && state == BP_DONE);
|
|
||||||
|
|
||||||
/* Schedule more work if there is some still to be done. */
|
|
||||||
if (state == BP_EAGAIN)
|
|
||||||
schedule_delayed_work(&balloon_worker, balloon_stats.schedule_delay * HZ);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
/* Resets the Xen limit, sets new target, and kicks off processing. */
|
/* Resets the Xen limit, sets new target, and kicks off processing. */
|
||||||
@ -542,7 +562,7 @@ void balloon_set_new_target(unsigned long target)
|
|||||||
{
|
{
|
||||||
/* No need for lock. Not read-modify-write updates. */
|
/* No need for lock. Not read-modify-write updates. */
|
||||||
balloon_stats.target_pages = target;
|
balloon_stats.target_pages = target;
|
||||||
schedule_delayed_work(&balloon_worker, 0);
|
wake_up(&balloon_thread_wq);
|
||||||
}
|
}
|
||||||
EXPORT_SYMBOL_GPL(balloon_set_new_target);
|
EXPORT_SYMBOL_GPL(balloon_set_new_target);
|
||||||
|
|
||||||
@ -647,7 +667,7 @@ void free_xenballooned_pages(int nr_pages, struct page **pages)
|
|||||||
|
|
||||||
/* The balloon may be too large now. Shrink it if needed. */
|
/* The balloon may be too large now. Shrink it if needed. */
|
||||||
if (current_credit())
|
if (current_credit())
|
||||||
schedule_delayed_work(&balloon_worker, 0);
|
wake_up(&balloon_thread_wq);
|
||||||
|
|
||||||
mutex_unlock(&balloon_mutex);
|
mutex_unlock(&balloon_mutex);
|
||||||
}
|
}
|
||||||
@ -679,6 +699,8 @@ static void __init balloon_add_region(unsigned long start_pfn,
|
|||||||
|
|
||||||
static int __init balloon_init(void)
|
static int __init balloon_init(void)
|
||||||
{
|
{
|
||||||
|
struct task_struct *task;
|
||||||
|
|
||||||
if (!xen_domain())
|
if (!xen_domain())
|
||||||
return -ENODEV;
|
return -ENODEV;
|
||||||
|
|
||||||
@ -722,6 +744,12 @@ static int __init balloon_init(void)
|
|||||||
}
|
}
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
|
task = kthread_run(balloon_thread, NULL, "xen-balloon");
|
||||||
|
if (IS_ERR(task)) {
|
||||||
|
pr_err("xen-balloon thread could not be started, ballooning will not work!\n");
|
||||||
|
return PTR_ERR(task);
|
||||||
|
}
|
||||||
|
|
||||||
/* Init the xen-balloon driver. */
|
/* Init the xen-balloon driver. */
|
||||||
xen_balloon_init();
|
xen_balloon_init();
|
||||||
|
|
||||||
|
Loading…
Reference in New Issue
Block a user