mm: sched: numa: Control enabling and disabling of NUMA balancing

This patch adds Kconfig options and kernel parameters to allow the
enabling and disabling of automatic NUMA balancing. The existance
of such a switch was and is very important when debugging problems
related to transparent hugepages and we should have the same for
automatic NUMA placement.

Signed-off-by: Mel Gorman <mgorman@suse.de>
This commit is contained in:
Mel Gorman 2012-11-22 11:16:36 +00:00
parent b8593bfda1
commit 1a687c2e9a
7 changed files with 101 additions and 17 deletions

View File

@ -1996,6 +1996,9 @@ bytes respectively. Such letter suffixes can also be entirely omitted.
nr_uarts= [SERIAL] maximum number of UARTs to be registered. nr_uarts= [SERIAL] maximum number of UARTs to be registered.
numa_balancing= [KNL,X86] Enable or disable automatic NUMA balancing.
Allowed values are enable and disable
numa_zonelist_order= [KNL, BOOT] Select zonelist order for NUMA. numa_zonelist_order= [KNL, BOOT] Select zonelist order for NUMA.
one of ['zone', 'node', 'default'] can be specified one of ['zone', 'node', 'default'] can be specified
This can be set from sysctl after boot. This can be set from sysctl after boot.

View File

@ -1563,10 +1563,14 @@ struct task_struct {
#ifdef CONFIG_NUMA_BALANCING #ifdef CONFIG_NUMA_BALANCING
extern void task_numa_fault(int node, int pages, bool migrated); extern void task_numa_fault(int node, int pages, bool migrated);
extern void set_numabalancing_state(bool enabled);
#else #else
static inline void task_numa_fault(int node, int pages, bool migrated) static inline void task_numa_fault(int node, int pages, bool migrated)
{ {
} }
static inline void set_numabalancing_state(bool enabled)
{
}
#endif #endif
/* /*

View File

@ -720,6 +720,14 @@ config ARCH_USES_NUMA_PROT_NONE
depends on ARCH_WANTS_PROT_NUMA_PROT_NONE depends on ARCH_WANTS_PROT_NUMA_PROT_NONE
depends on NUMA_BALANCING depends on NUMA_BALANCING
config NUMA_BALANCING_DEFAULT_ENABLED
bool "Automatically enable NUMA aware memory/task placement"
default y
depends on NUMA_BALANCING
help
If set, autonumic NUMA balancing will be enabled if running on a NUMA
machine.
config NUMA_BALANCING config NUMA_BALANCING
bool "Memory placement aware NUMA scheduler" bool "Memory placement aware NUMA scheduler"
default y default y

View File

@ -192,23 +192,10 @@ static void sched_feat_disable(int i) { };
static void sched_feat_enable(int i) { }; static void sched_feat_enable(int i) { };
#endif /* HAVE_JUMP_LABEL */ #endif /* HAVE_JUMP_LABEL */
static ssize_t static int sched_feat_set(char *cmp)
sched_feat_write(struct file *filp, const char __user *ubuf,
size_t cnt, loff_t *ppos)
{ {
char buf[64];
char *cmp;
int neg = 0;
int i; int i;
int neg = 0;
if (cnt > 63)
cnt = 63;
if (copy_from_user(&buf, ubuf, cnt))
return -EFAULT;
buf[cnt] = 0;
cmp = strstrip(buf);
if (strncmp(cmp, "NO_", 3) == 0) { if (strncmp(cmp, "NO_", 3) == 0) {
neg = 1; neg = 1;
@ -228,6 +215,27 @@ sched_feat_write(struct file *filp, const char __user *ubuf,
} }
} }
return i;
}
static ssize_t
sched_feat_write(struct file *filp, const char __user *ubuf,
size_t cnt, loff_t *ppos)
{
char buf[64];
char *cmp;
int i;
if (cnt > 63)
cnt = 63;
if (copy_from_user(&buf, ubuf, cnt))
return -EFAULT;
buf[cnt] = 0;
cmp = strstrip(buf);
i = sched_feat_set(cmp);
if (i == __SCHED_FEAT_NR) if (i == __SCHED_FEAT_NR)
return -EINVAL; return -EINVAL;
@ -1549,6 +1557,16 @@ static void __sched_fork(struct task_struct *p)
#endif /* CONFIG_NUMA_BALANCING */ #endif /* CONFIG_NUMA_BALANCING */
} }
#ifdef CONFIG_NUMA_BALANCING
void set_numabalancing_state(bool enabled)
{
if (enabled)
sched_feat_set("NUMA");
else
sched_feat_set("NO_NUMA");
}
#endif /* CONFIG_NUMA_BALANCING */
/* /*
* fork()/clone()-time setup: * fork()/clone()-time setup:
*/ */

View File

@ -811,6 +811,9 @@ void task_numa_fault(int node, int pages, bool migrated)
{ {
struct task_struct *p = current; struct task_struct *p = current;
if (!sched_feat_numa(NUMA))
return;
/* FIXME: Allocate task-specific structure for placement policy here */ /* FIXME: Allocate task-specific structure for placement policy here */
/* /*

View File

@ -63,8 +63,10 @@ SCHED_FEAT(RT_RUNTIME_SHARE, true)
SCHED_FEAT(LB_MIN, false) SCHED_FEAT(LB_MIN, false)
/* /*
* Apply the automatic NUMA scheduling policy * Apply the automatic NUMA scheduling policy. Enabled automatically
* at runtime if running on a NUMA machine. Can be controlled via
* numa_balancing=
*/ */
#ifdef CONFIG_NUMA_BALANCING #ifdef CONFIG_NUMA_BALANCING
SCHED_FEAT(NUMA, true) SCHED_FEAT(NUMA, false)
#endif #endif

View File

@ -2521,6 +2521,50 @@ void mpol_free_shared_policy(struct shared_policy *p)
mutex_unlock(&p->mutex); mutex_unlock(&p->mutex);
} }
#ifdef CONFIG_NUMA_BALANCING
static bool __initdata numabalancing_override;
static void __init check_numabalancing_enable(void)
{
bool numabalancing_default = false;
if (IS_ENABLED(CONFIG_NUMA_BALANCING_DEFAULT_ENABLED))
numabalancing_default = true;
if (nr_node_ids > 1 && !numabalancing_override) {
printk(KERN_INFO "Enabling automatic NUMA balancing. "
"Configure with numa_balancing= or sysctl");
set_numabalancing_state(numabalancing_default);
}
}
static int __init setup_numabalancing(char *str)
{
int ret = 0;
if (!str)
goto out;
numabalancing_override = true;
if (!strcmp(str, "enable")) {
set_numabalancing_state(true);
ret = 1;
} else if (!strcmp(str, "disable")) {
set_numabalancing_state(false);
ret = 1;
}
out:
if (!ret)
printk(KERN_WARNING "Unable to parse numa_balancing=\n");
return ret;
}
__setup("numa_balancing=", setup_numabalancing);
#else
static inline void __init check_numabalancing_enable(void)
{
}
#endif /* CONFIG_NUMA_BALANCING */
/* assumes fs == KERNEL_DS */ /* assumes fs == KERNEL_DS */
void __init numa_policy_init(void) void __init numa_policy_init(void)
{ {
@ -2571,6 +2615,8 @@ void __init numa_policy_init(void)
if (do_set_mempolicy(MPOL_INTERLEAVE, 0, &interleave_nodes)) if (do_set_mempolicy(MPOL_INTERLEAVE, 0, &interleave_nodes))
printk("numa_policy_init: interleaving failed\n"); printk("numa_policy_init: interleaving failed\n");
check_numabalancing_enable();
} }
/* Reset policy of current process to default */ /* Reset policy of current process to default */