linux/arch/x86/kernel/itmt.c
Ricardo Neri 183b8ec38f x86/sched: Decrease further the priorities of SMT siblings
When scheduling, it is better to prefer a separate physical core rather
than the SMT sibling of a high priority core. The existing formula to
compute priorities takes such fact in consideration. There may exist,
however, combinations of priorities (i.e., maximum frequencies) in which
the priority of high-numbered SMT siblings of high-priority cores collides
with the priority of low-numbered SMT siblings of low-priority cores.

Consider for instance an SMT2 system with CPUs [0, 1] with priority 60 and
[2, 3] with priority 30(CPUs in brackets are SMT siblings. In such a case,
the resulting priorities would be [120, 60], [60, 30]. Thus, to ensure
that CPU2 has higher priority than CPU1, divide the raw priority by the
squared SMT iterator. The resulting priorities are [120, 30]. [60, 15].

Originally-by: Len Brown <len.brown@intel.com>
Signed-off-by: Len Brown <len.brown@intel.com>
Signed-off-by: Ricardo Neri <ricardo.neri-calderon@linux.intel.com>
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Link: https://lkml.kernel.org/r/20210911011819.12184-2-ricardo.neri-calderon@linux.intel.com
2021-10-05 15:51:59 +02:00

206 lines
5.1 KiB
C

// SPDX-License-Identifier: GPL-2.0-only
/*
* itmt.c: Support Intel Turbo Boost Max Technology 3.0
*
* (C) Copyright 2016 Intel Corporation
* Author: Tim Chen <tim.c.chen@linux.intel.com>
*
* On platforms supporting Intel Turbo Boost Max Technology 3.0, (ITMT),
* the maximum turbo frequencies of some cores in a CPU package may be
* higher than for the other cores in the same package. In that case,
* better performance can be achieved by making the scheduler prefer
* to run tasks on the CPUs with higher max turbo frequencies.
*
* This file provides functions and data structures for enabling the
* scheduler to favor scheduling on cores can be boosted to a higher
* frequency under ITMT.
*/
#include <linux/sched.h>
#include <linux/cpumask.h>
#include <linux/cpuset.h>
#include <linux/mutex.h>
#include <linux/sysctl.h>
#include <linux/nodemask.h>
static DEFINE_MUTEX(itmt_update_mutex);
DEFINE_PER_CPU_READ_MOSTLY(int, sched_core_priority);
/* Boolean to track if system has ITMT capabilities */
static bool __read_mostly sched_itmt_capable;
/*
* Boolean to control whether we want to move processes to cpu capable
* of higher turbo frequency for cpus supporting Intel Turbo Boost Max
* Technology 3.0.
*
* It can be set via /proc/sys/kernel/sched_itmt_enabled
*/
unsigned int __read_mostly sysctl_sched_itmt_enabled;
static int sched_itmt_update_handler(struct ctl_table *table, int write,
void *buffer, size_t *lenp, loff_t *ppos)
{
unsigned int old_sysctl;
int ret;
mutex_lock(&itmt_update_mutex);
if (!sched_itmt_capable) {
mutex_unlock(&itmt_update_mutex);
return -EINVAL;
}
old_sysctl = sysctl_sched_itmt_enabled;
ret = proc_dointvec_minmax(table, write, buffer, lenp, ppos);
if (!ret && write && old_sysctl != sysctl_sched_itmt_enabled) {
x86_topology_update = true;
rebuild_sched_domains();
}
mutex_unlock(&itmt_update_mutex);
return ret;
}
static struct ctl_table itmt_kern_table[] = {
{
.procname = "sched_itmt_enabled",
.data = &sysctl_sched_itmt_enabled,
.maxlen = sizeof(unsigned int),
.mode = 0644,
.proc_handler = sched_itmt_update_handler,
.extra1 = SYSCTL_ZERO,
.extra2 = SYSCTL_ONE,
},
{}
};
static struct ctl_table itmt_root_table[] = {
{
.procname = "kernel",
.mode = 0555,
.child = itmt_kern_table,
},
{}
};
static struct ctl_table_header *itmt_sysctl_header;
/**
* sched_set_itmt_support() - Indicate platform supports ITMT
*
* This function is used by the OS to indicate to scheduler that the platform
* is capable of supporting the ITMT feature.
*
* The current scheme has the pstate driver detects if the system
* is ITMT capable and call sched_set_itmt_support.
*
* This must be done only after sched_set_itmt_core_prio
* has been called to set the cpus' priorities.
* It must not be called with cpu hot plug lock
* held as we need to acquire the lock to rebuild sched domains
* later.
*
* Return: 0 on success
*/
int sched_set_itmt_support(void)
{
mutex_lock(&itmt_update_mutex);
if (sched_itmt_capable) {
mutex_unlock(&itmt_update_mutex);
return 0;
}
itmt_sysctl_header = register_sysctl_table(itmt_root_table);
if (!itmt_sysctl_header) {
mutex_unlock(&itmt_update_mutex);
return -ENOMEM;
}
sched_itmt_capable = true;
sysctl_sched_itmt_enabled = 1;
x86_topology_update = true;
rebuild_sched_domains();
mutex_unlock(&itmt_update_mutex);
return 0;
}
/**
* sched_clear_itmt_support() - Revoke platform's support of ITMT
*
* This function is used by the OS to indicate that it has
* revoked the platform's support of ITMT feature.
*
* It must not be called with cpu hot plug lock
* held as we need to acquire the lock to rebuild sched domains
* later.
*/
void sched_clear_itmt_support(void)
{
mutex_lock(&itmt_update_mutex);
if (!sched_itmt_capable) {
mutex_unlock(&itmt_update_mutex);
return;
}
sched_itmt_capable = false;
if (itmt_sysctl_header) {
unregister_sysctl_table(itmt_sysctl_header);
itmt_sysctl_header = NULL;
}
if (sysctl_sched_itmt_enabled) {
/* disable sched_itmt if we are no longer ITMT capable */
sysctl_sched_itmt_enabled = 0;
x86_topology_update = true;
rebuild_sched_domains();
}
mutex_unlock(&itmt_update_mutex);
}
int arch_asym_cpu_priority(int cpu)
{
return per_cpu(sched_core_priority, cpu);
}
/**
* sched_set_itmt_core_prio() - Set CPU priority based on ITMT
* @prio: Priority of cpu core
* @core_cpu: The cpu number associated with the core
*
* The pstate driver will find out the max boost frequency
* and call this function to set a priority proportional
* to the max boost frequency. CPU with higher boost
* frequency will receive higher priority.
*
* No need to rebuild sched domain after updating
* the CPU priorities. The sched domains have no
* dependency on CPU priorities.
*/
void sched_set_itmt_core_prio(int prio, int core_cpu)
{
int cpu, i = 1;
for_each_cpu(cpu, topology_sibling_cpumask(core_cpu)) {
int smt_prio;
/*
* Ensure that the siblings are moved to the end
* of the priority chain and only used when
* all other high priority cpus are out of capacity.
*/
smt_prio = prio * smp_num_siblings / (i * i);
per_cpu(sched_core_priority, cpu) = smt_prio;
i++;
}
}