mirror of
https://mirrors.bfsu.edu.cn/git/linux.git
synced 2024-11-13 14:24:11 +08:00
Merge branch 'for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/mingo/linux-2.6-sched-devel
* 'for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/mingo/linux-2.6-sched-devel: (62 commits) sched: build fix sched: better rt-group documentation sched: features fix sched: /debug/sched_features sched: add SCHED_FEAT_DEADLINE sched: debug: show a weight tree sched: fair: weight calculations sched: fair-group: de-couple load-balancing from the rb-trees sched: fair-group scheduling vs latency sched: rt-group: optimize dequeue_rt_stack sched: debug: add some debug code to handle the full hierarchy sched: fair-group: SMP-nice for group scheduling sched, cpuset: customize sched domains, core sched, cpuset: customize sched domains, docs sched: prepatory code movement sched: rt: multi level group constraints sched: task_group hierarchy sched: fix the task_group hierarchy for UID grouping sched: allow the group scheduler to have multiple levels sched: mix tasks and groups ...
This commit is contained in:
commit
ec965350bb
@ -8,6 +8,7 @@ Portions Copyright (c) 2004-2006 Silicon Graphics, Inc.
|
||||
Modified by Paul Jackson <pj@sgi.com>
|
||||
Modified by Christoph Lameter <clameter@sgi.com>
|
||||
Modified by Paul Menage <menage@google.com>
|
||||
Modified by Hidetoshi Seto <seto.hidetoshi@jp.fujitsu.com>
|
||||
|
||||
CONTENTS:
|
||||
=========
|
||||
@ -20,7 +21,8 @@ CONTENTS:
|
||||
1.5 What is memory_pressure ?
|
||||
1.6 What is memory spread ?
|
||||
1.7 What is sched_load_balance ?
|
||||
1.8 How do I use cpusets ?
|
||||
1.8 What is sched_relax_domain_level ?
|
||||
1.9 How do I use cpusets ?
|
||||
2. Usage Examples and Syntax
|
||||
2.1 Basic Usage
|
||||
2.2 Adding/removing cpus
|
||||
@ -497,7 +499,73 @@ the cpuset code to update these sched domains, it compares the new
|
||||
partition requested with the current, and updates its sched domains,
|
||||
removing the old and adding the new, for each change.
|
||||
|
||||
1.8 How do I use cpusets ?
|
||||
|
||||
1.8 What is sched_relax_domain_level ?
|
||||
--------------------------------------
|
||||
|
||||
In sched domain, the scheduler migrates tasks in 2 ways; periodic load
|
||||
balance on tick, and at time of some schedule events.
|
||||
|
||||
When a task is woken up, scheduler try to move the task on idle CPU.
|
||||
For example, if a task A running on CPU X activates another task B
|
||||
on the same CPU X, and if CPU Y is X's sibling and performing idle,
|
||||
then scheduler migrate task B to CPU Y so that task B can start on
|
||||
CPU Y without waiting task A on CPU X.
|
||||
|
||||
And if a CPU run out of tasks in its runqueue, the CPU try to pull
|
||||
extra tasks from other busy CPUs to help them before it is going to
|
||||
be idle.
|
||||
|
||||
Of course it takes some searching cost to find movable tasks and/or
|
||||
idle CPUs, the scheduler might not search all CPUs in the domain
|
||||
everytime. In fact, in some architectures, the searching ranges on
|
||||
events are limited in the same socket or node where the CPU locates,
|
||||
while the load balance on tick searchs all.
|
||||
|
||||
For example, assume CPU Z is relatively far from CPU X. Even if CPU Z
|
||||
is idle while CPU X and the siblings are busy, scheduler can't migrate
|
||||
woken task B from X to Z since it is out of its searching range.
|
||||
As the result, task B on CPU X need to wait task A or wait load balance
|
||||
on the next tick. For some applications in special situation, waiting
|
||||
1 tick may be too long.
|
||||
|
||||
The 'sched_relax_domain_level' file allows you to request changing
|
||||
this searching range as you like. This file takes int value which
|
||||
indicates size of searching range in levels ideally as follows,
|
||||
otherwise initial value -1 that indicates the cpuset has no request.
|
||||
|
||||
-1 : no request. use system default or follow request of others.
|
||||
0 : no search.
|
||||
1 : search siblings (hyperthreads in a core).
|
||||
2 : search cores in a package.
|
||||
3 : search cpus in a node [= system wide on non-NUMA system]
|
||||
( 4 : search nodes in a chunk of node [on NUMA system] )
|
||||
( 5~ : search system wide [on NUMA system])
|
||||
|
||||
This file is per-cpuset and affect the sched domain where the cpuset
|
||||
belongs to. Therefore if the flag 'sched_load_balance' of a cpuset
|
||||
is disabled, then 'sched_relax_domain_level' have no effect since
|
||||
there is no sched domain belonging the cpuset.
|
||||
|
||||
If multiple cpusets are overlapping and hence they form a single sched
|
||||
domain, the largest value among those is used. Be careful, if one
|
||||
requests 0 and others are -1 then 0 is used.
|
||||
|
||||
Note that modifying this file will have both good and bad effects,
|
||||
and whether it is acceptable or not will be depend on your situation.
|
||||
Don't modify this file if you are not sure.
|
||||
|
||||
If your situation is:
|
||||
- The migration costs between each cpu can be assumed considerably
|
||||
small(for you) due to your special application's behavior or
|
||||
special hardware support for CPU cache etc.
|
||||
- The searching cost doesn't have impact(for you) or you can make
|
||||
the searching cost enough small by managing cpuset to compact etc.
|
||||
- The latency is required even it sacrifices cache hit rate etc.
|
||||
then increasing 'sched_relax_domain_level' would benefit you.
|
||||
|
||||
|
||||
1.9 How do I use cpusets ?
|
||||
--------------------------
|
||||
|
||||
In order to minimize the impact of cpusets on critical kernel
|
||||
|
@ -1,59 +1,177 @@
|
||||
Real-Time group scheduling
|
||||
--------------------------
|
||||
|
||||
CONTENTS
|
||||
========
|
||||
|
||||
1. Overview
|
||||
1.1 The problem
|
||||
1.2 The solution
|
||||
2. The interface
|
||||
2.1 System-wide settings
|
||||
2.2 Default behaviour
|
||||
2.3 Basis for grouping tasks
|
||||
3. Future plans
|
||||
|
||||
|
||||
Real-Time group scheduling.
|
||||
|
||||
The problem space:
|
||||
|
||||
In order to schedule multiple groups of realtime tasks each group must
|
||||
be assigned a fixed portion of the CPU time available. Without a minimum
|
||||
guarantee a realtime group can obviously fall short. A fuzzy upper limit
|
||||
is of no use since it cannot be relied upon. Which leaves us with just
|
||||
the single fixed portion.
|
||||
|
||||
CPU time is divided by means of specifying how much time can be spent
|
||||
running in a given period. Say a frame fixed realtime renderer must
|
||||
deliver 25 frames a second, which yields a period of 0.04s. Now say
|
||||
it will also have to play some music and respond to input, leaving it
|
||||
with around 80% for the graphics. We can then give this group a runtime
|
||||
of 0.8 * 0.04s = 0.032s.
|
||||
|
||||
This way the graphics group will have a 0.04s period with a 0.032s runtime
|
||||
limit.
|
||||
|
||||
Now if the audio thread needs to refill the DMA buffer every 0.005s, but
|
||||
needs only about 3% CPU time to do so, it can do with a 0.03 * 0.005s
|
||||
= 0.00015s.
|
||||
1. Overview
|
||||
===========
|
||||
|
||||
|
||||
The Interface:
|
||||
1.1 The problem
|
||||
---------------
|
||||
|
||||
system wide:
|
||||
Realtime scheduling is all about determinism, a group has to be able to rely on
|
||||
the amount of bandwidth (eg. CPU time) being constant. In order to schedule
|
||||
multiple groups of realtime tasks, each group must be assigned a fixed portion
|
||||
of the CPU time available. Without a minimum guarantee a realtime group can
|
||||
obviously fall short. A fuzzy upper limit is of no use since it cannot be
|
||||
relied upon. Which leaves us with just the single fixed portion.
|
||||
|
||||
/proc/sys/kernel/sched_rt_period_ms
|
||||
/proc/sys/kernel/sched_rt_runtime_us
|
||||
1.2 The solution
|
||||
----------------
|
||||
|
||||
CONFIG_FAIR_USER_SCHED
|
||||
CPU time is divided by means of specifying how much time can be spent running
|
||||
in a given period. We allocate this "run time" for each realtime group which
|
||||
the other realtime groups will not be permitted to use.
|
||||
|
||||
/sys/kernel/uids/<uid>/cpu_rt_runtime_us
|
||||
Any time not allocated to a realtime group will be used to run normal priority
|
||||
tasks (SCHED_OTHER). Any allocated run time not used will also be picked up by
|
||||
SCHED_OTHER.
|
||||
|
||||
or
|
||||
Let's consider an example: a frame fixed realtime renderer must deliver 25
|
||||
frames a second, which yields a period of 0.04s per frame. Now say it will also
|
||||
have to play some music and respond to input, leaving it with around 80% CPU
|
||||
time dedicated for the graphics. We can then give this group a run time of 0.8
|
||||
* 0.04s = 0.032s.
|
||||
|
||||
CONFIG_FAIR_CGROUP_SCHED
|
||||
This way the graphics group will have a 0.04s period with a 0.032s run time
|
||||
limit. Now if the audio thread needs to refill the DMA buffer every 0.005s, but
|
||||
needs only about 3% CPU time to do so, it can do with a 0.03 * 0.005s =
|
||||
0.00015s. So this group can be scheduled with a period of 0.005s and a run time
|
||||
of 0.00015s.
|
||||
|
||||
/cgroup/<cgroup>/cpu.rt_runtime_us
|
||||
The remaining CPU time will be used for user input and other tass. Because
|
||||
realtime tasks have explicitly allocated the CPU time they need to perform
|
||||
their tasks, buffer underruns in the graphocs or audio can be eliminated.
|
||||
|
||||
[ time is specified in us because the interface is s32; this gives an
|
||||
operating range of ~35m to 1us ]
|
||||
NOTE: the above example is not fully implemented as of yet (2.6.25). We still
|
||||
lack an EDF scheduler to make non-uniform periods usable.
|
||||
|
||||
The period takes values in [ 1, INT_MAX ], runtime in [ -1, INT_MAX - 1 ].
|
||||
|
||||
A runtime of -1 specifies runtime == period, ie. no limit.
|
||||
2. The Interface
|
||||
================
|
||||
|
||||
New groups get the period from /proc/sys/kernel/sched_rt_period_us and
|
||||
a runtime of 0.
|
||||
|
||||
Settings are constrained to:
|
||||
2.1 System wide settings
|
||||
------------------------
|
||||
|
||||
The system wide settings are configured under the /proc virtual file system:
|
||||
|
||||
/proc/sys/kernel/sched_rt_period_us:
|
||||
The scheduling period that is equivalent to 100% CPU bandwidth
|
||||
|
||||
/proc/sys/kernel/sched_rt_runtime_us:
|
||||
A global limit on how much time realtime scheduling may use. Even without
|
||||
CONFIG_RT_GROUP_SCHED enabled, this will limit time reserved to realtime
|
||||
processes. With CONFIG_RT_GROUP_SCHED it signifies the total bandwidth
|
||||
available to all realtime groups.
|
||||
|
||||
* Time is specified in us because the interface is s32. This gives an
|
||||
operating range from 1us to about 35 minutes.
|
||||
* sched_rt_period_us takes values from 1 to INT_MAX.
|
||||
* sched_rt_runtime_us takes values from -1 to (INT_MAX - 1).
|
||||
* A run time of -1 specifies runtime == period, ie. no limit.
|
||||
|
||||
|
||||
2.2 Default behaviour
|
||||
---------------------
|
||||
|
||||
The default values for sched_rt_period_us (1000000 or 1s) and
|
||||
sched_rt_runtime_us (950000 or 0.95s). This gives 0.05s to be used by
|
||||
SCHED_OTHER (non-RT tasks). These defaults were chosen so that a run-away
|
||||
realtime tasks will not lock up the machine but leave a little time to recover
|
||||
it. By setting runtime to -1 you'd get the old behaviour back.
|
||||
|
||||
By default all bandwidth is assigned to the root group and new groups get the
|
||||
period from /proc/sys/kernel/sched_rt_period_us and a run time of 0. If you
|
||||
want to assign bandwidth to another group, reduce the root group's bandwidth
|
||||
and assign some or all of the difference to another group.
|
||||
|
||||
Realtime group scheduling means you have to assign a portion of total CPU
|
||||
bandwidth to the group before it will accept realtime tasks. Therefore you will
|
||||
not be able to run realtime tasks as any user other than root until you have
|
||||
done that, even if the user has the rights to run processes with realtime
|
||||
priority!
|
||||
|
||||
|
||||
2.3 Basis for grouping tasks
|
||||
----------------------------
|
||||
|
||||
There are two compile-time settings for allocating CPU bandwidth. These are
|
||||
configured using the "Basis for grouping tasks" multiple choice menu under
|
||||
General setup > Group CPU Scheduler:
|
||||
|
||||
a. CONFIG_USER_SCHED (aka "Basis for grouping tasks" = "user id")
|
||||
|
||||
This lets you use the virtual files under
|
||||
"/sys/kernel/uids/<uid>/cpu_rt_runtime_us" to control he CPU time reserved for
|
||||
each user .
|
||||
|
||||
The other option is:
|
||||
|
||||
.o CONFIG_CGROUP_SCHED (aka "Basis for grouping tasks" = "Control groups")
|
||||
|
||||
This uses the /cgroup virtual file system and "/cgroup/<cgroup>/cpu.rt_runtime_us"
|
||||
to control the CPU time reserved for each control group instead.
|
||||
|
||||
For more information on working with control groups, you should read
|
||||
Documentation/cgroups.txt as well.
|
||||
|
||||
Group settings are checked against the following limits in order to keep the configuration
|
||||
schedulable:
|
||||
|
||||
\Sum_{i} runtime_{i} / global_period <= global_runtime / global_period
|
||||
|
||||
in order to keep the configuration schedulable.
|
||||
For now, this can be simplified to just the following (but see Future plans):
|
||||
|
||||
\Sum_{i} runtime_{i} <= global_runtime
|
||||
|
||||
|
||||
3. Future plans
|
||||
===============
|
||||
|
||||
There is work in progress to make the scheduling period for each group
|
||||
("/sys/kernel/uids/<uid>/cpu_rt_period_us" or
|
||||
"/cgroup/<cgroup>/cpu.rt_period_us" respectively) configurable as well.
|
||||
|
||||
The constraint on the period is that a subgroup must have a smaller or
|
||||
equal period to its parent. But realistically its not very useful _yet_
|
||||
as its prone to starvation without deadline scheduling.
|
||||
|
||||
Consider two sibling groups A and B; both have 50% bandwidth, but A's
|
||||
period is twice the length of B's.
|
||||
|
||||
* group A: period=100000us, runtime=10000us
|
||||
- this runs for 0.01s once every 0.1s
|
||||
|
||||
* group B: period= 50000us, runtime=10000us
|
||||
- this runs for 0.01s twice every 0.1s (or once every 0.05 sec).
|
||||
|
||||
This means that currently a while (1) loop in A will run for the full period of
|
||||
B and can starve B's tasks (assuming they are of lower priority) for a whole
|
||||
period.
|
||||
|
||||
The next project will be SCHED_EDF (Earliest Deadline First scheduling) to bring
|
||||
full deadline scheduling to the linux kernel. Deadline scheduling the above
|
||||
groups and treating end of the period as a deadline will ensure that they both
|
||||
get their allocated time.
|
||||
|
||||
Implementing SCHED_EDF might take a while to complete. Priority Inheritance is
|
||||
the biggest challenge as the current linux PI infrastructure is geared towards
|
||||
the limited static priority levels 0-139. With deadline scheduling you need to
|
||||
do deadline inheritance (since priority is inversely proportional to the
|
||||
deadline delta (deadline - now).
|
||||
|
||||
This means the whole PI machinery will have to be reworked - and that is one of
|
||||
the most complex pieces of code we have.
|
||||
|
@ -117,6 +117,9 @@ config ARCH_HAS_CPU_RELAX
|
||||
config HAVE_SETUP_PER_CPU_AREA
|
||||
def_bool X86_64 || (X86_SMP && !X86_VOYAGER)
|
||||
|
||||
config HAVE_CPUMASK_OF_CPU_MAP
|
||||
def_bool X86_64_SMP
|
||||
|
||||
config ARCH_HIBERNATION_POSSIBLE
|
||||
def_bool y
|
||||
depends on !SMP || !X86_VOYAGER
|
||||
|
@ -91,7 +91,7 @@ int acpi_processor_ffh_cstate_probe(unsigned int cpu,
|
||||
|
||||
/* Make sure we are running on right CPU */
|
||||
saved_mask = current->cpus_allowed;
|
||||
retval = set_cpus_allowed(current, cpumask_of_cpu(cpu));
|
||||
retval = set_cpus_allowed_ptr(current, &cpumask_of_cpu(cpu));
|
||||
if (retval)
|
||||
return -1;
|
||||
|
||||
@ -128,7 +128,7 @@ int acpi_processor_ffh_cstate_probe(unsigned int cpu,
|
||||
cx->address);
|
||||
|
||||
out:
|
||||
set_cpus_allowed(current, saved_mask);
|
||||
set_cpus_allowed_ptr(current, &saved_mask);
|
||||
return retval;
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(acpi_processor_ffh_cstate_probe);
|
||||
|
@ -192,9 +192,9 @@ static void drv_read(struct drv_cmd *cmd)
|
||||
cpumask_t saved_mask = current->cpus_allowed;
|
||||
cmd->val = 0;
|
||||
|
||||
set_cpus_allowed(current, cmd->mask);
|
||||
set_cpus_allowed_ptr(current, &cmd->mask);
|
||||
do_drv_read(cmd);
|
||||
set_cpus_allowed(current, saved_mask);
|
||||
set_cpus_allowed_ptr(current, &saved_mask);
|
||||
}
|
||||
|
||||
static void drv_write(struct drv_cmd *cmd)
|
||||
@ -203,30 +203,30 @@ static void drv_write(struct drv_cmd *cmd)
|
||||
unsigned int i;
|
||||
|
||||
for_each_cpu_mask(i, cmd->mask) {
|
||||
set_cpus_allowed(current, cpumask_of_cpu(i));
|
||||
set_cpus_allowed_ptr(current, &cpumask_of_cpu(i));
|
||||
do_drv_write(cmd);
|
||||
}
|
||||
|
||||
set_cpus_allowed(current, saved_mask);
|
||||
set_cpus_allowed_ptr(current, &saved_mask);
|
||||
return;
|
||||
}
|
||||
|
||||
static u32 get_cur_val(cpumask_t mask)
|
||||
static u32 get_cur_val(const cpumask_t *mask)
|
||||
{
|
||||
struct acpi_processor_performance *perf;
|
||||
struct drv_cmd cmd;
|
||||
|
||||
if (unlikely(cpus_empty(mask)))
|
||||
if (unlikely(cpus_empty(*mask)))
|
||||
return 0;
|
||||
|
||||
switch (per_cpu(drv_data, first_cpu(mask))->cpu_feature) {
|
||||
switch (per_cpu(drv_data, first_cpu(*mask))->cpu_feature) {
|
||||
case SYSTEM_INTEL_MSR_CAPABLE:
|
||||
cmd.type = SYSTEM_INTEL_MSR_CAPABLE;
|
||||
cmd.addr.msr.reg = MSR_IA32_PERF_STATUS;
|
||||
break;
|
||||
case SYSTEM_IO_CAPABLE:
|
||||
cmd.type = SYSTEM_IO_CAPABLE;
|
||||
perf = per_cpu(drv_data, first_cpu(mask))->acpi_data;
|
||||
perf = per_cpu(drv_data, first_cpu(*mask))->acpi_data;
|
||||
cmd.addr.io.port = perf->control_register.address;
|
||||
cmd.addr.io.bit_width = perf->control_register.bit_width;
|
||||
break;
|
||||
@ -234,7 +234,7 @@ static u32 get_cur_val(cpumask_t mask)
|
||||
return 0;
|
||||
}
|
||||
|
||||
cmd.mask = mask;
|
||||
cmd.mask = *mask;
|
||||
|
||||
drv_read(&cmd);
|
||||
|
||||
@ -271,7 +271,7 @@ static unsigned int get_measured_perf(unsigned int cpu)
|
||||
unsigned int retval;
|
||||
|
||||
saved_mask = current->cpus_allowed;
|
||||
set_cpus_allowed(current, cpumask_of_cpu(cpu));
|
||||
set_cpus_allowed_ptr(current, &cpumask_of_cpu(cpu));
|
||||
if (get_cpu() != cpu) {
|
||||
/* We were not able to run on requested processor */
|
||||
put_cpu();
|
||||
@ -329,7 +329,7 @@ static unsigned int get_measured_perf(unsigned int cpu)
|
||||
retval = per_cpu(drv_data, cpu)->max_freq * perf_percent / 100;
|
||||
|
||||
put_cpu();
|
||||
set_cpus_allowed(current, saved_mask);
|
||||
set_cpus_allowed_ptr(current, &saved_mask);
|
||||
|
||||
dprintk("cpu %d: performance percent %d\n", cpu, perf_percent);
|
||||
return retval;
|
||||
@ -347,13 +347,13 @@ static unsigned int get_cur_freq_on_cpu(unsigned int cpu)
|
||||
return 0;
|
||||
}
|
||||
|
||||
freq = extract_freq(get_cur_val(cpumask_of_cpu(cpu)), data);
|
||||
freq = extract_freq(get_cur_val(&cpumask_of_cpu(cpu)), data);
|
||||
dprintk("cur freq = %u\n", freq);
|
||||
|
||||
return freq;
|
||||
}
|
||||
|
||||
static unsigned int check_freqs(cpumask_t mask, unsigned int freq,
|
||||
static unsigned int check_freqs(const cpumask_t *mask, unsigned int freq,
|
||||
struct acpi_cpufreq_data *data)
|
||||
{
|
||||
unsigned int cur_freq;
|
||||
@ -449,7 +449,7 @@ static int acpi_cpufreq_target(struct cpufreq_policy *policy,
|
||||
drv_write(&cmd);
|
||||
|
||||
if (acpi_pstate_strict) {
|
||||
if (!check_freqs(cmd.mask, freqs.new, data)) {
|
||||
if (!check_freqs(&cmd.mask, freqs.new, data)) {
|
||||
dprintk("acpi_cpufreq_target failed (%d)\n",
|
||||
policy->cpu);
|
||||
return -EAGAIN;
|
||||
|
@ -478,12 +478,12 @@ static int core_voltage_post_transition(struct powernow_k8_data *data, u32 reqvi
|
||||
|
||||
static int check_supported_cpu(unsigned int cpu)
|
||||
{
|
||||
cpumask_t oldmask = CPU_MASK_ALL;
|
||||
cpumask_t oldmask;
|
||||
u32 eax, ebx, ecx, edx;
|
||||
unsigned int rc = 0;
|
||||
|
||||
oldmask = current->cpus_allowed;
|
||||
set_cpus_allowed(current, cpumask_of_cpu(cpu));
|
||||
set_cpus_allowed_ptr(current, &cpumask_of_cpu(cpu));
|
||||
|
||||
if (smp_processor_id() != cpu) {
|
||||
printk(KERN_ERR PFX "limiting to cpu %u failed\n", cpu);
|
||||
@ -528,7 +528,7 @@ static int check_supported_cpu(unsigned int cpu)
|
||||
rc = 1;
|
||||
|
||||
out:
|
||||
set_cpus_allowed(current, oldmask);
|
||||
set_cpus_allowed_ptr(current, &oldmask);
|
||||
return rc;
|
||||
}
|
||||
|
||||
@ -1015,7 +1015,7 @@ static int transition_frequency_pstate(struct powernow_k8_data *data, unsigned i
|
||||
/* Driver entry point to switch to the target frequency */
|
||||
static int powernowk8_target(struct cpufreq_policy *pol, unsigned targfreq, unsigned relation)
|
||||
{
|
||||
cpumask_t oldmask = CPU_MASK_ALL;
|
||||
cpumask_t oldmask;
|
||||
struct powernow_k8_data *data = per_cpu(powernow_data, pol->cpu);
|
||||
u32 checkfid;
|
||||
u32 checkvid;
|
||||
@ -1030,7 +1030,7 @@ static int powernowk8_target(struct cpufreq_policy *pol, unsigned targfreq, unsi
|
||||
|
||||
/* only run on specific CPU from here on */
|
||||
oldmask = current->cpus_allowed;
|
||||
set_cpus_allowed(current, cpumask_of_cpu(pol->cpu));
|
||||
set_cpus_allowed_ptr(current, &cpumask_of_cpu(pol->cpu));
|
||||
|
||||
if (smp_processor_id() != pol->cpu) {
|
||||
printk(KERN_ERR PFX "limiting to cpu %u failed\n", pol->cpu);
|
||||
@ -1085,7 +1085,7 @@ static int powernowk8_target(struct cpufreq_policy *pol, unsigned targfreq, unsi
|
||||
ret = 0;
|
||||
|
||||
err_out:
|
||||
set_cpus_allowed(current, oldmask);
|
||||
set_cpus_allowed_ptr(current, &oldmask);
|
||||
return ret;
|
||||
}
|
||||
|
||||
@ -1104,7 +1104,7 @@ static int powernowk8_verify(struct cpufreq_policy *pol)
|
||||
static int __cpuinit powernowk8_cpu_init(struct cpufreq_policy *pol)
|
||||
{
|
||||
struct powernow_k8_data *data;
|
||||
cpumask_t oldmask = CPU_MASK_ALL;
|
||||
cpumask_t oldmask;
|
||||
int rc;
|
||||
|
||||
if (!cpu_online(pol->cpu))
|
||||
@ -1145,7 +1145,7 @@ static int __cpuinit powernowk8_cpu_init(struct cpufreq_policy *pol)
|
||||
|
||||
/* only run on specific CPU from here on */
|
||||
oldmask = current->cpus_allowed;
|
||||
set_cpus_allowed(current, cpumask_of_cpu(pol->cpu));
|
||||
set_cpus_allowed_ptr(current, &cpumask_of_cpu(pol->cpu));
|
||||
|
||||
if (smp_processor_id() != pol->cpu) {
|
||||
printk(KERN_ERR PFX "limiting to cpu %u failed\n", pol->cpu);
|
||||
@ -1164,7 +1164,7 @@ static int __cpuinit powernowk8_cpu_init(struct cpufreq_policy *pol)
|
||||
fidvid_msr_init();
|
||||
|
||||
/* run on any CPU again */
|
||||
set_cpus_allowed(current, oldmask);
|
||||
set_cpus_allowed_ptr(current, &oldmask);
|
||||
|
||||
if (cpu_family == CPU_HW_PSTATE)
|
||||
pol->cpus = cpumask_of_cpu(pol->cpu);
|
||||
@ -1205,7 +1205,7 @@ static int __cpuinit powernowk8_cpu_init(struct cpufreq_policy *pol)
|
||||
return 0;
|
||||
|
||||
err_out:
|
||||
set_cpus_allowed(current, oldmask);
|
||||
set_cpus_allowed_ptr(current, &oldmask);
|
||||
powernow_k8_cpu_exit_acpi(data);
|
||||
|
||||
kfree(data);
|
||||
@ -1242,10 +1242,11 @@ static unsigned int powernowk8_get (unsigned int cpu)
|
||||
if (!data)
|
||||
return -EINVAL;
|
||||
|
||||
set_cpus_allowed(current, cpumask_of_cpu(cpu));
|
||||
set_cpus_allowed_ptr(current, &cpumask_of_cpu(cpu));
|
||||
if (smp_processor_id() != cpu) {
|
||||
printk(KERN_ERR PFX "limiting to CPU %d failed in powernowk8_get\n", cpu);
|
||||
set_cpus_allowed(current, oldmask);
|
||||
printk(KERN_ERR PFX
|
||||
"limiting to CPU %d failed in powernowk8_get\n", cpu);
|
||||
set_cpus_allowed_ptr(current, &oldmask);
|
||||
return 0;
|
||||
}
|
||||
|
||||
@ -1253,13 +1254,14 @@ static unsigned int powernowk8_get (unsigned int cpu)
|
||||
goto out;
|
||||
|
||||
if (cpu_family == CPU_HW_PSTATE)
|
||||
khz = find_khz_freq_from_pstate(data->powernow_table, data->currpstate);
|
||||
khz = find_khz_freq_from_pstate(data->powernow_table,
|
||||
data->currpstate);
|
||||
else
|
||||
khz = find_khz_freq_from_fid(data->currfid);
|
||||
|
||||
|
||||
out:
|
||||
set_cpus_allowed(current, oldmask);
|
||||
set_cpus_allowed_ptr(current, &oldmask);
|
||||
return khz;
|
||||
}
|
||||
|
||||
|
@ -315,7 +315,7 @@ static unsigned int get_cur_freq(unsigned int cpu)
|
||||
cpumask_t saved_mask;
|
||||
|
||||
saved_mask = current->cpus_allowed;
|
||||
set_cpus_allowed(current, cpumask_of_cpu(cpu));
|
||||
set_cpus_allowed_ptr(current, &cpumask_of_cpu(cpu));
|
||||
if (smp_processor_id() != cpu)
|
||||
return 0;
|
||||
|
||||
@ -333,7 +333,7 @@ static unsigned int get_cur_freq(unsigned int cpu)
|
||||
clock_freq = extract_clock(l, cpu, 1);
|
||||
}
|
||||
|
||||
set_cpus_allowed(current, saved_mask);
|
||||
set_cpus_allowed_ptr(current, &saved_mask);
|
||||
return clock_freq;
|
||||
}
|
||||
|
||||
@ -487,7 +487,7 @@ static int centrino_target (struct cpufreq_policy *policy,
|
||||
else
|
||||
cpu_set(j, set_mask);
|
||||
|
||||
set_cpus_allowed(current, set_mask);
|
||||
set_cpus_allowed_ptr(current, &set_mask);
|
||||
preempt_disable();
|
||||
if (unlikely(!cpu_isset(smp_processor_id(), set_mask))) {
|
||||
dprintk("couldn't limit to CPUs in this domain\n");
|
||||
@ -555,7 +555,8 @@ static int centrino_target (struct cpufreq_policy *policy,
|
||||
|
||||
if (!cpus_empty(covered_cpus)) {
|
||||
for_each_cpu_mask(j, covered_cpus) {
|
||||
set_cpus_allowed(current, cpumask_of_cpu(j));
|
||||
set_cpus_allowed_ptr(current,
|
||||
&cpumask_of_cpu(j));
|
||||
wrmsr(MSR_IA32_PERF_CTL, oldmsr, h);
|
||||
}
|
||||
}
|
||||
@ -569,12 +570,12 @@ static int centrino_target (struct cpufreq_policy *policy,
|
||||
cpufreq_notify_transition(&freqs, CPUFREQ_POSTCHANGE);
|
||||
}
|
||||
}
|
||||
set_cpus_allowed(current, saved_mask);
|
||||
set_cpus_allowed_ptr(current, &saved_mask);
|
||||
return 0;
|
||||
|
||||
migrate_end:
|
||||
preempt_enable();
|
||||
set_cpus_allowed(current, saved_mask);
|
||||
set_cpus_allowed_ptr(current, &saved_mask);
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
@ -229,22 +229,22 @@ static unsigned int speedstep_detect_chipset (void)
|
||||
return 0;
|
||||
}
|
||||
|
||||
static unsigned int _speedstep_get(cpumask_t cpus)
|
||||
static unsigned int _speedstep_get(const cpumask_t *cpus)
|
||||
{
|
||||
unsigned int speed;
|
||||
cpumask_t cpus_allowed;
|
||||
|
||||
cpus_allowed = current->cpus_allowed;
|
||||
set_cpus_allowed(current, cpus);
|
||||
set_cpus_allowed_ptr(current, cpus);
|
||||
speed = speedstep_get_processor_frequency(speedstep_processor);
|
||||
set_cpus_allowed(current, cpus_allowed);
|
||||
set_cpus_allowed_ptr(current, &cpus_allowed);
|
||||
dprintk("detected %u kHz as current frequency\n", speed);
|
||||
return speed;
|
||||
}
|
||||
|
||||
static unsigned int speedstep_get(unsigned int cpu)
|
||||
{
|
||||
return _speedstep_get(cpumask_of_cpu(cpu));
|
||||
return _speedstep_get(&cpumask_of_cpu(cpu));
|
||||
}
|
||||
|
||||
/**
|
||||
@ -267,7 +267,7 @@ static int speedstep_target (struct cpufreq_policy *policy,
|
||||
if (cpufreq_frequency_table_target(policy, &speedstep_freqs[0], target_freq, relation, &newstate))
|
||||
return -EINVAL;
|
||||
|
||||
freqs.old = _speedstep_get(policy->cpus);
|
||||
freqs.old = _speedstep_get(&policy->cpus);
|
||||
freqs.new = speedstep_freqs[newstate].frequency;
|
||||
freqs.cpu = policy->cpu;
|
||||
|
||||
@ -285,12 +285,12 @@ static int speedstep_target (struct cpufreq_policy *policy,
|
||||
}
|
||||
|
||||
/* switch to physical CPU where state is to be changed */
|
||||
set_cpus_allowed(current, policy->cpus);
|
||||
set_cpus_allowed_ptr(current, &policy->cpus);
|
||||
|
||||
speedstep_set_state(newstate);
|
||||
|
||||
/* allow to be run on all CPUs */
|
||||
set_cpus_allowed(current, cpus_allowed);
|
||||
set_cpus_allowed_ptr(current, &cpus_allowed);
|
||||
|
||||
for_each_cpu_mask(i, policy->cpus) {
|
||||
freqs.cpu = i;
|
||||
@ -326,7 +326,7 @@ static int speedstep_cpu_init(struct cpufreq_policy *policy)
|
||||
#endif
|
||||
|
||||
cpus_allowed = current->cpus_allowed;
|
||||
set_cpus_allowed(current, policy->cpus);
|
||||
set_cpus_allowed_ptr(current, &policy->cpus);
|
||||
|
||||
/* detect low and high frequency and transition latency */
|
||||
result = speedstep_get_freqs(speedstep_processor,
|
||||
@ -334,12 +334,12 @@ static int speedstep_cpu_init(struct cpufreq_policy *policy)
|
||||
&speedstep_freqs[SPEEDSTEP_HIGH].frequency,
|
||||
&policy->cpuinfo.transition_latency,
|
||||
&speedstep_set_state);
|
||||
set_cpus_allowed(current, cpus_allowed);
|
||||
set_cpus_allowed_ptr(current, &cpus_allowed);
|
||||
if (result)
|
||||
return result;
|
||||
|
||||
/* get current speed setting */
|
||||
speed = _speedstep_get(policy->cpus);
|
||||
speed = _speedstep_get(&policy->cpus);
|
||||
if (!speed)
|
||||
return -EIO;
|
||||
|
||||
|
@ -129,7 +129,7 @@ struct _cpuid4_info {
|
||||
union _cpuid4_leaf_ebx ebx;
|
||||
union _cpuid4_leaf_ecx ecx;
|
||||
unsigned long size;
|
||||
cpumask_t shared_cpu_map;
|
||||
cpumask_t shared_cpu_map; /* future?: only cpus/node is needed */
|
||||
};
|
||||
|
||||
unsigned short num_cache_leaves;
|
||||
@ -451,8 +451,8 @@ unsigned int __cpuinit init_intel_cacheinfo(struct cpuinfo_x86 *c)
|
||||
}
|
||||
|
||||
/* pointer to _cpuid4_info array (for each cache leaf) */
|
||||
static struct _cpuid4_info *cpuid4_info[NR_CPUS];
|
||||
#define CPUID4_INFO_IDX(x,y) (&((cpuid4_info[x])[y]))
|
||||
static DEFINE_PER_CPU(struct _cpuid4_info *, cpuid4_info);
|
||||
#define CPUID4_INFO_IDX(x, y) (&((per_cpu(cpuid4_info, x))[y]))
|
||||
|
||||
#ifdef CONFIG_SMP
|
||||
static void __cpuinit cache_shared_cpu_map_setup(unsigned int cpu, int index)
|
||||
@ -474,7 +474,7 @@ static void __cpuinit cache_shared_cpu_map_setup(unsigned int cpu, int index)
|
||||
if (cpu_data(i).apicid >> index_msb ==
|
||||
c->apicid >> index_msb) {
|
||||
cpu_set(i, this_leaf->shared_cpu_map);
|
||||
if (i != cpu && cpuid4_info[i]) {
|
||||
if (i != cpu && per_cpu(cpuid4_info, i)) {
|
||||
sibling_leaf = CPUID4_INFO_IDX(i, index);
|
||||
cpu_set(cpu, sibling_leaf->shared_cpu_map);
|
||||
}
|
||||
@ -505,8 +505,8 @@ static void __cpuinit free_cache_attributes(unsigned int cpu)
|
||||
for (i = 0; i < num_cache_leaves; i++)
|
||||
cache_remove_shared_cpu_map(cpu, i);
|
||||
|
||||
kfree(cpuid4_info[cpu]);
|
||||
cpuid4_info[cpu] = NULL;
|
||||
kfree(per_cpu(cpuid4_info, cpu));
|
||||
per_cpu(cpuid4_info, cpu) = NULL;
|
||||
}
|
||||
|
||||
static int __cpuinit detect_cache_attributes(unsigned int cpu)
|
||||
@ -519,13 +519,13 @@ static int __cpuinit detect_cache_attributes(unsigned int cpu)
|
||||
if (num_cache_leaves == 0)
|
||||
return -ENOENT;
|
||||
|
||||
cpuid4_info[cpu] = kzalloc(
|
||||
per_cpu(cpuid4_info, cpu) = kzalloc(
|
||||
sizeof(struct _cpuid4_info) * num_cache_leaves, GFP_KERNEL);
|
||||
if (cpuid4_info[cpu] == NULL)
|
||||
if (per_cpu(cpuid4_info, cpu) == NULL)
|
||||
return -ENOMEM;
|
||||
|
||||
oldmask = current->cpus_allowed;
|
||||
retval = set_cpus_allowed(current, cpumask_of_cpu(cpu));
|
||||
retval = set_cpus_allowed_ptr(current, &cpumask_of_cpu(cpu));
|
||||
if (retval)
|
||||
goto out;
|
||||
|
||||
@ -542,12 +542,12 @@ static int __cpuinit detect_cache_attributes(unsigned int cpu)
|
||||
}
|
||||
cache_shared_cpu_map_setup(cpu, j);
|
||||
}
|
||||
set_cpus_allowed(current, oldmask);
|
||||
set_cpus_allowed_ptr(current, &oldmask);
|
||||
|
||||
out:
|
||||
if (retval) {
|
||||
kfree(cpuid4_info[cpu]);
|
||||
cpuid4_info[cpu] = NULL;
|
||||
kfree(per_cpu(cpuid4_info, cpu));
|
||||
per_cpu(cpuid4_info, cpu) = NULL;
|
||||
}
|
||||
|
||||
return retval;
|
||||
@ -561,7 +561,7 @@ out:
|
||||
extern struct sysdev_class cpu_sysdev_class; /* from drivers/base/cpu.c */
|
||||
|
||||
/* pointer to kobject for cpuX/cache */
|
||||
static struct kobject * cache_kobject[NR_CPUS];
|
||||
static DEFINE_PER_CPU(struct kobject *, cache_kobject);
|
||||
|
||||
struct _index_kobject {
|
||||
struct kobject kobj;
|
||||
@ -570,8 +570,8 @@ struct _index_kobject {
|
||||
};
|
||||
|
||||
/* pointer to array of kobjects for cpuX/cache/indexY */
|
||||
static struct _index_kobject *index_kobject[NR_CPUS];
|
||||
#define INDEX_KOBJECT_PTR(x,y) (&((index_kobject[x])[y]))
|
||||
static DEFINE_PER_CPU(struct _index_kobject *, index_kobject);
|
||||
#define INDEX_KOBJECT_PTR(x, y) (&((per_cpu(index_kobject, x))[y]))
|
||||
|
||||
#define show_one_plus(file_name, object, val) \
|
||||
static ssize_t show_##file_name \
|
||||
@ -591,11 +591,32 @@ static ssize_t show_size(struct _cpuid4_info *this_leaf, char *buf)
|
||||
return sprintf (buf, "%luK\n", this_leaf->size / 1024);
|
||||
}
|
||||
|
||||
static ssize_t show_shared_cpu_map(struct _cpuid4_info *this_leaf, char *buf)
|
||||
static ssize_t show_shared_cpu_map_func(struct _cpuid4_info *this_leaf,
|
||||
int type, char *buf)
|
||||
{
|
||||
char mask_str[NR_CPUS];
|
||||
cpumask_scnprintf(mask_str, NR_CPUS, this_leaf->shared_cpu_map);
|
||||
return sprintf(buf, "%s\n", mask_str);
|
||||
ptrdiff_t len = PTR_ALIGN(buf + PAGE_SIZE - 1, PAGE_SIZE) - buf;
|
||||
int n = 0;
|
||||
|
||||
if (len > 1) {
|
||||
cpumask_t *mask = &this_leaf->shared_cpu_map;
|
||||
|
||||
n = type?
|
||||
cpulist_scnprintf(buf, len-2, *mask):
|
||||
cpumask_scnprintf(buf, len-2, *mask);
|
||||
buf[n++] = '\n';
|
||||
buf[n] = '\0';
|
||||
}
|
||||
return n;
|
||||
}
|
||||
|
||||
static inline ssize_t show_shared_cpu_map(struct _cpuid4_info *leaf, char *buf)
|
||||
{
|
||||
return show_shared_cpu_map_func(leaf, 0, buf);
|
||||
}
|
||||
|
||||
static inline ssize_t show_shared_cpu_list(struct _cpuid4_info *leaf, char *buf)
|
||||
{
|
||||
return show_shared_cpu_map_func(leaf, 1, buf);
|
||||
}
|
||||
|
||||
static ssize_t show_type(struct _cpuid4_info *this_leaf, char *buf) {
|
||||
@ -633,6 +654,7 @@ define_one_ro(ways_of_associativity);
|
||||
define_one_ro(number_of_sets);
|
||||
define_one_ro(size);
|
||||
define_one_ro(shared_cpu_map);
|
||||
define_one_ro(shared_cpu_list);
|
||||
|
||||
static struct attribute * default_attrs[] = {
|
||||
&type.attr,
|
||||
@ -643,6 +665,7 @@ static struct attribute * default_attrs[] = {
|
||||
&number_of_sets.attr,
|
||||
&size.attr,
|
||||
&shared_cpu_map.attr,
|
||||
&shared_cpu_list.attr,
|
||||
NULL
|
||||
};
|
||||
|
||||
@ -684,10 +707,10 @@ static struct kobj_type ktype_percpu_entry = {
|
||||
|
||||
static void __cpuinit cpuid4_cache_sysfs_exit(unsigned int cpu)
|
||||
{
|
||||
kfree(cache_kobject[cpu]);
|
||||
kfree(index_kobject[cpu]);
|
||||
cache_kobject[cpu] = NULL;
|
||||
index_kobject[cpu] = NULL;
|
||||
kfree(per_cpu(cache_kobject, cpu));
|
||||
kfree(per_cpu(index_kobject, cpu));
|
||||
per_cpu(cache_kobject, cpu) = NULL;
|
||||
per_cpu(index_kobject, cpu) = NULL;
|
||||
free_cache_attributes(cpu);
|
||||
}
|
||||
|
||||
@ -703,13 +726,14 @@ static int __cpuinit cpuid4_cache_sysfs_init(unsigned int cpu)
|
||||
return err;
|
||||
|
||||
/* Allocate all required memory */
|
||||
cache_kobject[cpu] = kzalloc(sizeof(struct kobject), GFP_KERNEL);
|
||||
if (unlikely(cache_kobject[cpu] == NULL))
|
||||
per_cpu(cache_kobject, cpu) =
|
||||
kzalloc(sizeof(struct kobject), GFP_KERNEL);
|
||||
if (unlikely(per_cpu(cache_kobject, cpu) == NULL))
|
||||
goto err_out;
|
||||
|
||||
index_kobject[cpu] = kzalloc(
|
||||
per_cpu(index_kobject, cpu) = kzalloc(
|
||||
sizeof(struct _index_kobject ) * num_cache_leaves, GFP_KERNEL);
|
||||
if (unlikely(index_kobject[cpu] == NULL))
|
||||
if (unlikely(per_cpu(index_kobject, cpu) == NULL))
|
||||
goto err_out;
|
||||
|
||||
return 0;
|
||||
@ -733,7 +757,8 @@ static int __cpuinit cache_add_dev(struct sys_device * sys_dev)
|
||||
if (unlikely(retval < 0))
|
||||
return retval;
|
||||
|
||||
retval = kobject_init_and_add(cache_kobject[cpu], &ktype_percpu_entry,
|
||||
retval = kobject_init_and_add(per_cpu(cache_kobject, cpu),
|
||||
&ktype_percpu_entry,
|
||||
&sys_dev->kobj, "%s", "cache");
|
||||
if (retval < 0) {
|
||||
cpuid4_cache_sysfs_exit(cpu);
|
||||
@ -745,13 +770,14 @@ static int __cpuinit cache_add_dev(struct sys_device * sys_dev)
|
||||
this_object->cpu = cpu;
|
||||
this_object->index = i;
|
||||
retval = kobject_init_and_add(&(this_object->kobj),
|
||||
&ktype_cache, cache_kobject[cpu],
|
||||
&ktype_cache,
|
||||
per_cpu(cache_kobject, cpu),
|
||||
"index%1lu", i);
|
||||
if (unlikely(retval)) {
|
||||
for (j = 0; j < i; j++) {
|
||||
kobject_put(&(INDEX_KOBJECT_PTR(cpu,j)->kobj));
|
||||
}
|
||||
kobject_put(cache_kobject[cpu]);
|
||||
kobject_put(per_cpu(cache_kobject, cpu));
|
||||
cpuid4_cache_sysfs_exit(cpu);
|
||||
break;
|
||||
}
|
||||
@ -760,7 +786,7 @@ static int __cpuinit cache_add_dev(struct sys_device * sys_dev)
|
||||
if (!retval)
|
||||
cpu_set(cpu, cache_dev_map);
|
||||
|
||||
kobject_uevent(cache_kobject[cpu], KOBJ_ADD);
|
||||
kobject_uevent(per_cpu(cache_kobject, cpu), KOBJ_ADD);
|
||||
return retval;
|
||||
}
|
||||
|
||||
@ -769,7 +795,7 @@ static void __cpuinit cache_remove_dev(struct sys_device * sys_dev)
|
||||
unsigned int cpu = sys_dev->id;
|
||||
unsigned long i;
|
||||
|
||||
if (cpuid4_info[cpu] == NULL)
|
||||
if (per_cpu(cpuid4_info, cpu) == NULL)
|
||||
return;
|
||||
if (!cpu_isset(cpu, cache_dev_map))
|
||||
return;
|
||||
@ -777,7 +803,7 @@ static void __cpuinit cache_remove_dev(struct sys_device * sys_dev)
|
||||
|
||||
for (i = 0; i < num_cache_leaves; i++)
|
||||
kobject_put(&(INDEX_KOBJECT_PTR(cpu,i)->kobj));
|
||||
kobject_put(cache_kobject[cpu]);
|
||||
kobject_put(per_cpu(cache_kobject, cpu));
|
||||
cpuid4_cache_sysfs_exit(cpu);
|
||||
}
|
||||
|
||||
|
@ -251,18 +251,18 @@ struct threshold_attr {
|
||||
ssize_t(*store) (struct threshold_block *, const char *, size_t count);
|
||||
};
|
||||
|
||||
static cpumask_t affinity_set(unsigned int cpu)
|
||||
static void affinity_set(unsigned int cpu, cpumask_t *oldmask,
|
||||
cpumask_t *newmask)
|
||||
{
|
||||
cpumask_t oldmask = current->cpus_allowed;
|
||||
cpumask_t newmask = CPU_MASK_NONE;
|
||||
cpu_set(cpu, newmask);
|
||||
set_cpus_allowed(current, newmask);
|
||||
return oldmask;
|
||||
*oldmask = current->cpus_allowed;
|
||||
cpus_clear(*newmask);
|
||||
cpu_set(cpu, *newmask);
|
||||
set_cpus_allowed_ptr(current, newmask);
|
||||
}
|
||||
|
||||
static void affinity_restore(cpumask_t oldmask)
|
||||
static void affinity_restore(const cpumask_t *oldmask)
|
||||
{
|
||||
set_cpus_allowed(current, oldmask);
|
||||
set_cpus_allowed_ptr(current, oldmask);
|
||||
}
|
||||
|
||||
#define SHOW_FIELDS(name) \
|
||||
@ -277,15 +277,15 @@ static ssize_t store_interrupt_enable(struct threshold_block *b,
|
||||
const char *buf, size_t count)
|
||||
{
|
||||
char *end;
|
||||
cpumask_t oldmask;
|
||||
cpumask_t oldmask, newmask;
|
||||
unsigned long new = simple_strtoul(buf, &end, 0);
|
||||
if (end == buf)
|
||||
return -EINVAL;
|
||||
b->interrupt_enable = !!new;
|
||||
|
||||
oldmask = affinity_set(b->cpu);
|
||||
affinity_set(b->cpu, &oldmask, &newmask);
|
||||
threshold_restart_bank(b, 0, 0);
|
||||
affinity_restore(oldmask);
|
||||
affinity_restore(&oldmask);
|
||||
|
||||
return end - buf;
|
||||
}
|
||||
@ -294,7 +294,7 @@ static ssize_t store_threshold_limit(struct threshold_block *b,
|
||||
const char *buf, size_t count)
|
||||
{
|
||||
char *end;
|
||||
cpumask_t oldmask;
|
||||
cpumask_t oldmask, newmask;
|
||||
u16 old;
|
||||
unsigned long new = simple_strtoul(buf, &end, 0);
|
||||
if (end == buf)
|
||||
@ -306,9 +306,9 @@ static ssize_t store_threshold_limit(struct threshold_block *b,
|
||||
old = b->threshold_limit;
|
||||
b->threshold_limit = new;
|
||||
|
||||
oldmask = affinity_set(b->cpu);
|
||||
affinity_set(b->cpu, &oldmask, &newmask);
|
||||
threshold_restart_bank(b, 0, old);
|
||||
affinity_restore(oldmask);
|
||||
affinity_restore(&oldmask);
|
||||
|
||||
return end - buf;
|
||||
}
|
||||
@ -316,10 +316,10 @@ static ssize_t store_threshold_limit(struct threshold_block *b,
|
||||
static ssize_t show_error_count(struct threshold_block *b, char *buf)
|
||||
{
|
||||
u32 high, low;
|
||||
cpumask_t oldmask;
|
||||
oldmask = affinity_set(b->cpu);
|
||||
cpumask_t oldmask, newmask;
|
||||
affinity_set(b->cpu, &oldmask, &newmask);
|
||||
rdmsr(b->address, low, high);
|
||||
affinity_restore(oldmask);
|
||||
affinity_restore(&oldmask);
|
||||
return sprintf(buf, "%x\n",
|
||||
(high & 0xFFF) - (THRESHOLD_MAX - b->threshold_limit));
|
||||
}
|
||||
@ -327,10 +327,10 @@ static ssize_t show_error_count(struct threshold_block *b, char *buf)
|
||||
static ssize_t store_error_count(struct threshold_block *b,
|
||||
const char *buf, size_t count)
|
||||
{
|
||||
cpumask_t oldmask;
|
||||
oldmask = affinity_set(b->cpu);
|
||||
cpumask_t oldmask, newmask;
|
||||
affinity_set(b->cpu, &oldmask, &newmask);
|
||||
threshold_restart_bank(b, 1, 0);
|
||||
affinity_restore(oldmask);
|
||||
affinity_restore(&oldmask);
|
||||
return 1;
|
||||
}
|
||||
|
||||
@ -468,7 +468,7 @@ static __cpuinit int threshold_create_bank(unsigned int cpu, unsigned int bank)
|
||||
{
|
||||
int i, err = 0;
|
||||
struct threshold_bank *b = NULL;
|
||||
cpumask_t oldmask = CPU_MASK_NONE;
|
||||
cpumask_t oldmask, newmask;
|
||||
char name[32];
|
||||
|
||||
sprintf(name, "threshold_bank%i", bank);
|
||||
@ -519,10 +519,10 @@ static __cpuinit int threshold_create_bank(unsigned int cpu, unsigned int bank)
|
||||
|
||||
per_cpu(threshold_banks, cpu)[bank] = b;
|
||||
|
||||
oldmask = affinity_set(cpu);
|
||||
affinity_set(cpu, &oldmask, &newmask);
|
||||
err = allocate_threshold_blocks(cpu, bank, 0,
|
||||
MSR_IA32_MC0_MISC + bank * 4);
|
||||
affinity_restore(oldmask);
|
||||
affinity_restore(&oldmask);
|
||||
|
||||
if (err)
|
||||
goto out_free;
|
||||
|
@ -785,7 +785,7 @@ static void __clear_irq_vector(int irq)
|
||||
per_cpu(vector_irq, cpu)[vector] = -1;
|
||||
|
||||
cfg->vector = 0;
|
||||
cfg->domain = CPU_MASK_NONE;
|
||||
cpus_clear(cfg->domain);
|
||||
}
|
||||
|
||||
void __setup_vector_irq(int cpu)
|
||||
|
@ -402,7 +402,7 @@ static int do_microcode_update (void)
|
||||
|
||||
if (!uci->valid)
|
||||
continue;
|
||||
set_cpus_allowed(current, cpumask_of_cpu(cpu));
|
||||
set_cpus_allowed_ptr(current, &cpumask_of_cpu(cpu));
|
||||
error = get_maching_microcode(new_mc, cpu);
|
||||
if (error < 0)
|
||||
goto out;
|
||||
@ -416,7 +416,7 @@ out:
|
||||
vfree(new_mc);
|
||||
if (cursor < 0)
|
||||
error = cursor;
|
||||
set_cpus_allowed(current, old);
|
||||
set_cpus_allowed_ptr(current, &old);
|
||||
return error;
|
||||
}
|
||||
|
||||
@ -579,7 +579,7 @@ static int apply_microcode_check_cpu(int cpu)
|
||||
return 0;
|
||||
|
||||
old = current->cpus_allowed;
|
||||
set_cpus_allowed(current, cpumask_of_cpu(cpu));
|
||||
set_cpus_allowed_ptr(current, &cpumask_of_cpu(cpu));
|
||||
|
||||
/* Check if the microcode we have in memory matches the CPU */
|
||||
if (c->x86_vendor != X86_VENDOR_INTEL || c->x86 < 6 ||
|
||||
@ -610,7 +610,7 @@ static int apply_microcode_check_cpu(int cpu)
|
||||
" sig=0x%x, pf=0x%x, rev=0x%x\n",
|
||||
cpu, uci->sig, uci->pf, uci->rev);
|
||||
|
||||
set_cpus_allowed(current, old);
|
||||
set_cpus_allowed_ptr(current, &old);
|
||||
return err;
|
||||
}
|
||||
|
||||
@ -621,13 +621,13 @@ static void microcode_init_cpu(int cpu, int resume)
|
||||
|
||||
old = current->cpus_allowed;
|
||||
|
||||
set_cpus_allowed(current, cpumask_of_cpu(cpu));
|
||||
set_cpus_allowed_ptr(current, &cpumask_of_cpu(cpu));
|
||||
mutex_lock(µcode_mutex);
|
||||
collect_cpu_info(cpu);
|
||||
if (uci->valid && system_state == SYSTEM_RUNNING && !resume)
|
||||
cpu_request_microcode(cpu);
|
||||
mutex_unlock(µcode_mutex);
|
||||
set_cpus_allowed(current, old);
|
||||
set_cpus_allowed_ptr(current, &old);
|
||||
}
|
||||
|
||||
static void microcode_fini_cpu(int cpu)
|
||||
@ -657,14 +657,14 @@ static ssize_t reload_store(struct sys_device *dev, const char *buf, size_t sz)
|
||||
old = current->cpus_allowed;
|
||||
|
||||
get_online_cpus();
|
||||
set_cpus_allowed(current, cpumask_of_cpu(cpu));
|
||||
set_cpus_allowed_ptr(current, &cpumask_of_cpu(cpu));
|
||||
|
||||
mutex_lock(µcode_mutex);
|
||||
if (uci->valid)
|
||||
err = cpu_request_microcode(cpu);
|
||||
mutex_unlock(µcode_mutex);
|
||||
put_online_cpus();
|
||||
set_cpus_allowed(current, old);
|
||||
set_cpus_allowed_ptr(current, &old);
|
||||
}
|
||||
if (err)
|
||||
return err;
|
||||
|
@ -420,7 +420,7 @@ static void native_machine_shutdown(void)
|
||||
reboot_cpu_id = smp_processor_id();
|
||||
|
||||
/* Make certain I only run on the appropriate processor */
|
||||
set_cpus_allowed(current, cpumask_of_cpu(reboot_cpu_id));
|
||||
set_cpus_allowed_ptr(current, &cpumask_of_cpu(reboot_cpu_id));
|
||||
|
||||
/* O.K Now that I'm on the appropriate processor,
|
||||
* stop all of the others.
|
||||
|
@ -54,6 +54,24 @@ static void __init setup_per_cpu_maps(void)
|
||||
#endif
|
||||
}
|
||||
|
||||
#ifdef CONFIG_HAVE_CPUMASK_OF_CPU_MAP
|
||||
cpumask_t *cpumask_of_cpu_map __read_mostly;
|
||||
EXPORT_SYMBOL(cpumask_of_cpu_map);
|
||||
|
||||
/* requires nr_cpu_ids to be initialized */
|
||||
static void __init setup_cpumask_of_cpu(void)
|
||||
{
|
||||
int i;
|
||||
|
||||
/* alloc_bootmem zeroes memory */
|
||||
cpumask_of_cpu_map = alloc_bootmem_low(sizeof(cpumask_t) * nr_cpu_ids);
|
||||
for (i = 0; i < nr_cpu_ids; i++)
|
||||
cpu_set(i, cpumask_of_cpu_map[i]);
|
||||
}
|
||||
#else
|
||||
static inline void setup_cpumask_of_cpu(void) { }
|
||||
#endif
|
||||
|
||||
#ifdef CONFIG_X86_32
|
||||
/*
|
||||
* Great future not-so-futuristic plan: make i386 and x86_64 do it
|
||||
@ -70,7 +88,7 @@ EXPORT_SYMBOL(__per_cpu_offset);
|
||||
*/
|
||||
void __init setup_per_cpu_areas(void)
|
||||
{
|
||||
int i;
|
||||
int i, highest_cpu = 0;
|
||||
unsigned long size;
|
||||
|
||||
#ifdef CONFIG_HOTPLUG_CPU
|
||||
@ -104,10 +122,18 @@ void __init setup_per_cpu_areas(void)
|
||||
__per_cpu_offset[i] = ptr - __per_cpu_start;
|
||||
#endif
|
||||
memcpy(ptr, __per_cpu_start, __per_cpu_end - __per_cpu_start);
|
||||
|
||||
highest_cpu = i;
|
||||
}
|
||||
|
||||
nr_cpu_ids = highest_cpu + 1;
|
||||
printk(KERN_DEBUG "NR_CPUS: %d, nr_cpu_ids: %d\n", NR_CPUS, nr_cpu_ids);
|
||||
|
||||
/* Setup percpu data maps */
|
||||
setup_per_cpu_maps();
|
||||
|
||||
/* Setup cpumask_of_cpu map */
|
||||
setup_cpumask_of_cpu();
|
||||
}
|
||||
|
||||
#endif
|
||||
|
@ -386,9 +386,10 @@ static int __init split_nodes_by_size(struct bootnode *nodes, u64 *addr,
|
||||
* Sets up the system RAM area from start_pfn to end_pfn according to the
|
||||
* numa=fake command-line option.
|
||||
*/
|
||||
static struct bootnode nodes[MAX_NUMNODES] __initdata;
|
||||
|
||||
static int __init numa_emulation(unsigned long start_pfn, unsigned long end_pfn)
|
||||
{
|
||||
struct bootnode nodes[MAX_NUMNODES];
|
||||
u64 size, addr = start_pfn << PAGE_SHIFT;
|
||||
u64 max_addr = end_pfn << PAGE_SHIFT;
|
||||
int num_nodes = 0, num = 0, coeff_flag, coeff = -1, i;
|
||||
|
@ -23,8 +23,8 @@
|
||||
#include "op_x86_model.h"
|
||||
|
||||
static struct op_x86_model_spec const *model;
|
||||
static struct op_msrs cpu_msrs[NR_CPUS];
|
||||
static unsigned long saved_lvtpc[NR_CPUS];
|
||||
static DEFINE_PER_CPU(struct op_msrs, cpu_msrs);
|
||||
static DEFINE_PER_CPU(unsigned long, saved_lvtpc);
|
||||
|
||||
static int nmi_start(void);
|
||||
static void nmi_stop(void);
|
||||
@ -89,7 +89,7 @@ static int profile_exceptions_notify(struct notifier_block *self,
|
||||
|
||||
switch (val) {
|
||||
case DIE_NMI:
|
||||
if (model->check_ctrs(args->regs, &cpu_msrs[cpu]))
|
||||
if (model->check_ctrs(args->regs, &per_cpu(cpu_msrs, cpu)))
|
||||
ret = NOTIFY_STOP;
|
||||
break;
|
||||
default:
|
||||
@ -126,7 +126,7 @@ static void nmi_cpu_save_registers(struct op_msrs *msrs)
|
||||
static void nmi_save_registers(void *dummy)
|
||||
{
|
||||
int cpu = smp_processor_id();
|
||||
struct op_msrs *msrs = &cpu_msrs[cpu];
|
||||
struct op_msrs *msrs = &per_cpu(cpu_msrs, cpu);
|
||||
nmi_cpu_save_registers(msrs);
|
||||
}
|
||||
|
||||
@ -134,10 +134,10 @@ static void free_msrs(void)
|
||||
{
|
||||
int i;
|
||||
for_each_possible_cpu(i) {
|
||||
kfree(cpu_msrs[i].counters);
|
||||
cpu_msrs[i].counters = NULL;
|
||||
kfree(cpu_msrs[i].controls);
|
||||
cpu_msrs[i].controls = NULL;
|
||||
kfree(per_cpu(cpu_msrs, i).counters);
|
||||
per_cpu(cpu_msrs, i).counters = NULL;
|
||||
kfree(per_cpu(cpu_msrs, i).controls);
|
||||
per_cpu(cpu_msrs, i).controls = NULL;
|
||||
}
|
||||
}
|
||||
|
||||
@ -149,13 +149,15 @@ static int allocate_msrs(void)
|
||||
|
||||
int i;
|
||||
for_each_possible_cpu(i) {
|
||||
cpu_msrs[i].counters = kmalloc(counters_size, GFP_KERNEL);
|
||||
if (!cpu_msrs[i].counters) {
|
||||
per_cpu(cpu_msrs, i).counters = kmalloc(counters_size,
|
||||
GFP_KERNEL);
|
||||
if (!per_cpu(cpu_msrs, i).counters) {
|
||||
success = 0;
|
||||
break;
|
||||
}
|
||||
cpu_msrs[i].controls = kmalloc(controls_size, GFP_KERNEL);
|
||||
if (!cpu_msrs[i].controls) {
|
||||
per_cpu(cpu_msrs, i).controls = kmalloc(controls_size,
|
||||
GFP_KERNEL);
|
||||
if (!per_cpu(cpu_msrs, i).controls) {
|
||||
success = 0;
|
||||
break;
|
||||
}
|
||||
@ -170,11 +172,11 @@ static int allocate_msrs(void)
|
||||
static void nmi_cpu_setup(void *dummy)
|
||||
{
|
||||
int cpu = smp_processor_id();
|
||||
struct op_msrs *msrs = &cpu_msrs[cpu];
|
||||
struct op_msrs *msrs = &per_cpu(cpu_msrs, cpu);
|
||||
spin_lock(&oprofilefs_lock);
|
||||
model->setup_ctrs(msrs);
|
||||
spin_unlock(&oprofilefs_lock);
|
||||
saved_lvtpc[cpu] = apic_read(APIC_LVTPC);
|
||||
per_cpu(saved_lvtpc, cpu) = apic_read(APIC_LVTPC);
|
||||
apic_write(APIC_LVTPC, APIC_DM_NMI);
|
||||
}
|
||||
|
||||
@ -203,13 +205,15 @@ static int nmi_setup(void)
|
||||
*/
|
||||
|
||||
/* Assume saved/restored counters are the same on all CPUs */
|
||||
model->fill_in_addresses(&cpu_msrs[0]);
|
||||
model->fill_in_addresses(&per_cpu(cpu_msrs, 0));
|
||||
for_each_possible_cpu(cpu) {
|
||||
if (cpu != 0) {
|
||||
memcpy(cpu_msrs[cpu].counters, cpu_msrs[0].counters,
|
||||
memcpy(per_cpu(cpu_msrs, cpu).counters,
|
||||
per_cpu(cpu_msrs, 0).counters,
|
||||
sizeof(struct op_msr) * model->num_counters);
|
||||
|
||||
memcpy(cpu_msrs[cpu].controls, cpu_msrs[0].controls,
|
||||
memcpy(per_cpu(cpu_msrs, cpu).controls,
|
||||
per_cpu(cpu_msrs, 0).controls,
|
||||
sizeof(struct op_msr) * model->num_controls);
|
||||
}
|
||||
|
||||
@ -249,7 +253,7 @@ static void nmi_cpu_shutdown(void *dummy)
|
||||
{
|
||||
unsigned int v;
|
||||
int cpu = smp_processor_id();
|
||||
struct op_msrs *msrs = &cpu_msrs[cpu];
|
||||
struct op_msrs *msrs = &__get_cpu_var(cpu_msrs);
|
||||
|
||||
/* restoring APIC_LVTPC can trigger an apic error because the delivery
|
||||
* mode and vector nr combination can be illegal. That's by design: on
|
||||
@ -258,23 +262,24 @@ static void nmi_cpu_shutdown(void *dummy)
|
||||
*/
|
||||
v = apic_read(APIC_LVTERR);
|
||||
apic_write(APIC_LVTERR, v | APIC_LVT_MASKED);
|
||||
apic_write(APIC_LVTPC, saved_lvtpc[cpu]);
|
||||
apic_write(APIC_LVTPC, per_cpu(saved_lvtpc, cpu));
|
||||
apic_write(APIC_LVTERR, v);
|
||||
nmi_restore_registers(msrs);
|
||||
}
|
||||
|
||||
static void nmi_shutdown(void)
|
||||
{
|
||||
struct op_msrs *msrs = &__get_cpu_var(cpu_msrs);
|
||||
nmi_enabled = 0;
|
||||
on_each_cpu(nmi_cpu_shutdown, NULL, 0, 1);
|
||||
unregister_die_notifier(&profile_exceptions_nb);
|
||||
model->shutdown(cpu_msrs);
|
||||
model->shutdown(msrs);
|
||||
free_msrs();
|
||||
}
|
||||
|
||||
static void nmi_cpu_start(void *dummy)
|
||||
{
|
||||
struct op_msrs const *msrs = &cpu_msrs[smp_processor_id()];
|
||||
struct op_msrs const *msrs = &__get_cpu_var(cpu_msrs);
|
||||
model->start(msrs);
|
||||
}
|
||||
|
||||
@ -286,7 +291,7 @@ static int nmi_start(void)
|
||||
|
||||
static void nmi_cpu_stop(void *dummy)
|
||||
{
|
||||
struct op_msrs const *msrs = &cpu_msrs[smp_processor_id()];
|
||||
struct op_msrs const *msrs = &__get_cpu_var(cpu_msrs);
|
||||
model->stop(msrs);
|
||||
}
|
||||
|
||||
|
@ -838,10 +838,10 @@ static int acpi_processor_get_throttling(struct acpi_processor *pr)
|
||||
* Migrate task to the cpu pointed by pr.
|
||||
*/
|
||||
saved_mask = current->cpus_allowed;
|
||||
set_cpus_allowed(current, cpumask_of_cpu(pr->id));
|
||||
set_cpus_allowed_ptr(current, &cpumask_of_cpu(pr->id));
|
||||
ret = pr->throttling.acpi_processor_get_throttling(pr);
|
||||
/* restore the previous state */
|
||||
set_cpus_allowed(current, saved_mask);
|
||||
set_cpus_allowed_ptr(current, &saved_mask);
|
||||
|
||||
return ret;
|
||||
}
|
||||
@ -1025,7 +1025,7 @@ int acpi_processor_set_throttling(struct acpi_processor *pr, int state)
|
||||
* it can be called only for the cpu pointed by pr.
|
||||
*/
|
||||
if (p_throttling->shared_type == DOMAIN_COORD_TYPE_SW_ANY) {
|
||||
set_cpus_allowed(current, cpumask_of_cpu(pr->id));
|
||||
set_cpus_allowed_ptr(current, &cpumask_of_cpu(pr->id));
|
||||
ret = p_throttling->acpi_processor_set_throttling(pr,
|
||||
t_state.target_state);
|
||||
} else {
|
||||
@ -1056,7 +1056,7 @@ int acpi_processor_set_throttling(struct acpi_processor *pr, int state)
|
||||
continue;
|
||||
}
|
||||
t_state.cpu = i;
|
||||
set_cpus_allowed(current, cpumask_of_cpu(i));
|
||||
set_cpus_allowed_ptr(current, &cpumask_of_cpu(i));
|
||||
ret = match_pr->throttling.
|
||||
acpi_processor_set_throttling(
|
||||
match_pr, t_state.target_state);
|
||||
@ -1074,7 +1074,7 @@ int acpi_processor_set_throttling(struct acpi_processor *pr, int state)
|
||||
&t_state);
|
||||
}
|
||||
/* restore the previous state */
|
||||
set_cpus_allowed(current, saved_mask);
|
||||
set_cpus_allowed_ptr(current, &saved_mask);
|
||||
return ret;
|
||||
}
|
||||
|
||||
|
@ -102,6 +102,51 @@ static ssize_t show_crash_notes(struct sys_device *dev, char *buf)
|
||||
static SYSDEV_ATTR(crash_notes, 0400, show_crash_notes, NULL);
|
||||
#endif
|
||||
|
||||
/*
|
||||
* Print cpu online, possible, present, and system maps
|
||||
*/
|
||||
static ssize_t print_cpus_map(char *buf, cpumask_t *map)
|
||||
{
|
||||
int n = cpulist_scnprintf(buf, PAGE_SIZE-2, *map);
|
||||
|
||||
buf[n++] = '\n';
|
||||
buf[n] = '\0';
|
||||
return n;
|
||||
}
|
||||
|
||||
#define print_cpus_func(type) \
|
||||
static ssize_t print_cpus_##type(struct sysdev_class *class, char *buf) \
|
||||
{ \
|
||||
return print_cpus_map(buf, &cpu_##type##_map); \
|
||||
} \
|
||||
struct sysdev_class_attribute attr_##type##_map = \
|
||||
_SYSDEV_CLASS_ATTR(type, 0444, print_cpus_##type, NULL)
|
||||
|
||||
print_cpus_func(online);
|
||||
print_cpus_func(possible);
|
||||
print_cpus_func(present);
|
||||
|
||||
struct sysdev_class_attribute *cpu_state_attr[] = {
|
||||
&attr_online_map,
|
||||
&attr_possible_map,
|
||||
&attr_present_map,
|
||||
};
|
||||
|
||||
static int cpu_states_init(void)
|
||||
{
|
||||
int i;
|
||||
int err = 0;
|
||||
|
||||
for (i = 0; i < ARRAY_SIZE(cpu_state_attr); i++) {
|
||||
int ret;
|
||||
ret = sysdev_class_create_file(&cpu_sysdev_class,
|
||||
cpu_state_attr[i]);
|
||||
if (!err)
|
||||
err = ret;
|
||||
}
|
||||
return err;
|
||||
}
|
||||
|
||||
/*
|
||||
* register_cpu - Setup a sysfs device for a CPU.
|
||||
* @cpu - cpu->hotpluggable field set to 1 will generate a control file in
|
||||
@ -147,6 +192,9 @@ int __init cpu_dev_init(void)
|
||||
int err;
|
||||
|
||||
err = sysdev_class_register(&cpu_sysdev_class);
|
||||
if (!err)
|
||||
err = cpu_states_init();
|
||||
|
||||
#if defined(CONFIG_SCHED_MC) || defined(CONFIG_SCHED_SMT)
|
||||
if (!err)
|
||||
err = sched_create_sysfs_power_savings_entries(&cpu_sysdev_class);
|
||||
|
@ -19,21 +19,34 @@ static struct sysdev_class node_class = {
|
||||
};
|
||||
|
||||
|
||||
static ssize_t node_read_cpumap(struct sys_device * dev, char * buf)
|
||||
static ssize_t node_read_cpumap(struct sys_device *dev, int type, char *buf)
|
||||
{
|
||||
struct node *node_dev = to_node(dev);
|
||||
cpumask_t mask = node_to_cpumask(node_dev->sysdev.id);
|
||||
node_to_cpumask_ptr(mask, node_dev->sysdev.id);
|
||||
int len;
|
||||
|
||||
/* 2004/06/03: buf currently PAGE_SIZE, need > 1 char per 4 bits. */
|
||||
BUILD_BUG_ON(MAX_NUMNODES/4 > PAGE_SIZE/2);
|
||||
/* 2008/04/07: buf currently PAGE_SIZE, need 9 chars per 32 bits. */
|
||||
BUILD_BUG_ON((NR_CPUS/32 * 9) > (PAGE_SIZE-1));
|
||||
|
||||
len = cpumask_scnprintf(buf, PAGE_SIZE-1, mask);
|
||||
len += sprintf(buf + len, "\n");
|
||||
len = type?
|
||||
cpulist_scnprintf(buf, PAGE_SIZE-2, *mask):
|
||||
cpumask_scnprintf(buf, PAGE_SIZE-2, *mask);
|
||||
buf[len++] = '\n';
|
||||
buf[len] = '\0';
|
||||
return len;
|
||||
}
|
||||
|
||||
static SYSDEV_ATTR(cpumap, S_IRUGO, node_read_cpumap, NULL);
|
||||
static inline ssize_t node_read_cpumask(struct sys_device *dev, char *buf)
|
||||
{
|
||||
return node_read_cpumap(dev, 0, buf);
|
||||
}
|
||||
static inline ssize_t node_read_cpulist(struct sys_device *dev, char *buf)
|
||||
{
|
||||
return node_read_cpumap(dev, 1, buf);
|
||||
}
|
||||
|
||||
static SYSDEV_ATTR(cpumap, S_IRUGO, node_read_cpumask, NULL);
|
||||
static SYSDEV_ATTR(cpulist, S_IRUGO, node_read_cpulist, NULL);
|
||||
|
||||
#define K(x) ((x) << (PAGE_SHIFT - 10))
|
||||
static ssize_t node_read_meminfo(struct sys_device * dev, char * buf)
|
||||
@ -149,6 +162,7 @@ int register_node(struct node *node, int num, struct node *parent)
|
||||
|
||||
if (!error){
|
||||
sysdev_create_file(&node->sysdev, &attr_cpumap);
|
||||
sysdev_create_file(&node->sysdev, &attr_cpulist);
|
||||
sysdev_create_file(&node->sysdev, &attr_meminfo);
|
||||
sysdev_create_file(&node->sysdev, &attr_numastat);
|
||||
sysdev_create_file(&node->sysdev, &attr_distance);
|
||||
@ -166,6 +180,7 @@ int register_node(struct node *node, int num, struct node *parent)
|
||||
void unregister_node(struct node *node)
|
||||
{
|
||||
sysdev_remove_file(&node->sysdev, &attr_cpumap);
|
||||
sysdev_remove_file(&node->sysdev, &attr_cpulist);
|
||||
sysdev_remove_file(&node->sysdev, &attr_meminfo);
|
||||
sysdev_remove_file(&node->sysdev, &attr_numastat);
|
||||
sysdev_remove_file(&node->sysdev, &attr_distance);
|
||||
|
@ -40,15 +40,38 @@ static ssize_t show_##name(struct sys_device *dev, char *buf) \
|
||||
return sprintf(buf, "%d\n", topology_##name(cpu)); \
|
||||
}
|
||||
|
||||
#define define_siblings_show_func(name) \
|
||||
static ssize_t show_##name(struct sys_device *dev, char *buf) \
|
||||
{ \
|
||||
ssize_t len = -1; \
|
||||
unsigned int cpu = dev->id; \
|
||||
len = cpumask_scnprintf(buf, NR_CPUS+1, topology_##name(cpu)); \
|
||||
return (len + sprintf(buf + len, "\n")); \
|
||||
static ssize_t show_cpumap(int type, cpumask_t *mask, char *buf)
|
||||
{
|
||||
ptrdiff_t len = PTR_ALIGN(buf + PAGE_SIZE - 1, PAGE_SIZE) - buf;
|
||||
int n = 0;
|
||||
|
||||
if (len > 1) {
|
||||
n = type?
|
||||
cpulist_scnprintf(buf, len-2, *mask):
|
||||
cpumask_scnprintf(buf, len-2, *mask);
|
||||
buf[n++] = '\n';
|
||||
buf[n] = '\0';
|
||||
}
|
||||
return n;
|
||||
}
|
||||
|
||||
#define define_siblings_show_map(name) \
|
||||
static inline ssize_t show_##name(struct sys_device *dev, char *buf) \
|
||||
{ \
|
||||
unsigned int cpu = dev->id; \
|
||||
return show_cpumap(0, &(topology_##name(cpu)), buf); \
|
||||
}
|
||||
|
||||
#define define_siblings_show_list(name) \
|
||||
static inline ssize_t show_##name##_list(struct sys_device *dev, char *buf) \
|
||||
{ \
|
||||
unsigned int cpu = dev->id; \
|
||||
return show_cpumap(1, &(topology_##name(cpu)), buf); \
|
||||
}
|
||||
|
||||
#define define_siblings_show_func(name) \
|
||||
define_siblings_show_map(name); define_siblings_show_list(name)
|
||||
|
||||
#ifdef topology_physical_package_id
|
||||
define_id_show_func(physical_package_id);
|
||||
define_one_ro(physical_package_id);
|
||||
@ -68,7 +91,9 @@ define_one_ro(core_id);
|
||||
#ifdef topology_thread_siblings
|
||||
define_siblings_show_func(thread_siblings);
|
||||
define_one_ro(thread_siblings);
|
||||
#define ref_thread_siblings_attr &attr_thread_siblings.attr,
|
||||
define_one_ro(thread_siblings_list);
|
||||
#define ref_thread_siblings_attr \
|
||||
&attr_thread_siblings.attr, &attr_thread_siblings_list.attr,
|
||||
#else
|
||||
#define ref_thread_siblings_attr
|
||||
#endif
|
||||
@ -76,7 +101,9 @@ define_one_ro(thread_siblings);
|
||||
#ifdef topology_core_siblings
|
||||
define_siblings_show_func(core_siblings);
|
||||
define_one_ro(core_siblings);
|
||||
#define ref_core_siblings_attr &attr_core_siblings.attr,
|
||||
define_one_ro(core_siblings_list);
|
||||
#define ref_core_siblings_attr \
|
||||
&attr_core_siblings.attr, &attr_core_siblings_list.attr,
|
||||
#else
|
||||
#define ref_core_siblings_attr
|
||||
#endif
|
||||
|
@ -265,7 +265,7 @@ static int smi_request(struct smi_cmd *smi_cmd)
|
||||
|
||||
/* SMI requires CPU 0 */
|
||||
old_mask = current->cpus_allowed;
|
||||
set_cpus_allowed(current, cpumask_of_cpu(0));
|
||||
set_cpus_allowed_ptr(current, &cpumask_of_cpu(0));
|
||||
if (smp_processor_id() != 0) {
|
||||
dev_dbg(&dcdbas_pdev->dev, "%s: failed to get CPU 0\n",
|
||||
__FUNCTION__);
|
||||
@ -285,7 +285,7 @@ static int smi_request(struct smi_cmd *smi_cmd)
|
||||
);
|
||||
|
||||
out:
|
||||
set_cpus_allowed(current, old_mask);
|
||||
set_cpus_allowed_ptr(current, &old_mask);
|
||||
return ret;
|
||||
}
|
||||
|
||||
|
@ -182,15 +182,18 @@ static int pci_call_probe(struct pci_driver *drv, struct pci_dev *dev,
|
||||
struct mempolicy *oldpol;
|
||||
cpumask_t oldmask = current->cpus_allowed;
|
||||
int node = pcibus_to_node(dev->bus);
|
||||
if (node >= 0 && node_online(node))
|
||||
set_cpus_allowed(current, node_to_cpumask(node));
|
||||
|
||||
if (node >= 0) {
|
||||
node_to_cpumask_ptr(nodecpumask, node);
|
||||
set_cpus_allowed_ptr(current, nodecpumask);
|
||||
}
|
||||
/* And set default memory allocation policy */
|
||||
oldpol = current->mempolicy;
|
||||
current->mempolicy = NULL; /* fall back to system default policy */
|
||||
#endif
|
||||
error = drv->probe(dev, id);
|
||||
#ifdef CONFIG_NUMA
|
||||
set_cpus_allowed(current, oldmask);
|
||||
set_cpus_allowed_ptr(current, &oldmask);
|
||||
current->mempolicy = oldpol;
|
||||
#endif
|
||||
return error;
|
||||
|
@ -73,8 +73,23 @@ static ssize_t local_cpus_show(struct device *dev,
|
||||
|
||||
mask = pcibus_to_cpumask(to_pci_dev(dev)->bus);
|
||||
len = cpumask_scnprintf(buf, PAGE_SIZE-2, mask);
|
||||
strcat(buf,"\n");
|
||||
return 1+len;
|
||||
buf[len++] = '\n';
|
||||
buf[len] = '\0';
|
||||
return len;
|
||||
}
|
||||
|
||||
|
||||
static ssize_t local_cpulist_show(struct device *dev,
|
||||
struct device_attribute *attr, char *buf)
|
||||
{
|
||||
cpumask_t mask;
|
||||
int len;
|
||||
|
||||
mask = pcibus_to_cpumask(to_pci_dev(dev)->bus);
|
||||
len = cpulist_scnprintf(buf, PAGE_SIZE-2, mask);
|
||||
buf[len++] = '\n';
|
||||
buf[len] = '\0';
|
||||
return len;
|
||||
}
|
||||
|
||||
/* show resources */
|
||||
@ -201,6 +216,7 @@ struct device_attribute pci_dev_attrs[] = {
|
||||
__ATTR_RO(class),
|
||||
__ATTR_RO(irq),
|
||||
__ATTR_RO(local_cpus),
|
||||
__ATTR_RO(local_cpulist),
|
||||
__ATTR_RO(modalias),
|
||||
#ifdef CONFIG_NUMA
|
||||
__ATTR_RO(numa_node),
|
||||
|
@ -82,6 +82,7 @@ void pci_remove_legacy_files(struct pci_bus *bus) { return; }
|
||||
* PCI Bus Class Devices
|
||||
*/
|
||||
static ssize_t pci_bus_show_cpuaffinity(struct device *dev,
|
||||
int type,
|
||||
struct device_attribute *attr,
|
||||
char *buf)
|
||||
{
|
||||
@ -89,12 +90,30 @@ static ssize_t pci_bus_show_cpuaffinity(struct device *dev,
|
||||
cpumask_t cpumask;
|
||||
|
||||
cpumask = pcibus_to_cpumask(to_pci_bus(dev));
|
||||
ret = cpumask_scnprintf(buf, PAGE_SIZE, cpumask);
|
||||
if (ret < PAGE_SIZE)
|
||||
buf[ret++] = '\n';
|
||||
ret = type?
|
||||
cpulist_scnprintf(buf, PAGE_SIZE-2, cpumask):
|
||||
cpumask_scnprintf(buf, PAGE_SIZE-2, cpumask);
|
||||
buf[ret++] = '\n';
|
||||
buf[ret] = '\0';
|
||||
return ret;
|
||||
}
|
||||
DEVICE_ATTR(cpuaffinity, S_IRUGO, pci_bus_show_cpuaffinity, NULL);
|
||||
|
||||
static ssize_t inline pci_bus_show_cpumaskaffinity(struct device *dev,
|
||||
struct device_attribute *attr,
|
||||
char *buf)
|
||||
{
|
||||
return pci_bus_show_cpuaffinity(dev, 0, attr, buf);
|
||||
}
|
||||
|
||||
static ssize_t inline pci_bus_show_cpulistaffinity(struct device *dev,
|
||||
struct device_attribute *attr,
|
||||
char *buf)
|
||||
{
|
||||
return pci_bus_show_cpuaffinity(dev, 1, attr, buf);
|
||||
}
|
||||
|
||||
DEVICE_ATTR(cpuaffinity, S_IRUGO, pci_bus_show_cpumaskaffinity, NULL);
|
||||
DEVICE_ATTR(cpulistaffinity, S_IRUGO, pci_bus_show_cpulistaffinity, NULL);
|
||||
|
||||
/*
|
||||
* PCI Bus Class
|
||||
|
@ -41,8 +41,7 @@ static inline cpumask_t node_to_cpumask(int node)
|
||||
|
||||
#define pcibus_to_cpumask(bus) (cpu_online_map)
|
||||
|
||||
#else /* CONFIG_NUMA */
|
||||
# include <asm-generic/topology.h>
|
||||
#endif /* !CONFIG_NUMA */
|
||||
# include <asm-generic/topology.h>
|
||||
|
||||
#endif /* _ASM_ALPHA_TOPOLOGY_H */
|
||||
|
@ -5,10 +5,8 @@
|
||||
|
||||
#error NUMA not supported yet
|
||||
|
||||
#else /* !CONFIG_NUMA */
|
||||
#endif /* CONFIG_NUMA */
|
||||
|
||||
#include <asm-generic/topology.h>
|
||||
|
||||
#endif /* CONFIG_NUMA */
|
||||
|
||||
#endif /* _ASM_TOPOLOGY_H */
|
||||
|
@ -27,6 +27,8 @@
|
||||
#ifndef _ASM_GENERIC_TOPOLOGY_H
|
||||
#define _ASM_GENERIC_TOPOLOGY_H
|
||||
|
||||
#ifndef CONFIG_NUMA
|
||||
|
||||
/* Other architectures wishing to use this simple topology API should fill
|
||||
in the below functions as appropriate in their own <asm/topology.h> file. */
|
||||
#ifndef cpu_to_node
|
||||
@ -52,4 +54,16 @@
|
||||
)
|
||||
#endif
|
||||
|
||||
#endif /* CONFIG_NUMA */
|
||||
|
||||
/* returns pointer to cpumask for specified node */
|
||||
#ifndef node_to_cpumask_ptr
|
||||
|
||||
#define node_to_cpumask_ptr(v, node) \
|
||||
cpumask_t _##v = node_to_cpumask(node), *v = &_##v
|
||||
|
||||
#define node_to_cpumask_ptr_next(v, node) \
|
||||
_##v = node_to_cpumask(node)
|
||||
#endif
|
||||
|
||||
#endif /* _ASM_GENERIC_TOPOLOGY_H */
|
||||
|
@ -93,7 +93,7 @@ void build_cpu_to_node_map(void);
|
||||
.cache_nice_tries = 2, \
|
||||
.busy_idx = 3, \
|
||||
.idle_idx = 2, \
|
||||
.newidle_idx = 0, /* unused */ \
|
||||
.newidle_idx = 2, \
|
||||
.wake_idx = 1, \
|
||||
.forkexec_idx = 1, \
|
||||
.flags = SD_LOAD_BALANCE \
|
||||
@ -116,6 +116,11 @@ void build_cpu_to_node_map(void);
|
||||
#define smt_capable() (smp_num_siblings > 1)
|
||||
#endif
|
||||
|
||||
#define pcibus_to_cpumask(bus) (pcibus_to_node(bus) == -1 ? \
|
||||
CPU_MASK_ALL : \
|
||||
node_to_cpumask(pcibus_to_node(bus)) \
|
||||
)
|
||||
|
||||
#include <asm-generic/topology.h>
|
||||
|
||||
#endif /* _ASM_IA64_TOPOLOGY_H */
|
||||
|
@ -96,11 +96,10 @@ static inline void sysfs_remove_device_from_node(struct sys_device *dev,
|
||||
{
|
||||
}
|
||||
|
||||
#endif /* CONFIG_NUMA */
|
||||
|
||||
#include <asm-generic/topology.h>
|
||||
|
||||
#endif /* CONFIG_NUMA */
|
||||
|
||||
#ifdef CONFIG_SMP
|
||||
#include <asm/cputable.h>
|
||||
#define smt_capable() (cpu_has_feature(CPU_FTR_SMT))
|
||||
|
@ -16,7 +16,7 @@
|
||||
.cache_nice_tries = 2, \
|
||||
.busy_idx = 3, \
|
||||
.idle_idx = 2, \
|
||||
.newidle_idx = 0, \
|
||||
.newidle_idx = 2, \
|
||||
.wake_idx = 1, \
|
||||
.forkexec_idx = 1, \
|
||||
.flags = SD_LOAD_BALANCE \
|
||||
|
@ -88,6 +88,17 @@ static inline int cpu_to_node(int cpu)
|
||||
#endif
|
||||
return per_cpu(x86_cpu_to_node_map, cpu);
|
||||
}
|
||||
|
||||
#ifdef CONFIG_NUMA
|
||||
|
||||
/* Returns a pointer to the cpumask of CPUs on Node 'node'. */
|
||||
#define node_to_cpumask_ptr(v, node) \
|
||||
cpumask_t *v = &(node_to_cpumask_map[node])
|
||||
|
||||
#define node_to_cpumask_ptr_next(v, node) \
|
||||
v = &(node_to_cpumask_map[node])
|
||||
#endif
|
||||
|
||||
#endif /* CONFIG_X86_64 */
|
||||
|
||||
/*
|
||||
@ -136,17 +147,13 @@ extern unsigned long node_remap_size[];
|
||||
|
||||
# define SD_CACHE_NICE_TRIES 2
|
||||
# define SD_IDLE_IDX 2
|
||||
# define SD_NEWIDLE_IDX 0
|
||||
# define SD_NEWIDLE_IDX 2
|
||||
# define SD_FORKEXEC_IDX 1
|
||||
|
||||
#endif
|
||||
|
||||
/* sched_domains SD_NODE_INIT for NUMAQ machines */
|
||||
#define SD_NODE_INIT (struct sched_domain) { \
|
||||
.span = CPU_MASK_NONE, \
|
||||
.parent = NULL, \
|
||||
.child = NULL, \
|
||||
.groups = NULL, \
|
||||
.min_interval = 8, \
|
||||
.max_interval = 32, \
|
||||
.busy_factor = 32, \
|
||||
@ -164,7 +171,6 @@ extern unsigned long node_remap_size[];
|
||||
| SD_WAKE_BALANCE, \
|
||||
.last_balance = jiffies, \
|
||||
.balance_interval = 1, \
|
||||
.nr_balance_failed = 0, \
|
||||
}
|
||||
|
||||
#ifdef CONFIG_X86_64_ACPI_NUMA
|
||||
@ -174,10 +180,10 @@ extern int __node_distance(int, int);
|
||||
|
||||
#else /* CONFIG_NUMA */
|
||||
|
||||
#include <asm-generic/topology.h>
|
||||
|
||||
#endif
|
||||
|
||||
#include <asm-generic/topology.h>
|
||||
|
||||
extern cpumask_t cpu_coregroup_map(int cpu);
|
||||
|
||||
#ifdef ENABLE_TOPO_DEFINES
|
||||
|
@ -108,6 +108,7 @@ extern int __bitmap_weight(const unsigned long *bitmap, int bits);
|
||||
|
||||
extern int bitmap_scnprintf(char *buf, unsigned int len,
|
||||
const unsigned long *src, int nbits);
|
||||
extern int bitmap_scnprintf_len(unsigned int len);
|
||||
extern int __bitmap_parse(const char *buf, unsigned int buflen, int is_user,
|
||||
unsigned long *dst, int nbits);
|
||||
extern int bitmap_parse_user(const char __user *ubuf, unsigned int ulen,
|
||||
|
@ -222,8 +222,13 @@ int __next_cpu(int n, const cpumask_t *srcp);
|
||||
#define next_cpu(n, src) ({ (void)(src); 1; })
|
||||
#endif
|
||||
|
||||
#ifdef CONFIG_HAVE_CPUMASK_OF_CPU_MAP
|
||||
extern cpumask_t *cpumask_of_cpu_map;
|
||||
#define cpumask_of_cpu(cpu) (cpumask_of_cpu_map[cpu])
|
||||
|
||||
#else
|
||||
#define cpumask_of_cpu(cpu) \
|
||||
({ \
|
||||
(*({ \
|
||||
typeof(_unused_cpumask_arg_) m; \
|
||||
if (sizeof(m) == sizeof(unsigned long)) { \
|
||||
m.bits[0] = 1UL<<(cpu); \
|
||||
@ -231,8 +236,9 @@ int __next_cpu(int n, const cpumask_t *srcp);
|
||||
cpus_clear(m); \
|
||||
cpu_set((cpu), m); \
|
||||
} \
|
||||
m; \
|
||||
})
|
||||
&m; \
|
||||
}))
|
||||
#endif
|
||||
|
||||
#define CPU_MASK_LAST_WORD BITMAP_LAST_WORD_MASK(NR_CPUS)
|
||||
|
||||
@ -243,6 +249,8 @@ int __next_cpu(int n, const cpumask_t *srcp);
|
||||
[BITS_TO_LONGS(NR_CPUS)-1] = CPU_MASK_LAST_WORD \
|
||||
} }
|
||||
|
||||
#define CPU_MASK_ALL_PTR (&CPU_MASK_ALL)
|
||||
|
||||
#else
|
||||
|
||||
#define CPU_MASK_ALL \
|
||||
@ -251,6 +259,10 @@ int __next_cpu(int n, const cpumask_t *srcp);
|
||||
[BITS_TO_LONGS(NR_CPUS)-1] = CPU_MASK_LAST_WORD \
|
||||
} }
|
||||
|
||||
/* cpu_mask_all is in init/main.c */
|
||||
extern cpumask_t cpu_mask_all;
|
||||
#define CPU_MASK_ALL_PTR (&cpu_mask_all)
|
||||
|
||||
#endif
|
||||
|
||||
#define CPU_MASK_NONE \
|
||||
@ -273,6 +285,13 @@ static inline int __cpumask_scnprintf(char *buf, int len,
|
||||
return bitmap_scnprintf(buf, len, srcp->bits, nbits);
|
||||
}
|
||||
|
||||
#define cpumask_scnprintf_len(len) \
|
||||
__cpumask_scnprintf_len((len))
|
||||
static inline int __cpumask_scnprintf_len(int len)
|
||||
{
|
||||
return bitmap_scnprintf_len(len);
|
||||
}
|
||||
|
||||
#define cpumask_parse_user(ubuf, ulen, dst) \
|
||||
__cpumask_parse_user((ubuf), (ulen), &(dst), NR_CPUS)
|
||||
static inline int __cpumask_parse_user(const char __user *buf, int len,
|
||||
|
@ -20,8 +20,8 @@ extern int number_of_cpusets; /* How many cpusets are defined in system? */
|
||||
extern int cpuset_init_early(void);
|
||||
extern int cpuset_init(void);
|
||||
extern void cpuset_init_smp(void);
|
||||
extern cpumask_t cpuset_cpus_allowed(struct task_struct *p);
|
||||
extern cpumask_t cpuset_cpus_allowed_locked(struct task_struct *p);
|
||||
extern void cpuset_cpus_allowed(struct task_struct *p, cpumask_t *mask);
|
||||
extern void cpuset_cpus_allowed_locked(struct task_struct *p, cpumask_t *mask);
|
||||
extern nodemask_t cpuset_mems_allowed(struct task_struct *p);
|
||||
#define cpuset_current_mems_allowed (current->mems_allowed)
|
||||
void cpuset_init_current_mems_allowed(void);
|
||||
@ -84,13 +84,14 @@ static inline int cpuset_init_early(void) { return 0; }
|
||||
static inline int cpuset_init(void) { return 0; }
|
||||
static inline void cpuset_init_smp(void) {}
|
||||
|
||||
static inline cpumask_t cpuset_cpus_allowed(struct task_struct *p)
|
||||
static inline void cpuset_cpus_allowed(struct task_struct *p, cpumask_t *mask)
|
||||
{
|
||||
return cpu_possible_map;
|
||||
*mask = cpu_possible_map;
|
||||
}
|
||||
static inline cpumask_t cpuset_cpus_allowed_locked(struct task_struct *p)
|
||||
static inline void cpuset_cpus_allowed_locked(struct task_struct *p,
|
||||
cpumask_t *mask)
|
||||
{
|
||||
return cpu_possible_map;
|
||||
*mask = cpu_possible_map;
|
||||
}
|
||||
|
||||
static inline nodemask_t cpuset_mems_allowed(struct task_struct *p)
|
||||
|
@ -151,6 +151,9 @@ extern struct group_info init_groups;
|
||||
.cpus_allowed = CPU_MASK_ALL, \
|
||||
.mm = NULL, \
|
||||
.active_mm = &init_mm, \
|
||||
.se = { \
|
||||
.group_node = LIST_HEAD_INIT(tsk.se.group_node), \
|
||||
}, \
|
||||
.rt = { \
|
||||
.run_list = LIST_HEAD_INIT(tsk.rt.run_list), \
|
||||
.time_slice = HZ, \
|
||||
|
@ -327,4 +327,10 @@ extern void ktime_get_ts(struct timespec *ts);
|
||||
/* Get the real (wall-) time in timespec format: */
|
||||
#define ktime_get_real_ts(ts) getnstimeofday(ts)
|
||||
|
||||
static inline ktime_t ns_to_ktime(u64 ns)
|
||||
{
|
||||
static const ktime_t ktime_zero = { .tv64 = 0 };
|
||||
return ktime_add_ns(ktime_zero, ns);
|
||||
}
|
||||
|
||||
#endif
|
||||
|
@ -704,6 +704,7 @@ enum cpu_idle_type {
|
||||
#define SD_POWERSAVINGS_BALANCE 256 /* Balance for power savings */
|
||||
#define SD_SHARE_PKG_RESOURCES 512 /* Domain members share cpu pkg resources */
|
||||
#define SD_SERIALIZE 1024 /* Only a single load balancing instance */
|
||||
#define SD_WAKE_IDLE_FAR 2048 /* Gain latency sacrificing cache hit */
|
||||
|
||||
#define BALANCE_FOR_MC_POWER \
|
||||
(sched_smt_power_savings ? SD_POWERSAVINGS_BALANCE : 0)
|
||||
@ -733,12 +734,31 @@ struct sched_group {
|
||||
u32 reciprocal_cpu_power;
|
||||
};
|
||||
|
||||
enum sched_domain_level {
|
||||
SD_LV_NONE = 0,
|
||||
SD_LV_SIBLING,
|
||||
SD_LV_MC,
|
||||
SD_LV_CPU,
|
||||
SD_LV_NODE,
|
||||
SD_LV_ALLNODES,
|
||||
SD_LV_MAX
|
||||
};
|
||||
|
||||
struct sched_domain_attr {
|
||||
int relax_domain_level;
|
||||
};
|
||||
|
||||
#define SD_ATTR_INIT (struct sched_domain_attr) { \
|
||||
.relax_domain_level = -1, \
|
||||
}
|
||||
|
||||
struct sched_domain {
|
||||
/* These fields must be setup */
|
||||
struct sched_domain *parent; /* top domain must be null terminated */
|
||||
struct sched_domain *child; /* bottom domain must be null terminated */
|
||||
struct sched_group *groups; /* the balancing groups of the domain */
|
||||
cpumask_t span; /* span of all CPUs in this domain */
|
||||
int first_cpu; /* cache of the first cpu in this domain */
|
||||
unsigned long min_interval; /* Minimum balance interval ms */
|
||||
unsigned long max_interval; /* Maximum balance interval ms */
|
||||
unsigned int busy_factor; /* less balancing by factor if busy */
|
||||
@ -750,6 +770,7 @@ struct sched_domain {
|
||||
unsigned int wake_idx;
|
||||
unsigned int forkexec_idx;
|
||||
int flags; /* See SD_* */
|
||||
enum sched_domain_level level;
|
||||
|
||||
/* Runtime fields. */
|
||||
unsigned long last_balance; /* init to jiffies. units in jiffies */
|
||||
@ -789,7 +810,8 @@ struct sched_domain {
|
||||
#endif
|
||||
};
|
||||
|
||||
extern void partition_sched_domains(int ndoms_new, cpumask_t *doms_new);
|
||||
extern void partition_sched_domains(int ndoms_new, cpumask_t *doms_new,
|
||||
struct sched_domain_attr *dattr_new);
|
||||
extern int arch_reinit_sched_domains(void);
|
||||
|
||||
#endif /* CONFIG_SMP */
|
||||
@ -889,7 +911,8 @@ struct sched_class {
|
||||
void (*set_curr_task) (struct rq *rq);
|
||||
void (*task_tick) (struct rq *rq, struct task_struct *p, int queued);
|
||||
void (*task_new) (struct rq *rq, struct task_struct *p);
|
||||
void (*set_cpus_allowed)(struct task_struct *p, cpumask_t *newmask);
|
||||
void (*set_cpus_allowed)(struct task_struct *p,
|
||||
const cpumask_t *newmask);
|
||||
|
||||
void (*join_domain)(struct rq *rq);
|
||||
void (*leave_domain)(struct rq *rq);
|
||||
@ -923,6 +946,7 @@ struct load_weight {
|
||||
struct sched_entity {
|
||||
struct load_weight load; /* for load-balancing */
|
||||
struct rb_node run_node;
|
||||
struct list_head group_node;
|
||||
unsigned int on_rq;
|
||||
|
||||
u64 exec_start;
|
||||
@ -982,6 +1006,7 @@ struct sched_rt_entity {
|
||||
unsigned long timeout;
|
||||
int nr_cpus_allowed;
|
||||
|
||||
struct sched_rt_entity *back;
|
||||
#ifdef CONFIG_RT_GROUP_SCHED
|
||||
struct sched_rt_entity *parent;
|
||||
/* rq on which this entity is (to be) queued: */
|
||||
@ -1502,15 +1527,21 @@ static inline void put_task_struct(struct task_struct *t)
|
||||
#define used_math() tsk_used_math(current)
|
||||
|
||||
#ifdef CONFIG_SMP
|
||||
extern int set_cpus_allowed(struct task_struct *p, cpumask_t new_mask);
|
||||
extern int set_cpus_allowed_ptr(struct task_struct *p,
|
||||
const cpumask_t *new_mask);
|
||||
#else
|
||||
static inline int set_cpus_allowed(struct task_struct *p, cpumask_t new_mask)
|
||||
static inline int set_cpus_allowed_ptr(struct task_struct *p,
|
||||
const cpumask_t *new_mask)
|
||||
{
|
||||
if (!cpu_isset(0, new_mask))
|
||||
if (!cpu_isset(0, *new_mask))
|
||||
return -EINVAL;
|
||||
return 0;
|
||||
}
|
||||
#endif
|
||||
static inline int set_cpus_allowed(struct task_struct *p, cpumask_t new_mask)
|
||||
{
|
||||
return set_cpus_allowed_ptr(p, &new_mask);
|
||||
}
|
||||
|
||||
extern unsigned long long sched_clock(void);
|
||||
|
||||
@ -1551,7 +1582,6 @@ static inline void wake_up_idle_cpu(int cpu) { }
|
||||
extern unsigned int sysctl_sched_latency;
|
||||
extern unsigned int sysctl_sched_min_granularity;
|
||||
extern unsigned int sysctl_sched_wakeup_granularity;
|
||||
extern unsigned int sysctl_sched_batch_wakeup_granularity;
|
||||
extern unsigned int sysctl_sched_child_runs_first;
|
||||
extern unsigned int sysctl_sched_features;
|
||||
extern unsigned int sysctl_sched_migration_cost;
|
||||
@ -1564,6 +1594,10 @@ int sched_nr_latency_handler(struct ctl_table *table, int write,
|
||||
extern unsigned int sysctl_sched_rt_period;
|
||||
extern int sysctl_sched_rt_runtime;
|
||||
|
||||
int sched_rt_handler(struct ctl_table *table, int write,
|
||||
struct file *filp, void __user *buffer, size_t *lenp,
|
||||
loff_t *ppos);
|
||||
|
||||
extern unsigned int sysctl_sched_compat_yield;
|
||||
|
||||
#ifdef CONFIG_RT_MUTEXES
|
||||
@ -2031,7 +2065,7 @@ static inline void arch_pick_mmap_layout(struct mm_struct *mm)
|
||||
}
|
||||
#endif
|
||||
|
||||
extern long sched_setaffinity(pid_t pid, cpumask_t new_mask);
|
||||
extern long sched_setaffinity(pid_t pid, const cpumask_t *new_mask);
|
||||
extern long sched_getaffinity(pid_t pid, cpumask_t *mask);
|
||||
|
||||
extern int sched_mc_power_savings, sched_smt_power_savings;
|
||||
@ -2041,8 +2075,11 @@ extern void normalize_rt_tasks(void);
|
||||
#ifdef CONFIG_GROUP_SCHED
|
||||
|
||||
extern struct task_group init_task_group;
|
||||
#ifdef CONFIG_USER_SCHED
|
||||
extern struct task_group root_task_group;
|
||||
#endif
|
||||
|
||||
extern struct task_group *sched_create_group(void);
|
||||
extern struct task_group *sched_create_group(struct task_group *parent);
|
||||
extern void sched_destroy_group(struct task_group *tg);
|
||||
extern void sched_move_task(struct task_struct *tsk);
|
||||
#ifdef CONFIG_FAIR_GROUP_SCHED
|
||||
@ -2053,6 +2090,9 @@ extern unsigned long sched_group_shares(struct task_group *tg);
|
||||
extern int sched_group_set_rt_runtime(struct task_group *tg,
|
||||
long rt_runtime_us);
|
||||
extern long sched_group_rt_runtime(struct task_group *tg);
|
||||
extern int sched_group_set_rt_period(struct task_group *tg,
|
||||
long rt_period_us);
|
||||
extern long sched_group_rt_period(struct task_group *tg);
|
||||
#endif
|
||||
#endif
|
||||
|
||||
|
@ -45,12 +45,16 @@ struct sysdev_class_attribute {
|
||||
ssize_t (*store)(struct sysdev_class *, const char *, size_t);
|
||||
};
|
||||
|
||||
#define SYSDEV_CLASS_ATTR(_name,_mode,_show,_store) \
|
||||
struct sysdev_class_attribute attr_##_name = { \
|
||||
#define _SYSDEV_CLASS_ATTR(_name,_mode,_show,_store) \
|
||||
{ \
|
||||
.attr = {.name = __stringify(_name), .mode = _mode }, \
|
||||
.show = _show, \
|
||||
.store = _store, \
|
||||
};
|
||||
}
|
||||
|
||||
#define SYSDEV_CLASS_ATTR(_name,_mode,_show,_store) \
|
||||
struct sysdev_class_attribute attr_##_name = \
|
||||
_SYSDEV_CLASS_ATTR(_name,_mode,_show,_store)
|
||||
|
||||
|
||||
extern int sysdev_class_register(struct sysdev_class *);
|
||||
@ -100,15 +104,16 @@ struct sysdev_attribute {
|
||||
};
|
||||
|
||||
|
||||
#define _SYSDEV_ATTR(_name,_mode,_show,_store) \
|
||||
#define _SYSDEV_ATTR(_name, _mode, _show, _store) \
|
||||
{ \
|
||||
.attr = { .name = __stringify(_name), .mode = _mode }, \
|
||||
.show = _show, \
|
||||
.store = _store, \
|
||||
}
|
||||
|
||||
#define SYSDEV_ATTR(_name,_mode,_show,_store) \
|
||||
struct sysdev_attribute attr_##_name = _SYSDEV_ATTR(_name,_mode,_show,_store);
|
||||
#define SYSDEV_ATTR(_name, _mode, _show, _store) \
|
||||
struct sysdev_attribute attr_##_name = \
|
||||
_SYSDEV_ATTR(_name, _mode, _show, _store);
|
||||
|
||||
extern int sysdev_create_file(struct sys_device *, struct sysdev_attribute *);
|
||||
extern void sysdev_remove_file(struct sys_device *, struct sysdev_attribute *);
|
||||
|
@ -38,16 +38,15 @@
|
||||
#endif
|
||||
|
||||
#ifndef nr_cpus_node
|
||||
#define nr_cpus_node(node) \
|
||||
({ \
|
||||
cpumask_t __tmp__; \
|
||||
__tmp__ = node_to_cpumask(node); \
|
||||
cpus_weight(__tmp__); \
|
||||
#define nr_cpus_node(node) \
|
||||
({ \
|
||||
node_to_cpumask_ptr(__tmp__, node); \
|
||||
cpus_weight(*__tmp__); \
|
||||
})
|
||||
#endif
|
||||
|
||||
#define for_each_node_with_cpus(node) \
|
||||
for_each_online_node(node) \
|
||||
#define for_each_node_with_cpus(node) \
|
||||
for_each_online_node(node) \
|
||||
if (nr_cpus_node(node))
|
||||
|
||||
void arch_update_cpu_topology(void);
|
||||
@ -80,7 +79,9 @@ void arch_update_cpu_topology(void);
|
||||
* by defining their own arch-specific initializer in include/asm/topology.h.
|
||||
* A definition there will automagically override these default initializers
|
||||
* and allow arch-specific performance tuning of sched_domains.
|
||||
* (Only non-zero and non-null fields need be specified.)
|
||||
*/
|
||||
|
||||
#ifdef CONFIG_SCHED_SMT
|
||||
/* MCD - Do we really need this? It is always on if CONFIG_SCHED_SMT is,
|
||||
* so can't we drop this in favor of CONFIG_SCHED_SMT?
|
||||
@ -89,20 +90,10 @@ void arch_update_cpu_topology(void);
|
||||
/* Common values for SMT siblings */
|
||||
#ifndef SD_SIBLING_INIT
|
||||
#define SD_SIBLING_INIT (struct sched_domain) { \
|
||||
.span = CPU_MASK_NONE, \
|
||||
.parent = NULL, \
|
||||
.child = NULL, \
|
||||
.groups = NULL, \
|
||||
.min_interval = 1, \
|
||||
.max_interval = 2, \
|
||||
.busy_factor = 64, \
|
||||
.imbalance_pct = 110, \
|
||||
.cache_nice_tries = 0, \
|
||||
.busy_idx = 0, \
|
||||
.idle_idx = 0, \
|
||||
.newidle_idx = 0, \
|
||||
.wake_idx = 0, \
|
||||
.forkexec_idx = 0, \
|
||||
.flags = SD_LOAD_BALANCE \
|
||||
| SD_BALANCE_NEWIDLE \
|
||||
| SD_BALANCE_FORK \
|
||||
@ -112,7 +103,6 @@ void arch_update_cpu_topology(void);
|
||||
| SD_SHARE_CPUPOWER, \
|
||||
.last_balance = jiffies, \
|
||||
.balance_interval = 1, \
|
||||
.nr_balance_failed = 0, \
|
||||
}
|
||||
#endif
|
||||
#endif /* CONFIG_SCHED_SMT */
|
||||
@ -121,18 +111,12 @@ void arch_update_cpu_topology(void);
|
||||
/* Common values for MC siblings. for now mostly derived from SD_CPU_INIT */
|
||||
#ifndef SD_MC_INIT
|
||||
#define SD_MC_INIT (struct sched_domain) { \
|
||||
.span = CPU_MASK_NONE, \
|
||||
.parent = NULL, \
|
||||
.child = NULL, \
|
||||
.groups = NULL, \
|
||||
.min_interval = 1, \
|
||||
.max_interval = 4, \
|
||||
.busy_factor = 64, \
|
||||
.imbalance_pct = 125, \
|
||||
.cache_nice_tries = 1, \
|
||||
.busy_idx = 2, \
|
||||
.idle_idx = 0, \
|
||||
.newidle_idx = 0, \
|
||||
.wake_idx = 1, \
|
||||
.forkexec_idx = 1, \
|
||||
.flags = SD_LOAD_BALANCE \
|
||||
@ -144,7 +128,6 @@ void arch_update_cpu_topology(void);
|
||||
| BALANCE_FOR_MC_POWER, \
|
||||
.last_balance = jiffies, \
|
||||
.balance_interval = 1, \
|
||||
.nr_balance_failed = 0, \
|
||||
}
|
||||
#endif
|
||||
#endif /* CONFIG_SCHED_MC */
|
||||
@ -152,10 +135,6 @@ void arch_update_cpu_topology(void);
|
||||
/* Common values for CPUs */
|
||||
#ifndef SD_CPU_INIT
|
||||
#define SD_CPU_INIT (struct sched_domain) { \
|
||||
.span = CPU_MASK_NONE, \
|
||||
.parent = NULL, \
|
||||
.child = NULL, \
|
||||
.groups = NULL, \
|
||||
.min_interval = 1, \
|
||||
.max_interval = 4, \
|
||||
.busy_factor = 64, \
|
||||
@ -174,16 +153,11 @@ void arch_update_cpu_topology(void);
|
||||
| BALANCE_FOR_PKG_POWER,\
|
||||
.last_balance = jiffies, \
|
||||
.balance_interval = 1, \
|
||||
.nr_balance_failed = 0, \
|
||||
}
|
||||
#endif
|
||||
|
||||
/* sched_domains SD_ALLNODES_INIT for NUMA machines */
|
||||
#define SD_ALLNODES_INIT (struct sched_domain) { \
|
||||
.span = CPU_MASK_NONE, \
|
||||
.parent = NULL, \
|
||||
.child = NULL, \
|
||||
.groups = NULL, \
|
||||
.min_interval = 64, \
|
||||
.max_interval = 64*num_online_cpus(), \
|
||||
.busy_factor = 128, \
|
||||
@ -191,14 +165,10 @@ void arch_update_cpu_topology(void);
|
||||
.cache_nice_tries = 1, \
|
||||
.busy_idx = 3, \
|
||||
.idle_idx = 3, \
|
||||
.newidle_idx = 0, /* unused */ \
|
||||
.wake_idx = 0, /* unused */ \
|
||||
.forkexec_idx = 0, /* unused */ \
|
||||
.flags = SD_LOAD_BALANCE \
|
||||
| SD_SERIALIZE, \
|
||||
.last_balance = jiffies, \
|
||||
.balance_interval = 64, \
|
||||
.nr_balance_failed = 0, \
|
||||
}
|
||||
|
||||
#ifdef CONFIG_NUMA
|
||||
|
@ -328,6 +328,13 @@ config RT_GROUP_SCHED
|
||||
depends on EXPERIMENTAL
|
||||
depends on GROUP_SCHED
|
||||
default n
|
||||
help
|
||||
This feature lets you explicitly allocate real CPU bandwidth
|
||||
to users or control groups (depending on the "Basis for grouping tasks"
|
||||
setting below. If enabled, it will also make it impossible to
|
||||
schedule realtime tasks for non-root users until you allocate
|
||||
realtime bandwidth for them.
|
||||
See Documentation/sched-rt-group.txt for more information.
|
||||
|
||||
choice
|
||||
depends on GROUP_SCHED
|
||||
|
24
init/main.c
24
init/main.c
@ -359,10 +359,31 @@ static void __init smp_init(void)
|
||||
#endif
|
||||
|
||||
static inline void setup_per_cpu_areas(void) { }
|
||||
static inline void setup_nr_cpu_ids(void) { }
|
||||
static inline void smp_prepare_cpus(unsigned int maxcpus) { }
|
||||
|
||||
#else
|
||||
|
||||
#if NR_CPUS > BITS_PER_LONG
|
||||
cpumask_t cpu_mask_all __read_mostly = CPU_MASK_ALL;
|
||||
EXPORT_SYMBOL(cpu_mask_all);
|
||||
#endif
|
||||
|
||||
/* Setup number of possible processor ids */
|
||||
int nr_cpu_ids __read_mostly = NR_CPUS;
|
||||
EXPORT_SYMBOL(nr_cpu_ids);
|
||||
|
||||
/* An arch may set nr_cpu_ids earlier if needed, so this would be redundant */
|
||||
static void __init setup_nr_cpu_ids(void)
|
||||
{
|
||||
int cpu, highest_cpu = 0;
|
||||
|
||||
for_each_possible_cpu(cpu)
|
||||
highest_cpu = cpu;
|
||||
|
||||
nr_cpu_ids = highest_cpu + 1;
|
||||
}
|
||||
|
||||
#ifndef CONFIG_HAVE_SETUP_PER_CPU_AREA
|
||||
unsigned long __per_cpu_offset[NR_CPUS] __read_mostly;
|
||||
|
||||
@ -537,6 +558,7 @@ asmlinkage void __init start_kernel(void)
|
||||
setup_command_line(command_line);
|
||||
unwind_setup();
|
||||
setup_per_cpu_areas();
|
||||
setup_nr_cpu_ids();
|
||||
smp_prepare_boot_cpu(); /* arch-specific boot-cpu hooks */
|
||||
|
||||
/*
|
||||
@ -811,7 +833,7 @@ static int __init kernel_init(void * unused)
|
||||
/*
|
||||
* init can run on any cpu.
|
||||
*/
|
||||
set_cpus_allowed(current, CPU_MASK_ALL);
|
||||
set_cpus_allowed_ptr(current, CPU_MASK_ALL_PTR);
|
||||
/*
|
||||
* Tell the world that we're going to be the grim
|
||||
* reaper of innocent orphaned children.
|
||||
|
@ -445,7 +445,7 @@ asmlinkage long compat_sys_sched_setaffinity(compat_pid_t pid,
|
||||
if (retval)
|
||||
return retval;
|
||||
|
||||
return sched_setaffinity(pid, new_mask);
|
||||
return sched_setaffinity(pid, &new_mask);
|
||||
}
|
||||
|
||||
asmlinkage long compat_sys_sched_getaffinity(compat_pid_t pid, unsigned int len,
|
||||
|
@ -232,9 +232,9 @@ static int _cpu_down(unsigned int cpu, int tasks_frozen)
|
||||
|
||||
/* Ensure that we are not runnable on dying cpu */
|
||||
old_allowed = current->cpus_allowed;
|
||||
tmp = CPU_MASK_ALL;
|
||||
cpus_setall(tmp);
|
||||
cpu_clear(cpu, tmp);
|
||||
set_cpus_allowed(current, tmp);
|
||||
set_cpus_allowed_ptr(current, &tmp);
|
||||
|
||||
p = __stop_machine_run(take_cpu_down, &tcd_param, cpu);
|
||||
|
||||
@ -268,7 +268,7 @@ static int _cpu_down(unsigned int cpu, int tasks_frozen)
|
||||
out_thread:
|
||||
err = kthread_stop(p);
|
||||
out_allowed:
|
||||
set_cpus_allowed(current, old_allowed);
|
||||
set_cpus_allowed_ptr(current, &old_allowed);
|
||||
out_release:
|
||||
cpu_hotplug_done();
|
||||
return err;
|
||||
|
100
kernel/cpuset.c
100
kernel/cpuset.c
@ -98,6 +98,9 @@ struct cpuset {
|
||||
/* partition number for rebuild_sched_domains() */
|
||||
int pn;
|
||||
|
||||
/* for custom sched domain */
|
||||
int relax_domain_level;
|
||||
|
||||
/* used for walking a cpuset heirarchy */
|
||||
struct list_head stack_list;
|
||||
};
|
||||
@ -478,6 +481,16 @@ static int cpusets_overlap(struct cpuset *a, struct cpuset *b)
|
||||
return cpus_intersects(a->cpus_allowed, b->cpus_allowed);
|
||||
}
|
||||
|
||||
static void
|
||||
update_domain_attr(struct sched_domain_attr *dattr, struct cpuset *c)
|
||||
{
|
||||
if (!dattr)
|
||||
return;
|
||||
if (dattr->relax_domain_level < c->relax_domain_level)
|
||||
dattr->relax_domain_level = c->relax_domain_level;
|
||||
return;
|
||||
}
|
||||
|
||||
/*
|
||||
* rebuild_sched_domains()
|
||||
*
|
||||
@ -553,12 +566,14 @@ static void rebuild_sched_domains(void)
|
||||
int csn; /* how many cpuset ptrs in csa so far */
|
||||
int i, j, k; /* indices for partition finding loops */
|
||||
cpumask_t *doms; /* resulting partition; i.e. sched domains */
|
||||
struct sched_domain_attr *dattr; /* attributes for custom domains */
|
||||
int ndoms; /* number of sched domains in result */
|
||||
int nslot; /* next empty doms[] cpumask_t slot */
|
||||
|
||||
q = NULL;
|
||||
csa = NULL;
|
||||
doms = NULL;
|
||||
dattr = NULL;
|
||||
|
||||
/* Special case for the 99% of systems with one, full, sched domain */
|
||||
if (is_sched_load_balance(&top_cpuset)) {
|
||||
@ -566,6 +581,11 @@ static void rebuild_sched_domains(void)
|
||||
doms = kmalloc(sizeof(cpumask_t), GFP_KERNEL);
|
||||
if (!doms)
|
||||
goto rebuild;
|
||||
dattr = kmalloc(sizeof(struct sched_domain_attr), GFP_KERNEL);
|
||||
if (dattr) {
|
||||
*dattr = SD_ATTR_INIT;
|
||||
update_domain_attr(dattr, &top_cpuset);
|
||||
}
|
||||
*doms = top_cpuset.cpus_allowed;
|
||||
goto rebuild;
|
||||
}
|
||||
@ -622,6 +642,7 @@ restart:
|
||||
doms = kmalloc(ndoms * sizeof(cpumask_t), GFP_KERNEL);
|
||||
if (!doms)
|
||||
goto rebuild;
|
||||
dattr = kmalloc(ndoms * sizeof(struct sched_domain_attr), GFP_KERNEL);
|
||||
|
||||
for (nslot = 0, i = 0; i < csn; i++) {
|
||||
struct cpuset *a = csa[i];
|
||||
@ -644,12 +665,15 @@ restart:
|
||||
}
|
||||
|
||||
cpus_clear(*dp);
|
||||
if (dattr)
|
||||
*(dattr + nslot) = SD_ATTR_INIT;
|
||||
for (j = i; j < csn; j++) {
|
||||
struct cpuset *b = csa[j];
|
||||
|
||||
if (apn == b->pn) {
|
||||
cpus_or(*dp, *dp, b->cpus_allowed);
|
||||
b->pn = -1;
|
||||
update_domain_attr(dattr, b);
|
||||
}
|
||||
}
|
||||
nslot++;
|
||||
@ -660,7 +684,7 @@ restart:
|
||||
rebuild:
|
||||
/* Have scheduler rebuild sched domains */
|
||||
get_online_cpus();
|
||||
partition_sched_domains(ndoms, doms);
|
||||
partition_sched_domains(ndoms, doms, dattr);
|
||||
put_online_cpus();
|
||||
|
||||
done:
|
||||
@ -668,6 +692,7 @@ done:
|
||||
kfifo_free(q);
|
||||
kfree(csa);
|
||||
/* Don't kfree(doms) -- partition_sched_domains() does that. */
|
||||
/* Don't kfree(dattr) -- partition_sched_domains() does that. */
|
||||
}
|
||||
|
||||
static inline int started_after_time(struct task_struct *t1,
|
||||
@ -729,7 +754,7 @@ int cpuset_test_cpumask(struct task_struct *tsk, struct cgroup_scanner *scan)
|
||||
*/
|
||||
void cpuset_change_cpumask(struct task_struct *tsk, struct cgroup_scanner *scan)
|
||||
{
|
||||
set_cpus_allowed(tsk, (cgroup_cs(scan->cg))->cpus_allowed);
|
||||
set_cpus_allowed_ptr(tsk, &((cgroup_cs(scan->cg))->cpus_allowed));
|
||||
}
|
||||
|
||||
/**
|
||||
@ -1011,6 +1036,21 @@ static int update_memory_pressure_enabled(struct cpuset *cs, char *buf)
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int update_relax_domain_level(struct cpuset *cs, char *buf)
|
||||
{
|
||||
int val = simple_strtol(buf, NULL, 10);
|
||||
|
||||
if (val < 0)
|
||||
val = -1;
|
||||
|
||||
if (val != cs->relax_domain_level) {
|
||||
cs->relax_domain_level = val;
|
||||
rebuild_sched_domains();
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
/*
|
||||
* update_flag - read a 0 or a 1 in a file and update associated flag
|
||||
* bit: the bit to update (CS_CPU_EXCLUSIVE, CS_MEM_EXCLUSIVE,
|
||||
@ -1178,7 +1218,7 @@ static void cpuset_attach(struct cgroup_subsys *ss,
|
||||
|
||||
mutex_lock(&callback_mutex);
|
||||
guarantee_online_cpus(cs, &cpus);
|
||||
set_cpus_allowed(tsk, cpus);
|
||||
set_cpus_allowed_ptr(tsk, &cpus);
|
||||
mutex_unlock(&callback_mutex);
|
||||
|
||||
from = oldcs->mems_allowed;
|
||||
@ -1202,6 +1242,7 @@ typedef enum {
|
||||
FILE_CPU_EXCLUSIVE,
|
||||
FILE_MEM_EXCLUSIVE,
|
||||
FILE_SCHED_LOAD_BALANCE,
|
||||
FILE_SCHED_RELAX_DOMAIN_LEVEL,
|
||||
FILE_MEMORY_PRESSURE_ENABLED,
|
||||
FILE_MEMORY_PRESSURE,
|
||||
FILE_SPREAD_PAGE,
|
||||
@ -1256,6 +1297,9 @@ static ssize_t cpuset_common_file_write(struct cgroup *cont,
|
||||
case FILE_SCHED_LOAD_BALANCE:
|
||||
retval = update_flag(CS_SCHED_LOAD_BALANCE, cs, buffer);
|
||||
break;
|
||||
case FILE_SCHED_RELAX_DOMAIN_LEVEL:
|
||||
retval = update_relax_domain_level(cs, buffer);
|
||||
break;
|
||||
case FILE_MEMORY_MIGRATE:
|
||||
retval = update_flag(CS_MEMORY_MIGRATE, cs, buffer);
|
||||
break;
|
||||
@ -1354,6 +1398,9 @@ static ssize_t cpuset_common_file_read(struct cgroup *cont,
|
||||
case FILE_SCHED_LOAD_BALANCE:
|
||||
*s++ = is_sched_load_balance(cs) ? '1' : '0';
|
||||
break;
|
||||
case FILE_SCHED_RELAX_DOMAIN_LEVEL:
|
||||
s += sprintf(s, "%d", cs->relax_domain_level);
|
||||
break;
|
||||
case FILE_MEMORY_MIGRATE:
|
||||
*s++ = is_memory_migrate(cs) ? '1' : '0';
|
||||
break;
|
||||
@ -1424,6 +1471,13 @@ static struct cftype cft_sched_load_balance = {
|
||||
.private = FILE_SCHED_LOAD_BALANCE,
|
||||
};
|
||||
|
||||
static struct cftype cft_sched_relax_domain_level = {
|
||||
.name = "sched_relax_domain_level",
|
||||
.read = cpuset_common_file_read,
|
||||
.write = cpuset_common_file_write,
|
||||
.private = FILE_SCHED_RELAX_DOMAIN_LEVEL,
|
||||
};
|
||||
|
||||
static struct cftype cft_memory_migrate = {
|
||||
.name = "memory_migrate",
|
||||
.read = cpuset_common_file_read,
|
||||
@ -1475,6 +1529,9 @@ static int cpuset_populate(struct cgroup_subsys *ss, struct cgroup *cont)
|
||||
return err;
|
||||
if ((err = cgroup_add_file(cont, ss, &cft_sched_load_balance)) < 0)
|
||||
return err;
|
||||
if ((err = cgroup_add_file(cont, ss,
|
||||
&cft_sched_relax_domain_level)) < 0)
|
||||
return err;
|
||||
if ((err = cgroup_add_file(cont, ss, &cft_memory_pressure)) < 0)
|
||||
return err;
|
||||
if ((err = cgroup_add_file(cont, ss, &cft_spread_page)) < 0)
|
||||
@ -1555,10 +1612,11 @@ static struct cgroup_subsys_state *cpuset_create(
|
||||
if (is_spread_slab(parent))
|
||||
set_bit(CS_SPREAD_SLAB, &cs->flags);
|
||||
set_bit(CS_SCHED_LOAD_BALANCE, &cs->flags);
|
||||
cs->cpus_allowed = CPU_MASK_NONE;
|
||||
cs->mems_allowed = NODE_MASK_NONE;
|
||||
cpus_clear(cs->cpus_allowed);
|
||||
nodes_clear(cs->mems_allowed);
|
||||
cs->mems_generation = cpuset_mems_generation++;
|
||||
fmeter_init(&cs->fmeter);
|
||||
cs->relax_domain_level = -1;
|
||||
|
||||
cs->parent = parent;
|
||||
number_of_cpusets++;
|
||||
@ -1625,12 +1683,13 @@ int __init cpuset_init(void)
|
||||
{
|
||||
int err = 0;
|
||||
|
||||
top_cpuset.cpus_allowed = CPU_MASK_ALL;
|
||||
top_cpuset.mems_allowed = NODE_MASK_ALL;
|
||||
cpus_setall(top_cpuset.cpus_allowed);
|
||||
nodes_setall(top_cpuset.mems_allowed);
|
||||
|
||||
fmeter_init(&top_cpuset.fmeter);
|
||||
top_cpuset.mems_generation = cpuset_mems_generation++;
|
||||
set_bit(CS_SCHED_LOAD_BALANCE, &top_cpuset.flags);
|
||||
top_cpuset.relax_domain_level = -1;
|
||||
|
||||
err = register_filesystem(&cpuset_fs_type);
|
||||
if (err < 0)
|
||||
@ -1844,6 +1903,7 @@ void __init cpuset_init_smp(void)
|
||||
|
||||
* cpuset_cpus_allowed - return cpus_allowed mask from a tasks cpuset.
|
||||
* @tsk: pointer to task_struct from which to obtain cpuset->cpus_allowed.
|
||||
* @pmask: pointer to cpumask_t variable to receive cpus_allowed set.
|
||||
*
|
||||
* Description: Returns the cpumask_t cpus_allowed of the cpuset
|
||||
* attached to the specified @tsk. Guaranteed to return some non-empty
|
||||
@ -1851,35 +1911,27 @@ void __init cpuset_init_smp(void)
|
||||
* tasks cpuset.
|
||||
**/
|
||||
|
||||
cpumask_t cpuset_cpus_allowed(struct task_struct *tsk)
|
||||
void cpuset_cpus_allowed(struct task_struct *tsk, cpumask_t *pmask)
|
||||
{
|
||||
cpumask_t mask;
|
||||
|
||||
mutex_lock(&callback_mutex);
|
||||
mask = cpuset_cpus_allowed_locked(tsk);
|
||||
cpuset_cpus_allowed_locked(tsk, pmask);
|
||||
mutex_unlock(&callback_mutex);
|
||||
|
||||
return mask;
|
||||
}
|
||||
|
||||
/**
|
||||
* cpuset_cpus_allowed_locked - return cpus_allowed mask from a tasks cpuset.
|
||||
* Must be called with callback_mutex held.
|
||||
**/
|
||||
cpumask_t cpuset_cpus_allowed_locked(struct task_struct *tsk)
|
||||
void cpuset_cpus_allowed_locked(struct task_struct *tsk, cpumask_t *pmask)
|
||||
{
|
||||
cpumask_t mask;
|
||||
|
||||
task_lock(tsk);
|
||||
guarantee_online_cpus(task_cs(tsk), &mask);
|
||||
guarantee_online_cpus(task_cs(tsk), pmask);
|
||||
task_unlock(tsk);
|
||||
|
||||
return mask;
|
||||
}
|
||||
|
||||
void cpuset_init_current_mems_allowed(void)
|
||||
{
|
||||
current->mems_allowed = NODE_MASK_ALL;
|
||||
nodes_setall(current->mems_allowed);
|
||||
}
|
||||
|
||||
/**
|
||||
@ -2261,8 +2313,16 @@ void cpuset_task_status_allowed(struct seq_file *m, struct task_struct *task)
|
||||
m->count += cpumask_scnprintf(m->buf + m->count, m->size - m->count,
|
||||
task->cpus_allowed);
|
||||
seq_printf(m, "\n");
|
||||
seq_printf(m, "Cpus_allowed_list:\t");
|
||||
m->count += cpulist_scnprintf(m->buf + m->count, m->size - m->count,
|
||||
task->cpus_allowed);
|
||||
seq_printf(m, "\n");
|
||||
seq_printf(m, "Mems_allowed:\t");
|
||||
m->count += nodemask_scnprintf(m->buf + m->count, m->size - m->count,
|
||||
task->mems_allowed);
|
||||
seq_printf(m, "\n");
|
||||
seq_printf(m, "Mems_allowed_list:\t");
|
||||
m->count += nodelist_scnprintf(m->buf + m->count, m->size - m->count,
|
||||
task->mems_allowed);
|
||||
seq_printf(m, "\n");
|
||||
}
|
||||
|
@ -47,7 +47,7 @@ void dynamic_irq_init(unsigned int irq)
|
||||
desc->irq_count = 0;
|
||||
desc->irqs_unhandled = 0;
|
||||
#ifdef CONFIG_SMP
|
||||
desc->affinity = CPU_MASK_ALL;
|
||||
cpus_setall(desc->affinity);
|
||||
#endif
|
||||
spin_unlock_irqrestore(&desc->lock, flags);
|
||||
}
|
||||
|
@ -165,7 +165,7 @@ static int ____call_usermodehelper(void *data)
|
||||
}
|
||||
|
||||
/* We can run anywhere, unlike our parent keventd(). */
|
||||
set_cpus_allowed(current, CPU_MASK_ALL);
|
||||
set_cpus_allowed_ptr(current, CPU_MASK_ALL_PTR);
|
||||
|
||||
/*
|
||||
* Our parent is keventd, which runs with elevated scheduling priority.
|
||||
|
@ -180,6 +180,7 @@ void kthread_bind(struct task_struct *k, unsigned int cpu)
|
||||
wait_task_inactive(k);
|
||||
set_task_cpu(k, cpu);
|
||||
k->cpus_allowed = cpumask_of_cpu(cpu);
|
||||
k->rt.nr_cpus_allowed = 1;
|
||||
}
|
||||
EXPORT_SYMBOL(kthread_bind);
|
||||
|
||||
|
@ -64,8 +64,8 @@ account_global_scheduler_latency(struct task_struct *tsk, struct latency_record
|
||||
return;
|
||||
|
||||
for (i = 0; i < MAXLR; i++) {
|
||||
int q;
|
||||
int same = 1;
|
||||
int q, same = 1;
|
||||
|
||||
/* Nothing stored: */
|
||||
if (!latency_record[i].backtrace[0]) {
|
||||
if (firstnonnull > i)
|
||||
@ -73,12 +73,15 @@ account_global_scheduler_latency(struct task_struct *tsk, struct latency_record
|
||||
continue;
|
||||
}
|
||||
for (q = 0 ; q < LT_BACKTRACEDEPTH ; q++) {
|
||||
if (latency_record[i].backtrace[q] !=
|
||||
lat->backtrace[q])
|
||||
unsigned long record = lat->backtrace[q];
|
||||
|
||||
if (latency_record[i].backtrace[q] != record) {
|
||||
same = 0;
|
||||
if (same && lat->backtrace[q] == 0)
|
||||
break;
|
||||
if (same && lat->backtrace[q] == ULONG_MAX)
|
||||
}
|
||||
|
||||
/* 0 and ULONG_MAX entries mean end of backtrace: */
|
||||
if (record == 0 || record == ULONG_MAX)
|
||||
break;
|
||||
}
|
||||
if (same) {
|
||||
@ -143,14 +146,18 @@ account_scheduler_latency(struct task_struct *tsk, int usecs, int inter)
|
||||
for (i = 0; i < LT_SAVECOUNT ; i++) {
|
||||
struct latency_record *mylat;
|
||||
int same = 1;
|
||||
|
||||
mylat = &tsk->latency_record[i];
|
||||
for (q = 0 ; q < LT_BACKTRACEDEPTH ; q++) {
|
||||
if (mylat->backtrace[q] !=
|
||||
lat.backtrace[q])
|
||||
unsigned long record = lat.backtrace[q];
|
||||
|
||||
if (mylat->backtrace[q] != record) {
|
||||
same = 0;
|
||||
if (same && lat.backtrace[q] == 0)
|
||||
break;
|
||||
if (same && lat.backtrace[q] == ULONG_MAX)
|
||||
}
|
||||
|
||||
/* 0 and ULONG_MAX entries mean end of backtrace: */
|
||||
if (record == 0 || record == ULONG_MAX)
|
||||
break;
|
||||
}
|
||||
if (same) {
|
||||
|
@ -1007,10 +1007,10 @@ void __synchronize_sched(void)
|
||||
if (sched_getaffinity(0, &oldmask) < 0)
|
||||
oldmask = cpu_possible_map;
|
||||
for_each_online_cpu(cpu) {
|
||||
sched_setaffinity(0, cpumask_of_cpu(cpu));
|
||||
sched_setaffinity(0, &cpumask_of_cpu(cpu));
|
||||
schedule();
|
||||
}
|
||||
sched_setaffinity(0, oldmask);
|
||||
sched_setaffinity(0, &oldmask);
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(__synchronize_sched);
|
||||
|
||||
|
@ -723,9 +723,10 @@ static int rcu_idle_cpu; /* Force all torture tasks off this CPU */
|
||||
*/
|
||||
static void rcu_torture_shuffle_tasks(void)
|
||||
{
|
||||
cpumask_t tmp_mask = CPU_MASK_ALL;
|
||||
cpumask_t tmp_mask;
|
||||
int i;
|
||||
|
||||
cpus_setall(tmp_mask);
|
||||
get_online_cpus();
|
||||
|
||||
/* No point in shuffling if there is only one online CPU (ex: UP) */
|
||||
@ -737,25 +738,27 @@ static void rcu_torture_shuffle_tasks(void)
|
||||
if (rcu_idle_cpu != -1)
|
||||
cpu_clear(rcu_idle_cpu, tmp_mask);
|
||||
|
||||
set_cpus_allowed(current, tmp_mask);
|
||||
set_cpus_allowed_ptr(current, &tmp_mask);
|
||||
|
||||
if (reader_tasks) {
|
||||
for (i = 0; i < nrealreaders; i++)
|
||||
if (reader_tasks[i])
|
||||
set_cpus_allowed(reader_tasks[i], tmp_mask);
|
||||
set_cpus_allowed_ptr(reader_tasks[i],
|
||||
&tmp_mask);
|
||||
}
|
||||
|
||||
if (fakewriter_tasks) {
|
||||
for (i = 0; i < nfakewriters; i++)
|
||||
if (fakewriter_tasks[i])
|
||||
set_cpus_allowed(fakewriter_tasks[i], tmp_mask);
|
||||
set_cpus_allowed_ptr(fakewriter_tasks[i],
|
||||
&tmp_mask);
|
||||
}
|
||||
|
||||
if (writer_task)
|
||||
set_cpus_allowed(writer_task, tmp_mask);
|
||||
set_cpus_allowed_ptr(writer_task, &tmp_mask);
|
||||
|
||||
if (stats_task)
|
||||
set_cpus_allowed(stats_task, tmp_mask);
|
||||
set_cpus_allowed_ptr(stats_task, &tmp_mask);
|
||||
|
||||
if (rcu_idle_cpu == -1)
|
||||
rcu_idle_cpu = num_online_cpus() - 1;
|
||||
|
1914
kernel/sched.c
1914
kernel/sched.c
File diff suppressed because it is too large
Load Diff
@ -67,14 +67,24 @@ print_task(struct seq_file *m, struct rq *rq, struct task_struct *p)
|
||||
(long long)(p->nvcsw + p->nivcsw),
|
||||
p->prio);
|
||||
#ifdef CONFIG_SCHEDSTATS
|
||||
SEQ_printf(m, "%9Ld.%06ld %9Ld.%06ld %9Ld.%06ld\n",
|
||||
SEQ_printf(m, "%9Ld.%06ld %9Ld.%06ld %9Ld.%06ld",
|
||||
SPLIT_NS(p->se.vruntime),
|
||||
SPLIT_NS(p->se.sum_exec_runtime),
|
||||
SPLIT_NS(p->se.sum_sleep_runtime));
|
||||
#else
|
||||
SEQ_printf(m, "%15Ld %15Ld %15Ld.%06ld %15Ld.%06ld %15Ld.%06ld\n",
|
||||
SEQ_printf(m, "%15Ld %15Ld %15Ld.%06ld %15Ld.%06ld %15Ld.%06ld",
|
||||
0LL, 0LL, 0LL, 0L, 0LL, 0L, 0LL, 0L);
|
||||
#endif
|
||||
|
||||
#ifdef CONFIG_CGROUP_SCHED
|
||||
{
|
||||
char path[64];
|
||||
|
||||
cgroup_path(task_group(p)->css.cgroup, path, sizeof(path));
|
||||
SEQ_printf(m, " %s", path);
|
||||
}
|
||||
#endif
|
||||
SEQ_printf(m, "\n");
|
||||
}
|
||||
|
||||
static void print_rq(struct seq_file *m, struct rq *rq, int rq_cpu)
|
||||
@ -109,7 +119,21 @@ void print_cfs_rq(struct seq_file *m, int cpu, struct cfs_rq *cfs_rq)
|
||||
struct sched_entity *last;
|
||||
unsigned long flags;
|
||||
|
||||
SEQ_printf(m, "\ncfs_rq\n");
|
||||
#if !defined(CONFIG_CGROUP_SCHED) || !defined(CONFIG_USER_SCHED)
|
||||
SEQ_printf(m, "\ncfs_rq[%d]:\n", cpu);
|
||||
#else
|
||||
char path[128] = "";
|
||||
struct cgroup *cgroup = NULL;
|
||||
struct task_group *tg = cfs_rq->tg;
|
||||
|
||||
if (tg)
|
||||
cgroup = tg->css.cgroup;
|
||||
|
||||
if (cgroup)
|
||||
cgroup_path(cgroup, path, sizeof(path));
|
||||
|
||||
SEQ_printf(m, "\ncfs_rq[%d]:%s\n", cpu, path);
|
||||
#endif
|
||||
|
||||
SEQ_printf(m, " .%-30s: %Ld.%06ld\n", "exec_clock",
|
||||
SPLIT_NS(cfs_rq->exec_clock));
|
||||
@ -143,6 +167,11 @@ void print_cfs_rq(struct seq_file *m, int cpu, struct cfs_rq *cfs_rq)
|
||||
#endif
|
||||
SEQ_printf(m, " .%-30s: %ld\n", "nr_spread_over",
|
||||
cfs_rq->nr_spread_over);
|
||||
#ifdef CONFIG_FAIR_GROUP_SCHED
|
||||
#ifdef CONFIG_SMP
|
||||
SEQ_printf(m, " .%-30s: %lu\n", "shares", cfs_rq->shares);
|
||||
#endif
|
||||
#endif
|
||||
}
|
||||
|
||||
static void print_cpu(struct seq_file *m, int cpu)
|
||||
@ -214,7 +243,6 @@ static int sched_debug_show(struct seq_file *m, void *v)
|
||||
PN(sysctl_sched_latency);
|
||||
PN(sysctl_sched_min_granularity);
|
||||
PN(sysctl_sched_wakeup_granularity);
|
||||
PN(sysctl_sched_batch_wakeup_granularity);
|
||||
PN(sysctl_sched_child_runs_first);
|
||||
P(sysctl_sched_features);
|
||||
#undef PN
|
||||
|
@ -62,24 +62,14 @@ const_debug unsigned int sysctl_sched_child_runs_first = 1;
|
||||
unsigned int __read_mostly sysctl_sched_compat_yield;
|
||||
|
||||
/*
|
||||
* SCHED_BATCH wake-up granularity.
|
||||
* SCHED_OTHER wake-up granularity.
|
||||
* (default: 10 msec * (1 + ilog(ncpus)), units: nanoseconds)
|
||||
*
|
||||
* This option delays the preemption effects of decoupled workloads
|
||||
* and reduces their over-scheduling. Synchronous workloads will still
|
||||
* have immediate wakeup/sleep latencies.
|
||||
*/
|
||||
unsigned int sysctl_sched_batch_wakeup_granularity = 10000000UL;
|
||||
|
||||
/*
|
||||
* SCHED_OTHER wake-up granularity.
|
||||
* (default: 5 msec * (1 + ilog(ncpus)), units: nanoseconds)
|
||||
*
|
||||
* This option delays the preemption effects of decoupled workloads
|
||||
* and reduces their over-scheduling. Synchronous workloads will still
|
||||
* have immediate wakeup/sleep latencies.
|
||||
*/
|
||||
unsigned int sysctl_sched_wakeup_granularity = 5000000UL;
|
||||
unsigned int sysctl_sched_wakeup_granularity = 10000000UL;
|
||||
|
||||
const_debug unsigned int sysctl_sched_migration_cost = 500000UL;
|
||||
|
||||
@ -87,6 +77,11 @@ const_debug unsigned int sysctl_sched_migration_cost = 500000UL;
|
||||
* CFS operations on generic schedulable entities:
|
||||
*/
|
||||
|
||||
static inline struct task_struct *task_of(struct sched_entity *se)
|
||||
{
|
||||
return container_of(se, struct task_struct, se);
|
||||
}
|
||||
|
||||
#ifdef CONFIG_FAIR_GROUP_SCHED
|
||||
|
||||
/* cpu runqueue to which this cfs_rq is attached */
|
||||
@ -98,6 +93,54 @@ static inline struct rq *rq_of(struct cfs_rq *cfs_rq)
|
||||
/* An entity is a task if it doesn't "own" a runqueue */
|
||||
#define entity_is_task(se) (!se->my_q)
|
||||
|
||||
/* Walk up scheduling entities hierarchy */
|
||||
#define for_each_sched_entity(se) \
|
||||
for (; se; se = se->parent)
|
||||
|
||||
static inline struct cfs_rq *task_cfs_rq(struct task_struct *p)
|
||||
{
|
||||
return p->se.cfs_rq;
|
||||
}
|
||||
|
||||
/* runqueue on which this entity is (to be) queued */
|
||||
static inline struct cfs_rq *cfs_rq_of(struct sched_entity *se)
|
||||
{
|
||||
return se->cfs_rq;
|
||||
}
|
||||
|
||||
/* runqueue "owned" by this group */
|
||||
static inline struct cfs_rq *group_cfs_rq(struct sched_entity *grp)
|
||||
{
|
||||
return grp->my_q;
|
||||
}
|
||||
|
||||
/* Given a group's cfs_rq on one cpu, return its corresponding cfs_rq on
|
||||
* another cpu ('this_cpu')
|
||||
*/
|
||||
static inline struct cfs_rq *cpu_cfs_rq(struct cfs_rq *cfs_rq, int this_cpu)
|
||||
{
|
||||
return cfs_rq->tg->cfs_rq[this_cpu];
|
||||
}
|
||||
|
||||
/* Iterate thr' all leaf cfs_rq's on a runqueue */
|
||||
#define for_each_leaf_cfs_rq(rq, cfs_rq) \
|
||||
list_for_each_entry_rcu(cfs_rq, &rq->leaf_cfs_rq_list, leaf_cfs_rq_list)
|
||||
|
||||
/* Do the two (enqueued) entities belong to the same group ? */
|
||||
static inline int
|
||||
is_same_group(struct sched_entity *se, struct sched_entity *pse)
|
||||
{
|
||||
if (se->cfs_rq == pse->cfs_rq)
|
||||
return 1;
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static inline struct sched_entity *parent_entity(struct sched_entity *se)
|
||||
{
|
||||
return se->parent;
|
||||
}
|
||||
|
||||
#else /* CONFIG_FAIR_GROUP_SCHED */
|
||||
|
||||
static inline struct rq *rq_of(struct cfs_rq *cfs_rq)
|
||||
@ -107,13 +150,49 @@ static inline struct rq *rq_of(struct cfs_rq *cfs_rq)
|
||||
|
||||
#define entity_is_task(se) 1
|
||||
|
||||
#endif /* CONFIG_FAIR_GROUP_SCHED */
|
||||
#define for_each_sched_entity(se) \
|
||||
for (; se; se = NULL)
|
||||
|
||||
static inline struct task_struct *task_of(struct sched_entity *se)
|
||||
static inline struct cfs_rq *task_cfs_rq(struct task_struct *p)
|
||||
{
|
||||
return container_of(se, struct task_struct, se);
|
||||
return &task_rq(p)->cfs;
|
||||
}
|
||||
|
||||
static inline struct cfs_rq *cfs_rq_of(struct sched_entity *se)
|
||||
{
|
||||
struct task_struct *p = task_of(se);
|
||||
struct rq *rq = task_rq(p);
|
||||
|
||||
return &rq->cfs;
|
||||
}
|
||||
|
||||
/* runqueue "owned" by this group */
|
||||
static inline struct cfs_rq *group_cfs_rq(struct sched_entity *grp)
|
||||
{
|
||||
return NULL;
|
||||
}
|
||||
|
||||
static inline struct cfs_rq *cpu_cfs_rq(struct cfs_rq *cfs_rq, int this_cpu)
|
||||
{
|
||||
return &cpu_rq(this_cpu)->cfs;
|
||||
}
|
||||
|
||||
#define for_each_leaf_cfs_rq(rq, cfs_rq) \
|
||||
for (cfs_rq = &rq->cfs; cfs_rq; cfs_rq = NULL)
|
||||
|
||||
static inline int
|
||||
is_same_group(struct sched_entity *se, struct sched_entity *pse)
|
||||
{
|
||||
return 1;
|
||||
}
|
||||
|
||||
static inline struct sched_entity *parent_entity(struct sched_entity *se)
|
||||
{
|
||||
return NULL;
|
||||
}
|
||||
|
||||
#endif /* CONFIG_FAIR_GROUP_SCHED */
|
||||
|
||||
|
||||
/**************************************************************
|
||||
* Scheduling class tree data structure manipulation methods:
|
||||
@ -254,6 +333,34 @@ int sched_nr_latency_handler(struct ctl_table *table, int write,
|
||||
}
|
||||
#endif
|
||||
|
||||
/*
|
||||
* delta *= w / rw
|
||||
*/
|
||||
static inline unsigned long
|
||||
calc_delta_weight(unsigned long delta, struct sched_entity *se)
|
||||
{
|
||||
for_each_sched_entity(se) {
|
||||
delta = calc_delta_mine(delta,
|
||||
se->load.weight, &cfs_rq_of(se)->load);
|
||||
}
|
||||
|
||||
return delta;
|
||||
}
|
||||
|
||||
/*
|
||||
* delta *= rw / w
|
||||
*/
|
||||
static inline unsigned long
|
||||
calc_delta_fair(unsigned long delta, struct sched_entity *se)
|
||||
{
|
||||
for_each_sched_entity(se) {
|
||||
delta = calc_delta_mine(delta,
|
||||
cfs_rq_of(se)->load.weight, &se->load);
|
||||
}
|
||||
|
||||
return delta;
|
||||
}
|
||||
|
||||
/*
|
||||
* The idea is to set a period in which each task runs once.
|
||||
*
|
||||
@ -283,29 +390,54 @@ static u64 __sched_period(unsigned long nr_running)
|
||||
*/
|
||||
static u64 sched_slice(struct cfs_rq *cfs_rq, struct sched_entity *se)
|
||||
{
|
||||
return calc_delta_mine(__sched_period(cfs_rq->nr_running),
|
||||
se->load.weight, &cfs_rq->load);
|
||||
return calc_delta_weight(__sched_period(cfs_rq->nr_running), se);
|
||||
}
|
||||
|
||||
/*
|
||||
* We calculate the vruntime slice.
|
||||
* We calculate the vruntime slice of a to be inserted task
|
||||
*
|
||||
* vs = s/w = p/rw
|
||||
* vs = s*rw/w = p
|
||||
*/
|
||||
static u64 __sched_vslice(unsigned long rq_weight, unsigned long nr_running)
|
||||
{
|
||||
u64 vslice = __sched_period(nr_running);
|
||||
|
||||
vslice *= NICE_0_LOAD;
|
||||
do_div(vslice, rq_weight);
|
||||
|
||||
return vslice;
|
||||
}
|
||||
|
||||
static u64 sched_vslice_add(struct cfs_rq *cfs_rq, struct sched_entity *se)
|
||||
{
|
||||
return __sched_vslice(cfs_rq->load.weight + se->load.weight,
|
||||
cfs_rq->nr_running + 1);
|
||||
unsigned long nr_running = cfs_rq->nr_running;
|
||||
|
||||
if (!se->on_rq)
|
||||
nr_running++;
|
||||
|
||||
return __sched_period(nr_running);
|
||||
}
|
||||
|
||||
/*
|
||||
* The goal of calc_delta_asym() is to be asymmetrically around NICE_0_LOAD, in
|
||||
* that it favours >=0 over <0.
|
||||
*
|
||||
* -20 |
|
||||
* |
|
||||
* 0 --------+-------
|
||||
* .'
|
||||
* 19 .'
|
||||
*
|
||||
*/
|
||||
static unsigned long
|
||||
calc_delta_asym(unsigned long delta, struct sched_entity *se)
|
||||
{
|
||||
struct load_weight lw = {
|
||||
.weight = NICE_0_LOAD,
|
||||
.inv_weight = 1UL << (WMULT_SHIFT-NICE_0_SHIFT)
|
||||
};
|
||||
|
||||
for_each_sched_entity(se) {
|
||||
struct load_weight *se_lw = &se->load;
|
||||
|
||||
if (se->load.weight < NICE_0_LOAD)
|
||||
se_lw = &lw;
|
||||
|
||||
delta = calc_delta_mine(delta,
|
||||
cfs_rq_of(se)->load.weight, se_lw);
|
||||
}
|
||||
|
||||
return delta;
|
||||
}
|
||||
|
||||
/*
|
||||
@ -322,11 +454,7 @@ __update_curr(struct cfs_rq *cfs_rq, struct sched_entity *curr,
|
||||
|
||||
curr->sum_exec_runtime += delta_exec;
|
||||
schedstat_add(cfs_rq, exec_clock, delta_exec);
|
||||
delta_exec_weighted = delta_exec;
|
||||
if (unlikely(curr->load.weight != NICE_0_LOAD)) {
|
||||
delta_exec_weighted = calc_delta_fair(delta_exec_weighted,
|
||||
&curr->load);
|
||||
}
|
||||
delta_exec_weighted = calc_delta_fair(delta_exec, curr);
|
||||
curr->vruntime += delta_exec_weighted;
|
||||
}
|
||||
|
||||
@ -413,20 +541,43 @@ update_stats_curr_start(struct cfs_rq *cfs_rq, struct sched_entity *se)
|
||||
* Scheduling class queueing methods:
|
||||
*/
|
||||
|
||||
#if defined CONFIG_SMP && defined CONFIG_FAIR_GROUP_SCHED
|
||||
static void
|
||||
add_cfs_task_weight(struct cfs_rq *cfs_rq, unsigned long weight)
|
||||
{
|
||||
cfs_rq->task_weight += weight;
|
||||
}
|
||||
#else
|
||||
static inline void
|
||||
add_cfs_task_weight(struct cfs_rq *cfs_rq, unsigned long weight)
|
||||
{
|
||||
}
|
||||
#endif
|
||||
|
||||
static void
|
||||
account_entity_enqueue(struct cfs_rq *cfs_rq, struct sched_entity *se)
|
||||
{
|
||||
update_load_add(&cfs_rq->load, se->load.weight);
|
||||
if (!parent_entity(se))
|
||||
inc_cpu_load(rq_of(cfs_rq), se->load.weight);
|
||||
if (entity_is_task(se))
|
||||
add_cfs_task_weight(cfs_rq, se->load.weight);
|
||||
cfs_rq->nr_running++;
|
||||
se->on_rq = 1;
|
||||
list_add(&se->group_node, &cfs_rq->tasks);
|
||||
}
|
||||
|
||||
static void
|
||||
account_entity_dequeue(struct cfs_rq *cfs_rq, struct sched_entity *se)
|
||||
{
|
||||
update_load_sub(&cfs_rq->load, se->load.weight);
|
||||
if (!parent_entity(se))
|
||||
dec_cpu_load(rq_of(cfs_rq), se->load.weight);
|
||||
if (entity_is_task(se))
|
||||
add_cfs_task_weight(cfs_rq, -se->load.weight);
|
||||
cfs_rq->nr_running--;
|
||||
se->on_rq = 0;
|
||||
list_del_init(&se->group_node);
|
||||
}
|
||||
|
||||
static void enqueue_sleeper(struct cfs_rq *cfs_rq, struct sched_entity *se)
|
||||
@ -510,8 +661,12 @@ place_entity(struct cfs_rq *cfs_rq, struct sched_entity *se, int initial)
|
||||
|
||||
if (!initial) {
|
||||
/* sleeps upto a single latency don't count. */
|
||||
if (sched_feat(NEW_FAIR_SLEEPERS))
|
||||
vruntime -= sysctl_sched_latency;
|
||||
if (sched_feat(NEW_FAIR_SLEEPERS)) {
|
||||
if (sched_feat(NORMALIZED_SLEEPER))
|
||||
vruntime -= calc_delta_weight(sysctl_sched_latency, se);
|
||||
else
|
||||
vruntime -= sysctl_sched_latency;
|
||||
}
|
||||
|
||||
/* ensure we never gain time by being placed backwards. */
|
||||
vruntime = max_vruntime(se->vruntime, vruntime);
|
||||
@ -627,20 +782,16 @@ set_next_entity(struct cfs_rq *cfs_rq, struct sched_entity *se)
|
||||
se->prev_sum_exec_runtime = se->sum_exec_runtime;
|
||||
}
|
||||
|
||||
static int
|
||||
wakeup_preempt_entity(struct sched_entity *curr, struct sched_entity *se);
|
||||
|
||||
static struct sched_entity *
|
||||
pick_next(struct cfs_rq *cfs_rq, struct sched_entity *se)
|
||||
{
|
||||
s64 diff, gran;
|
||||
|
||||
if (!cfs_rq->next)
|
||||
return se;
|
||||
|
||||
diff = cfs_rq->next->vruntime - se->vruntime;
|
||||
if (diff < 0)
|
||||
return se;
|
||||
|
||||
gran = calc_delta_fair(sysctl_sched_wakeup_granularity, &cfs_rq->load);
|
||||
if (diff > gran)
|
||||
if (wakeup_preempt_entity(cfs_rq->next, se) != 0)
|
||||
return se;
|
||||
|
||||
return cfs_rq->next;
|
||||
@ -708,101 +859,6 @@ entity_tick(struct cfs_rq *cfs_rq, struct sched_entity *curr, int queued)
|
||||
* CFS operations on tasks:
|
||||
*/
|
||||
|
||||
#ifdef CONFIG_FAIR_GROUP_SCHED
|
||||
|
||||
/* Walk up scheduling entities hierarchy */
|
||||
#define for_each_sched_entity(se) \
|
||||
for (; se; se = se->parent)
|
||||
|
||||
static inline struct cfs_rq *task_cfs_rq(struct task_struct *p)
|
||||
{
|
||||
return p->se.cfs_rq;
|
||||
}
|
||||
|
||||
/* runqueue on which this entity is (to be) queued */
|
||||
static inline struct cfs_rq *cfs_rq_of(struct sched_entity *se)
|
||||
{
|
||||
return se->cfs_rq;
|
||||
}
|
||||
|
||||
/* runqueue "owned" by this group */
|
||||
static inline struct cfs_rq *group_cfs_rq(struct sched_entity *grp)
|
||||
{
|
||||
return grp->my_q;
|
||||
}
|
||||
|
||||
/* Given a group's cfs_rq on one cpu, return its corresponding cfs_rq on
|
||||
* another cpu ('this_cpu')
|
||||
*/
|
||||
static inline struct cfs_rq *cpu_cfs_rq(struct cfs_rq *cfs_rq, int this_cpu)
|
||||
{
|
||||
return cfs_rq->tg->cfs_rq[this_cpu];
|
||||
}
|
||||
|
||||
/* Iterate thr' all leaf cfs_rq's on a runqueue */
|
||||
#define for_each_leaf_cfs_rq(rq, cfs_rq) \
|
||||
list_for_each_entry_rcu(cfs_rq, &rq->leaf_cfs_rq_list, leaf_cfs_rq_list)
|
||||
|
||||
/* Do the two (enqueued) entities belong to the same group ? */
|
||||
static inline int
|
||||
is_same_group(struct sched_entity *se, struct sched_entity *pse)
|
||||
{
|
||||
if (se->cfs_rq == pse->cfs_rq)
|
||||
return 1;
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static inline struct sched_entity *parent_entity(struct sched_entity *se)
|
||||
{
|
||||
return se->parent;
|
||||
}
|
||||
|
||||
#else /* CONFIG_FAIR_GROUP_SCHED */
|
||||
|
||||
#define for_each_sched_entity(se) \
|
||||
for (; se; se = NULL)
|
||||
|
||||
static inline struct cfs_rq *task_cfs_rq(struct task_struct *p)
|
||||
{
|
||||
return &task_rq(p)->cfs;
|
||||
}
|
||||
|
||||
static inline struct cfs_rq *cfs_rq_of(struct sched_entity *se)
|
||||
{
|
||||
struct task_struct *p = task_of(se);
|
||||
struct rq *rq = task_rq(p);
|
||||
|
||||
return &rq->cfs;
|
||||
}
|
||||
|
||||
/* runqueue "owned" by this group */
|
||||
static inline struct cfs_rq *group_cfs_rq(struct sched_entity *grp)
|
||||
{
|
||||
return NULL;
|
||||
}
|
||||
|
||||
static inline struct cfs_rq *cpu_cfs_rq(struct cfs_rq *cfs_rq, int this_cpu)
|
||||
{
|
||||
return &cpu_rq(this_cpu)->cfs;
|
||||
}
|
||||
|
||||
#define for_each_leaf_cfs_rq(rq, cfs_rq) \
|
||||
for (cfs_rq = &rq->cfs; cfs_rq; cfs_rq = NULL)
|
||||
|
||||
static inline int
|
||||
is_same_group(struct sched_entity *se, struct sched_entity *pse)
|
||||
{
|
||||
return 1;
|
||||
}
|
||||
|
||||
static inline struct sched_entity *parent_entity(struct sched_entity *se)
|
||||
{
|
||||
return NULL;
|
||||
}
|
||||
|
||||
#endif /* CONFIG_FAIR_GROUP_SCHED */
|
||||
|
||||
#ifdef CONFIG_SCHED_HRTICK
|
||||
static void hrtick_start_fair(struct rq *rq, struct task_struct *p)
|
||||
{
|
||||
@ -916,7 +972,7 @@ static void yield_task_fair(struct rq *rq)
|
||||
/*
|
||||
* Already in the rightmost position?
|
||||
*/
|
||||
if (unlikely(rightmost->vruntime < se->vruntime))
|
||||
if (unlikely(!rightmost || rightmost->vruntime < se->vruntime))
|
||||
return;
|
||||
|
||||
/*
|
||||
@ -955,7 +1011,9 @@ static int wake_idle(int cpu, struct task_struct *p)
|
||||
return cpu;
|
||||
|
||||
for_each_domain(cpu, sd) {
|
||||
if (sd->flags & SD_WAKE_IDLE) {
|
||||
if ((sd->flags & SD_WAKE_IDLE)
|
||||
|| ((sd->flags & SD_WAKE_IDLE_FAR)
|
||||
&& !task_hot(p, task_rq(p)->clock, sd))) {
|
||||
cpus_and(tmp, sd->span, p->cpus_allowed);
|
||||
for_each_cpu_mask(i, tmp) {
|
||||
if (idle_cpu(i)) {
|
||||
@ -1099,6 +1157,58 @@ out:
|
||||
}
|
||||
#endif /* CONFIG_SMP */
|
||||
|
||||
static unsigned long wakeup_gran(struct sched_entity *se)
|
||||
{
|
||||
unsigned long gran = sysctl_sched_wakeup_granularity;
|
||||
|
||||
/*
|
||||
* More easily preempt - nice tasks, while not making it harder for
|
||||
* + nice tasks.
|
||||
*/
|
||||
gran = calc_delta_asym(sysctl_sched_wakeup_granularity, se);
|
||||
|
||||
return gran;
|
||||
}
|
||||
|
||||
/*
|
||||
* Should 'se' preempt 'curr'.
|
||||
*
|
||||
* |s1
|
||||
* |s2
|
||||
* |s3
|
||||
* g
|
||||
* |<--->|c
|
||||
*
|
||||
* w(c, s1) = -1
|
||||
* w(c, s2) = 0
|
||||
* w(c, s3) = 1
|
||||
*
|
||||
*/
|
||||
static int
|
||||
wakeup_preempt_entity(struct sched_entity *curr, struct sched_entity *se)
|
||||
{
|
||||
s64 gran, vdiff = curr->vruntime - se->vruntime;
|
||||
|
||||
if (vdiff < 0)
|
||||
return -1;
|
||||
|
||||
gran = wakeup_gran(curr);
|
||||
if (vdiff > gran)
|
||||
return 1;
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
/* return depth at which a sched entity is present in the hierarchy */
|
||||
static inline int depth_se(struct sched_entity *se)
|
||||
{
|
||||
int depth = 0;
|
||||
|
||||
for_each_sched_entity(se)
|
||||
depth++;
|
||||
|
||||
return depth;
|
||||
}
|
||||
|
||||
/*
|
||||
* Preempt the current task with a newly woken task if needed:
|
||||
@ -1108,7 +1218,7 @@ static void check_preempt_wakeup(struct rq *rq, struct task_struct *p)
|
||||
struct task_struct *curr = rq->curr;
|
||||
struct cfs_rq *cfs_rq = task_cfs_rq(curr);
|
||||
struct sched_entity *se = &curr->se, *pse = &p->se;
|
||||
unsigned long gran;
|
||||
int se_depth, pse_depth;
|
||||
|
||||
if (unlikely(rt_prio(p->prio))) {
|
||||
update_rq_clock(rq);
|
||||
@ -1133,20 +1243,33 @@ static void check_preempt_wakeup(struct rq *rq, struct task_struct *p)
|
||||
if (!sched_feat(WAKEUP_PREEMPT))
|
||||
return;
|
||||
|
||||
/*
|
||||
* preemption test can be made between sibling entities who are in the
|
||||
* same cfs_rq i.e who have a common parent. Walk up the hierarchy of
|
||||
* both tasks until we find their ancestors who are siblings of common
|
||||
* parent.
|
||||
*/
|
||||
|
||||
/* First walk up until both entities are at same depth */
|
||||
se_depth = depth_se(se);
|
||||
pse_depth = depth_se(pse);
|
||||
|
||||
while (se_depth > pse_depth) {
|
||||
se_depth--;
|
||||
se = parent_entity(se);
|
||||
}
|
||||
|
||||
while (pse_depth > se_depth) {
|
||||
pse_depth--;
|
||||
pse = parent_entity(pse);
|
||||
}
|
||||
|
||||
while (!is_same_group(se, pse)) {
|
||||
se = parent_entity(se);
|
||||
pse = parent_entity(pse);
|
||||
}
|
||||
|
||||
gran = sysctl_sched_wakeup_granularity;
|
||||
/*
|
||||
* More easily preempt - nice tasks, while not making
|
||||
* it harder for + nice tasks.
|
||||
*/
|
||||
if (unlikely(se->load.weight > NICE_0_LOAD))
|
||||
gran = calc_delta_fair(gran, &se->load);
|
||||
|
||||
if (pse->vruntime + gran < se->vruntime)
|
||||
if (wakeup_preempt_entity(se, pse) == 1)
|
||||
resched_task(curr);
|
||||
}
|
||||
|
||||
@ -1197,15 +1320,27 @@ static void put_prev_task_fair(struct rq *rq, struct task_struct *prev)
|
||||
* the current task:
|
||||
*/
|
||||
static struct task_struct *
|
||||
__load_balance_iterator(struct cfs_rq *cfs_rq, struct rb_node *curr)
|
||||
__load_balance_iterator(struct cfs_rq *cfs_rq, struct list_head *next)
|
||||
{
|
||||
struct task_struct *p;
|
||||
struct task_struct *p = NULL;
|
||||
struct sched_entity *se;
|
||||
|
||||
if (!curr)
|
||||
if (next == &cfs_rq->tasks)
|
||||
return NULL;
|
||||
|
||||
p = rb_entry(curr, struct task_struct, se.run_node);
|
||||
cfs_rq->rb_load_balance_curr = rb_next(curr);
|
||||
/* Skip over entities that are not tasks */
|
||||
do {
|
||||
se = list_entry(next, struct sched_entity, group_node);
|
||||
next = next->next;
|
||||
} while (next != &cfs_rq->tasks && !entity_is_task(se));
|
||||
|
||||
if (next == &cfs_rq->tasks)
|
||||
return NULL;
|
||||
|
||||
cfs_rq->balance_iterator = next;
|
||||
|
||||
if (entity_is_task(se))
|
||||
p = task_of(se);
|
||||
|
||||
return p;
|
||||
}
|
||||
@ -1214,85 +1349,100 @@ static struct task_struct *load_balance_start_fair(void *arg)
|
||||
{
|
||||
struct cfs_rq *cfs_rq = arg;
|
||||
|
||||
return __load_balance_iterator(cfs_rq, first_fair(cfs_rq));
|
||||
return __load_balance_iterator(cfs_rq, cfs_rq->tasks.next);
|
||||
}
|
||||
|
||||
static struct task_struct *load_balance_next_fair(void *arg)
|
||||
{
|
||||
struct cfs_rq *cfs_rq = arg;
|
||||
|
||||
return __load_balance_iterator(cfs_rq, cfs_rq->rb_load_balance_curr);
|
||||
return __load_balance_iterator(cfs_rq, cfs_rq->balance_iterator);
|
||||
}
|
||||
|
||||
static unsigned long
|
||||
__load_balance_fair(struct rq *this_rq, int this_cpu, struct rq *busiest,
|
||||
unsigned long max_load_move, struct sched_domain *sd,
|
||||
enum cpu_idle_type idle, int *all_pinned, int *this_best_prio,
|
||||
struct cfs_rq *cfs_rq)
|
||||
{
|
||||
struct rq_iterator cfs_rq_iterator;
|
||||
|
||||
cfs_rq_iterator.start = load_balance_start_fair;
|
||||
cfs_rq_iterator.next = load_balance_next_fair;
|
||||
cfs_rq_iterator.arg = cfs_rq;
|
||||
|
||||
return balance_tasks(this_rq, this_cpu, busiest,
|
||||
max_load_move, sd, idle, all_pinned,
|
||||
this_best_prio, &cfs_rq_iterator);
|
||||
}
|
||||
|
||||
#ifdef CONFIG_FAIR_GROUP_SCHED
|
||||
static int cfs_rq_best_prio(struct cfs_rq *cfs_rq)
|
||||
{
|
||||
struct sched_entity *curr;
|
||||
struct task_struct *p;
|
||||
|
||||
if (!cfs_rq->nr_running || !first_fair(cfs_rq))
|
||||
return MAX_PRIO;
|
||||
|
||||
curr = cfs_rq->curr;
|
||||
if (!curr)
|
||||
curr = __pick_next_entity(cfs_rq);
|
||||
|
||||
p = task_of(curr);
|
||||
|
||||
return p->prio;
|
||||
}
|
||||
#endif
|
||||
|
||||
static unsigned long
|
||||
load_balance_fair(struct rq *this_rq, int this_cpu, struct rq *busiest,
|
||||
unsigned long max_load_move,
|
||||
struct sched_domain *sd, enum cpu_idle_type idle,
|
||||
int *all_pinned, int *this_best_prio)
|
||||
{
|
||||
struct cfs_rq *busy_cfs_rq;
|
||||
long rem_load_move = max_load_move;
|
||||
struct rq_iterator cfs_rq_iterator;
|
||||
int busiest_cpu = cpu_of(busiest);
|
||||
struct task_group *tg;
|
||||
|
||||
cfs_rq_iterator.start = load_balance_start_fair;
|
||||
cfs_rq_iterator.next = load_balance_next_fair;
|
||||
|
||||
for_each_leaf_cfs_rq(busiest, busy_cfs_rq) {
|
||||
#ifdef CONFIG_FAIR_GROUP_SCHED
|
||||
struct cfs_rq *this_cfs_rq;
|
||||
rcu_read_lock();
|
||||
list_for_each_entry(tg, &task_groups, list) {
|
||||
long imbalance;
|
||||
unsigned long maxload;
|
||||
unsigned long this_weight, busiest_weight;
|
||||
long rem_load, max_load, moved_load;
|
||||
|
||||
this_cfs_rq = cpu_cfs_rq(busy_cfs_rq, this_cpu);
|
||||
|
||||
imbalance = busy_cfs_rq->load.weight - this_cfs_rq->load.weight;
|
||||
/* Don't pull if this_cfs_rq has more load than busy_cfs_rq */
|
||||
if (imbalance <= 0)
|
||||
/*
|
||||
* empty group
|
||||
*/
|
||||
if (!aggregate(tg, sd)->task_weight)
|
||||
continue;
|
||||
|
||||
/* Don't pull more than imbalance/2 */
|
||||
imbalance /= 2;
|
||||
maxload = min(rem_load_move, imbalance);
|
||||
rem_load = rem_load_move * aggregate(tg, sd)->rq_weight;
|
||||
rem_load /= aggregate(tg, sd)->load + 1;
|
||||
|
||||
*this_best_prio = cfs_rq_best_prio(this_cfs_rq);
|
||||
#else
|
||||
# define maxload rem_load_move
|
||||
#endif
|
||||
/*
|
||||
* pass busy_cfs_rq argument into
|
||||
* load_balance_[start|next]_fair iterators
|
||||
*/
|
||||
cfs_rq_iterator.arg = busy_cfs_rq;
|
||||
rem_load_move -= balance_tasks(this_rq, this_cpu, busiest,
|
||||
maxload, sd, idle, all_pinned,
|
||||
this_best_prio,
|
||||
&cfs_rq_iterator);
|
||||
this_weight = tg->cfs_rq[this_cpu]->task_weight;
|
||||
busiest_weight = tg->cfs_rq[busiest_cpu]->task_weight;
|
||||
|
||||
if (rem_load_move <= 0)
|
||||
imbalance = (busiest_weight - this_weight) / 2;
|
||||
|
||||
if (imbalance < 0)
|
||||
imbalance = busiest_weight;
|
||||
|
||||
max_load = max(rem_load, imbalance);
|
||||
moved_load = __load_balance_fair(this_rq, this_cpu, busiest,
|
||||
max_load, sd, idle, all_pinned, this_best_prio,
|
||||
tg->cfs_rq[busiest_cpu]);
|
||||
|
||||
if (!moved_load)
|
||||
continue;
|
||||
|
||||
move_group_shares(tg, sd, busiest_cpu, this_cpu);
|
||||
|
||||
moved_load *= aggregate(tg, sd)->load;
|
||||
moved_load /= aggregate(tg, sd)->rq_weight + 1;
|
||||
|
||||
rem_load_move -= moved_load;
|
||||
if (rem_load_move < 0)
|
||||
break;
|
||||
}
|
||||
rcu_read_unlock();
|
||||
|
||||
return max_load_move - rem_load_move;
|
||||
}
|
||||
#else
|
||||
static unsigned long
|
||||
load_balance_fair(struct rq *this_rq, int this_cpu, struct rq *busiest,
|
||||
unsigned long max_load_move,
|
||||
struct sched_domain *sd, enum cpu_idle_type idle,
|
||||
int *all_pinned, int *this_best_prio)
|
||||
{
|
||||
return __load_balance_fair(this_rq, this_cpu, busiest,
|
||||
max_load_move, sd, idle, all_pinned,
|
||||
this_best_prio, &busiest->cfs);
|
||||
}
|
||||
#endif
|
||||
|
||||
static int
|
||||
move_one_task_fair(struct rq *this_rq, int this_cpu, struct rq *busiest,
|
||||
@ -1461,16 +1611,40 @@ static const struct sched_class fair_sched_class = {
|
||||
};
|
||||
|
||||
#ifdef CONFIG_SCHED_DEBUG
|
||||
static void
|
||||
print_cfs_rq_tasks(struct seq_file *m, struct cfs_rq *cfs_rq, int depth)
|
||||
{
|
||||
struct sched_entity *se;
|
||||
|
||||
if (!cfs_rq)
|
||||
return;
|
||||
|
||||
list_for_each_entry_rcu(se, &cfs_rq->tasks, group_node) {
|
||||
int i;
|
||||
|
||||
for (i = depth; i; i--)
|
||||
seq_puts(m, " ");
|
||||
|
||||
seq_printf(m, "%lu %s %lu\n",
|
||||
se->load.weight,
|
||||
entity_is_task(se) ? "T" : "G",
|
||||
calc_delta_weight(SCHED_LOAD_SCALE, se)
|
||||
);
|
||||
if (!entity_is_task(se))
|
||||
print_cfs_rq_tasks(m, group_cfs_rq(se), depth + 1);
|
||||
}
|
||||
}
|
||||
|
||||
static void print_cfs_stats(struct seq_file *m, int cpu)
|
||||
{
|
||||
struct cfs_rq *cfs_rq;
|
||||
|
||||
#ifdef CONFIG_FAIR_GROUP_SCHED
|
||||
print_cfs_rq(m, cpu, &cpu_rq(cpu)->cfs);
|
||||
#endif
|
||||
rcu_read_lock();
|
||||
for_each_leaf_cfs_rq(cpu_rq(cpu), cfs_rq)
|
||||
print_cfs_rq(m, cpu, cfs_rq);
|
||||
|
||||
seq_printf(m, "\nWeight tree:\n");
|
||||
print_cfs_rq_tasks(m, &cpu_rq(cpu)->cfs, 1);
|
||||
rcu_read_unlock();
|
||||
}
|
||||
#endif
|
||||
|
10
kernel/sched_features.h
Normal file
10
kernel/sched_features.h
Normal file
@ -0,0 +1,10 @@
|
||||
SCHED_FEAT(NEW_FAIR_SLEEPERS, 1)
|
||||
SCHED_FEAT(WAKEUP_PREEMPT, 1)
|
||||
SCHED_FEAT(START_DEBIT, 1)
|
||||
SCHED_FEAT(AFFINE_WAKEUPS, 1)
|
||||
SCHED_FEAT(CACHE_HOT_BUDDY, 1)
|
||||
SCHED_FEAT(SYNC_WAKEUPS, 1)
|
||||
SCHED_FEAT(HRTICK, 1)
|
||||
SCHED_FEAT(DOUBLE_TICK, 0)
|
||||
SCHED_FEAT(NORMALIZED_SLEEPER, 1)
|
||||
SCHED_FEAT(DEADLINE, 1)
|
@ -62,7 +62,12 @@ static inline u64 sched_rt_runtime(struct rt_rq *rt_rq)
|
||||
if (!rt_rq->tg)
|
||||
return RUNTIME_INF;
|
||||
|
||||
return rt_rq->tg->rt_runtime;
|
||||
return rt_rq->rt_runtime;
|
||||
}
|
||||
|
||||
static inline u64 sched_rt_period(struct rt_rq *rt_rq)
|
||||
{
|
||||
return ktime_to_ns(rt_rq->tg->rt_bandwidth.rt_period);
|
||||
}
|
||||
|
||||
#define for_each_leaf_rt_rq(rt_rq, rq) \
|
||||
@ -127,14 +132,39 @@ static int rt_se_boosted(struct sched_rt_entity *rt_se)
|
||||
return p->prio != p->normal_prio;
|
||||
}
|
||||
|
||||
#ifdef CONFIG_SMP
|
||||
static inline cpumask_t sched_rt_period_mask(void)
|
||||
{
|
||||
return cpu_rq(smp_processor_id())->rd->span;
|
||||
}
|
||||
#else
|
||||
static inline cpumask_t sched_rt_period_mask(void)
|
||||
{
|
||||
return cpu_online_map;
|
||||
}
|
||||
#endif
|
||||
|
||||
static inline
|
||||
struct rt_rq *sched_rt_period_rt_rq(struct rt_bandwidth *rt_b, int cpu)
|
||||
{
|
||||
return container_of(rt_b, struct task_group, rt_bandwidth)->rt_rq[cpu];
|
||||
}
|
||||
|
||||
static inline struct rt_bandwidth *sched_rt_bandwidth(struct rt_rq *rt_rq)
|
||||
{
|
||||
return &rt_rq->tg->rt_bandwidth;
|
||||
}
|
||||
|
||||
#else
|
||||
|
||||
static inline u64 sched_rt_runtime(struct rt_rq *rt_rq)
|
||||
{
|
||||
if (sysctl_sched_rt_runtime == -1)
|
||||
return RUNTIME_INF;
|
||||
return rt_rq->rt_runtime;
|
||||
}
|
||||
|
||||
return (u64)sysctl_sched_rt_runtime * NSEC_PER_USEC;
|
||||
static inline u64 sched_rt_period(struct rt_rq *rt_rq)
|
||||
{
|
||||
return ktime_to_ns(def_rt_bandwidth.rt_period);
|
||||
}
|
||||
|
||||
#define for_each_leaf_rt_rq(rt_rq, rq) \
|
||||
@ -173,6 +203,102 @@ static inline int rt_rq_throttled(struct rt_rq *rt_rq)
|
||||
{
|
||||
return rt_rq->rt_throttled;
|
||||
}
|
||||
|
||||
static inline cpumask_t sched_rt_period_mask(void)
|
||||
{
|
||||
return cpu_online_map;
|
||||
}
|
||||
|
||||
static inline
|
||||
struct rt_rq *sched_rt_period_rt_rq(struct rt_bandwidth *rt_b, int cpu)
|
||||
{
|
||||
return &cpu_rq(cpu)->rt;
|
||||
}
|
||||
|
||||
static inline struct rt_bandwidth *sched_rt_bandwidth(struct rt_rq *rt_rq)
|
||||
{
|
||||
return &def_rt_bandwidth;
|
||||
}
|
||||
|
||||
#endif
|
||||
|
||||
static int do_sched_rt_period_timer(struct rt_bandwidth *rt_b, int overrun)
|
||||
{
|
||||
int i, idle = 1;
|
||||
cpumask_t span;
|
||||
|
||||
if (rt_b->rt_runtime == RUNTIME_INF)
|
||||
return 1;
|
||||
|
||||
span = sched_rt_period_mask();
|
||||
for_each_cpu_mask(i, span) {
|
||||
int enqueue = 0;
|
||||
struct rt_rq *rt_rq = sched_rt_period_rt_rq(rt_b, i);
|
||||
struct rq *rq = rq_of_rt_rq(rt_rq);
|
||||
|
||||
spin_lock(&rq->lock);
|
||||
if (rt_rq->rt_time) {
|
||||
u64 runtime;
|
||||
|
||||
spin_lock(&rt_rq->rt_runtime_lock);
|
||||
runtime = rt_rq->rt_runtime;
|
||||
rt_rq->rt_time -= min(rt_rq->rt_time, overrun*runtime);
|
||||
if (rt_rq->rt_throttled && rt_rq->rt_time < runtime) {
|
||||
rt_rq->rt_throttled = 0;
|
||||
enqueue = 1;
|
||||
}
|
||||
if (rt_rq->rt_time || rt_rq->rt_nr_running)
|
||||
idle = 0;
|
||||
spin_unlock(&rt_rq->rt_runtime_lock);
|
||||
}
|
||||
|
||||
if (enqueue)
|
||||
sched_rt_rq_enqueue(rt_rq);
|
||||
spin_unlock(&rq->lock);
|
||||
}
|
||||
|
||||
return idle;
|
||||
}
|
||||
|
||||
#ifdef CONFIG_SMP
|
||||
static int balance_runtime(struct rt_rq *rt_rq)
|
||||
{
|
||||
struct rt_bandwidth *rt_b = sched_rt_bandwidth(rt_rq);
|
||||
struct root_domain *rd = cpu_rq(smp_processor_id())->rd;
|
||||
int i, weight, more = 0;
|
||||
u64 rt_period;
|
||||
|
||||
weight = cpus_weight(rd->span);
|
||||
|
||||
spin_lock(&rt_b->rt_runtime_lock);
|
||||
rt_period = ktime_to_ns(rt_b->rt_period);
|
||||
for_each_cpu_mask(i, rd->span) {
|
||||
struct rt_rq *iter = sched_rt_period_rt_rq(rt_b, i);
|
||||
s64 diff;
|
||||
|
||||
if (iter == rt_rq)
|
||||
continue;
|
||||
|
||||
spin_lock(&iter->rt_runtime_lock);
|
||||
diff = iter->rt_runtime - iter->rt_time;
|
||||
if (diff > 0) {
|
||||
do_div(diff, weight);
|
||||
if (rt_rq->rt_runtime + diff > rt_period)
|
||||
diff = rt_period - rt_rq->rt_runtime;
|
||||
iter->rt_runtime -= diff;
|
||||
rt_rq->rt_runtime += diff;
|
||||
more = 1;
|
||||
if (rt_rq->rt_runtime == rt_period) {
|
||||
spin_unlock(&iter->rt_runtime_lock);
|
||||
break;
|
||||
}
|
||||
}
|
||||
spin_unlock(&iter->rt_runtime_lock);
|
||||
}
|
||||
spin_unlock(&rt_b->rt_runtime_lock);
|
||||
|
||||
return more;
|
||||
}
|
||||
#endif
|
||||
|
||||
static inline int rt_se_prio(struct sched_rt_entity *rt_se)
|
||||
@ -197,12 +323,24 @@ static int sched_rt_runtime_exceeded(struct rt_rq *rt_rq)
|
||||
if (rt_rq->rt_throttled)
|
||||
return rt_rq_throttled(rt_rq);
|
||||
|
||||
if (sched_rt_runtime(rt_rq) >= sched_rt_period(rt_rq))
|
||||
return 0;
|
||||
|
||||
#ifdef CONFIG_SMP
|
||||
if (rt_rq->rt_time > runtime) {
|
||||
struct rq *rq = rq_of_rt_rq(rt_rq);
|
||||
int more;
|
||||
|
||||
rq->rt_throttled = 1;
|
||||
spin_unlock(&rt_rq->rt_runtime_lock);
|
||||
more = balance_runtime(rt_rq);
|
||||
spin_lock(&rt_rq->rt_runtime_lock);
|
||||
|
||||
if (more)
|
||||
runtime = sched_rt_runtime(rt_rq);
|
||||
}
|
||||
#endif
|
||||
|
||||
if (rt_rq->rt_time > runtime) {
|
||||
rt_rq->rt_throttled = 1;
|
||||
|
||||
if (rt_rq_throttled(rt_rq)) {
|
||||
sched_rt_rq_dequeue(rt_rq);
|
||||
return 1;
|
||||
@ -212,29 +350,6 @@ static int sched_rt_runtime_exceeded(struct rt_rq *rt_rq)
|
||||
return 0;
|
||||
}
|
||||
|
||||
static void update_sched_rt_period(struct rq *rq)
|
||||
{
|
||||
struct rt_rq *rt_rq;
|
||||
u64 period;
|
||||
|
||||
while (rq->clock > rq->rt_period_expire) {
|
||||
period = (u64)sysctl_sched_rt_period * NSEC_PER_USEC;
|
||||
rq->rt_period_expire += period;
|
||||
|
||||
for_each_leaf_rt_rq(rt_rq, rq) {
|
||||
u64 runtime = sched_rt_runtime(rt_rq);
|
||||
|
||||
rt_rq->rt_time -= min(rt_rq->rt_time, runtime);
|
||||
if (rt_rq->rt_throttled && rt_rq->rt_time < runtime) {
|
||||
rt_rq->rt_throttled = 0;
|
||||
sched_rt_rq_enqueue(rt_rq);
|
||||
}
|
||||
}
|
||||
|
||||
rq->rt_throttled = 0;
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
* Update the current task's runtime statistics. Skip current tasks that
|
||||
* are not in our scheduling class.
|
||||
@ -259,9 +374,15 @@ static void update_curr_rt(struct rq *rq)
|
||||
curr->se.exec_start = rq->clock;
|
||||
cpuacct_charge(curr, delta_exec);
|
||||
|
||||
rt_rq->rt_time += delta_exec;
|
||||
if (sched_rt_runtime_exceeded(rt_rq))
|
||||
resched_task(curr);
|
||||
for_each_sched_rt_entity(rt_se) {
|
||||
rt_rq = rt_rq_of_se(rt_se);
|
||||
|
||||
spin_lock(&rt_rq->rt_runtime_lock);
|
||||
rt_rq->rt_time += delta_exec;
|
||||
if (sched_rt_runtime_exceeded(rt_rq))
|
||||
resched_task(curr);
|
||||
spin_unlock(&rt_rq->rt_runtime_lock);
|
||||
}
|
||||
}
|
||||
|
||||
static inline
|
||||
@ -284,6 +405,11 @@ void inc_rt_tasks(struct sched_rt_entity *rt_se, struct rt_rq *rt_rq)
|
||||
#ifdef CONFIG_RT_GROUP_SCHED
|
||||
if (rt_se_boosted(rt_se))
|
||||
rt_rq->rt_nr_boosted++;
|
||||
|
||||
if (rt_rq->tg)
|
||||
start_rt_bandwidth(&rt_rq->tg->rt_bandwidth);
|
||||
#else
|
||||
start_rt_bandwidth(&def_rt_bandwidth);
|
||||
#endif
|
||||
}
|
||||
|
||||
@ -353,27 +479,21 @@ static void dequeue_rt_entity(struct sched_rt_entity *rt_se)
|
||||
/*
|
||||
* Because the prio of an upper entry depends on the lower
|
||||
* entries, we must remove entries top - down.
|
||||
*
|
||||
* XXX: O(1/2 h^2) because we can only walk up, not down the chain.
|
||||
* doesn't matter much for now, as h=2 for GROUP_SCHED.
|
||||
*/
|
||||
static void dequeue_rt_stack(struct task_struct *p)
|
||||
{
|
||||
struct sched_rt_entity *rt_se, *top_se;
|
||||
struct sched_rt_entity *rt_se, *back = NULL;
|
||||
|
||||
/*
|
||||
* dequeue all, top - down.
|
||||
*/
|
||||
do {
|
||||
rt_se = &p->rt;
|
||||
top_se = NULL;
|
||||
for_each_sched_rt_entity(rt_se) {
|
||||
if (on_rt_rq(rt_se))
|
||||
top_se = rt_se;
|
||||
}
|
||||
if (top_se)
|
||||
dequeue_rt_entity(top_se);
|
||||
} while (top_se);
|
||||
rt_se = &p->rt;
|
||||
for_each_sched_rt_entity(rt_se) {
|
||||
rt_se->back = back;
|
||||
back = rt_se;
|
||||
}
|
||||
|
||||
for (rt_se = back; rt_se; rt_se = rt_se->back) {
|
||||
if (on_rt_rq(rt_se))
|
||||
dequeue_rt_entity(rt_se);
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
@ -393,6 +513,8 @@ static void enqueue_task_rt(struct rq *rq, struct task_struct *p, int wakeup)
|
||||
*/
|
||||
for_each_sched_rt_entity(rt_se)
|
||||
enqueue_rt_entity(rt_se);
|
||||
|
||||
inc_cpu_load(rq, p->se.load.weight);
|
||||
}
|
||||
|
||||
static void dequeue_task_rt(struct rq *rq, struct task_struct *p, int sleep)
|
||||
@ -412,6 +534,8 @@ static void dequeue_task_rt(struct rq *rq, struct task_struct *p, int sleep)
|
||||
if (rt_rq && rt_rq->rt_nr_running)
|
||||
enqueue_rt_entity(rt_se);
|
||||
}
|
||||
|
||||
dec_cpu_load(rq, p->se.load.weight);
|
||||
}
|
||||
|
||||
/*
|
||||
@ -1001,7 +1125,8 @@ move_one_task_rt(struct rq *this_rq, int this_cpu, struct rq *busiest,
|
||||
return 0;
|
||||
}
|
||||
|
||||
static void set_cpus_allowed_rt(struct task_struct *p, cpumask_t *new_mask)
|
||||
static void set_cpus_allowed_rt(struct task_struct *p,
|
||||
const cpumask_t *new_mask)
|
||||
{
|
||||
int weight = cpus_weight(*new_mask);
|
||||
|
||||
|
@ -9,6 +9,11 @@
|
||||
static int show_schedstat(struct seq_file *seq, void *v)
|
||||
{
|
||||
int cpu;
|
||||
int mask_len = NR_CPUS/32 * 9;
|
||||
char *mask_str = kmalloc(mask_len, GFP_KERNEL);
|
||||
|
||||
if (mask_str == NULL)
|
||||
return -ENOMEM;
|
||||
|
||||
seq_printf(seq, "version %d\n", SCHEDSTAT_VERSION);
|
||||
seq_printf(seq, "timestamp %lu\n", jiffies);
|
||||
@ -36,9 +41,8 @@ static int show_schedstat(struct seq_file *seq, void *v)
|
||||
preempt_disable();
|
||||
for_each_domain(cpu, sd) {
|
||||
enum cpu_idle_type itype;
|
||||
char mask_str[NR_CPUS];
|
||||
|
||||
cpumask_scnprintf(mask_str, NR_CPUS, sd->span);
|
||||
cpumask_scnprintf(mask_str, mask_len, sd->span);
|
||||
seq_printf(seq, "domain%d %s", dcount++, mask_str);
|
||||
for (itype = CPU_IDLE; itype < CPU_MAX_IDLE_TYPES;
|
||||
itype++) {
|
||||
|
@ -356,7 +356,8 @@ void open_softirq(int nr, void (*action)(struct softirq_action*), void *data)
|
||||
/* Tasklets */
|
||||
struct tasklet_head
|
||||
{
|
||||
struct tasklet_struct *list;
|
||||
struct tasklet_struct *head;
|
||||
struct tasklet_struct **tail;
|
||||
};
|
||||
|
||||
/* Some compilers disobey section attribute on statics when not
|
||||
@ -369,8 +370,9 @@ void __tasklet_schedule(struct tasklet_struct *t)
|
||||
unsigned long flags;
|
||||
|
||||
local_irq_save(flags);
|
||||
t->next = __get_cpu_var(tasklet_vec).list;
|
||||
__get_cpu_var(tasklet_vec).list = t;
|
||||
t->next = NULL;
|
||||
*__get_cpu_var(tasklet_vec).tail = t;
|
||||
__get_cpu_var(tasklet_vec).tail = &(t->next);
|
||||
raise_softirq_irqoff(TASKLET_SOFTIRQ);
|
||||
local_irq_restore(flags);
|
||||
}
|
||||
@ -382,8 +384,9 @@ void __tasklet_hi_schedule(struct tasklet_struct *t)
|
||||
unsigned long flags;
|
||||
|
||||
local_irq_save(flags);
|
||||
t->next = __get_cpu_var(tasklet_hi_vec).list;
|
||||
__get_cpu_var(tasklet_hi_vec).list = t;
|
||||
t->next = NULL;
|
||||
*__get_cpu_var(tasklet_hi_vec).tail = t;
|
||||
__get_cpu_var(tasklet_hi_vec).tail = &(t->next);
|
||||
raise_softirq_irqoff(HI_SOFTIRQ);
|
||||
local_irq_restore(flags);
|
||||
}
|
||||
@ -395,8 +398,9 @@ static void tasklet_action(struct softirq_action *a)
|
||||
struct tasklet_struct *list;
|
||||
|
||||
local_irq_disable();
|
||||
list = __get_cpu_var(tasklet_vec).list;
|
||||
__get_cpu_var(tasklet_vec).list = NULL;
|
||||
list = __get_cpu_var(tasklet_vec).head;
|
||||
__get_cpu_var(tasklet_vec).head = NULL;
|
||||
__get_cpu_var(tasklet_vec).tail = &__get_cpu_var(tasklet_vec).head;
|
||||
local_irq_enable();
|
||||
|
||||
while (list) {
|
||||
@ -416,8 +420,9 @@ static void tasklet_action(struct softirq_action *a)
|
||||
}
|
||||
|
||||
local_irq_disable();
|
||||
t->next = __get_cpu_var(tasklet_vec).list;
|
||||
__get_cpu_var(tasklet_vec).list = t;
|
||||
t->next = NULL;
|
||||
*__get_cpu_var(tasklet_vec).tail = t;
|
||||
__get_cpu_var(tasklet_vec).tail = &(t->next);
|
||||
__raise_softirq_irqoff(TASKLET_SOFTIRQ);
|
||||
local_irq_enable();
|
||||
}
|
||||
@ -428,8 +433,9 @@ static void tasklet_hi_action(struct softirq_action *a)
|
||||
struct tasklet_struct *list;
|
||||
|
||||
local_irq_disable();
|
||||
list = __get_cpu_var(tasklet_hi_vec).list;
|
||||
__get_cpu_var(tasklet_hi_vec).list = NULL;
|
||||
list = __get_cpu_var(tasklet_hi_vec).head;
|
||||
__get_cpu_var(tasklet_hi_vec).head = NULL;
|
||||
__get_cpu_var(tasklet_hi_vec).tail = &__get_cpu_var(tasklet_hi_vec).head;
|
||||
local_irq_enable();
|
||||
|
||||
while (list) {
|
||||
@ -449,8 +455,9 @@ static void tasklet_hi_action(struct softirq_action *a)
|
||||
}
|
||||
|
||||
local_irq_disable();
|
||||
t->next = __get_cpu_var(tasklet_hi_vec).list;
|
||||
__get_cpu_var(tasklet_hi_vec).list = t;
|
||||
t->next = NULL;
|
||||
*__get_cpu_var(tasklet_hi_vec).tail = t;
|
||||
__get_cpu_var(tasklet_hi_vec).tail = &(t->next);
|
||||
__raise_softirq_irqoff(HI_SOFTIRQ);
|
||||
local_irq_enable();
|
||||
}
|
||||
@ -487,6 +494,15 @@ EXPORT_SYMBOL(tasklet_kill);
|
||||
|
||||
void __init softirq_init(void)
|
||||
{
|
||||
int cpu;
|
||||
|
||||
for_each_possible_cpu(cpu) {
|
||||
per_cpu(tasklet_vec, cpu).tail =
|
||||
&per_cpu(tasklet_vec, cpu).head;
|
||||
per_cpu(tasklet_hi_vec, cpu).tail =
|
||||
&per_cpu(tasklet_hi_vec, cpu).head;
|
||||
}
|
||||
|
||||
open_softirq(TASKLET_SOFTIRQ, tasklet_action, NULL);
|
||||
open_softirq(HI_SOFTIRQ, tasklet_hi_action, NULL);
|
||||
}
|
||||
@ -555,9 +571,12 @@ void tasklet_kill_immediate(struct tasklet_struct *t, unsigned int cpu)
|
||||
return;
|
||||
|
||||
/* CPU is dead, so no lock needed. */
|
||||
for (i = &per_cpu(tasklet_vec, cpu).list; *i; i = &(*i)->next) {
|
||||
for (i = &per_cpu(tasklet_vec, cpu).head; *i; i = &(*i)->next) {
|
||||
if (*i == t) {
|
||||
*i = t->next;
|
||||
/* If this was the tail element, move the tail ptr */
|
||||
if (*i == NULL)
|
||||
per_cpu(tasklet_vec, cpu).tail = i;
|
||||
return;
|
||||
}
|
||||
}
|
||||
@ -566,20 +585,20 @@ void tasklet_kill_immediate(struct tasklet_struct *t, unsigned int cpu)
|
||||
|
||||
static void takeover_tasklets(unsigned int cpu)
|
||||
{
|
||||
struct tasklet_struct **i;
|
||||
|
||||
/* CPU is dead, so no lock needed. */
|
||||
local_irq_disable();
|
||||
|
||||
/* Find end, append list for that CPU. */
|
||||
for (i = &__get_cpu_var(tasklet_vec).list; *i; i = &(*i)->next);
|
||||
*i = per_cpu(tasklet_vec, cpu).list;
|
||||
per_cpu(tasklet_vec, cpu).list = NULL;
|
||||
*__get_cpu_var(tasklet_vec).tail = per_cpu(tasklet_vec, cpu).head;
|
||||
__get_cpu_var(tasklet_vec).tail = per_cpu(tasklet_vec, cpu).tail;
|
||||
per_cpu(tasklet_vec, cpu).head = NULL;
|
||||
per_cpu(tasklet_vec, cpu).tail = &per_cpu(tasklet_vec, cpu).head;
|
||||
raise_softirq_irqoff(TASKLET_SOFTIRQ);
|
||||
|
||||
for (i = &__get_cpu_var(tasklet_hi_vec).list; *i; i = &(*i)->next);
|
||||
*i = per_cpu(tasklet_hi_vec, cpu).list;
|
||||
per_cpu(tasklet_hi_vec, cpu).list = NULL;
|
||||
*__get_cpu_var(tasklet_hi_vec).tail = per_cpu(tasklet_hi_vec, cpu).head;
|
||||
__get_cpu_var(tasklet_hi_vec).tail = per_cpu(tasklet_hi_vec, cpu).tail;
|
||||
per_cpu(tasklet_hi_vec, cpu).head = NULL;
|
||||
per_cpu(tasklet_hi_vec, cpu).tail = &per_cpu(tasklet_hi_vec, cpu).head;
|
||||
raise_softirq_irqoff(HI_SOFTIRQ);
|
||||
|
||||
local_irq_enable();
|
||||
|
@ -35,7 +35,7 @@ static int stopmachine(void *cpu)
|
||||
int irqs_disabled = 0;
|
||||
int prepared = 0;
|
||||
|
||||
set_cpus_allowed(current, cpumask_of_cpu((int)(long)cpu));
|
||||
set_cpus_allowed_ptr(current, &cpumask_of_cpu((int)(long)cpu));
|
||||
|
||||
/* Ack: we are alive */
|
||||
smp_mb(); /* Theoretically the ack = 0 might not be on this CPU yet. */
|
||||
|
@ -268,17 +268,6 @@ static struct ctl_table kern_table[] = {
|
||||
.extra1 = &min_wakeup_granularity_ns,
|
||||
.extra2 = &max_wakeup_granularity_ns,
|
||||
},
|
||||
{
|
||||
.ctl_name = CTL_UNNUMBERED,
|
||||
.procname = "sched_batch_wakeup_granularity_ns",
|
||||
.data = &sysctl_sched_batch_wakeup_granularity,
|
||||
.maxlen = sizeof(unsigned int),
|
||||
.mode = 0644,
|
||||
.proc_handler = &proc_dointvec_minmax,
|
||||
.strategy = &sysctl_intvec,
|
||||
.extra1 = &min_wakeup_granularity_ns,
|
||||
.extra2 = &max_wakeup_granularity_ns,
|
||||
},
|
||||
{
|
||||
.ctl_name = CTL_UNNUMBERED,
|
||||
.procname = "sched_child_runs_first",
|
||||
@ -318,7 +307,7 @@ static struct ctl_table kern_table[] = {
|
||||
.data = &sysctl_sched_rt_period,
|
||||
.maxlen = sizeof(unsigned int),
|
||||
.mode = 0644,
|
||||
.proc_handler = &proc_dointvec,
|
||||
.proc_handler = &sched_rt_handler,
|
||||
},
|
||||
{
|
||||
.ctl_name = CTL_UNNUMBERED,
|
||||
@ -326,7 +315,7 @@ static struct ctl_table kern_table[] = {
|
||||
.data = &sysctl_sched_rt_runtime,
|
||||
.maxlen = sizeof(int),
|
||||
.mode = 0644,
|
||||
.proc_handler = &proc_dointvec,
|
||||
.proc_handler = &sched_rt_handler,
|
||||
},
|
||||
{
|
||||
.ctl_name = CTL_UNNUMBERED,
|
||||
|
@ -191,7 +191,6 @@ u64 get_cpu_idle_time_us(int cpu, u64 *last_update_time)
|
||||
void tick_nohz_stop_sched_tick(void)
|
||||
{
|
||||
unsigned long seq, last_jiffies, next_jiffies, delta_jiffies, flags;
|
||||
unsigned long rt_jiffies;
|
||||
struct tick_sched *ts;
|
||||
ktime_t last_update, expires, now;
|
||||
struct clock_event_device *dev = __get_cpu_var(tick_cpu_device).evtdev;
|
||||
@ -243,10 +242,6 @@ void tick_nohz_stop_sched_tick(void)
|
||||
next_jiffies = get_next_timer_interrupt(last_jiffies);
|
||||
delta_jiffies = next_jiffies - last_jiffies;
|
||||
|
||||
rt_jiffies = rt_needs_cpu(cpu);
|
||||
if (rt_jiffies && rt_jiffies < delta_jiffies)
|
||||
delta_jiffies = rt_jiffies;
|
||||
|
||||
if (rcu_needs_cpu(cpu))
|
||||
delta_jiffies = 1;
|
||||
/*
|
||||
|
@ -101,7 +101,7 @@ static int sched_create_user(struct user_struct *up)
|
||||
{
|
||||
int rc = 0;
|
||||
|
||||
up->tg = sched_create_group();
|
||||
up->tg = sched_create_group(&root_task_group);
|
||||
if (IS_ERR(up->tg))
|
||||
rc = -ENOMEM;
|
||||
|
||||
@ -193,6 +193,33 @@ static ssize_t cpu_rt_runtime_store(struct kobject *kobj,
|
||||
|
||||
static struct kobj_attribute cpu_rt_runtime_attr =
|
||||
__ATTR(cpu_rt_runtime, 0644, cpu_rt_runtime_show, cpu_rt_runtime_store);
|
||||
|
||||
static ssize_t cpu_rt_period_show(struct kobject *kobj,
|
||||
struct kobj_attribute *attr,
|
||||
char *buf)
|
||||
{
|
||||
struct user_struct *up = container_of(kobj, struct user_struct, kobj);
|
||||
|
||||
return sprintf(buf, "%lu\n", sched_group_rt_period(up->tg));
|
||||
}
|
||||
|
||||
static ssize_t cpu_rt_period_store(struct kobject *kobj,
|
||||
struct kobj_attribute *attr,
|
||||
const char *buf, size_t size)
|
||||
{
|
||||
struct user_struct *up = container_of(kobj, struct user_struct, kobj);
|
||||
unsigned long rt_period;
|
||||
int rc;
|
||||
|
||||
sscanf(buf, "%lu", &rt_period);
|
||||
|
||||
rc = sched_group_set_rt_period(up->tg, rt_period);
|
||||
|
||||
return (rc ? rc : size);
|
||||
}
|
||||
|
||||
static struct kobj_attribute cpu_rt_period_attr =
|
||||
__ATTR(cpu_rt_period, 0644, cpu_rt_period_show, cpu_rt_period_store);
|
||||
#endif
|
||||
|
||||
/* default attributes per uid directory */
|
||||
@ -202,6 +229,7 @@ static struct attribute *uids_attributes[] = {
|
||||
#endif
|
||||
#ifdef CONFIG_RT_GROUP_SCHED
|
||||
&cpu_rt_runtime_attr.attr,
|
||||
&cpu_rt_period_attr.attr,
|
||||
#endif
|
||||
NULL
|
||||
};
|
||||
|
16
lib/bitmap.c
16
lib/bitmap.c
@ -315,6 +315,22 @@ int bitmap_scnprintf(char *buf, unsigned int buflen,
|
||||
}
|
||||
EXPORT_SYMBOL(bitmap_scnprintf);
|
||||
|
||||
/**
|
||||
* bitmap_scnprintf_len - return buffer length needed to convert
|
||||
* bitmap to an ASCII hex string.
|
||||
* @len: number of bits to be converted
|
||||
*/
|
||||
int bitmap_scnprintf_len(unsigned int len)
|
||||
{
|
||||
/* we need 9 chars per word for 32 bit words (8 hexdigits + sep/null) */
|
||||
int bitslen = ALIGN(len, CHUNKSZ);
|
||||
int wordlen = CHUNKSZ / 4;
|
||||
int buflen = (bitslen / wordlen) * (wordlen + 1) * sizeof(char);
|
||||
|
||||
return buflen;
|
||||
}
|
||||
EXPORT_SYMBOL(bitmap_scnprintf_len);
|
||||
|
||||
/**
|
||||
* __bitmap_parse - convert an ASCII hex string into a bitmap.
|
||||
* @buf: pointer to buffer containing string.
|
||||
|
@ -82,9 +82,10 @@ EXPORT_SYMBOL_GPL(percpu_populate);
|
||||
int __percpu_populate_mask(void *__pdata, size_t size, gfp_t gfp,
|
||||
cpumask_t *mask)
|
||||
{
|
||||
cpumask_t populated = CPU_MASK_NONE;
|
||||
cpumask_t populated;
|
||||
int cpu;
|
||||
|
||||
cpus_clear(populated);
|
||||
for_each_cpu_mask(cpu, *mask)
|
||||
if (unlikely(!percpu_populate(__pdata, size, gfp, cpu))) {
|
||||
__percpu_depopulate_mask(__pdata, &populated);
|
||||
|
@ -2029,6 +2029,7 @@ static int find_next_best_node(int node, nodemask_t *used_node_mask)
|
||||
int n, val;
|
||||
int min_val = INT_MAX;
|
||||
int best_node = -1;
|
||||
node_to_cpumask_ptr(tmp, 0);
|
||||
|
||||
/* Use the local node if we haven't already */
|
||||
if (!node_isset(node, *used_node_mask)) {
|
||||
@ -2037,7 +2038,6 @@ static int find_next_best_node(int node, nodemask_t *used_node_mask)
|
||||
}
|
||||
|
||||
for_each_node_state(n, N_HIGH_MEMORY) {
|
||||
cpumask_t tmp;
|
||||
|
||||
/* Don't want a node to appear more than once */
|
||||
if (node_isset(n, *used_node_mask))
|
||||
@ -2050,8 +2050,8 @@ static int find_next_best_node(int node, nodemask_t *used_node_mask)
|
||||
val += (n < node);
|
||||
|
||||
/* Give preference to headless and unused nodes */
|
||||
tmp = node_to_cpumask(n);
|
||||
if (!cpus_empty(tmp))
|
||||
node_to_cpumask_ptr_next(tmp, n);
|
||||
if (!cpus_empty(*tmp))
|
||||
val += PENALTY_FOR_NODE_WITH_CPUS;
|
||||
|
||||
/* Slight preference for less loaded node */
|
||||
|
@ -187,8 +187,8 @@ static int pdflush(void *dummy)
|
||||
* This is needed as pdflush's are dynamically created and destroyed.
|
||||
* The boottime pdflush's are easily placed w/o these 2 lines.
|
||||
*/
|
||||
cpus_allowed = cpuset_cpus_allowed(current);
|
||||
set_cpus_allowed(current, cpus_allowed);
|
||||
cpuset_cpus_allowed(current, &cpus_allowed);
|
||||
set_cpus_allowed_ptr(current, &cpus_allowed);
|
||||
|
||||
return __pdflush(&my_work);
|
||||
}
|
||||
|
@ -1160,14 +1160,13 @@ static void __cpuinit cpuup_canceled(long cpu)
|
||||
struct kmem_cache *cachep;
|
||||
struct kmem_list3 *l3 = NULL;
|
||||
int node = cpu_to_node(cpu);
|
||||
node_to_cpumask_ptr(mask, node);
|
||||
|
||||
list_for_each_entry(cachep, &cache_chain, next) {
|
||||
struct array_cache *nc;
|
||||
struct array_cache *shared;
|
||||
struct array_cache **alien;
|
||||
cpumask_t mask;
|
||||
|
||||
mask = node_to_cpumask(node);
|
||||
/* cpu is dead; no one can alloc from it. */
|
||||
nc = cachep->array[cpu];
|
||||
cachep->array[cpu] = NULL;
|
||||
@ -1183,7 +1182,7 @@ static void __cpuinit cpuup_canceled(long cpu)
|
||||
if (nc)
|
||||
free_block(cachep, nc->entry, nc->avail, node);
|
||||
|
||||
if (!cpus_empty(mask)) {
|
||||
if (!cpus_empty(*mask)) {
|
||||
spin_unlock_irq(&l3->list_lock);
|
||||
goto free_array_cache;
|
||||
}
|
||||
|
18
mm/vmscan.c
18
mm/vmscan.c
@ -1647,11 +1647,10 @@ static int kswapd(void *p)
|
||||
struct reclaim_state reclaim_state = {
|
||||
.reclaimed_slab = 0,
|
||||
};
|
||||
cpumask_t cpumask;
|
||||
node_to_cpumask_ptr(cpumask, pgdat->node_id);
|
||||
|
||||
cpumask = node_to_cpumask(pgdat->node_id);
|
||||
if (!cpus_empty(cpumask))
|
||||
set_cpus_allowed(tsk, cpumask);
|
||||
if (!cpus_empty(*cpumask))
|
||||
set_cpus_allowed_ptr(tsk, cpumask);
|
||||
current->reclaim_state = &reclaim_state;
|
||||
|
||||
/*
|
||||
@ -1880,17 +1879,16 @@ out:
|
||||
static int __devinit cpu_callback(struct notifier_block *nfb,
|
||||
unsigned long action, void *hcpu)
|
||||
{
|
||||
pg_data_t *pgdat;
|
||||
cpumask_t mask;
|
||||
int nid;
|
||||
|
||||
if (action == CPU_ONLINE || action == CPU_ONLINE_FROZEN) {
|
||||
for_each_node_state(nid, N_HIGH_MEMORY) {
|
||||
pgdat = NODE_DATA(nid);
|
||||
mask = node_to_cpumask(pgdat->node_id);
|
||||
if (any_online_cpu(mask) != NR_CPUS)
|
||||
pg_data_t *pgdat = NODE_DATA(nid);
|
||||
node_to_cpumask_ptr(mask, pgdat->node_id);
|
||||
|
||||
if (any_online_cpu(*mask) < nr_cpu_ids)
|
||||
/* One of our CPUs online: restore mask */
|
||||
set_cpus_allowed(pgdat->kswapd, mask);
|
||||
set_cpus_allowed_ptr(pgdat->kswapd, mask);
|
||||
}
|
||||
}
|
||||
return NOTIFY_OK;
|
||||
|
@ -301,7 +301,6 @@ static inline int
|
||||
svc_pool_map_set_cpumask(unsigned int pidx, cpumask_t *oldmask)
|
||||
{
|
||||
struct svc_pool_map *m = &svc_pool_map;
|
||||
unsigned int node; /* or cpu */
|
||||
|
||||
/*
|
||||
* The caller checks for sv_nrpools > 1, which
|
||||
@ -314,16 +313,23 @@ svc_pool_map_set_cpumask(unsigned int pidx, cpumask_t *oldmask)
|
||||
default:
|
||||
return 0;
|
||||
case SVC_POOL_PERCPU:
|
||||
node = m->pool_to[pidx];
|
||||
{
|
||||
unsigned int cpu = m->pool_to[pidx];
|
||||
|
||||
*oldmask = current->cpus_allowed;
|
||||
set_cpus_allowed(current, cpumask_of_cpu(node));
|
||||
set_cpus_allowed_ptr(current, &cpumask_of_cpu(cpu));
|
||||
return 1;
|
||||
}
|
||||
case SVC_POOL_PERNODE:
|
||||
node = m->pool_to[pidx];
|
||||
{
|
||||
unsigned int node = m->pool_to[pidx];
|
||||
node_to_cpumask_ptr(nodecpumask, node);
|
||||
|
||||
*oldmask = current->cpus_allowed;
|
||||
set_cpus_allowed(current, node_to_cpumask(node));
|
||||
set_cpus_allowed_ptr(current, nodecpumask);
|
||||
return 1;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
|
Loading…
Reference in New Issue
Block a user