mirror of
https://mirrors.bfsu.edu.cn/git/linux.git
synced 2024-12-10 20:44:09 +08:00
eb95419b02
cgroup is currently in the process of transitioning to using struct
cgroup_subsys_state * as the primary handle instead of struct cgroup *
in subsystem implementations for the following reasons.
* With unified hierarchy, subsystems will be dynamically bound and
unbound from cgroups and thus css's (cgroup_subsys_state) may be
created and destroyed dynamically over the lifetime of a cgroup,
which is different from the current state where all css's are
allocated and destroyed together with the associated cgroup. This
in turn means that cgroup_css() should be synchronized and may
return NULL, making it more cumbersome to use.
* Differing levels of per-subsystem granularity in the unified
hierarchy means that the task and descendant iterators should behave
differently depending on the specific subsystem the iteration is
being performed for.
* In majority of the cases, subsystems only care about its part in the
cgroup hierarchy - ie. the hierarchy of css's. Subsystem methods
often obtain the matching css pointer from the cgroup and don't
bother with the cgroup pointer itself. Passing around css fits
much better.
This patch converts all cgroup_subsys methods to take @css instead of
@cgroup. The conversions are mostly straight-forward. A few
noteworthy changes are
* ->css_alloc() now takes css of the parent cgroup rather than the
pointer to the new cgroup as the css for the new cgroup doesn't
exist yet. Knowing the parent css is enough for all the existing
subsystems.
* In kernel/cgroup.c::offline_css(), unnecessary open coded css
dereference is replaced with local variable access.
This patch shouldn't cause any behavior differences.
v2: Unnecessary explicit cgrp->subsys[] deref in css_online() replaced
with local variable @css as suggested by Li Zefan.
Rebased on top of new for-3.12 which includes for-3.11-fixes so
that ->css_free() invocation added by da0a12caff
("cgroup: fix a
leak when percpu_ref_init() fails") is converted too. Suggested
by Li Zefan.
Signed-off-by: Tejun Heo <tj@kernel.org>
Acked-by: Li Zefan <lizefan@huawei.com>
Acked-by: Michal Hocko <mhocko@suse.cz>
Acked-by: Vivek Goyal <vgoyal@redhat.com>
Acked-by: Aristeu Rozanski <aris@redhat.com>
Acked-by: Daniel Wagner <daniel.wagner@bmw-carit.de>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Ingo Molnar <mingo@redhat.com>
Cc: Johannes Weiner <hannes@cmpxchg.org>
Cc: Balbir Singh <bsingharora@gmail.com>
Cc: Matt Helsley <matthltc@us.ibm.com>
Cc: Jens Axboe <axboe@kernel.dk>
Cc: Steven Rostedt <rostedt@goodmis.org>
294 lines
6.2 KiB
C
294 lines
6.2 KiB
C
#include <linux/cgroup.h>
|
|
#include <linux/slab.h>
|
|
#include <linux/percpu.h>
|
|
#include <linux/spinlock.h>
|
|
#include <linux/cpumask.h>
|
|
#include <linux/seq_file.h>
|
|
#include <linux/rcupdate.h>
|
|
#include <linux/kernel_stat.h>
|
|
#include <linux/err.h>
|
|
|
|
#include "sched.h"
|
|
|
|
/*
|
|
* CPU accounting code for task groups.
|
|
*
|
|
* Based on the work by Paul Menage (menage@google.com) and Balbir Singh
|
|
* (balbir@in.ibm.com).
|
|
*/
|
|
|
|
/* Time spent by the tasks of the cpu accounting group executing in ... */
|
|
enum cpuacct_stat_index {
|
|
CPUACCT_STAT_USER, /* ... user mode */
|
|
CPUACCT_STAT_SYSTEM, /* ... kernel mode */
|
|
|
|
CPUACCT_STAT_NSTATS,
|
|
};
|
|
|
|
/* track cpu usage of a group of tasks and its child groups */
|
|
struct cpuacct {
|
|
struct cgroup_subsys_state css;
|
|
/* cpuusage holds pointer to a u64-type object on every cpu */
|
|
u64 __percpu *cpuusage;
|
|
struct kernel_cpustat __percpu *cpustat;
|
|
};
|
|
|
|
static inline struct cpuacct *css_ca(struct cgroup_subsys_state *css)
|
|
{
|
|
return css ? container_of(css, struct cpuacct, css) : NULL;
|
|
}
|
|
|
|
/* return cpu accounting group corresponding to this container */
|
|
static inline struct cpuacct *cgroup_ca(struct cgroup *cgrp)
|
|
{
|
|
return css_ca(cgroup_css(cgrp, cpuacct_subsys_id));
|
|
}
|
|
|
|
/* return cpu accounting group to which this task belongs */
|
|
static inline struct cpuacct *task_ca(struct task_struct *tsk)
|
|
{
|
|
return css_ca(task_css(tsk, cpuacct_subsys_id));
|
|
}
|
|
|
|
static inline struct cpuacct *parent_ca(struct cpuacct *ca)
|
|
{
|
|
return css_ca(css_parent(&ca->css));
|
|
}
|
|
|
|
static DEFINE_PER_CPU(u64, root_cpuacct_cpuusage);
|
|
static struct cpuacct root_cpuacct = {
|
|
.cpustat = &kernel_cpustat,
|
|
.cpuusage = &root_cpuacct_cpuusage,
|
|
};
|
|
|
|
/* create a new cpu accounting group */
|
|
static struct cgroup_subsys_state *
|
|
cpuacct_css_alloc(struct cgroup_subsys_state *parent_css)
|
|
{
|
|
struct cpuacct *ca;
|
|
|
|
if (!parent_css)
|
|
return &root_cpuacct.css;
|
|
|
|
ca = kzalloc(sizeof(*ca), GFP_KERNEL);
|
|
if (!ca)
|
|
goto out;
|
|
|
|
ca->cpuusage = alloc_percpu(u64);
|
|
if (!ca->cpuusage)
|
|
goto out_free_ca;
|
|
|
|
ca->cpustat = alloc_percpu(struct kernel_cpustat);
|
|
if (!ca->cpustat)
|
|
goto out_free_cpuusage;
|
|
|
|
return &ca->css;
|
|
|
|
out_free_cpuusage:
|
|
free_percpu(ca->cpuusage);
|
|
out_free_ca:
|
|
kfree(ca);
|
|
out:
|
|
return ERR_PTR(-ENOMEM);
|
|
}
|
|
|
|
/* destroy an existing cpu accounting group */
|
|
static void cpuacct_css_free(struct cgroup_subsys_state *css)
|
|
{
|
|
struct cpuacct *ca = css_ca(css);
|
|
|
|
free_percpu(ca->cpustat);
|
|
free_percpu(ca->cpuusage);
|
|
kfree(ca);
|
|
}
|
|
|
|
static u64 cpuacct_cpuusage_read(struct cpuacct *ca, int cpu)
|
|
{
|
|
u64 *cpuusage = per_cpu_ptr(ca->cpuusage, cpu);
|
|
u64 data;
|
|
|
|
#ifndef CONFIG_64BIT
|
|
/*
|
|
* Take rq->lock to make 64-bit read safe on 32-bit platforms.
|
|
*/
|
|
raw_spin_lock_irq(&cpu_rq(cpu)->lock);
|
|
data = *cpuusage;
|
|
raw_spin_unlock_irq(&cpu_rq(cpu)->lock);
|
|
#else
|
|
data = *cpuusage;
|
|
#endif
|
|
|
|
return data;
|
|
}
|
|
|
|
static void cpuacct_cpuusage_write(struct cpuacct *ca, int cpu, u64 val)
|
|
{
|
|
u64 *cpuusage = per_cpu_ptr(ca->cpuusage, cpu);
|
|
|
|
#ifndef CONFIG_64BIT
|
|
/*
|
|
* Take rq->lock to make 64-bit write safe on 32-bit platforms.
|
|
*/
|
|
raw_spin_lock_irq(&cpu_rq(cpu)->lock);
|
|
*cpuusage = val;
|
|
raw_spin_unlock_irq(&cpu_rq(cpu)->lock);
|
|
#else
|
|
*cpuusage = val;
|
|
#endif
|
|
}
|
|
|
|
/* return total cpu usage (in nanoseconds) of a group */
|
|
static u64 cpuusage_read(struct cgroup *cgrp, struct cftype *cft)
|
|
{
|
|
struct cpuacct *ca = cgroup_ca(cgrp);
|
|
u64 totalcpuusage = 0;
|
|
int i;
|
|
|
|
for_each_present_cpu(i)
|
|
totalcpuusage += cpuacct_cpuusage_read(ca, i);
|
|
|
|
return totalcpuusage;
|
|
}
|
|
|
|
static int cpuusage_write(struct cgroup *cgrp, struct cftype *cftype,
|
|
u64 reset)
|
|
{
|
|
struct cpuacct *ca = cgroup_ca(cgrp);
|
|
int err = 0;
|
|
int i;
|
|
|
|
if (reset) {
|
|
err = -EINVAL;
|
|
goto out;
|
|
}
|
|
|
|
for_each_present_cpu(i)
|
|
cpuacct_cpuusage_write(ca, i, 0);
|
|
|
|
out:
|
|
return err;
|
|
}
|
|
|
|
static int cpuacct_percpu_seq_read(struct cgroup *cgroup, struct cftype *cft,
|
|
struct seq_file *m)
|
|
{
|
|
struct cpuacct *ca = cgroup_ca(cgroup);
|
|
u64 percpu;
|
|
int i;
|
|
|
|
for_each_present_cpu(i) {
|
|
percpu = cpuacct_cpuusage_read(ca, i);
|
|
seq_printf(m, "%llu ", (unsigned long long) percpu);
|
|
}
|
|
seq_printf(m, "\n");
|
|
return 0;
|
|
}
|
|
|
|
static const char * const cpuacct_stat_desc[] = {
|
|
[CPUACCT_STAT_USER] = "user",
|
|
[CPUACCT_STAT_SYSTEM] = "system",
|
|
};
|
|
|
|
static int cpuacct_stats_show(struct cgroup *cgrp, struct cftype *cft,
|
|
struct cgroup_map_cb *cb)
|
|
{
|
|
struct cpuacct *ca = cgroup_ca(cgrp);
|
|
int cpu;
|
|
s64 val = 0;
|
|
|
|
for_each_online_cpu(cpu) {
|
|
struct kernel_cpustat *kcpustat = per_cpu_ptr(ca->cpustat, cpu);
|
|
val += kcpustat->cpustat[CPUTIME_USER];
|
|
val += kcpustat->cpustat[CPUTIME_NICE];
|
|
}
|
|
val = cputime64_to_clock_t(val);
|
|
cb->fill(cb, cpuacct_stat_desc[CPUACCT_STAT_USER], val);
|
|
|
|
val = 0;
|
|
for_each_online_cpu(cpu) {
|
|
struct kernel_cpustat *kcpustat = per_cpu_ptr(ca->cpustat, cpu);
|
|
val += kcpustat->cpustat[CPUTIME_SYSTEM];
|
|
val += kcpustat->cpustat[CPUTIME_IRQ];
|
|
val += kcpustat->cpustat[CPUTIME_SOFTIRQ];
|
|
}
|
|
|
|
val = cputime64_to_clock_t(val);
|
|
cb->fill(cb, cpuacct_stat_desc[CPUACCT_STAT_SYSTEM], val);
|
|
|
|
return 0;
|
|
}
|
|
|
|
static struct cftype files[] = {
|
|
{
|
|
.name = "usage",
|
|
.read_u64 = cpuusage_read,
|
|
.write_u64 = cpuusage_write,
|
|
},
|
|
{
|
|
.name = "usage_percpu",
|
|
.read_seq_string = cpuacct_percpu_seq_read,
|
|
},
|
|
{
|
|
.name = "stat",
|
|
.read_map = cpuacct_stats_show,
|
|
},
|
|
{ } /* terminate */
|
|
};
|
|
|
|
/*
|
|
* charge this task's execution time to its accounting group.
|
|
*
|
|
* called with rq->lock held.
|
|
*/
|
|
void cpuacct_charge(struct task_struct *tsk, u64 cputime)
|
|
{
|
|
struct cpuacct *ca;
|
|
int cpu;
|
|
|
|
cpu = task_cpu(tsk);
|
|
|
|
rcu_read_lock();
|
|
|
|
ca = task_ca(tsk);
|
|
|
|
while (true) {
|
|
u64 *cpuusage = per_cpu_ptr(ca->cpuusage, cpu);
|
|
*cpuusage += cputime;
|
|
|
|
ca = parent_ca(ca);
|
|
if (!ca)
|
|
break;
|
|
}
|
|
|
|
rcu_read_unlock();
|
|
}
|
|
|
|
/*
|
|
* Add user/system time to cpuacct.
|
|
*
|
|
* Note: it's the caller that updates the account of the root cgroup.
|
|
*/
|
|
void cpuacct_account_field(struct task_struct *p, int index, u64 val)
|
|
{
|
|
struct kernel_cpustat *kcpustat;
|
|
struct cpuacct *ca;
|
|
|
|
rcu_read_lock();
|
|
ca = task_ca(p);
|
|
while (ca != &root_cpuacct) {
|
|
kcpustat = this_cpu_ptr(ca->cpustat);
|
|
kcpustat->cpustat[index] += val;
|
|
ca = parent_ca(ca);
|
|
}
|
|
rcu_read_unlock();
|
|
}
|
|
|
|
struct cgroup_subsys cpuacct_subsys = {
|
|
.name = "cpuacct",
|
|
.css_alloc = cpuacct_css_alloc,
|
|
.css_free = cpuacct_css_free,
|
|
.subsys_id = cpuacct_subsys_id,
|
|
.base_cftypes = files,
|
|
.early_init = 1,
|
|
};
|