2013-12-30 01:27:10 +08:00
|
|
|
/*
|
|
|
|
* net/core/netclassid_cgroup.c Classid Cgroupfs Handling
|
|
|
|
*
|
|
|
|
* This program is free software; you can redistribute it and/or
|
|
|
|
* modify it under the terms of the GNU General Public License
|
|
|
|
* as published by the Free Software Foundation; either version
|
|
|
|
* 2 of the License, or (at your option) any later version.
|
|
|
|
*
|
|
|
|
* Authors: Thomas Graf <tgraf@suug.ch>
|
|
|
|
*/
|
|
|
|
|
|
|
|
#include <linux/module.h>
|
|
|
|
#include <linux/slab.h>
|
|
|
|
#include <linux/cgroup.h>
|
|
|
|
#include <linux/fdtable.h>
|
|
|
|
#include <net/cls_cgroup.h>
|
|
|
|
#include <net/sock.h>
|
|
|
|
|
|
|
|
static inline struct cgroup_cls_state *css_cls_state(struct cgroup_subsys_state *css)
|
|
|
|
{
|
|
|
|
return css ? container_of(css, struct cgroup_cls_state, css) : NULL;
|
|
|
|
}
|
|
|
|
|
|
|
|
struct cgroup_cls_state *task_cls_state(struct task_struct *p)
|
|
|
|
{
|
2015-07-22 17:23:20 +08:00
|
|
|
return css_cls_state(task_css_check(p, net_cls_cgrp_id,
|
|
|
|
rcu_read_lock_bh_held()));
|
2013-12-30 01:27:10 +08:00
|
|
|
}
|
|
|
|
EXPORT_SYMBOL_GPL(task_cls_state);
|
|
|
|
|
|
|
|
static struct cgroup_subsys_state *
|
|
|
|
cgrp_css_alloc(struct cgroup_subsys_state *parent_css)
|
|
|
|
{
|
|
|
|
struct cgroup_cls_state *cs;
|
|
|
|
|
|
|
|
cs = kzalloc(sizeof(*cs), GFP_KERNEL);
|
|
|
|
if (!cs)
|
|
|
|
return ERR_PTR(-ENOMEM);
|
|
|
|
|
|
|
|
return &cs->css;
|
|
|
|
}
|
|
|
|
|
|
|
|
static int cgrp_css_online(struct cgroup_subsys_state *css)
|
|
|
|
{
|
|
|
|
struct cgroup_cls_state *cs = css_cls_state(css);
|
2014-05-17 01:22:48 +08:00
|
|
|
struct cgroup_cls_state *parent = css_cls_state(css->parent);
|
2013-12-30 01:27:10 +08:00
|
|
|
|
|
|
|
if (parent)
|
|
|
|
cs->classid = parent->classid;
|
|
|
|
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
|
|
|
static void cgrp_css_free(struct cgroup_subsys_state *css)
|
|
|
|
{
|
|
|
|
kfree(css_cls_state(css));
|
|
|
|
}
|
|
|
|
|
|
|
|
static int update_classid(const void *v, struct file *file, unsigned n)
|
|
|
|
{
|
|
|
|
int err;
|
|
|
|
struct socket *sock = sock_from_file(file, &err);
|
|
|
|
|
|
|
|
if (sock)
|
|
|
|
sock->sk->sk_classid = (u32)(unsigned long)v;
|
|
|
|
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
cgroup: fix handling of multi-destination migration from subtree_control enabling
Consider the following v2 hierarchy.
P0 (+memory) --- P1 (-memory) --- A
\- B
P0 has memory enabled in its subtree_control while P1 doesn't. If
both A and B contain processes, they would belong to the memory css of
P1. Now if memory is enabled on P1's subtree_control, memory csses
should be created on both A and B and A's processes should be moved to
the former and B's processes the latter. IOW, enabling controllers
can cause atomic migrations into different csses.
The core cgroup migration logic has been updated accordingly but the
controller migration methods haven't and still assume that all tasks
migrate to a single target css; furthermore, the methods were fed the
css in which subtree_control was updated which is the parent of the
target csses. pids controller depends on the migration methods to
move charges and this made the controller attribute charges to the
wrong csses often triggering the following warning by driving a
counter negative.
WARNING: CPU: 1 PID: 1 at kernel/cgroup_pids.c:97 pids_cancel.constprop.6+0x31/0x40()
Modules linked in:
CPU: 1 PID: 1 Comm: systemd Not tainted 4.4.0-rc1+ #29
...
ffffffff81f65382 ffff88007c043b90 ffffffff81551ffc 0000000000000000
ffff88007c043bc8 ffffffff810de202 ffff88007a752000 ffff88007a29ab00
ffff88007c043c80 ffff88007a1d8400 0000000000000001 ffff88007c043bd8
Call Trace:
[<ffffffff81551ffc>] dump_stack+0x4e/0x82
[<ffffffff810de202>] warn_slowpath_common+0x82/0xc0
[<ffffffff810de2fa>] warn_slowpath_null+0x1a/0x20
[<ffffffff8118e031>] pids_cancel.constprop.6+0x31/0x40
[<ffffffff8118e0fd>] pids_can_attach+0x6d/0xf0
[<ffffffff81188a4c>] cgroup_taskset_migrate+0x6c/0x330
[<ffffffff81188e05>] cgroup_migrate+0xf5/0x190
[<ffffffff81189016>] cgroup_attach_task+0x176/0x200
[<ffffffff8118949d>] __cgroup_procs_write+0x2ad/0x460
[<ffffffff81189684>] cgroup_procs_write+0x14/0x20
[<ffffffff811854e5>] cgroup_file_write+0x35/0x1c0
[<ffffffff812e26f1>] kernfs_fop_write+0x141/0x190
[<ffffffff81265f88>] __vfs_write+0x28/0xe0
[<ffffffff812666fc>] vfs_write+0xac/0x1a0
[<ffffffff81267019>] SyS_write+0x49/0xb0
[<ffffffff81bcef32>] entry_SYSCALL_64_fastpath+0x12/0x76
This patch fixes the bug by removing @css parameter from the three
migration methods, ->can_attach, ->cancel_attach() and ->attach() and
updating cgroup_taskset iteration helpers also return the destination
css in addition to the task being migrated. All controllers are
updated accordingly.
* Controllers which don't care whether there are one or multiple
target csses can be converted trivially. cpu, io, freezer, perf,
netclassid and netprio fall in this category.
* cpuset's current implementation assumes that there's single source
and destination and thus doesn't support v2 hierarchy already. The
only change made by this patchset is how that single destination css
is obtained.
* memory migration path already doesn't do anything on v2. How the
single destination css is obtained is updated and the prep stage of
mem_cgroup_can_attach() is reordered to accomodate the change.
* pids is the only controller which was affected by this bug. It now
correctly handles multi-destination migrations and no longer causes
counter underflow from incorrect accounting.
Signed-off-by: Tejun Heo <tj@kernel.org>
Reported-and-tested-by: Daniel Wagner <daniel.wagner@bmw-carit.de>
Cc: Aleksa Sarai <cyphar@cyphar.com>
2015-12-03 23:18:21 +08:00
|
|
|
static void cgrp_attach(struct cgroup_taskset *tset)
|
2013-12-30 01:27:10 +08:00
|
|
|
{
|
|
|
|
struct task_struct *p;
|
cgroup: fix handling of multi-destination migration from subtree_control enabling
Consider the following v2 hierarchy.
P0 (+memory) --- P1 (-memory) --- A
\- B
P0 has memory enabled in its subtree_control while P1 doesn't. If
both A and B contain processes, they would belong to the memory css of
P1. Now if memory is enabled on P1's subtree_control, memory csses
should be created on both A and B and A's processes should be moved to
the former and B's processes the latter. IOW, enabling controllers
can cause atomic migrations into different csses.
The core cgroup migration logic has been updated accordingly but the
controller migration methods haven't and still assume that all tasks
migrate to a single target css; furthermore, the methods were fed the
css in which subtree_control was updated which is the parent of the
target csses. pids controller depends on the migration methods to
move charges and this made the controller attribute charges to the
wrong csses often triggering the following warning by driving a
counter negative.
WARNING: CPU: 1 PID: 1 at kernel/cgroup_pids.c:97 pids_cancel.constprop.6+0x31/0x40()
Modules linked in:
CPU: 1 PID: 1 Comm: systemd Not tainted 4.4.0-rc1+ #29
...
ffffffff81f65382 ffff88007c043b90 ffffffff81551ffc 0000000000000000
ffff88007c043bc8 ffffffff810de202 ffff88007a752000 ffff88007a29ab00
ffff88007c043c80 ffff88007a1d8400 0000000000000001 ffff88007c043bd8
Call Trace:
[<ffffffff81551ffc>] dump_stack+0x4e/0x82
[<ffffffff810de202>] warn_slowpath_common+0x82/0xc0
[<ffffffff810de2fa>] warn_slowpath_null+0x1a/0x20
[<ffffffff8118e031>] pids_cancel.constprop.6+0x31/0x40
[<ffffffff8118e0fd>] pids_can_attach+0x6d/0xf0
[<ffffffff81188a4c>] cgroup_taskset_migrate+0x6c/0x330
[<ffffffff81188e05>] cgroup_migrate+0xf5/0x190
[<ffffffff81189016>] cgroup_attach_task+0x176/0x200
[<ffffffff8118949d>] __cgroup_procs_write+0x2ad/0x460
[<ffffffff81189684>] cgroup_procs_write+0x14/0x20
[<ffffffff811854e5>] cgroup_file_write+0x35/0x1c0
[<ffffffff812e26f1>] kernfs_fop_write+0x141/0x190
[<ffffffff81265f88>] __vfs_write+0x28/0xe0
[<ffffffff812666fc>] vfs_write+0xac/0x1a0
[<ffffffff81267019>] SyS_write+0x49/0xb0
[<ffffffff81bcef32>] entry_SYSCALL_64_fastpath+0x12/0x76
This patch fixes the bug by removing @css parameter from the three
migration methods, ->can_attach, ->cancel_attach() and ->attach() and
updating cgroup_taskset iteration helpers also return the destination
css in addition to the task being migrated. All controllers are
updated accordingly.
* Controllers which don't care whether there are one or multiple
target csses can be converted trivially. cpu, io, freezer, perf,
netclassid and netprio fall in this category.
* cpuset's current implementation assumes that there's single source
and destination and thus doesn't support v2 hierarchy already. The
only change made by this patchset is how that single destination css
is obtained.
* memory migration path already doesn't do anything on v2. How the
single destination css is obtained is updated and the prep stage of
mem_cgroup_can_attach() is reordered to accomodate the change.
* pids is the only controller which was affected by this bug. It now
correctly handles multi-destination migrations and no longer causes
counter underflow from incorrect accounting.
Signed-off-by: Tejun Heo <tj@kernel.org>
Reported-and-tested-by: Daniel Wagner <daniel.wagner@bmw-carit.de>
Cc: Aleksa Sarai <cyphar@cyphar.com>
2015-12-03 23:18:21 +08:00
|
|
|
struct cgroup_subsys_state *css;
|
|
|
|
|
|
|
|
cgroup_taskset_for_each(p, css, tset) {
|
|
|
|
struct cgroup_cls_state *cs = css_cls_state(css);
|
|
|
|
void *v = (void *)(unsigned long)cs->classid;
|
2013-12-30 01:27:10 +08:00
|
|
|
|
|
|
|
task_lock(p);
|
|
|
|
iterate_fd(p->files, 0, update_classid, v);
|
|
|
|
task_unlock(p);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
static u64 read_classid(struct cgroup_subsys_state *css, struct cftype *cft)
|
|
|
|
{
|
|
|
|
return css_cls_state(css)->classid;
|
|
|
|
}
|
|
|
|
|
|
|
|
static int write_classid(struct cgroup_subsys_state *css, struct cftype *cft,
|
|
|
|
u64 value)
|
|
|
|
{
|
|
|
|
css_cls_state(css)->classid = (u32) value;
|
|
|
|
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
|
|
|
static struct cftype ss_files[] = {
|
|
|
|
{
|
|
|
|
.name = "classid",
|
|
|
|
.read_u64 = read_classid,
|
|
|
|
.write_u64 = write_classid,
|
|
|
|
},
|
|
|
|
{ } /* terminate */
|
|
|
|
};
|
|
|
|
|
2014-02-08 23:36:58 +08:00
|
|
|
struct cgroup_subsys net_cls_cgrp_subsys = {
|
2013-12-30 01:27:10 +08:00
|
|
|
.css_alloc = cgrp_css_alloc,
|
|
|
|
.css_online = cgrp_css_online,
|
|
|
|
.css_free = cgrp_css_free,
|
|
|
|
.attach = cgrp_attach,
|
2014-07-15 23:05:09 +08:00
|
|
|
.legacy_cftypes = ss_files,
|
2013-12-30 01:27:10 +08:00
|
|
|
};
|