cgroup changes for v6.2-rc1

Nothing too interesting.
 
 * Add CONFIG_DEBUG_GROUP_REF which makes cgroup refcnt operations kprobable.
 
 * A couple cpuset optimizations.
 
 * Other misc changes including doc and test updates.
 -----BEGIN PGP SIGNATURE-----
 
 iIQEABYIACwWIQTfIjM1kS57o3GsC/uxYfJx3gVYGQUCY5bHvg4cdGpAa2VybmVs
 Lm9yZwAKCRCxYfJx3gVYGcYrAQCfrlzrbWw6gTQ7fmr0Avxjy5FxLjsdzEGPcmGY
 ByEMhgD/VdUf3zI/Khr91Gsi5JXQxQf7a5caD369xupRWUWjqA8=
 =Nf+E
 -----END PGP SIGNATURE-----

Merge tag 'cgroup-for-6.2' of git://git.kernel.org/pub/scm/linux/kernel/git/tj/cgroup

Pull cgroup updates from Tejun Heo:
 "Nothing too interesting:

   - Add CONFIG_DEBUG_GROUP_REF which makes cgroup refcnt operations
     kprobable

   - A couple cpuset optimizations

   - Other misc changes including doc and test updates"

* tag 'cgroup-for-6.2' of git://git.kernel.org/pub/scm/linux/kernel/git/tj/cgroup:
  cgroup: remove rcu_read_lock()/rcu_read_unlock() in critical section of spin_lock_irq()
  cgroup/cpuset: Improve cpuset_css_alloc() description
  kselftest/cgroup: Add cleanup() to test_cpuset_prs.sh
  cgroup/cpuset: Optimize cpuset_attach() on v2
  cgroup/cpuset: Skip spread flags update on v2
  kselftest/cgroup: Fix gathering number of CPUs
  cgroup: cgroup refcnt functions should be exported when CONFIG_DEBUG_CGROUP_REF
  cgroup: Implement DEBUG_CGROUP_REF
This commit is contained in:
Linus Torvalds 2022-12-12 15:48:36 -08:00
commit a312a8cc3c
6 changed files with 181 additions and 98 deletions

View File

@ -310,71 +310,24 @@ void css_task_iter_end(struct css_task_iter *it);
* Inline functions.
*/
#ifdef CONFIG_DEBUG_CGROUP_REF
void css_get(struct cgroup_subsys_state *css);
void css_get_many(struct cgroup_subsys_state *css, unsigned int n);
bool css_tryget(struct cgroup_subsys_state *css);
bool css_tryget_online(struct cgroup_subsys_state *css);
void css_put(struct cgroup_subsys_state *css);
void css_put_many(struct cgroup_subsys_state *css, unsigned int n);
#else
#define CGROUP_REF_FN_ATTRS static inline
#define CGROUP_REF_EXPORT(fn)
#include <linux/cgroup_refcnt.h>
#endif
static inline u64 cgroup_id(const struct cgroup *cgrp)
{
return cgrp->kn->id;
}
/**
* css_get - obtain a reference on the specified css
* @css: target css
*
* The caller must already have a reference.
*/
static inline void css_get(struct cgroup_subsys_state *css)
{
if (!(css->flags & CSS_NO_REF))
percpu_ref_get(&css->refcnt);
}
/**
* css_get_many - obtain references on the specified css
* @css: target css
* @n: number of references to get
*
* The caller must already have a reference.
*/
static inline void css_get_many(struct cgroup_subsys_state *css, unsigned int n)
{
if (!(css->flags & CSS_NO_REF))
percpu_ref_get_many(&css->refcnt, n);
}
/**
* css_tryget - try to obtain a reference on the specified css
* @css: target css
*
* Obtain a reference on @css unless it already has reached zero and is
* being released. This function doesn't care whether @css is on or
* offline. The caller naturally needs to ensure that @css is accessible
* but doesn't have to be holding a reference on it - IOW, RCU protected
* access is good enough for this function. Returns %true if a reference
* count was successfully obtained; %false otherwise.
*/
static inline bool css_tryget(struct cgroup_subsys_state *css)
{
if (!(css->flags & CSS_NO_REF))
return percpu_ref_tryget(&css->refcnt);
return true;
}
/**
* css_tryget_online - try to obtain a reference on the specified css if online
* @css: target css
*
* Obtain a reference on @css if it's online. The caller naturally needs
* to ensure that @css is accessible but doesn't have to be holding a
* reference on it - IOW, RCU protected access is good enough for this
* function. Returns %true if a reference count was successfully obtained;
* %false otherwise.
*/
static inline bool css_tryget_online(struct cgroup_subsys_state *css)
{
if (!(css->flags & CSS_NO_REF))
return percpu_ref_tryget_live(&css->refcnt);
return true;
}
/**
* css_is_dying - test whether the specified css is dying
* @css: target css
@ -395,31 +348,6 @@ static inline bool css_is_dying(struct cgroup_subsys_state *css)
return !(css->flags & CSS_NO_REF) && percpu_ref_is_dying(&css->refcnt);
}
/**
* css_put - put a css reference
* @css: target css
*
* Put a reference obtained via css_get() and css_tryget_online().
*/
static inline void css_put(struct cgroup_subsys_state *css)
{
if (!(css->flags & CSS_NO_REF))
percpu_ref_put(&css->refcnt);
}
/**
* css_put_many - put css references
* @css: target css
* @n: number of references to put
*
* Put references obtained via css_get() and css_tryget_online().
*/
static inline void css_put_many(struct cgroup_subsys_state *css, unsigned int n)
{
if (!(css->flags & CSS_NO_REF))
percpu_ref_put_many(&css->refcnt, n);
}
static inline void cgroup_get(struct cgroup *cgrp)
{
css_get(&cgrp->self);

View File

@ -0,0 +1,96 @@
/**
* css_get - obtain a reference on the specified css
* @css: target css
*
* The caller must already have a reference.
*/
CGROUP_REF_FN_ATTRS
void css_get(struct cgroup_subsys_state *css)
{
if (!(css->flags & CSS_NO_REF))
percpu_ref_get(&css->refcnt);
}
CGROUP_REF_EXPORT(css_get)
/**
* css_get_many - obtain references on the specified css
* @css: target css
* @n: number of references to get
*
* The caller must already have a reference.
*/
CGROUP_REF_FN_ATTRS
void css_get_many(struct cgroup_subsys_state *css, unsigned int n)
{
if (!(css->flags & CSS_NO_REF))
percpu_ref_get_many(&css->refcnt, n);
}
CGROUP_REF_EXPORT(css_get_many)
/**
* css_tryget - try to obtain a reference on the specified css
* @css: target css
*
* Obtain a reference on @css unless it already has reached zero and is
* being released. This function doesn't care whether @css is on or
* offline. The caller naturally needs to ensure that @css is accessible
* but doesn't have to be holding a reference on it - IOW, RCU protected
* access is good enough for this function. Returns %true if a reference
* count was successfully obtained; %false otherwise.
*/
CGROUP_REF_FN_ATTRS
bool css_tryget(struct cgroup_subsys_state *css)
{
if (!(css->flags & CSS_NO_REF))
return percpu_ref_tryget(&css->refcnt);
return true;
}
CGROUP_REF_EXPORT(css_tryget)
/**
* css_tryget_online - try to obtain a reference on the specified css if online
* @css: target css
*
* Obtain a reference on @css if it's online. The caller naturally needs
* to ensure that @css is accessible but doesn't have to be holding a
* reference on it - IOW, RCU protected access is good enough for this
* function. Returns %true if a reference count was successfully obtained;
* %false otherwise.
*/
CGROUP_REF_FN_ATTRS
bool css_tryget_online(struct cgroup_subsys_state *css)
{
if (!(css->flags & CSS_NO_REF))
return percpu_ref_tryget_live(&css->refcnt);
return true;
}
CGROUP_REF_EXPORT(css_tryget_online)
/**
* css_put - put a css reference
* @css: target css
*
* Put a reference obtained via css_get() and css_tryget_online().
*/
CGROUP_REF_FN_ATTRS
void css_put(struct cgroup_subsys_state *css)
{
if (!(css->flags & CSS_NO_REF))
percpu_ref_put(&css->refcnt);
}
CGROUP_REF_EXPORT(css_put)
/**
* css_put_many - put css references
* @css: target css
* @n: number of references to put
*
* Put references obtained via css_get() and css_tryget_online().
*/
CGROUP_REF_FN_ATTRS
void css_put_many(struct cgroup_subsys_state *css, unsigned int n)
{
if (!(css->flags & CSS_NO_REF))
percpu_ref_put_many(&css->refcnt, n);
}
CGROUP_REF_EXPORT(css_put_many)

View File

@ -248,6 +248,12 @@ static int cgroup_addrm_files(struct cgroup_subsys_state *css,
struct cgroup *cgrp, struct cftype cfts[],
bool is_add);
#ifdef CONFIG_DEBUG_CGROUP_REF
#define CGROUP_REF_FN_ATTRS noinline
#define CGROUP_REF_EXPORT(fn) EXPORT_SYMBOL_GPL(fn);
#include <linux/cgroup_refcnt.h>
#endif
/**
* cgroup_ssid_enabled - cgroup subsys enabled test by subsys ID
* @ssid: subsys ID of interest
@ -2860,14 +2866,12 @@ int cgroup_migrate(struct task_struct *leader, bool threadgroup,
* take an rcu_read_lock.
*/
spin_lock_irq(&css_set_lock);
rcu_read_lock();
task = leader;
do {
cgroup_migrate_add_task(task, mgctx);
if (!threadgroup)
break;
} while_each_thread(leader, task);
rcu_read_unlock();
spin_unlock_irq(&css_set_lock);
return cgroup_migrate_execute(mgctx);

View File

@ -550,11 +550,15 @@ static void guarantee_online_mems(struct cpuset *cs, nodemask_t *pmask)
/*
* update task's spread flag if cpuset's page/slab spread flag is set
*
* Call with callback_lock or cpuset_rwsem held.
* Call with callback_lock or cpuset_rwsem held. The check can be skipped
* if on default hierarchy.
*/
static void cpuset_update_task_spread_flag(struct cpuset *cs,
static void cpuset_update_task_spread_flags(struct cpuset *cs,
struct task_struct *tsk)
{
if (cgroup_subsys_on_dfl(cpuset_cgrp_subsys))
return;
if (is_spread_page(cs))
task_set_spread_page(tsk);
else
@ -2153,7 +2157,7 @@ static void update_tasks_flags(struct cpuset *cs)
css_task_iter_start(&cs->css, 0, &it);
while ((task = css_task_iter_next(&it)))
cpuset_update_task_spread_flag(cs, task);
cpuset_update_task_spread_flags(cs, task);
css_task_iter_end(&it);
}
@ -2509,12 +2513,28 @@ static void cpuset_attach(struct cgroup_taskset *tset)
struct cgroup_subsys_state *css;
struct cpuset *cs;
struct cpuset *oldcs = cpuset_attach_old_cs;
bool cpus_updated, mems_updated;
cgroup_taskset_first(tset, &css);
cs = css_cs(css);
lockdep_assert_cpus_held(); /* see cgroup_attach_lock() */
percpu_down_write(&cpuset_rwsem);
cpus_updated = !cpumask_equal(cs->effective_cpus,
oldcs->effective_cpus);
mems_updated = !nodes_equal(cs->effective_mems, oldcs->effective_mems);
/*
* In the default hierarchy, enabling cpuset in the child cgroups
* will trigger a number of cpuset_attach() calls with no change
* in effective cpus and mems. In that case, we can optimize out
* by skipping the task iteration and update.
*/
if (cgroup_subsys_on_dfl(cpuset_cgrp_subsys) &&
!cpus_updated && !mems_updated) {
cpuset_attach_nodemask_to = cs->effective_mems;
goto out;
}
guarantee_online_mems(cs, &cpuset_attach_nodemask_to);
@ -2530,14 +2550,19 @@ static void cpuset_attach(struct cgroup_taskset *tset)
WARN_ON_ONCE(set_cpus_allowed_ptr(task, cpus_attach));
cpuset_change_task_nodemask(task, &cpuset_attach_nodemask_to);
cpuset_update_task_spread_flag(cs, task);
cpuset_update_task_spread_flags(cs, task);
}
/*
* Change mm for all threadgroup leaders. This is expensive and may
* sleep and should be moved outside migration path proper.
* sleep and should be moved outside migration path proper. Skip it
* if there is no change in effective_mems and CS_MEMORY_MIGRATE is
* not set.
*/
cpuset_attach_nodemask_to = cs->effective_mems;
if (!is_memory_migrate(cs) && !mems_updated)
goto out;
cgroup_taskset_for_each_leader(leader, css, tset) {
struct mm_struct *mm = get_task_mm(leader);
@ -2560,6 +2585,7 @@ static void cpuset_attach(struct cgroup_taskset *tset)
}
}
out:
cs->old_mems_allowed = cpuset_attach_nodemask_to;
cs->attach_in_progress--;
@ -3046,11 +3072,15 @@ static struct cftype dfl_files[] = {
};
/*
* cpuset_css_alloc - allocate a cpuset css
* cgrp: control group that the new cpuset will be part of
/**
* cpuset_css_alloc - Allocate a cpuset css
* @parent_css: Parent css of the control group that the new cpuset will be
* part of
* Return: cpuset css on success, -ENOMEM on failure.
*
* Allocate and initialize a new cpuset css, for non-NULL @parent_css, return
* top cpuset css otherwise.
*/
static struct cgroup_subsys_state *
cpuset_css_alloc(struct cgroup_subsys_state *parent_css)
{

View File

@ -1717,6 +1717,16 @@ config LATENCYTOP
Enable this option if you want to use the LatencyTOP tool
to find out which userspace is blocking on what kernel operations.
config DEBUG_CGROUP_REF
bool "Disable inlining of cgroup css reference count functions"
depends on DEBUG_KERNEL
depends on CGROUPS
depends on KPROBES
default n
help
Force cgroup css reference count functions to not be inlined so
that they can be kprobed for debugging.
source "kernel/trace/Kconfig"
config PROVIDE_OHCI1394_DMA_INIT

View File

@ -16,7 +16,12 @@ skip_test() {
[[ $(id -u) -eq 0 ]] || skip_test "Test must be run as root!"
# Set sched verbose flag, if available
[[ -d /sys/kernel/debug/sched ]] && echo Y > /sys/kernel/debug/sched/verbose
if [[ -d /sys/kernel/debug/sched ]]
then
# Used to restore the original setting during cleanup
SCHED_DEBUG=$(cat /sys/kernel/debug/sched/verbose)
echo Y > /sys/kernel/debug/sched/verbose
fi
# Get wait_inotify location
WAIT_INOTIFY=$(cd $(dirname $0); pwd)/wait_inotify
@ -25,7 +30,7 @@ WAIT_INOTIFY=$(cd $(dirname $0); pwd)/wait_inotify
CGROUP2=$(mount -t cgroup2 | head -1 | awk -e '{print $3}')
[[ -n "$CGROUP2" ]] || skip_test "Cgroup v2 mount point not found!"
CPUS=$(lscpu | grep "^CPU(s)" | sed -e "s/.*:[[:space:]]*//")
CPUS=$(lscpu | grep "^CPU(s):" | sed -e "s/.*:[[:space:]]*//")
[[ $CPUS -lt 8 ]] && skip_test "Test needs at least 8 cpus available!"
# Set verbose flag and delay factor
@ -54,6 +59,15 @@ echo +cpuset > cgroup.subtree_control
[[ -d test ]] || mkdir test
cd test
cleanup()
{
online_cpus
rmdir A1/A2/A3 A1/A2 A1 B1 > /dev/null 2>&1
cd ..
rmdir test > /dev/null 2>&1
echo "$SCHED_DEBUG" > /sys/kernel/debug/sched/verbose
}
# Pause in ms
pause()
{
@ -666,6 +680,7 @@ test_inotify()
fi
}
trap cleanup 0 2 3 6
run_state_test TEST_MATRIX
test_isolated
test_inotify