cgroup/cpuset: Fix violation of cpuset locking rule

The cpuset fields that manage partition root state do not strictly
follow the cpuset locking rule that update to cpuset has to be done
with both the callback_lock and cpuset_mutex held. This is now fixed
by making sure that the locking rule is upheld.

Fixes: 3881b86128 ("cpuset: Add an error state to cpuset.sched.partition")
Fixes: 4b842da276 ("cpuset: Make CPU hotplug work with partition")
Signed-off-by: Waiman Long <longman@redhat.com>
Signed-off-by: Tejun Heo <tj@kernel.org>
This commit is contained in:
Waiman Long 2021-07-20 10:18:28 -04:00 committed by Tejun Heo
parent 15d428e6fe
commit 6ba34d3c73

View File

@ -1148,6 +1148,7 @@ static int update_parent_subparts_cpumask(struct cpuset *cpuset, int cmd,
struct cpuset *parent = parent_cs(cpuset);
int adding; /* Moving cpus from effective_cpus to subparts_cpus */
int deleting; /* Moving cpus from subparts_cpus to effective_cpus */
int new_prs;
bool part_error = false; /* Partition error? */
percpu_rwsem_assert_held(&cpuset_rwsem);
@ -1183,6 +1184,7 @@ static int update_parent_subparts_cpumask(struct cpuset *cpuset, int cmd,
* A cpumask update cannot make parent's effective_cpus become empty.
*/
adding = deleting = false;
new_prs = cpuset->partition_root_state;
if (cmd == partcmd_enable) {
cpumask_copy(tmp->addmask, cpuset->cpus_allowed);
adding = true;
@ -1247,11 +1249,11 @@ static int update_parent_subparts_cpumask(struct cpuset *cpuset, int cmd,
switch (cpuset->partition_root_state) {
case PRS_ENABLED:
if (part_error)
cpuset->partition_root_state = PRS_ERROR;
new_prs = PRS_ERROR;
break;
case PRS_ERROR:
if (!part_error)
cpuset->partition_root_state = PRS_ENABLED;
new_prs = PRS_ENABLED;
break;
}
/*
@ -1260,10 +1262,10 @@ static int update_parent_subparts_cpumask(struct cpuset *cpuset, int cmd,
part_error = (prev_prs == PRS_ERROR);
}
if (!part_error && (cpuset->partition_root_state == PRS_ERROR))
if (!part_error && (new_prs == PRS_ERROR))
return 0; /* Nothing need to be done */
if (cpuset->partition_root_state == PRS_ERROR) {
if (new_prs == PRS_ERROR) {
/*
* Remove all its cpus from parent's subparts_cpus.
*/
@ -1272,7 +1274,7 @@ static int update_parent_subparts_cpumask(struct cpuset *cpuset, int cmd,
parent->subparts_cpus);
}
if (!adding && !deleting)
if (!adding && !deleting && (new_prs == cpuset->partition_root_state))
return 0;
/*
@ -1299,6 +1301,9 @@ static int update_parent_subparts_cpumask(struct cpuset *cpuset, int cmd,
}
parent->nr_subparts_cpus = cpumask_weight(parent->subparts_cpus);
if (cpuset->partition_root_state != new_prs)
cpuset->partition_root_state = new_prs;
spin_unlock_irq(&callback_lock);
return cmd == partcmd_update;
@ -1321,6 +1326,7 @@ static void update_cpumasks_hier(struct cpuset *cs, struct tmpmasks *tmp)
struct cpuset *cp;
struct cgroup_subsys_state *pos_css;
bool need_rebuild_sched_domains = false;
int new_prs;
rcu_read_lock();
cpuset_for_each_descendant_pre(cp, pos_css, cs) {
@ -1360,7 +1366,8 @@ static void update_cpumasks_hier(struct cpuset *cs, struct tmpmasks *tmp)
* update_tasks_cpumask() again for tasks in the parent
* cpuset if the parent's subparts_cpus changes.
*/
if ((cp != cs) && cp->partition_root_state) {
new_prs = cp->partition_root_state;
if ((cp != cs) && new_prs) {
switch (parent->partition_root_state) {
case PRS_DISABLED:
/*
@ -1370,7 +1377,7 @@ static void update_cpumasks_hier(struct cpuset *cs, struct tmpmasks *tmp)
*/
WARN_ON_ONCE(cp->partition_root_state
!= PRS_ERROR);
cp->partition_root_state = PRS_DISABLED;
new_prs = PRS_DISABLED;
/*
* clear_bit() is an atomic operation and
@ -1391,11 +1398,7 @@ static void update_cpumasks_hier(struct cpuset *cs, struct tmpmasks *tmp)
/*
* When parent is invalid, it has to be too.
*/
cp->partition_root_state = PRS_ERROR;
if (cp->nr_subparts_cpus) {
cp->nr_subparts_cpus = 0;
cpumask_clear(cp->subparts_cpus);
}
new_prs = PRS_ERROR;
break;
}
}
@ -1407,8 +1410,7 @@ static void update_cpumasks_hier(struct cpuset *cs, struct tmpmasks *tmp)
spin_lock_irq(&callback_lock);
cpumask_copy(cp->effective_cpus, tmp->new_cpus);
if (cp->nr_subparts_cpus &&
(cp->partition_root_state != PRS_ENABLED)) {
if (cp->nr_subparts_cpus && (new_prs != PRS_ENABLED)) {
cp->nr_subparts_cpus = 0;
cpumask_clear(cp->subparts_cpus);
} else if (cp->nr_subparts_cpus) {
@ -1435,6 +1437,10 @@ static void update_cpumasks_hier(struct cpuset *cs, struct tmpmasks *tmp)
= cpumask_weight(cp->subparts_cpus);
}
}
if (new_prs != cp->partition_root_state)
cp->partition_root_state = new_prs;
spin_unlock_irq(&callback_lock);
WARN_ON(!is_in_v2_mode() &&
@ -1944,25 +1950,25 @@ out:
*/
static int update_prstate(struct cpuset *cs, int new_prs)
{
int err;
int err, old_prs = cs->partition_root_state;
struct cpuset *parent = parent_cs(cs);
struct tmpmasks tmpmask;
if (new_prs == cs->partition_root_state)
if (old_prs == new_prs)
return 0;
/*
* Cannot force a partial or invalid partition root to a full
* partition root.
*/
if (new_prs && (cs->partition_root_state < 0))
if (new_prs && (old_prs == PRS_ERROR))
return -EINVAL;
if (alloc_cpumasks(NULL, &tmpmask))
return -ENOMEM;
err = -EINVAL;
if (!cs->partition_root_state) {
if (!old_prs) {
/*
* Turning on partition root requires setting the
* CS_CPU_EXCLUSIVE bit implicitly as well and cpus_allowed
@ -1981,14 +1987,12 @@ static int update_prstate(struct cpuset *cs, int new_prs)
update_flag(CS_CPU_EXCLUSIVE, cs, 0);
goto out;
}
cs->partition_root_state = PRS_ENABLED;
} else {
/*
* Turning off partition root will clear the
* CS_CPU_EXCLUSIVE bit.
*/
if (cs->partition_root_state == PRS_ERROR) {
cs->partition_root_state = PRS_DISABLED;
if (old_prs == PRS_ERROR) {
update_flag(CS_CPU_EXCLUSIVE, cs, 0);
err = 0;
goto out;
@ -1999,8 +2003,6 @@ static int update_prstate(struct cpuset *cs, int new_prs)
if (err)
goto out;
cs->partition_root_state = PRS_DISABLED;
/* Turning off CS_CPU_EXCLUSIVE will not return error */
update_flag(CS_CPU_EXCLUSIVE, cs, 0);
}
@ -2017,6 +2019,12 @@ static int update_prstate(struct cpuset *cs, int new_prs)
rebuild_sched_domains_locked();
out:
if (!err) {
spin_lock_irq(&callback_lock);
cs->partition_root_state = new_prs;
spin_unlock_irq(&callback_lock);
}
free_cpumasks(NULL, &tmpmask);
return err;
}
@ -3080,8 +3088,10 @@ retry:
if (is_partition_root(cs) && (cpumask_empty(&new_cpus) ||
(parent->partition_root_state == PRS_ERROR))) {
if (cs->nr_subparts_cpus) {
spin_lock_irq(&callback_lock);
cs->nr_subparts_cpus = 0;
cpumask_clear(cs->subparts_cpus);
spin_unlock_irq(&callback_lock);
compute_effective_cpumask(&new_cpus, cs, parent);
}
@ -3095,7 +3105,9 @@ retry:
cpumask_empty(&new_cpus)) {
update_parent_subparts_cpumask(cs, partcmd_disable,
NULL, tmp);
spin_lock_irq(&callback_lock);
cs->partition_root_state = PRS_ERROR;
spin_unlock_irq(&callback_lock);
}
cpuset_force_rebuild();
}