From d4ec36bac3de39b7e10ec8f42fbdd20d9a9ed753 Mon Sep 17 00:00:00 2001
From: Wolfram Sang <w.sang@pengutronix.de>
Date: Sun, 21 Jun 2009 12:32:39 +0200
Subject: [PATCH 1/5] sched: Documentation/sched-rt-group: Fix style issues &
 bump version

- add missing closing bracket
- fix two 80-chars issues (as the rest of the document adheres to it)
- bump a reference to kernel version, so the document does not feel aged

Signed-off-by: Wolfram Sang <w.sang@pengutronix.de>
Cc: Peter Zijlstra <a.p.zijlstra@chello.nl>
LKML-Reference: <1245580359-4465-1-git-send-email-w.sang@pengutronix.de>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 Documentation/scheduler/sched-rt-group.txt | 13 +++++++------
 1 file changed, 7 insertions(+), 6 deletions(-)
diff --git a/Documentation/scheduler/sched-rt-group.txt b/Documentation/scheduler/sched-rt-group.txt
index 1df7f9cdab05..86eabe6c3419 100644
--- a/Documentation/scheduler/sched-rt-group.txt
+++ b/Documentation/scheduler/sched-rt-group.txt
@@ -73,7 +73,7 @@ The remaining CPU time will be used for user input and other tasks. Because
 realtime tasks have explicitly allocated the CPU time they need to perform
 their tasks, buffer underruns in the graphics or audio can be eliminated.
 
-NOTE: the above example is not fully implemented as of yet (2.6.25). We still
+NOTE: the above example is not fully implemented yet. We still
 lack an EDF scheduler to make non-uniform periods usable.
 
 
@@ -140,14 +140,15 @@ The other option is:
 
 .o CONFIG_CGROUP_SCHED (aka "Basis for grouping tasks" = "Control groups")
 
-This uses the /cgroup virtual file system and "/cgroup/<cgroup>/cpu.rt_runtime_us"
-to control the CPU time reserved for each control group instead.
+This uses the /cgroup virtual file system and
+"/cgroup/<cgroup>/cpu.rt_runtime_us" to control the CPU time reserved for each
+control group instead.
 
 For more information on working with control groups, you should read
 Documentation/cgroups/cgroups.txt as well.
 
-Group settings are checked against the following limits in order to keep the configuration
-schedulable:
+Group settings are checked against the following limits in order to keep the
+configuration schedulable:
 
    \Sum_{i} runtime_{i} / global_period <= global_runtime / global_period
 
@@ -189,7 +190,7 @@ Implementing SCHED_EDF might take a while to complete. Priority Inheritance is
 the biggest challenge as the current linux PI infrastructure is geared towards
 the limited static priority levels 0-99. With deadline scheduling you need to
 do deadline inheritance (since priority is inversely proportional to the
-deadline delta (deadline - now).
+deadline delta (deadline - now)).
 
 This means the whole PI machinery will have to be reworked - and that is one of
 the most complex pieces of code we have.

From a1ba4d8ba9f06a397e97cbd67a93ee306860b40a Mon Sep 17 00:00:00 2001
From: Peter Zijlstra <peterz@infradead.org>
Date: Wed, 1 Apr 2009 18:40:15 +0200
Subject: [PATCH 2/5] sched_rt: Fix overload bug on rt group scheduling

Fixes an easily triggerable BUG() when setting process affinities.

Make sure to count the number of migratable tasks in the same place:
the root rt_rq. Otherwise the number doesn't make sense and we'll hit
the BUG in set_cpus_allowed_rt().

Also, make sure we only count tasks, not groups (this is probably
already taken care of by the fact that rt_se->nr_cpus_allowed will be 0
for groups, but be more explicit)

Tested-by: Thomas Gleixner <tglx@linutronix.de>
CC: stable@kernel.org
Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
Acked-by: Gregory Haskins <ghaskins@novell.com>
LKML-Reference: <1247067476.9777.57.camel@twins>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 kernel/sched.c    |  1 +
 kernel/sched_rt.c | 18 +++++++++++++++++-
 2 files changed, 18 insertions(+), 1 deletion(-)

diff --git a/kernel/sched.c b/kernel/sched.c
index 7c9098d186e6..a17f3d9a8bfa 100644
--- a/kernel/sched.c
+++ b/kernel/sched.c
@@ -493,6 +493,7 @@ struct rt_rq {
 #endif
 #ifdef CONFIG_SMP
 	unsigned long rt_nr_migratory;
+	unsigned long rt_nr_total;
 	int overloaded;
 	struct plist_head pushable_tasks;
 #endif
diff --git a/kernel/sched_rt.c b/kernel/sched_rt.c
index 9bf0d2a73045..3918e01994e0 100644
--- a/kernel/sched_rt.c
+++ b/kernel/sched_rt.c
@@ -10,6 +10,8 @@ static inline struct task_struct *rt_task_of(struct sched_rt_entity *rt_se)
 
 #ifdef CONFIG_RT_GROUP_SCHED
 
+#define rt_entity_is_task(rt_se) (!(rt_se)->my_q)
+
 static inline struct rq *rq_of_rt_rq(struct rt_rq *rt_rq)
 {
 	return rt_rq->rq;
@@ -22,6 +24,8 @@ static inline struct rt_rq *rt_rq_of_se(struct sched_rt_entity *rt_se)
 
 #else /* CONFIG_RT_GROUP_SCHED */
 
+#define rt_entity_is_task(rt_se) (1)
+
 static inline struct rq *rq_of_rt_rq(struct rt_rq *rt_rq)
 {
 	return container_of(rt_rq, struct rq, rt);
@@ -73,7 +77,7 @@ static inline void rt_clear_overload(struct rq *rq)
 
 static void update_rt_migration(struct rt_rq *rt_rq)
 {
-	if (rt_rq->rt_nr_migratory && (rt_rq->rt_nr_running > 1)) {
+	if (rt_rq->rt_nr_migratory && rt_rq->rt_nr_total > 1) {
 		if (!rt_rq->overloaded) {
 			rt_set_overload(rq_of_rt_rq(rt_rq));
 			rt_rq->overloaded = 1;
@@ -86,6 +90,12 @@ static void update_rt_migration(struct rt_rq *rt_rq)
 
 static void inc_rt_migration(struct sched_rt_entity *rt_se, struct rt_rq *rt_rq)
 {
+	if (!rt_entity_is_task(rt_se))
+		return;
+
+	rt_rq = &rq_of_rt_rq(rt_rq)->rt;
+
+	rt_rq->rt_nr_total++;
 	if (rt_se->nr_cpus_allowed > 1)
 		rt_rq->rt_nr_migratory++;
 
@@ -94,6 +104,12 @@ static void inc_rt_migration(struct sched_rt_entity *rt_se, struct rt_rq *rt_rq)
 
 static void dec_rt_migration(struct sched_rt_entity *rt_se, struct rt_rq *rt_rq)
 {
+	if (!rt_entity_is_task(rt_se))
+		return;
+
+	rt_rq = &rq_of_rt_rq(rt_rq)->rt;
+
+	rt_rq->rt_nr_total--;
 	if (rt_se->nr_cpus_allowed > 1)
 		rt_rq->rt_nr_migratory--;
 

From 7793527b90d9418211f4fe8464cc1dcb1631ea1b Mon Sep 17 00:00:00 2001
From: Lucas De Marchi <lucas.de.marchi@gmail.com>
Date: Thu, 9 Jul 2009 13:57:20 +0200
Subject: [PATCH 3/5] sched: Reset sched stats on fork()

The sched_stat fields are currently not reset upon fork.
Ingo's recent commit 6c594c21fcb02c662f11c97be4d7d2b73060a205
did reset nr_migrations, but it didn't reset any of the
others.

This patch resets all sched_stat fields on fork.

Signed-off-by: Lucas De Marchi <lucas.de.marchi@gmail.com>
Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
LKML-Reference: <193b0f820907090457s7a3662f4gcdecdc22fcae857b@mail.gmail.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 kernel/sched.c | 40 +++++++++++++++++++++++++++++++---------
 1 file changed, 31 insertions(+), 9 deletions(-)

diff --git a/kernel/sched.c b/kernel/sched.c
index a17f3d9a8bfa..c4549bd7e174 100644
--- a/kernel/sched.c
+++ b/kernel/sched.c
@@ -2572,15 +2572,37 @@ static void __sched_fork(struct task_struct *p)
 	p->se.avg_wakeup		= sysctl_sched_wakeup_granularity;
 
 #ifdef CONFIG_SCHEDSTATS
-	p->se.wait_start		= 0;
-	p->se.sum_sleep_runtime		= 0;
-	p->se.sleep_start		= 0;
-	p->se.block_start		= 0;
-	p->se.sleep_max			= 0;
-	p->se.block_max			= 0;
-	p->se.exec_max			= 0;
-	p->se.slice_max			= 0;
-	p->se.wait_max			= 0;
+	p->se.wait_start			= 0;
+	p->se.wait_max				= 0;
+	p->se.wait_count			= 0;
+	p->se.wait_sum				= 0;
+
+	p->se.sleep_start			= 0;
+	p->se.sleep_max				= 0;
+	p->se.sum_sleep_runtime			= 0;
+
+	p->se.block_start			= 0;
+	p->se.block_max				= 0;
+	p->se.exec_max				= 0;
+	p->se.slice_max				= 0;
+
+	p->se.nr_migrations_cold		= 0;
+	p->se.nr_failed_migrations_affine	= 0;
+	p->se.nr_failed_migrations_running	= 0;
+	p->se.nr_failed_migrations_hot		= 0;
+	p->se.nr_forced_migrations		= 0;
+	p->se.nr_forced2_migrations		= 0;
+
+	p->se.nr_wakeups			= 0;
+	p->se.nr_wakeups_sync			= 0;
+	p->se.nr_wakeups_migrate		= 0;
+	p->se.nr_wakeups_local			= 0;
+	p->se.nr_wakeups_remote			= 0;
+	p->se.nr_wakeups_affine			= 0;
+	p->se.nr_wakeups_affine_attempts	= 0;
+	p->se.nr_wakeups_passive		= 0;
+	p->se.nr_wakeups_idle			= 0;
+
 #endif
 
 	INIT_LIST_HEAD(&p->rt.run_list);

From c20b08e3986c2dbfa6df1e880bf4f7159994d199 Mon Sep 17 00:00:00 2001
From: Fabio Checconi <fchecconi@gmail.com>
Date: Mon, 15 Jun 2009 20:56:38 +0200
Subject: [PATCH 4/5] sched: Fix rt_rq->pushable_tasks initialization in
 init_rt_rq()

init_rt_rq() initializes only rq->rt.pushable_tasks, and not the
pushable_tasks field of the passed rt_rq.  The plist is not used
uninitialized since the only pushable_tasks plists used are the
ones of root rt_rqs; anyway reinitializing the list on every group
creation corrupts the root plist, losing its previous contents.

Signed-off-by: Fabio Checconi <fabio@gandalf.sssup.it>
Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
LKML-Reference: <20090615185638.GK21741@gandalf.sssup.it>
CC: Gregory Haskins <ghaskins@novell.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 kernel/sched.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/kernel/sched.c b/kernel/sched.c
index c4549bd7e174..efecfdad1b5f 100644
--- a/kernel/sched.c
+++ b/kernel/sched.c
@@ -9093,7 +9093,7 @@ static void init_rt_rq(struct rt_rq *rt_rq, struct rq *rq)
 #ifdef CONFIG_SMP
 	rt_rq->rt_nr_migratory = 0;
 	rt_rq->overloaded = 0;
-	plist_head_init(&rq->rt.pushable_tasks, &rq->lock);
+	plist_head_init(&rt_rq->pushable_tasks, &rq->lock);
 #endif
 
 	rt_rq->rt_time = 0;

From d07387b490b1c43bfcb9f3900faf96f2dafb2630 Mon Sep 17 00:00:00 2001
From: Paul Turner <pjt@google.com>
Date: Fri, 10 Jul 2009 17:05:16 -0700
Subject: [PATCH 5/5] sched: Fix bug in SCHED_IDLE interaction with group
 scheduling

One of the isolation modifications for SCHED_IDLE is the
unitization of sleeper credit.  However the check for this
assumes that the sched_entity we're placing always belongs to a
task.

This is potentially not true with group scheduling and leaves
us rummaging randomly when we try to pull the policy.

Signed-off-by: Paul Turner <pjt@google.com>
Cc: peterz@infradead.org
LKML-Reference: <alpine.DEB.1.00.0907101649570.29914@kitami.corp.google.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 kernel/sched_fair.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/kernel/sched_fair.c b/kernel/sched_fair.c
index ba7fd6e9556f..7c248dc30f41 100644
--- a/kernel/sched_fair.c
+++ b/kernel/sched_fair.c
@@ -687,7 +687,8 @@ place_entity(struct cfs_rq *cfs_rq, struct sched_entity *se, int initial)
 			 * all of which have the same weight.
 			 */
 			if (sched_feat(NORMALIZED_SLEEPER) &&
-					task_of(se)->policy != SCHED_IDLE)
+					(!entity_is_task(se) ||
+					 task_of(se)->policy != SCHED_IDLE))
 				thresh = calc_delta_fair(thresh, se);
 
 			vruntime -= thresh;