From d4ec36bac3de39b7e10ec8f42fbdd20d9a9ed753 Mon Sep 17 00:00:00 2001 From: Wolfram Sang Date: Sun, 21 Jun 2009 12:32:39 +0200 Subject: [PATCH 1/5] sched: Documentation/sched-rt-group: Fix style issues & bump version - add missing closing bracket - fix two 80-chars issues (as the rest of the document adheres to it) - bump a reference to kernel version, so the document does not feel aged Signed-off-by: Wolfram Sang Cc: Peter Zijlstra LKML-Reference: <1245580359-4465-1-git-send-email-w.sang@pengutronix.de> Signed-off-by: Ingo Molnar --- Documentation/scheduler/sched-rt-group.txt | 13 +++++++------ 1 file changed, 7 insertions(+), 6 deletions(-) diff --git a/Documentation/scheduler/sched-rt-group.txt b/Documentation/scheduler/sched-rt-group.txt index 1df7f9cdab05..86eabe6c3419 100644 --- a/Documentation/scheduler/sched-rt-group.txt +++ b/Documentation/scheduler/sched-rt-group.txt @@ -73,7 +73,7 @@ The remaining CPU time will be used for user input and other tasks. Because realtime tasks have explicitly allocated the CPU time they need to perform their tasks, buffer underruns in the graphics or audio can be eliminated. -NOTE: the above example is not fully implemented as of yet (2.6.25). We still +NOTE: the above example is not fully implemented yet. We still lack an EDF scheduler to make non-uniform periods usable. @@ -140,14 +140,15 @@ The other option is: .o CONFIG_CGROUP_SCHED (aka "Basis for grouping tasks" = "Control groups") -This uses the /cgroup virtual file system and "/cgroup//cpu.rt_runtime_us" -to control the CPU time reserved for each control group instead. +This uses the /cgroup virtual file system and +"/cgroup//cpu.rt_runtime_us" to control the CPU time reserved for each +control group instead. For more information on working with control groups, you should read Documentation/cgroups/cgroups.txt as well. -Group settings are checked against the following limits in order to keep the configuration -schedulable: +Group settings are checked against the following limits in order to keep the +configuration schedulable: \Sum_{i} runtime_{i} / global_period <= global_runtime / global_period @@ -189,7 +190,7 @@ Implementing SCHED_EDF might take a while to complete. Priority Inheritance is the biggest challenge as the current linux PI infrastructure is geared towards the limited static priority levels 0-99. With deadline scheduling you need to do deadline inheritance (since priority is inversely proportional to the -deadline delta (deadline - now). +deadline delta (deadline - now)). This means the whole PI machinery will have to be reworked - and that is one of the most complex pieces of code we have. From a1ba4d8ba9f06a397e97cbd67a93ee306860b40a Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Wed, 1 Apr 2009 18:40:15 +0200 Subject: [PATCH 2/5] sched_rt: Fix overload bug on rt group scheduling Fixes an easily triggerable BUG() when setting process affinities. Make sure to count the number of migratable tasks in the same place: the root rt_rq. Otherwise the number doesn't make sense and we'll hit the BUG in set_cpus_allowed_rt(). Also, make sure we only count tasks, not groups (this is probably already taken care of by the fact that rt_se->nr_cpus_allowed will be 0 for groups, but be more explicit) Tested-by: Thomas Gleixner CC: stable@kernel.org Signed-off-by: Peter Zijlstra Acked-by: Gregory Haskins LKML-Reference: <1247067476.9777.57.camel@twins> Signed-off-by: Ingo Molnar --- kernel/sched.c | 1 + kernel/sched_rt.c | 18 +++++++++++++++++- 2 files changed, 18 insertions(+), 1 deletion(-) diff --git a/kernel/sched.c b/kernel/sched.c index 7c9098d186e6..a17f3d9a8bfa 100644 --- a/kernel/sched.c +++ b/kernel/sched.c @@ -493,6 +493,7 @@ struct rt_rq { #endif #ifdef CONFIG_SMP unsigned long rt_nr_migratory; + unsigned long rt_nr_total; int overloaded; struct plist_head pushable_tasks; #endif diff --git a/kernel/sched_rt.c b/kernel/sched_rt.c index 9bf0d2a73045..3918e01994e0 100644 --- a/kernel/sched_rt.c +++ b/kernel/sched_rt.c @@ -10,6 +10,8 @@ static inline struct task_struct *rt_task_of(struct sched_rt_entity *rt_se) #ifdef CONFIG_RT_GROUP_SCHED +#define rt_entity_is_task(rt_se) (!(rt_se)->my_q) + static inline struct rq *rq_of_rt_rq(struct rt_rq *rt_rq) { return rt_rq->rq; @@ -22,6 +24,8 @@ static inline struct rt_rq *rt_rq_of_se(struct sched_rt_entity *rt_se) #else /* CONFIG_RT_GROUP_SCHED */ +#define rt_entity_is_task(rt_se) (1) + static inline struct rq *rq_of_rt_rq(struct rt_rq *rt_rq) { return container_of(rt_rq, struct rq, rt); @@ -73,7 +77,7 @@ static inline void rt_clear_overload(struct rq *rq) static void update_rt_migration(struct rt_rq *rt_rq) { - if (rt_rq->rt_nr_migratory && (rt_rq->rt_nr_running > 1)) { + if (rt_rq->rt_nr_migratory && rt_rq->rt_nr_total > 1) { if (!rt_rq->overloaded) { rt_set_overload(rq_of_rt_rq(rt_rq)); rt_rq->overloaded = 1; @@ -86,6 +90,12 @@ static void update_rt_migration(struct rt_rq *rt_rq) static void inc_rt_migration(struct sched_rt_entity *rt_se, struct rt_rq *rt_rq) { + if (!rt_entity_is_task(rt_se)) + return; + + rt_rq = &rq_of_rt_rq(rt_rq)->rt; + + rt_rq->rt_nr_total++; if (rt_se->nr_cpus_allowed > 1) rt_rq->rt_nr_migratory++; @@ -94,6 +104,12 @@ static void inc_rt_migration(struct sched_rt_entity *rt_se, struct rt_rq *rt_rq) static void dec_rt_migration(struct sched_rt_entity *rt_se, struct rt_rq *rt_rq) { + if (!rt_entity_is_task(rt_se)) + return; + + rt_rq = &rq_of_rt_rq(rt_rq)->rt; + + rt_rq->rt_nr_total--; if (rt_se->nr_cpus_allowed > 1) rt_rq->rt_nr_migratory--; From 7793527b90d9418211f4fe8464cc1dcb1631ea1b Mon Sep 17 00:00:00 2001 From: Lucas De Marchi Date: Thu, 9 Jul 2009 13:57:20 +0200 Subject: [PATCH 3/5] sched: Reset sched stats on fork() The sched_stat fields are currently not reset upon fork. Ingo's recent commit 6c594c21fcb02c662f11c97be4d7d2b73060a205 did reset nr_migrations, but it didn't reset any of the others. This patch resets all sched_stat fields on fork. Signed-off-by: Lucas De Marchi Signed-off-by: Peter Zijlstra LKML-Reference: <193b0f820907090457s7a3662f4gcdecdc22fcae857b@mail.gmail.com> Signed-off-by: Ingo Molnar --- kernel/sched.c | 40 +++++++++++++++++++++++++++++++--------- 1 file changed, 31 insertions(+), 9 deletions(-) diff --git a/kernel/sched.c b/kernel/sched.c index a17f3d9a8bfa..c4549bd7e174 100644 --- a/kernel/sched.c +++ b/kernel/sched.c @@ -2572,15 +2572,37 @@ static void __sched_fork(struct task_struct *p) p->se.avg_wakeup = sysctl_sched_wakeup_granularity; #ifdef CONFIG_SCHEDSTATS - p->se.wait_start = 0; - p->se.sum_sleep_runtime = 0; - p->se.sleep_start = 0; - p->se.block_start = 0; - p->se.sleep_max = 0; - p->se.block_max = 0; - p->se.exec_max = 0; - p->se.slice_max = 0; - p->se.wait_max = 0; + p->se.wait_start = 0; + p->se.wait_max = 0; + p->se.wait_count = 0; + p->se.wait_sum = 0; + + p->se.sleep_start = 0; + p->se.sleep_max = 0; + p->se.sum_sleep_runtime = 0; + + p->se.block_start = 0; + p->se.block_max = 0; + p->se.exec_max = 0; + p->se.slice_max = 0; + + p->se.nr_migrations_cold = 0; + p->se.nr_failed_migrations_affine = 0; + p->se.nr_failed_migrations_running = 0; + p->se.nr_failed_migrations_hot = 0; + p->se.nr_forced_migrations = 0; + p->se.nr_forced2_migrations = 0; + + p->se.nr_wakeups = 0; + p->se.nr_wakeups_sync = 0; + p->se.nr_wakeups_migrate = 0; + p->se.nr_wakeups_local = 0; + p->se.nr_wakeups_remote = 0; + p->se.nr_wakeups_affine = 0; + p->se.nr_wakeups_affine_attempts = 0; + p->se.nr_wakeups_passive = 0; + p->se.nr_wakeups_idle = 0; + #endif INIT_LIST_HEAD(&p->rt.run_list); From c20b08e3986c2dbfa6df1e880bf4f7159994d199 Mon Sep 17 00:00:00 2001 From: Fabio Checconi Date: Mon, 15 Jun 2009 20:56:38 +0200 Subject: [PATCH 4/5] sched: Fix rt_rq->pushable_tasks initialization in init_rt_rq() init_rt_rq() initializes only rq->rt.pushable_tasks, and not the pushable_tasks field of the passed rt_rq. The plist is not used uninitialized since the only pushable_tasks plists used are the ones of root rt_rqs; anyway reinitializing the list on every group creation corrupts the root plist, losing its previous contents. Signed-off-by: Fabio Checconi Signed-off-by: Peter Zijlstra LKML-Reference: <20090615185638.GK21741@gandalf.sssup.it> CC: Gregory Haskins Signed-off-by: Ingo Molnar --- kernel/sched.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/kernel/sched.c b/kernel/sched.c index c4549bd7e174..efecfdad1b5f 100644 --- a/kernel/sched.c +++ b/kernel/sched.c @@ -9093,7 +9093,7 @@ static void init_rt_rq(struct rt_rq *rt_rq, struct rq *rq) #ifdef CONFIG_SMP rt_rq->rt_nr_migratory = 0; rt_rq->overloaded = 0; - plist_head_init(&rq->rt.pushable_tasks, &rq->lock); + plist_head_init(&rt_rq->pushable_tasks, &rq->lock); #endif rt_rq->rt_time = 0; From d07387b490b1c43bfcb9f3900faf96f2dafb2630 Mon Sep 17 00:00:00 2001 From: Paul Turner Date: Fri, 10 Jul 2009 17:05:16 -0700 Subject: [PATCH 5/5] sched: Fix bug in SCHED_IDLE interaction with group scheduling One of the isolation modifications for SCHED_IDLE is the unitization of sleeper credit. However the check for this assumes that the sched_entity we're placing always belongs to a task. This is potentially not true with group scheduling and leaves us rummaging randomly when we try to pull the policy. Signed-off-by: Paul Turner Cc: peterz@infradead.org LKML-Reference: Signed-off-by: Ingo Molnar --- kernel/sched_fair.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/kernel/sched_fair.c b/kernel/sched_fair.c index ba7fd6e9556f..7c248dc30f41 100644 --- a/kernel/sched_fair.c +++ b/kernel/sched_fair.c @@ -687,7 +687,8 @@ place_entity(struct cfs_rq *cfs_rq, struct sched_entity *se, int initial) * all of which have the same weight. */ if (sched_feat(NORMALIZED_SLEEPER) && - task_of(se)->policy != SCHED_IDLE) + (!entity_is_task(se) || + task_of(se)->policy != SCHED_IDLE)) thresh = calc_delta_fair(thresh, se); vruntime -= thresh;