|
|
|
@@ -1,7 +1,7 @@
|
|
|
|
|
From d931ed7fc8d6728204d36d31a18d4c8b60593821 Mon Sep 17 00:00:00 2001
|
|
|
|
|
From 6f9fee6b2a2ceb4561a58c152467fd5e6d5c47e8 Mon Sep 17 00:00:00 2001
|
|
|
|
|
From: Peter Zijlstra <peterz@infradead.org>
|
|
|
|
|
Date: Wed, 31 May 2023 13:58:40 +0200
|
|
|
|
|
Subject: [PATCH 01/16] sched/fair: Add cfs_rq::avg_vruntime
|
|
|
|
|
Subject: [PATCH 01/15] sched/fair: Add cfs_rq::avg_vruntime
|
|
|
|
|
|
|
|
|
|
In order to move to an eligibility based scheduling policy, we need
|
|
|
|
|
to have a better approximation of the ideal scheduler.
|
|
|
|
@@ -295,10 +295,10 @@ index e93e006a9..4ccb73d85 100644
|
|
|
|
|
2.42.0
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
From 4e5d4ab816239fc30595a76ffcd41c323bdd4996 Mon Sep 17 00:00:00 2001
|
|
|
|
|
From 826b8e2df1d3c69e138c6c89f6872df2be4ad1cb Mon Sep 17 00:00:00 2001
|
|
|
|
|
From: Peter Zijlstra <peterz@infradead.org>
|
|
|
|
|
Date: Wed, 31 May 2023 13:58:41 +0200
|
|
|
|
|
Subject: [PATCH 02/16] sched/fair: Remove sched_feat(START_DEBIT)
|
|
|
|
|
Subject: [PATCH 02/15] sched/fair: Remove sched_feat(START_DEBIT)
|
|
|
|
|
|
|
|
|
|
With the introduction of avg_vruntime() there is no need to use worse
|
|
|
|
|
approximations. Take the 0-lag point as starting point for inserting
|
|
|
|
@@ -372,10 +372,10 @@ index ee7f23c76..fa828b365 100644
|
|
|
|
|
2.42.0
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
From 49ba3e84689bb047d9411e8a3a6ae99020070f37 Mon Sep 17 00:00:00 2001
|
|
|
|
|
From 5672ddd48026e6f590a9eae4d122bb0eed50e109 Mon Sep 17 00:00:00 2001
|
|
|
|
|
From: Peter Zijlstra <peterz@infradead.org>
|
|
|
|
|
Date: Wed, 31 May 2023 13:58:42 +0200
|
|
|
|
|
Subject: [PATCH 03/16] sched/fair: Add lag based placement
|
|
|
|
|
Subject: [PATCH 03/15] sched/fair: Add lag based placement
|
|
|
|
|
|
|
|
|
|
With the introduction of avg_vruntime, it is possible to approximate
|
|
|
|
|
lag (the entire purpose of introducing it in fact). Use this to do lag
|
|
|
|
@@ -410,7 +410,7 @@ index 609bde814..52910bfb9 100644
|
|
|
|
|
u64 nr_migrations;
|
|
|
|
|
|
|
|
|
|
diff --git a/kernel/sched/core.c b/kernel/sched/core.c
|
|
|
|
|
index e8f73ff12..acb9d9ff3 100644
|
|
|
|
|
index c52c2eba7..3bb4df5bb 100644
|
|
|
|
|
--- a/kernel/sched/core.c
|
|
|
|
|
+++ b/kernel/sched/core.c
|
|
|
|
|
@@ -4501,6 +4501,7 @@ static void __sched_fork(unsigned long clone_flags, struct task_struct *p)
|
|
|
|
@@ -661,10 +661,10 @@ index fa828b365..7958a10fe 100644
|
|
|
|
|
2.42.0
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
From 31462b52019e938357395e7bd0f630fcd550e27c Mon Sep 17 00:00:00 2001
|
|
|
|
|
From e9818f093795a5d7b1ee08248d8db84ed88411dd Mon Sep 17 00:00:00 2001
|
|
|
|
|
From: Peter Zijlstra <peterz@infradead.org>
|
|
|
|
|
Date: Wed, 31 May 2023 13:58:43 +0200
|
|
|
|
|
Subject: [PATCH 04/16] rbtree: Add rb_add_augmented_cached() helper
|
|
|
|
|
Subject: [PATCH 04/15] rbtree: Add rb_add_augmented_cached() helper
|
|
|
|
|
|
|
|
|
|
While slightly sub-optimal, updating the augmented data while going
|
|
|
|
|
down the tree during lookup would be faster -- alas the augment
|
|
|
|
@@ -719,10 +719,10 @@ index 7ee7ed5de..6dbc5a1bf 100644
|
|
|
|
|
2.42.0
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
From e8c55c05618756cf090470c355f2864dafe0a618 Mon Sep 17 00:00:00 2001
|
|
|
|
|
From cb798272c085050f0db104befcf8092da0931210 Mon Sep 17 00:00:00 2001
|
|
|
|
|
From: Peter Zijlstra <peterz@infradead.org>
|
|
|
|
|
Date: Wed, 31 May 2023 13:58:44 +0200
|
|
|
|
|
Subject: [PATCH 05/16] sched/fair: Implement an EEVDF-like scheduling policy
|
|
|
|
|
Subject: [PATCH 05/15] sched/fair: Implement an EEVDF-like scheduling policy
|
|
|
|
|
|
|
|
|
|
Where CFS is currently a WFQ based scheduler with only a single knob,
|
|
|
|
|
the weight. The addition of a second, latency oriented parameter,
|
|
|
|
@@ -785,7 +785,7 @@ index 52910bfb9..35331c35f 100644
|
|
|
|
|
u64 nr_migrations;
|
|
|
|
|
|
|
|
|
|
diff --git a/kernel/sched/core.c b/kernel/sched/core.c
|
|
|
|
|
index acb9d9ff3..427d694ff 100644
|
|
|
|
|
index 3bb4df5bb..d7291206f 100644
|
|
|
|
|
--- a/kernel/sched/core.c
|
|
|
|
|
+++ b/kernel/sched/core.c
|
|
|
|
|
@@ -4502,6 +4502,7 @@ static void __sched_fork(unsigned long clone_flags, struct task_struct *p)
|
|
|
|
@@ -1398,10 +1398,10 @@ index 4ccb73d85..1fc81dd7f 100644
|
|
|
|
|
2.42.0
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
From 6aa7145ce28656863846e7f67ad98e3ed89473f3 Mon Sep 17 00:00:00 2001
|
|
|
|
|
From 792befe9ba4d972eeb1ba144cfa3062e48fd98ff Mon Sep 17 00:00:00 2001
|
|
|
|
|
From: Peter Zijlstra <peterz@infradead.org>
|
|
|
|
|
Date: Wed, 31 May 2023 13:58:45 +0200
|
|
|
|
|
Subject: [PATCH 06/16] sched/fair: Commit to lag based placement
|
|
|
|
|
Subject: [PATCH 06/15] sched/fair: Commit to lag based placement
|
|
|
|
|
|
|
|
|
|
Removes the FAIR_SLEEPERS code in favour of the new LAG based
|
|
|
|
|
placement.
|
|
|
|
@@ -1526,10 +1526,10 @@ index 60cce1e6f..2a830eccd 100644
|
|
|
|
|
2.42.0
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
From 12c67a50f08fe4b97fda8f13302e2574e10351c7 Mon Sep 17 00:00:00 2001
|
|
|
|
|
From 26b3a580ff53a5b6e3b01810b7f223b672cab5e9 Mon Sep 17 00:00:00 2001
|
|
|
|
|
From: Peter Zijlstra <peterz@infradead.org>
|
|
|
|
|
Date: Wed, 31 May 2023 13:58:46 +0200
|
|
|
|
|
Subject: [PATCH 07/16] sched/smp: Use lag to simplify cross-runqueue placement
|
|
|
|
|
Subject: [PATCH 07/15] sched/smp: Use lag to simplify cross-runqueue placement
|
|
|
|
|
|
|
|
|
|
Using lag is both more correct and simpler when moving between
|
|
|
|
|
runqueues.
|
|
|
|
@@ -1793,10 +1793,10 @@ index 91f25d6c8..b7daccfb2 100644
|
|
|
|
|
2.42.0
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
From 8e2fcd5cb320987439faec8442f7f73ccb234875 Mon Sep 17 00:00:00 2001
|
|
|
|
|
From 35645d3d36593126531a3ee2f7402c9acfdb3e6d Mon Sep 17 00:00:00 2001
|
|
|
|
|
From: Peter Zijlstra <peterz@infradead.org>
|
|
|
|
|
Date: Wed, 31 May 2023 13:58:47 +0200
|
|
|
|
|
Subject: [PATCH 08/16] sched/fair: Commit to EEVDF
|
|
|
|
|
Subject: [PATCH 08/15] sched/fair: Commit to EEVDF
|
|
|
|
|
|
|
|
|
|
EEVDF is a better defined scheduling policy, as a result it has less
|
|
|
|
|
heuristics/tunables. There is no compelling reason to keep CFS around.
|
|
|
|
@@ -2569,10 +2569,10 @@ index 1fc81dd7f..83bbcd35c 100644
|
|
|
|
|
2.42.0
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
From 55aa8349238fbe34a1f8198d56210a5e773851f1 Mon Sep 17 00:00:00 2001
|
|
|
|
|
From a1bff7f7a7608a50d8b1108e68f766daa920b4a3 Mon Sep 17 00:00:00 2001
|
|
|
|
|
From: Peter Zijlstra <peterz@infradead.org>
|
|
|
|
|
Date: Wed, 31 May 2023 13:58:48 +0200
|
|
|
|
|
Subject: [PATCH 09/16] sched/debug: Rename sysctl_sched_min_granularity to
|
|
|
|
|
Subject: [PATCH 09/15] sched/debug: Rename sysctl_sched_min_granularity to
|
|
|
|
|
sysctl_sched_base_slice
|
|
|
|
|
|
|
|
|
|
EEVDF uses this tunable as the base request/slice -- make sure the
|
|
|
|
@@ -2589,7 +2589,7 @@ Link: https://lore.kernel.org/r/20230531124604.205287511@infradead.org
|
|
|
|
|
4 files changed, 10 insertions(+), 10 deletions(-)
|
|
|
|
|
|
|
|
|
|
diff --git a/kernel/sched/core.c b/kernel/sched/core.c
|
|
|
|
|
index 427d694ff..be77d999d 100644
|
|
|
|
|
index d7291206f..8116ef56d 100644
|
|
|
|
|
--- a/kernel/sched/core.c
|
|
|
|
|
+++ b/kernel/sched/core.c
|
|
|
|
|
@@ -4502,7 +4502,7 @@ static void __sched_fork(unsigned long clone_flags, struct task_struct *p)
|
|
|
|
@@ -2685,10 +2685,10 @@ index 83bbcd35c..e21f6a048 100644
|
|
|
|
|
2.42.0
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
From d059ffad9f9729ec63ad32fc3840a1a308cbd8a7 Mon Sep 17 00:00:00 2001
|
|
|
|
|
From 5ef098d5a57aa3f3a054935b500d694c4027834a Mon Sep 17 00:00:00 2001
|
|
|
|
|
From: Peter Zijlstra <peterz@infradead.org>
|
|
|
|
|
Date: Wed, 31 May 2023 13:58:49 +0200
|
|
|
|
|
Subject: [PATCH 10/16] sched/fair: Propagate enqueue flags into place_entity()
|
|
|
|
|
Subject: [PATCH 10/15] sched/fair: Propagate enqueue flags into place_entity()
|
|
|
|
|
|
|
|
|
|
This allows place_entity() to consider ENQUEUE_WAKEUP and
|
|
|
|
|
ENQUEUE_MIGRATED.
|
|
|
|
@@ -2766,10 +2766,10 @@ index e21f6a048..576d371c8 100644
|
|
|
|
|
2.42.0
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
From 80cdbd469974a44e5150be88f5c696ec241f6087 Mon Sep 17 00:00:00 2001
|
|
|
|
|
From 30054e00408a19d0a9ba9c2682217544b09c4937 Mon Sep 17 00:00:00 2001
|
|
|
|
|
From: Peter Zijlstra <peterz@infradead.org>
|
|
|
|
|
Date: Wed, 16 Aug 2023 15:40:59 +0200
|
|
|
|
|
Subject: [PATCH 11/16] sched/eevdf: Curb wakeup-preemption
|
|
|
|
|
Subject: [PATCH 11/15] sched/eevdf: Curb wakeup-preemption
|
|
|
|
|
|
|
|
|
|
Mike and others noticed that EEVDF does like to over-schedule quite a
|
|
|
|
|
bit -- which does hurt performance of a number of benchmarks /
|
|
|
|
@@ -2858,10 +2858,10 @@ index 54334ca5c..546d212ef 100644
|
|
|
|
|
2.42.0
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
From 7d5bf4ed3cc74835a55db18eead11af61557a795 Mon Sep 17 00:00:00 2001
|
|
|
|
|
From 3e8371461b6d790eb57788495c157d3092ae4ce9 Mon Sep 17 00:00:00 2001
|
|
|
|
|
From: Shrikanth Hegde <sshegde@linux.vnet.ibm.com>
|
|
|
|
|
Date: Thu, 24 Aug 2023 13:33:42 +0530
|
|
|
|
|
Subject: [PATCH 12/16] sched/eevdf/doc: Modify the documented knob to
|
|
|
|
|
Subject: [PATCH 12/15] sched/eevdf/doc: Modify the documented knob to
|
|
|
|
|
base_slice_ns as well
|
|
|
|
|
|
|
|
|
|
After committing the scheduler to EEVDF, we renamed the 'min_granularity_ns'
|
|
|
|
@@ -2896,299 +2896,3 @@ index 03db55504..f68919800 100644
|
|
|
|
|
--
|
|
|
|
|
2.42.0
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
From bff784de63e9a8567d91b630e8f2bf842aef894b Mon Sep 17 00:00:00 2001
|
|
|
|
|
From: Peter Zijlstra <peterz@infradead.org>
|
|
|
|
|
Date: Fri, 15 Sep 2023 00:48:55 +0200
|
|
|
|
|
Subject: [PATCH 13/16] sched/eevdf: Also update slice on placement
|
|
|
|
|
|
|
|
|
|
Tasks that never consume their full slice would not update their slice value.
|
|
|
|
|
This means that tasks that are spawned before the sysctl scaling keep their
|
|
|
|
|
original (UP) slice length.
|
|
|
|
|
|
|
|
|
|
Fixes: 147f3efaa241 ("sched/fair: Implement an EEVDF-like scheduling policy")
|
|
|
|
|
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
|
|
|
|
|
Link: https://lkml.kernel.org/r/20230915124822.847197830@noisy.programming.kicks-ass.net
|
|
|
|
|
---
|
|
|
|
|
kernel/sched/fair.c | 6 ++++--
|
|
|
|
|
1 file changed, 4 insertions(+), 2 deletions(-)
|
|
|
|
|
|
|
|
|
|
diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c
|
|
|
|
|
index 1cdc95725..efbcdc69c 100644
|
|
|
|
|
--- a/kernel/sched/fair.c
|
|
|
|
|
+++ b/kernel/sched/fair.c
|
|
|
|
|
@@ -4918,10 +4918,12 @@ static inline void update_misfit_status(struct task_struct *p, struct rq *rq) {}
|
|
|
|
|
static void
|
|
|
|
|
place_entity(struct cfs_rq *cfs_rq, struct sched_entity *se, int flags)
|
|
|
|
|
{
|
|
|
|
|
- u64 vslice = calc_delta_fair(se->slice, se);
|
|
|
|
|
- u64 vruntime = avg_vruntime(cfs_rq);
|
|
|
|
|
+ u64 vslice, vruntime = avg_vruntime(cfs_rq);
|
|
|
|
|
s64 lag = 0;
|
|
|
|
|
|
|
|
|
|
+ se->slice = sysctl_sched_base_slice;
|
|
|
|
|
+ vslice = calc_delta_fair(se->slice, se);
|
|
|
|
|
+
|
|
|
|
|
/*
|
|
|
|
|
* Due to how V is constructed as the weighted average of entities,
|
|
|
|
|
* adding tasks with positive lag, or removing tasks with negative lag
|
|
|
|
|
--
|
|
|
|
|
2.42.0
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
From 163619e41993d6e481a745466c05cc0dfb3dcda8 Mon Sep 17 00:00:00 2001
|
|
|
|
|
From: Peter Zijlstra <peterz@infradead.org>
|
|
|
|
|
Date: Tue, 26 Sep 2023 14:29:50 +0200
|
|
|
|
|
Subject: [PATCH 14/16] sched/eevdf: Fix avg_vruntime()
|
|
|
|
|
|
|
|
|
|
The expectation is that placing a task at avg_vruntime() makes it
|
|
|
|
|
eligible. Turns out there is a corner case where this is not the case.
|
|
|
|
|
|
|
|
|
|
Specifically, avg_vruntime() relies on the fact that integer division
|
|
|
|
|
is a flooring function (eg. it discards the remainder). By this
|
|
|
|
|
property the value returned is slightly left of the true average.
|
|
|
|
|
|
|
|
|
|
However! when the average is a negative (relative to min_vruntime) the
|
|
|
|
|
effect is flipped and it becomes a ceil, with the result that the
|
|
|
|
|
returned value is just right of the average and thus not eligible.
|
|
|
|
|
|
|
|
|
|
Fixes: af4cf40470c2 ("sched/fair: Add cfs_rq::avg_vruntime")
|
|
|
|
|
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
|
|
|
|
|
---
|
|
|
|
|
kernel/sched/fair.c | 10 +++++++++-
|
|
|
|
|
1 file changed, 9 insertions(+), 1 deletion(-)
|
|
|
|
|
|
|
|
|
|
diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c
|
|
|
|
|
index efbcdc69c..9dbf3ce61 100644
|
|
|
|
|
--- a/kernel/sched/fair.c
|
|
|
|
|
+++ b/kernel/sched/fair.c
|
|
|
|
|
@@ -664,6 +664,10 @@ void avg_vruntime_update(struct cfs_rq *cfs_rq, s64 delta)
|
|
|
|
|
cfs_rq->avg_vruntime -= cfs_rq->avg_load * delta;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
+/*
|
|
|
|
|
+ * Specifically: avg_runtime() + 0 must result in entity_eligible() := true
|
|
|
|
|
+ * For this to be so, the result of this function must have a left bias.
|
|
|
|
|
+ */
|
|
|
|
|
u64 avg_vruntime(struct cfs_rq *cfs_rq)
|
|
|
|
|
{
|
|
|
|
|
struct sched_entity *curr = cfs_rq->curr;
|
|
|
|
|
@@ -677,8 +681,12 @@ u64 avg_vruntime(struct cfs_rq *cfs_rq)
|
|
|
|
|
load += weight;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
- if (load)
|
|
|
|
|
+ if (load) {
|
|
|
|
|
+ /* sign flips effective floor / ceil */
|
|
|
|
|
+ if (avg < 0)
|
|
|
|
|
+ avg -= (load - 1);
|
|
|
|
|
avg = div_s64(avg, load);
|
|
|
|
|
+ }
|
|
|
|
|
|
|
|
|
|
return cfs_rq->min_vruntime + avg;
|
|
|
|
|
}
|
|
|
|
|
--
|
|
|
|
|
2.42.0
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
From 217895647edb558ce9b28d0e07418f66fdaf85bc Mon Sep 17 00:00:00 2001
|
|
|
|
|
From: Peter Zijlstra <peterz@infradead.org>
|
|
|
|
|
Date: Fri, 6 Oct 2023 21:24:45 +0200
|
|
|
|
|
Subject: [PATCH 15/16] sched/eevdf: Fix min_deadline heap integrity
|
|
|
|
|
|
|
|
|
|
Marek and Biju reported instances of:
|
|
|
|
|
|
|
|
|
|
"EEVDF scheduling fail, picking leftmost"
|
|
|
|
|
|
|
|
|
|
which Mike correlated with cgroup scheduling and the min_deadline heap
|
|
|
|
|
getting corrupted; some trace output confirms:
|
|
|
|
|
|
|
|
|
|
> And yeah, min_deadline is hosed somehow:
|
|
|
|
|
>
|
|
|
|
|
> validate_cfs_rq: --- /
|
|
|
|
|
> __print_se: ffff88845cf48080 w: 1024 ve: -58857638 lag: 870381 vd: -55861854 vmd: -66302085 E (11372/tr)
|
|
|
|
|
> __print_se: ffff88810d165800 w: 25 ve: -80323686 lag: 22336429 vd: -41496434 vmd: -66302085 E (-1//autogroup-31)
|
|
|
|
|
> __print_se: ffff888108379000 w: 25 ve: 0 lag: -57987257 vd: 114632828 vmd: 114632828 N (-1//autogroup-33)
|
|
|
|
|
> validate_cfs_rq: min_deadline: -55861854 avg_vruntime: -62278313462 / 1074 = -57987256
|
|
|
|
|
|
|
|
|
|
Turns out that reweight_entity(), which tries really hard to be fast,
|
|
|
|
|
does not do the normal dequeue+update+enqueue pattern but *does* scale
|
|
|
|
|
the deadline.
|
|
|
|
|
|
|
|
|
|
However, it then fails to propagate the updated deadline value up the
|
|
|
|
|
heap.
|
|
|
|
|
|
|
|
|
|
Fixes: 147f3efaa241 ("sched/fair: Implement an EEVDF-like scheduling policy")
|
|
|
|
|
Reported-by: Marek Szyprowski <m.szyprowski@samsung.com>
|
|
|
|
|
Reported-by: Biju Das <biju.das.jz@bp.renesas.com>
|
|
|
|
|
Reported-by: Mike Galbraith <efault@gmx.de>
|
|
|
|
|
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
|
|
|
|
|
Link: https://lkml.kernel.org/r/20231006192445.GE743@noisy.programming.kicks-ass.net
|
|
|
|
|
---
|
|
|
|
|
kernel/sched/fair.c | 1 +
|
|
|
|
|
1 file changed, 1 insertion(+)
|
|
|
|
|
|
|
|
|
|
diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c
|
|
|
|
|
index 9dbf3ce61..a0f1d9578 100644
|
|
|
|
|
--- a/kernel/sched/fair.c
|
|
|
|
|
+++ b/kernel/sched/fair.c
|
|
|
|
|
@@ -3612,6 +3612,7 @@ static void reweight_entity(struct cfs_rq *cfs_rq, struct sched_entity *se,
|
|
|
|
|
*/
|
|
|
|
|
deadline = div_s64(deadline * old_weight, weight);
|
|
|
|
|
se->deadline = se->vruntime + deadline;
|
|
|
|
|
+ min_deadline_cb_propagate(&se->run_node, NULL);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
#ifdef CONFIG_SMP
|
|
|
|
|
--
|
|
|
|
|
2.42.0
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
From 71f1c08f8102e48a5235bb145af59edfa597cf72 Mon Sep 17 00:00:00 2001
|
|
|
|
|
From: Benjamin Segall <bsegall@google.com>
|
|
|
|
|
Date: Fri, 29 Sep 2023 17:09:30 -0700
|
|
|
|
|
Subject: [PATCH 16/16] sched/eevdf: Fix pick_eevdf()
|
|
|
|
|
|
|
|
|
|
The old pick_eevdf() could fail to find the actual earliest eligible
|
|
|
|
|
deadline when it descended to the right looking for min_deadline, but
|
|
|
|
|
it turned out that that min_deadline wasn't actually eligible. In that
|
|
|
|
|
case we need to go back and search through any left branches we
|
|
|
|
|
skipped looking for the actual best _eligible_ min_deadline.
|
|
|
|
|
|
|
|
|
|
This is more expensive, but still O(log n), and at worst should only
|
|
|
|
|
involve descending two branches of the rbtree.
|
|
|
|
|
|
|
|
|
|
I've run this through a userspace stress test (thank you
|
|
|
|
|
tools/lib/rbtree.c), so hopefully this implementation doesn't miss any
|
|
|
|
|
corner cases.
|
|
|
|
|
|
|
|
|
|
Fixes: 147f3efaa241 ("sched/fair: Implement an EEVDF-like scheduling policy")
|
|
|
|
|
Signed-off-by: Ben Segall <bsegall@google.com>
|
|
|
|
|
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
|
|
|
|
|
Link: https://lkml.kernel.org/r/xm261qego72d.fsf_-_@google.com
|
|
|
|
|
---
|
|
|
|
|
kernel/sched/fair.c | 72 ++++++++++++++++++++++++++++++++++++---------
|
|
|
|
|
1 file changed, 58 insertions(+), 14 deletions(-)
|
|
|
|
|
|
|
|
|
|
diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c
|
|
|
|
|
index a0f1d9578..caec9b43c 100644
|
|
|
|
|
--- a/kernel/sched/fair.c
|
|
|
|
|
+++ b/kernel/sched/fair.c
|
|
|
|
|
@@ -872,14 +872,16 @@ struct sched_entity *__pick_first_entity(struct cfs_rq *cfs_rq)
|
|
|
|
|
*
|
|
|
|
|
* Which allows an EDF like search on (sub)trees.
|
|
|
|
|
*/
|
|
|
|
|
-static struct sched_entity *pick_eevdf(struct cfs_rq *cfs_rq)
|
|
|
|
|
+static struct sched_entity *__pick_eevdf(struct cfs_rq *cfs_rq)
|
|
|
|
|
{
|
|
|
|
|
struct rb_node *node = cfs_rq->tasks_timeline.rb_root.rb_node;
|
|
|
|
|
struct sched_entity *curr = cfs_rq->curr;
|
|
|
|
|
struct sched_entity *best = NULL;
|
|
|
|
|
+ struct sched_entity *best_left = NULL;
|
|
|
|
|
|
|
|
|
|
if (curr && (!curr->on_rq || !entity_eligible(cfs_rq, curr)))
|
|
|
|
|
curr = NULL;
|
|
|
|
|
+ best = curr;
|
|
|
|
|
|
|
|
|
|
/*
|
|
|
|
|
* Once selected, run a task until it either becomes non-eligible or
|
|
|
|
|
@@ -900,33 +902,75 @@ static struct sched_entity *pick_eevdf(struct cfs_rq *cfs_rq)
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
/*
|
|
|
|
|
- * If this entity has an earlier deadline than the previous
|
|
|
|
|
- * best, take this one. If it also has the earliest deadline
|
|
|
|
|
- * of its subtree, we're done.
|
|
|
|
|
+ * Now we heap search eligible trees for the best (min_)deadline
|
|
|
|
|
*/
|
|
|
|
|
- if (!best || deadline_gt(deadline, best, se)) {
|
|
|
|
|
+ if (!best || deadline_gt(deadline, best, se))
|
|
|
|
|
best = se;
|
|
|
|
|
- if (best->deadline == best->min_deadline)
|
|
|
|
|
- break;
|
|
|
|
|
- }
|
|
|
|
|
|
|
|
|
|
/*
|
|
|
|
|
- * If the earlest deadline in this subtree is in the fully
|
|
|
|
|
- * eligible left half of our space, go there.
|
|
|
|
|
+ * Every se in a left branch is eligible, keep track of the
|
|
|
|
|
+ * branch with the best min_deadline
|
|
|
|
|
*/
|
|
|
|
|
+ if (node->rb_left) {
|
|
|
|
|
+ struct sched_entity *left = __node_2_se(node->rb_left);
|
|
|
|
|
+
|
|
|
|
|
+ if (!best_left || deadline_gt(min_deadline, best_left, left))
|
|
|
|
|
+ best_left = left;
|
|
|
|
|
+
|
|
|
|
|
+ /*
|
|
|
|
|
+ * min_deadline is in the left branch. rb_left and all
|
|
|
|
|
+ * descendants are eligible, so immediately switch to the second
|
|
|
|
|
+ * loop.
|
|
|
|
|
+ */
|
|
|
|
|
+ if (left->min_deadline == se->min_deadline)
|
|
|
|
|
+ break;
|
|
|
|
|
+ }
|
|
|
|
|
+
|
|
|
|
|
+ /* min_deadline is at this node, no need to look right */
|
|
|
|
|
+ if (se->deadline == se->min_deadline)
|
|
|
|
|
+ break;
|
|
|
|
|
+
|
|
|
|
|
+ /* else min_deadline is in the right branch. */
|
|
|
|
|
+ node = node->rb_right;
|
|
|
|
|
+ }
|
|
|
|
|
+
|
|
|
|
|
+ /*
|
|
|
|
|
+ * We ran into an eligible node which is itself the best.
|
|
|
|
|
+ * (Or nr_running == 0 and both are NULL)
|
|
|
|
|
+ */
|
|
|
|
|
+ if (!best_left || (s64)(best_left->min_deadline - best->deadline) > 0)
|
|
|
|
|
+ return best;
|
|
|
|
|
+
|
|
|
|
|
+ /*
|
|
|
|
|
+ * Now best_left and all of its children are eligible, and we are just
|
|
|
|
|
+ * looking for deadline == min_deadline
|
|
|
|
|
+ */
|
|
|
|
|
+ node = &best_left->run_node;
|
|
|
|
|
+ while (node) {
|
|
|
|
|
+ struct sched_entity *se = __node_2_se(node);
|
|
|
|
|
+
|
|
|
|
|
+ /* min_deadline is the current node */
|
|
|
|
|
+ if (se->deadline == se->min_deadline)
|
|
|
|
|
+ return se;
|
|
|
|
|
+
|
|
|
|
|
+ /* min_deadline is in the left branch */
|
|
|
|
|
if (node->rb_left &&
|
|
|
|
|
__node_2_se(node->rb_left)->min_deadline == se->min_deadline) {
|
|
|
|
|
node = node->rb_left;
|
|
|
|
|
continue;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
+ /* else min_deadline is in the right branch */
|
|
|
|
|
node = node->rb_right;
|
|
|
|
|
}
|
|
|
|
|
+ return NULL;
|
|
|
|
|
+}
|
|
|
|
|
|
|
|
|
|
- if (!best || (curr && deadline_gt(deadline, best, curr)))
|
|
|
|
|
- best = curr;
|
|
|
|
|
+static struct sched_entity *pick_eevdf(struct cfs_rq *cfs_rq)
|
|
|
|
|
+{
|
|
|
|
|
+ struct sched_entity *se = __pick_eevdf(cfs_rq);
|
|
|
|
|
|
|
|
|
|
- if (unlikely(!best)) {
|
|
|
|
|
+ if (!se) {
|
|
|
|
|
struct sched_entity *left = __pick_first_entity(cfs_rq);
|
|
|
|
|
if (left) {
|
|
|
|
|
pr_err("EEVDF scheduling fail, picking leftmost\n");
|
|
|
|
|
@@ -934,7 +978,7 @@ static struct sched_entity *pick_eevdf(struct cfs_rq *cfs_rq)
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
- return best;
|
|
|
|
|
+ return se;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
#ifdef CONFIG_SCHED_DEBUG
|
|
|
|
|
--
|
|
|
|
|
2.42.0
|
|
|
|
|
|
|
|
|
|