diff --git a/linux-tkg-config/prepare b/linux-tkg-config/prepare index bc9556f..ffc3300 100644 --- a/linux-tkg-config/prepare +++ b/linux-tkg-config/prepare @@ -859,11 +859,7 @@ _tkg_srcprep() { if [ "${_cpusched}" = "bore-eevdf" ]; then _msg="Applying BORE-EEVDF patch" - if [ "$_kver" != "605" ]; then - curl "https://raw.githubusercontent.com/CachyOS/kernel-patches/master/${_basekernel}/sched/0001-bore-eevdf.patch" > "$srcdir"/0001-bore-eevdf.patch - else - curl "https://raw.githubusercontent.com/sirlucjan/kernel-patches/master/${_basekernel}/bore-eevdf-patches-v2-sep/0016-linux6.5-bore3.1.3.patch" > "$srcdir"/0001-bore-eevdf.patch - fi + curl "https://raw.githubusercontent.com/CachyOS/kernel-patches/master/${_basekernel}/sched/0001-bore-eevdf.patch" > "$srcdir"/0001-bore-eevdf.patch tkgpatch="$srcdir/0001-bore-eevdf.patch" && _tkg_patcher fi fi diff --git a/linux-tkg-patches/6.5/0003-eevdf.patch b/linux-tkg-patches/6.5/0003-eevdf.patch index 2b7848c..844be38 100644 --- a/linux-tkg-patches/6.5/0003-eevdf.patch +++ b/linux-tkg-patches/6.5/0003-eevdf.patch @@ -2896,514 +2896,3 @@ index 03db55504..f68919800 100644 -- 2.42.0 - -From edbc7fe6658db891c80f244dc397f4e0247f6f3d Mon Sep 17 00:00:00 2001 -From: Peter Zijlstra -Date: Fri, 15 Sep 2023 00:48:55 +0200 -Subject: [PATCH 13/15] sched/eevdf: Also update slice on placement - -Tasks that never consume their full slice would not update their slice value. -This means that tasks that are spawned before the sysctl scaling keep their -original (UP) slice length. - -Fixes: 147f3efaa241 ("sched/fair: Implement an EEVDF-like scheduling policy") -Signed-off-by: Peter Zijlstra (Intel) ---- - kernel/sched/fair.c | 6 ++++-- - 1 file changed, 4 insertions(+), 2 deletions(-) - -diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c -index 1cdc95725..efbcdc69c 100644 ---- a/kernel/sched/fair.c -+++ b/kernel/sched/fair.c -@@ -4918,10 +4918,12 @@ static inline void update_misfit_status(struct task_struct *p, struct rq *rq) {} - static void - place_entity(struct cfs_rq *cfs_rq, struct sched_entity *se, int flags) - { -- u64 vslice = calc_delta_fair(se->slice, se); -- u64 vruntime = avg_vruntime(cfs_rq); -+ u64 vslice, vruntime = avg_vruntime(cfs_rq); - s64 lag = 0; - -+ se->slice = sysctl_sched_base_slice; -+ vslice = calc_delta_fair(se->slice, se); -+ - /* - * Due to how V is constructed as the weighted average of entities, - * adding tasks with positive lag, or removing tasks with negative lag --- -2.42.0 - - -From 0f1fadfb03ba9ba181e4631de8cd97ba765fae1d Mon Sep 17 00:00:00 2001 -From: Peter Zijlstra -Date: Fri, 15 Sep 2023 00:48:45 +0200 -Subject: [PATCH 14/15] sched/eevdf: Delay dequeue - -For tasks that have negative-lag (have received 'excess' service), delay the -dequeue and keep them in the runnable tree until they're elegible again. Or -rather, keep them until they're selected again, since finding their elegibility -crossover point is expensive. - -The effect is a bit like sleeper bonus, the tasks keep contending for service -until either they get a wakeup or until they're selected again and are really -dequeued. - -This means that any actual dequeue happens with positive lag (serviced owed) -and are more readily ran when wakeup. - -Signed-off-by: Peter Zijlstra (Intel) ---- - include/linux/sched.h | 1 + - kernel/sched/core.c | 41 +++++++++++++++++++++++++++++++++++------ - kernel/sched/fair.c | 9 +++++++++ - kernel/sched/features.h | 1 + - kernel/sched/sched.h | 3 ++- - 5 files changed, 48 insertions(+), 7 deletions(-) - -diff --git a/include/linux/sched.h b/include/linux/sched.h -index 35331c35f..d40d98313 100644 ---- a/include/linux/sched.h -+++ b/include/linux/sched.h -@@ -891,6 +891,7 @@ struct task_struct { - unsigned sched_reset_on_fork:1; - unsigned sched_contributes_to_load:1; - unsigned sched_migrated:1; -+ unsigned sched_delayed:1; - - /* Force alignment to the next boundary: */ - unsigned :0; -diff --git a/kernel/sched/core.c b/kernel/sched/core.c -index 8116ef56d..cfb0ffa69 100644 ---- a/kernel/sched/core.c -+++ b/kernel/sched/core.c -@@ -6551,6 +6551,16 @@ pick_next_task(struct rq *rq, struct task_struct *prev, struct rq_flags *rf) - # define SM_MASK_PREEMPT SM_PREEMPT - #endif - -+static void __deschedule_task(struct rq *rq, struct task_struct *p) -+{ -+ deactivate_task(rq, p, DEQUEUE_SLEEP | DEQUEUE_NOCLOCK); -+ -+ if (p->in_iowait) { -+ atomic_inc(&rq->nr_iowait); -+ delayacct_blkio_start(); -+ } -+} -+ - /* - * __schedule() is the main scheduler function. - * -@@ -6663,17 +6673,36 @@ static void __sched notrace __schedule(unsigned int sched_mode) - * - * After this, schedule() must not care about p->state any more. - */ -- deactivate_task(rq, prev, DEQUEUE_SLEEP | DEQUEUE_NOCLOCK); -+ if (!(sched_feat(DELAY_DEQUEUE) && -+ prev->sched_class->eligible_task && -+ !prev->sched_class->eligible_task(rq, prev))) -+ __deschedule_task(rq, prev); -+ else -+ prev->sched_delayed = 1; -+ } -+ switch_count = &prev->nvcsw; -+ } -+ -+ for (struct task_struct *tmp = prev;;) { - -- if (prev->in_iowait) { -- atomic_inc(&rq->nr_iowait); -- delayacct_blkio_start(); -+ next = pick_next_task(rq, tmp, &rf); -+ if (unlikely(tmp != prev)) -+ finish_task(tmp); -+ -+ if (sched_feat(DELAY_DEQUEUE) && unlikely(next->sched_delayed)) { -+ next->sched_delayed = 0; -+ if (READ_ONCE(next->__state)) { -+ prepare_task(next); -+ smp_wmb(); -+ __deschedule_task(rq, next); -+ tmp = next; -+ continue; - } - } -- switch_count = &prev->nvcsw; -+ -+ break; - } - -- next = pick_next_task(rq, prev, &rf); - clear_tsk_need_resched(prev); - clear_preempt_need_resched(); - #ifdef CONFIG_SCHED_DEBUG -diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c -index efbcdc69c..729507e40 100644 ---- a/kernel/sched/fair.c -+++ b/kernel/sched/fair.c -@@ -8174,6 +8174,14 @@ static struct task_struct *__pick_next_task_fair(struct rq *rq) - return pick_next_task_fair(rq, NULL, NULL); - } - -+static bool eligible_task_fair(struct rq *rq, struct task_struct *p) -+{ -+ struct sched_entity *se = &p->se; -+ struct cfs_rq *cfs_rq = cfs_rq_of(se); -+ -+ return entity_eligible(cfs_rq, se); -+} -+ - /* - * Account for a descheduled task: - */ -@@ -12628,6 +12636,7 @@ DEFINE_SCHED_CLASS(fair) = { - - .check_preempt_curr = check_preempt_wakeup, - -+ .eligible_task = eligible_task_fair, - .pick_next_task = __pick_next_task_fair, - .put_prev_task = put_prev_task_fair, - .set_next_task = set_next_task_fair, -diff --git a/kernel/sched/features.h b/kernel/sched/features.h -index 546d212ef..5ae5a6f92 100644 ---- a/kernel/sched/features.h -+++ b/kernel/sched/features.h -@@ -7,6 +7,7 @@ - SCHED_FEAT(PLACE_LAG, true) - SCHED_FEAT(PLACE_DEADLINE_INITIAL, true) - SCHED_FEAT(RUN_TO_PARITY, true) -+SCHED_FEAT(DELAY_DEQUEUE, true) - - /* - * Prefer to schedule the task we woke last (assuming it failed -diff --git a/kernel/sched/sched.h b/kernel/sched/sched.h -index 576d371c8..c18ab7c2f 100644 ---- a/kernel/sched/sched.h -+++ b/kernel/sched/sched.h -@@ -2219,6 +2219,7 @@ struct sched_class { - - void (*check_preempt_curr)(struct rq *rq, struct task_struct *p, int flags); - -+ bool (*eligible_task)(struct rq *rq, struct task_struct *p); - struct task_struct *(*pick_next_task)(struct rq *rq); - - void (*put_prev_task)(struct rq *rq, struct task_struct *p); -@@ -2272,7 +2273,7 @@ struct sched_class { - - static inline void put_prev_task(struct rq *rq, struct task_struct *prev) - { -- WARN_ON_ONCE(rq->curr != prev); -+// WARN_ON_ONCE(rq->curr != prev); - prev->sched_class->put_prev_task(rq, prev); - } - --- -2.42.0 - - -From 4aba3e1c3bbe4a36d4b9e405be8a66d7c10d6495 Mon Sep 17 00:00:00 2001 -From: Peter Zijlstra -Date: Mon, 22 May 2023 13:46:30 +0200 -Subject: [PATCH 15/15] sched/eevdf: Use sched_attr::sched_runtime to set - request/slice suggestion - -Allow applications to directly set a suggested request/slice length using -sched_attr::sched_runtime. - -The implementation clamps the value to: 0.1[ms] <= slice <= 100[ms] -which is 1/10 the size of HZ=1000 and 10 times the size of HZ=100. - -Applications should strive to use their periodic runtime at a high -confidence interval (95%+) as the target slice. Using a smaller slice -will introduce undue preemptions, while using a larger value will -increase latency. - -For all the following examples assume a scheduling quantum of 8, and for -consistency all examples have W=4: - - {A,B,C,D}(w=1,r=8): - - ABCD... - +---+---+---+--- - - t=0, V=1.5 t=1, V=3.5 - A |------< A |------< - B |------< B |------< - C |------< C |------< - D |------< D |------< - ---+*------+-------+--- ---+--*----+-------+--- - - t=2, V=5.5 t=3, V=7.5 - A |------< A |------< - B |------< B |------< - C |------< C |------< - D |------< D |------< - ---+----*--+-------+--- ---+------*+-------+--- - -Note: 4 identical tasks in FIFO order - -~~~ - - {A,B}(w=1,r=16) C(w=2,r=16) - - AACCBBCC... - +---+---+---+--- - - t=0, V=1.25 t=2, V=5.25 - A |--------------< A |--------------< - B |--------------< B |--------------< - C |------< C |------< - ---+*------+-------+--- ---+----*--+-------+--- - - t=4, V=8.25 t=6, V=12.25 - A |--------------< A |--------------< - B |--------------< B |--------------< - C |------< C |------< - ---+-------*-------+--- ---+-------+---*---+--- - -Note: 1 heavy task -- because q=8, double r such that the deadline of the w=2 - task doesn't go below q. - -Note: observe the full schedule becomes: W*max(r_i/w_i) = 4*2q = 8q in length. - -Note: the period of the heavy task is half the full period at: - W*(r_i/w_i) = 4*(2q/2) = 4q - -~~~ - - {A,C,D}(w=1,r=16) B(w=1,r=8): - - BAACCBDD... - +---+---+---+--- - - t=0, V=1.5 t=1, V=3.5 - A |--------------< A |---------------< - B |------< B |------< - C |--------------< C |--------------< - D |--------------< D |--------------< - ---+*------+-------+--- ---+--*----+-------+--- - - t=3, V=7.5 t=5, V=11.5 - A |---------------< A |---------------< - B |------< B |------< - C |--------------< C |--------------< - D |--------------< D |--------------< - ---+------*+-------+--- ---+-------+--*----+--- - - t=6, V=13.5 - A |---------------< - B |------< - C |--------------< - D |--------------< - ---+-------+----*--+--- - -Note: 1 short task -- again double r so that the deadline of the short task - won't be below q. Made B short because its not the leftmost task, but is - eligible with the 0,1,2,3 spread. - -Note: like with the heavy task, the period of the short task observes: - W*(r_i/w_i) = 4*(1q/1) = 4q - -~~~ - - A(w=1,r=16) B(w=1,r=8) C(w=2,r=16) - - BCCAABCC... - +---+---+---+--- - - t=0, V=1.25 t=1, V=3.25 - A |--------------< A |--------------< - B |------< B |------< - C |------< C |------< - ---+*------+-------+--- ---+--*----+-------+--- - - t=3, V=7.25 t=5, V=11.25 - A |--------------< A |--------------< - B |------< B |------< - C |------< C |------< - ---+------*+-------+--- ---+-------+--*----+--- - - t=6, V=13.25 - A |--------------< - B |------< - C |------< - ---+-------+----*--+--- - -Note: 1 heavy and 1 short task -- combine them all. - -Note: both the short and heavy task end up with a period of 4q - -~~~ - - A(w=1,r=16) B(w=2,r=16) C(w=1,r=8) - - BBCAABBC... - +---+---+---+--- - - t=0, V=1 t=2, V=5 - A |--------------< A |--------------< - B |------< B |------< - C |------< C |------< - ---+*------+-------+--- ---+----*--+-------+--- - - t=3, V=7 t=5, V=11 - A |--------------< A |--------------< - B |------< B |------< - C |------< C |------< - ---+------*+-------+--- ---+-------+--*----+--- - - t=7, V=15 - A |--------------< - B |------< - C |------< - ---+-------+------*+--- - -Note: as before but permuted - -~~~ - -From all this it can be deduced that, for the steady state: - - - the total period (P) of a schedule is: W*max(r_i/w_i) - - the average period of a task is: W*(r_i/w_i) - - each task obtains the fair share: w_i/W of each full period P - -Signed-off-by: Peter Zijlstra (Intel) ---- - include/linux/sched.h | 3 +++ - kernel/sched/core.c | 33 ++++++++++++++++++++++++++------- - kernel/sched/fair.c | 6 ++++-- - 3 files changed, 33 insertions(+), 9 deletions(-) - -diff --git a/include/linux/sched.h b/include/linux/sched.h -index d40d98313..93c03b162 100644 ---- a/include/linux/sched.h -+++ b/include/linux/sched.h -@@ -555,6 +555,9 @@ struct sched_entity { - struct list_head group_node; - unsigned int on_rq; - -+ unsigned int custom_slice : 1; -+ /* 31 bits hole */ -+ - u64 exec_start; - u64 sum_exec_runtime; - u64 prev_sum_exec_runtime; -diff --git a/kernel/sched/core.c b/kernel/sched/core.c -index cfb0ffa69..1ae5a8272 100644 ---- a/kernel/sched/core.c -+++ b/kernel/sched/core.c -@@ -4502,7 +4502,6 @@ static void __sched_fork(unsigned long clone_flags, struct task_struct *p) - p->se.nr_migrations = 0; - p->se.vruntime = 0; - p->se.vlag = 0; -- p->se.slice = sysctl_sched_base_slice; - INIT_LIST_HEAD(&p->se.group_node); - - #ifdef CONFIG_FAIR_GROUP_SCHED -@@ -4756,6 +4755,8 @@ int sched_fork(unsigned long clone_flags, struct task_struct *p) - - p->prio = p->normal_prio = p->static_prio; - set_load_weight(p, false); -+ p->se.custom_slice = 0; -+ p->se.slice = sysctl_sched_base_slice; - - /* - * We don't need the reset flag anymore after the fork. It has -@@ -7556,10 +7557,20 @@ static void __setscheduler_params(struct task_struct *p, - - p->policy = policy; - -- if (dl_policy(policy)) -+ if (dl_policy(policy)) { - __setparam_dl(p, attr); -- else if (fair_policy(policy)) -+ } else if (fair_policy(policy)) { - p->static_prio = NICE_TO_PRIO(attr->sched_nice); -+ if (attr->sched_runtime) { -+ p->se.custom_slice = 1; -+ p->se.slice = clamp_t(u64, attr->sched_runtime, -+ NSEC_PER_MSEC/10, /* HZ=1000 * 10 */ -+ NSEC_PER_MSEC*100); /* HZ=100 / 10 */ -+ } else { -+ p->se.custom_slice = 0; -+ p->se.slice = sysctl_sched_base_slice; -+ } -+ } - - /* - * __sched_setscheduler() ensures attr->sched_priority == 0 when -@@ -7744,7 +7755,9 @@ static int __sched_setscheduler(struct task_struct *p, - * but store a possible modification of reset_on_fork. - */ - if (unlikely(policy == p->policy)) { -- if (fair_policy(policy) && attr->sched_nice != task_nice(p)) -+ if (fair_policy(policy) && -+ (attr->sched_nice != task_nice(p) || -+ (attr->sched_runtime && attr->sched_runtime != p->se.slice))) - goto change; - if (rt_policy(policy) && attr->sched_priority != p->rt_priority) - goto change; -@@ -7890,6 +7903,9 @@ static int _sched_setscheduler(struct task_struct *p, int policy, - .sched_nice = PRIO_TO_NICE(p->static_prio), - }; - -+ if (p->se.custom_slice) -+ attr.sched_runtime = p->se.slice; -+ - /* Fixup the legacy SCHED_RESET_ON_FORK hack. */ - if ((policy != SETPARAM_POLICY) && (policy & SCHED_RESET_ON_FORK)) { - attr.sched_flags |= SCHED_FLAG_RESET_ON_FORK; -@@ -8066,12 +8082,14 @@ static int sched_copy_attr(struct sched_attr __user *uattr, struct sched_attr *a - - static void get_params(struct task_struct *p, struct sched_attr *attr) - { -- if (task_has_dl_policy(p)) -+ if (task_has_dl_policy(p)) { - __getparam_dl(p, attr); -- else if (task_has_rt_policy(p)) -+ } else if (task_has_rt_policy(p)) { - attr->sched_priority = p->rt_priority; -- else -+ } else { - attr->sched_nice = task_nice(p); -+ attr->sched_runtime = p->se.slice; -+ } - } - - /** -@@ -10090,6 +10108,7 @@ void __init sched_init(void) - } - - set_load_weight(&init_task, false); -+ init_task.se.slice = sysctl_sched_base_slice, - - /* - * The boot idle thread does lazy MMU switching as well: -diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c -index 729507e40..51e19a1fb 100644 ---- a/kernel/sched/fair.c -+++ b/kernel/sched/fair.c -@@ -973,7 +973,8 @@ static void update_deadline(struct cfs_rq *cfs_rq, struct sched_entity *se) - * nice) while the request time r_i is determined by - * sysctl_sched_base_slice. - */ -- se->slice = sysctl_sched_base_slice; -+ if (!se->custom_slice) -+ se->slice = sysctl_sched_base_slice; - - /* - * EEVDF: vd_i = ve_i + r_i / w_i -@@ -4921,7 +4922,8 @@ place_entity(struct cfs_rq *cfs_rq, struct sched_entity *se, int flags) - u64 vslice, vruntime = avg_vruntime(cfs_rq); - s64 lag = 0; - -- se->slice = sysctl_sched_base_slice; -+ if (!se->custom_slice) -+ se->slice = sysctl_sched_base_slice; - vslice = calc_delta_fair(se->slice, se); - - /* --- -2.42.0 -