From 11e66a8709a3695029153e9ed48c291327fe52fd Mon Sep 17 00:00:00 2001 From: Tk-Glitch Date: Thu, 18 Aug 2022 18:54:12 +0200 Subject: [PATCH] linux518/519: Fix Project C patchset for 5.19.2 and 5.18.18 - Thanks to openglfreak and torvic - https://gitlab.com/alfredchen/linux-prjc/-/issues/63 --- PKGBUILD | 6 +- .../5.18/0009-prjc_v5.18-r2.patch | 394 ++++++++++++++++++ .../5.19/0009-prjc_v5.19-r0.patch | 318 ++++++++++++++ 3 files changed, 715 insertions(+), 3 deletions(-) diff --git a/PKGBUILD b/PKGBUILD index 3e46612..cee1aef 100644 --- a/PKGBUILD +++ b/PKGBUILD @@ -59,7 +59,7 @@ else fi pkgname=("${pkgbase}" "${pkgbase}-headers") pkgver="${_basekernel}"."${_sub}" -pkgrel=268 +pkgrel=269 pkgdesc='Linux-tkg' arch=('x86_64') # no i686 in here url="https://www.kernel.org/" @@ -786,7 +786,7 @@ case $_basever in '12a241794cab82b851a11f4f5aea1e96e75d998935ed5f723040bed543eb9359' '9fad4a40449e09522899955762c8928ae17f4cdaa16e01239fd12592e9d58177' 'a557b342111849a5f920bbe1c129f3ff1fc1eff62c6bd6685e0972fc88e39911' - 'bb8a2daf56a513701895f67dc0e6cbde153481fcd0557906af45523d24aa9f76' + '7f6b631bb02714904c3715e0f1c0d558e32b06d7a533e269276fb80777356623' '55f633da855b9346bfcfc5204f109eca6d0d7532ad42dec131ea0d64a0d4fd08' '428517fbcb161a640b53782000c16f797c2ad27cf2758e7e56133fc62d2d113b' '1b656ad96004f27e9dc63d7f430b50d5c48510d6d4cd595a81c24b21adb70313' @@ -838,7 +838,7 @@ case $_basever in '829631f803f11579972aa19f3f7f2ae11b0e380c01745a05776dd02b8e6c8855' '9fad4a40449e09522899955762c8928ae17f4cdaa16e01239fd12592e9d58177' 'a557b342111849a5f920bbe1c129f3ff1fc1eff62c6bd6685e0972fc88e39911' - '6592ae4cadf0cfddd63fa77416a07131a926d3426df351c41490f361fc56495b' + 'fbf28a5bcf0ce0443ac2a621796ff2c2b1ade06b263d8f01356fae9a3035c585' '970819d5a6eca65b9eeb7c459a8bd487600a10b2b02fed323f7456f483c7d5ce' '213ecf1ba59dc87ed1844c3473d575b85ffe3a567f86735e8c6239c92dbbb493' '1b656ad96004f27e9dc63d7f430b50d5c48510d6d4cd595a81c24b21adb70313' diff --git a/linux-tkg-patches/5.18/0009-prjc_v5.18-r2.patch b/linux-tkg-patches/5.18/0009-prjc_v5.18-r2.patch index 6dba709..58d2f67 100644 --- a/linux-tkg-patches/5.18/0009-prjc_v5.18-r2.patch +++ b/linux-tkg-patches/5.18/0009-prjc_v5.18-r2.patch @@ -9914,3 +9914,397 @@ index abcadbe933bb..d4c778b0ab0e 100644 }; struct wakeup_test_data *x = data; +From 3728c383c5031dce5ae0f5ea53fc47afba71270f Mon Sep 17 00:00:00 2001 +From: Juuso Alasuutari +Date: Sun, 14 Aug 2022 18:19:09 +0300 +Subject: [PATCH 01/10] sched/alt: [Sync] sched/core: Always flush pending + blk_plug + +--- + kernel/sched/alt_core.c | 8 ++++++-- + 1 file changed, 6 insertions(+), 2 deletions(-) + +diff --git a/kernel/sched/alt_core.c b/kernel/sched/alt_core.c +index 588c7b983e3ba..8a6aa5b7279d3 100644 +--- a/kernel/sched/alt_core.c ++++ b/kernel/sched/alt_core.c +@@ -4663,8 +4663,12 @@ static inline void sched_submit_work(struct task_struct *tsk) + io_wq_worker_sleeping(tsk); + } + +- if (tsk_is_pi_blocked(tsk)) +- return; ++ /* ++ * spinlock and rwlock must not flush block requests. This will ++ * deadlock if the callback attempts to acquire a lock which is ++ * already acquired. ++ */ ++ SCHED_WARN_ON(current->__state & TASK_RTLOCK_WAIT); + + /* + * If we are going to sleep and we have plugged IO queued, + +From 379df22366dfa47d021a6bfe149c10a02d39a59e Mon Sep 17 00:00:00 2001 +From: Juuso Alasuutari +Date: Sun, 14 Aug 2022 18:19:09 +0300 +Subject: [PATCH 02/10] sched/alt: [Sync] io_uring: move to separate directory + +--- + kernel/sched/alt_core.c | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +diff --git a/kernel/sched/alt_core.c b/kernel/sched/alt_core.c +index 8a6aa5b7279d3..200d12b0ba6a9 100644 +--- a/kernel/sched/alt_core.c ++++ b/kernel/sched/alt_core.c +@@ -43,7 +43,7 @@ + + #include "pelt.h" + +-#include "../../fs/io-wq.h" ++#include "../../io_uring/io-wq.h" + #include "../smpboot.h" + + /* + +From 289d4f9619656155c2d467f9ea9fa5258b4aacd0 Mon Sep 17 00:00:00 2001 +From: Juuso Alasuutari +Date: Sun, 14 Aug 2022 18:19:09 +0300 +Subject: [PATCH 03/10] sched/alt: [Sync] sched, cpuset: Fix dl_cpu_busy() + panic due to empty cs->cpus_allowed + +--- + kernel/sched/alt_core.c | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +diff --git a/kernel/sched/alt_core.c b/kernel/sched/alt_core.c +index 200d12b0ba6a9..1aeb7a225d9bd 100644 +--- a/kernel/sched/alt_core.c ++++ b/kernel/sched/alt_core.c +@@ -6737,7 +6737,7 @@ int cpuset_cpumask_can_shrink(const struct cpumask __maybe_unused *cur, + } + + int task_can_attach(struct task_struct *p, +- const struct cpumask *cs_cpus_allowed) ++ const struct cpumask *cs_effective_cpus) + { + int ret = 0; + + +From 95e712f92034119e23b4157aba72e8ffb2d74fed Mon Sep 17 00:00:00 2001 +From: Tor Vic +Date: Wed, 17 Aug 2022 21:44:18 +0200 +Subject: [PATCH 05/10] sched/alt: Transpose the sched_rq_watermark array + +This is not my work. +All credits go to Torge Matthies as in below link. + +Link: https://gitlab.com/alfredchen/linux-prjc/-/merge_requests/11 +--- + kernel/sched/alt_core.c | 124 +++++++++++++++++++++++++++++++++------- + 1 file changed, 104 insertions(+), 20 deletions(-) + +diff --git a/kernel/sched/alt_core.c b/kernel/sched/alt_core.c +index cf71defb0e0be..7929b810ba74f 100644 +--- a/kernel/sched/alt_core.c ++++ b/kernel/sched/alt_core.c +@@ -147,7 +147,87 @@ DEFINE_PER_CPU_SHARED_ALIGNED(struct rq, runqueues); + #ifdef CONFIG_SCHED_SMT + static cpumask_t sched_sg_idle_mask ____cacheline_aligned_in_smp; + #endif +-static cpumask_t sched_rq_watermark[SCHED_QUEUE_BITS] ____cacheline_aligned_in_smp; ++ ++#define BITS_PER_ATOMIC_LONG_T BITS_PER_LONG ++typedef struct sched_bitmask { ++ atomic_long_t bits[DIV_ROUND_UP(SCHED_QUEUE_BITS, BITS_PER_ATOMIC_LONG_T)]; ++} sched_bitmask_t; ++static sched_bitmask_t sched_rq_watermark[NR_CPUS] ____cacheline_aligned_in_smp; ++ ++#define x(p, set, mask) \ ++ do { \ ++ if (set) \ ++ atomic_long_or((mask), (p)); \ ++ else \ ++ atomic_long_and(~(mask), (p)); \ ++ } while (0) ++ ++static __always_inline void sched_rq_watermark_fill_downwards(int cpu, unsigned int end, ++ unsigned int start, bool set) ++{ ++ unsigned int start_idx, start_bit; ++ unsigned int end_idx, end_bit; ++ atomic_long_t *p; ++ ++ if (end == start) { ++ return; ++ } ++ ++ start_idx = start / BITS_PER_ATOMIC_LONG_T; ++ start_bit = start % BITS_PER_ATOMIC_LONG_T; ++ end_idx = (end - 1) / BITS_PER_ATOMIC_LONG_T; ++ end_bit = (end - 1) % BITS_PER_ATOMIC_LONG_T; ++ p = &sched_rq_watermark[cpu].bits[end_idx]; ++ ++ if (end_idx == start_idx) { ++ x(p, set, (~0UL >> (BITS_PER_ATOMIC_LONG_T - 1 - end_bit)) & (~0UL << start_bit)); ++ return; ++ } ++ ++ if (end_bit != BITS_PER_ATOMIC_LONG_T - 1) { ++ x(p, set, (~0UL >> (BITS_PER_ATOMIC_LONG_T - 1 - end_bit))); ++ p -= 1; ++ end_idx -= 1; ++ } ++ ++ while (end_idx != start_idx) { ++ atomic_long_set(p, set ? ~0UL : 0); ++ p -= 1; ++ end_idx -= 1; ++ } ++ ++ x(p, set, ~0UL << start_bit); ++} ++ ++#undef x ++ ++static __always_inline bool sched_rq_watermark_and(cpumask_t *dstp, const cpumask_t *cpus, int prio, bool not) ++{ ++ int cpu; ++ bool ret = false; ++ int idx = prio / BITS_PER_ATOMIC_LONG_T; ++ int bit = prio % BITS_PER_ATOMIC_LONG_T; ++ ++ cpumask_clear(dstp); ++ for_each_cpu(cpu, cpus) ++ if (test_bit(bit, (long*)&sched_rq_watermark[cpu].bits[idx].counter) == !not) { ++ __cpumask_set_cpu(cpu, dstp); ++ ret = true; ++ } ++ return ret; ++} ++ ++static __always_inline bool sched_rq_watermark_test(const cpumask_t *cpus, int prio, bool not) ++{ ++ int cpu; ++ int idx = prio / BITS_PER_ATOMIC_LONG_T; ++ int bit = prio % BITS_PER_ATOMIC_LONG_T; ++ ++ for_each_cpu(cpu, cpus) ++ if (test_bit(bit, (long*)&sched_rq_watermark[cpu].bits[idx].counter) == !not) ++ return true; ++ return false; ++} + + /* sched_queue related functions */ + static inline void sched_queue_init(struct sched_queue *q) +@@ -176,7 +256,6 @@ static inline void update_sched_rq_watermark(struct rq *rq) + { + unsigned long watermark = find_first_bit(rq->queue.bitmap, SCHED_QUEUE_BITS); + unsigned long last_wm = rq->watermark; +- unsigned long i; + int cpu; + + if (watermark == last_wm) +@@ -185,28 +264,25 @@ static inline void update_sched_rq_watermark(struct rq *rq) + rq->watermark = watermark; + cpu = cpu_of(rq); + if (watermark < last_wm) { +- for (i = last_wm; i > watermark; i--) +- cpumask_clear_cpu(cpu, sched_rq_watermark + SCHED_QUEUE_BITS - i); ++ sched_rq_watermark_fill_downwards(cpu, SCHED_QUEUE_BITS - watermark, SCHED_QUEUE_BITS - last_wm, false); + #ifdef CONFIG_SCHED_SMT + if (static_branch_likely(&sched_smt_present) && +- IDLE_TASK_SCHED_PRIO == last_wm) ++ unlikely(IDLE_TASK_SCHED_PRIO == last_wm)) + cpumask_andnot(&sched_sg_idle_mask, + &sched_sg_idle_mask, cpu_smt_mask(cpu)); + #endif + return; + } + /* last_wm < watermark */ +- for (i = watermark; i > last_wm; i--) +- cpumask_set_cpu(cpu, sched_rq_watermark + SCHED_QUEUE_BITS - i); ++ sched_rq_watermark_fill_downwards(cpu, SCHED_QUEUE_BITS - last_wm, SCHED_QUEUE_BITS - watermark, true); + #ifdef CONFIG_SCHED_SMT + if (static_branch_likely(&sched_smt_present) && +- IDLE_TASK_SCHED_PRIO == watermark) { +- cpumask_t tmp; ++ unlikely(IDLE_TASK_SCHED_PRIO == watermark)) { ++ const cpumask_t *smt_mask = cpu_smt_mask(cpu); + +- cpumask_and(&tmp, cpu_smt_mask(cpu), sched_rq_watermark); +- if (cpumask_equal(&tmp, cpu_smt_mask(cpu))) ++ if (!sched_rq_watermark_test(smt_mask, 0, true)) + cpumask_or(&sched_sg_idle_mask, +- &sched_sg_idle_mask, cpu_smt_mask(cpu)); ++ &sched_sg_idle_mask, smt_mask); + } + #endif + } +@@ -1903,9 +1979,9 @@ static inline int select_task_rq(struct task_struct *p) + #ifdef CONFIG_SCHED_SMT + cpumask_and(&tmp, &chk_mask, &sched_sg_idle_mask) || + #endif +- cpumask_and(&tmp, &chk_mask, sched_rq_watermark) || +- cpumask_and(&tmp, &chk_mask, +- sched_rq_watermark + SCHED_QUEUE_BITS - 1 - task_sched_prio(p))) ++ sched_rq_watermark_and(&tmp, &chk_mask, 0, false) || ++ sched_rq_watermark_and(&tmp, &chk_mask, ++ SCHED_QUEUE_BITS - 1 - task_sched_prio(p), false)) + return best_mask_cpu(task_cpu(p), &tmp); + + return best_mask_cpu(task_cpu(p), &chk_mask); +@@ -3977,7 +4053,7 @@ static inline void sg_balance(struct rq *rq) + * find potential cpus which can migrate the current running task + */ + if (cpumask_test_cpu(cpu, &sched_sg_idle_mask) && +- cpumask_andnot(&chk, cpu_online_mask, sched_rq_watermark) && ++ sched_rq_watermark_and(&chk, cpu_online_mask, 0, true) && + cpumask_andnot(&chk, &chk, &sched_rq_pending_mask)) { + int i; + +@@ -4285,9 +4361,8 @@ static inline void schedule_debug(struct task_struct *prev, bool preempt) + #ifdef ALT_SCHED_DEBUG + void alt_sched_debug(void) + { +- printk(KERN_INFO "sched: pending: 0x%04lx, idle: 0x%04lx, sg_idle: 0x%04lx\n", ++ printk(KERN_INFO "sched: pending: 0x%04lx, sg_idle: 0x%04lx\n", + sched_rq_pending_mask.bits[0], +- sched_rq_watermark[0].bits[0], + sched_sg_idle_mask.bits[0]); + } + #else +@@ -7285,8 +7360,17 @@ void __init sched_init(void) + wait_bit_init(); + + #ifdef CONFIG_SMP +- for (i = 0; i < SCHED_QUEUE_BITS; i++) +- cpumask_copy(sched_rq_watermark + i, cpu_present_mask); ++ for (i = 0; i < nr_cpu_ids; i++) { ++ long val = cpumask_test_cpu(i, cpu_present_mask) ? -1L : 0; ++ int j; ++ for (j = 0; j < DIV_ROUND_UP(SCHED_QUEUE_BITS, BITS_PER_ATOMIC_LONG_T); j++) ++ atomic_long_set(&sched_rq_watermark[i].bits[j], val); ++ } ++ for (i = nr_cpu_ids; i < NR_CPUS; i++) { ++ int j; ++ for (j = 0; j < DIV_ROUND_UP(SCHED_QUEUE_BITS, BITS_PER_ATOMIC_LONG_T); j++) ++ atomic_long_set(&sched_rq_watermark[i].bits[j], 0); ++ } + #endif + + #ifdef CONFIG_CGROUP_SCHED + +From 5b3b4b3d14c234196c807568905ee2e013565508 Mon Sep 17 00:00:00 2001 +From: Torge Matthies +Date: Tue, 15 Mar 2022 23:08:54 +0100 +Subject: [PATCH 06/10] sched/alt: Add memory barriers around atomics. + +--- + kernel/sched/alt_core.c | 4 ++++ + 1 file changed, 4 insertions(+) + +diff --git a/kernel/sched/alt_core.c b/kernel/sched/alt_core.c +index 7929b810ba74f..b0cb6b772d5fa 100644 +--- a/kernel/sched/alt_core.c ++++ b/kernel/sched/alt_core.c +@@ -156,10 +156,12 @@ static sched_bitmask_t sched_rq_watermark[NR_CPUS] ____cacheline_aligned_in_smp; + + #define x(p, set, mask) \ + do { \ ++ smp_mb__before_atomic(); \ + if (set) \ + atomic_long_or((mask), (p)); \ + else \ + atomic_long_and(~(mask), (p)); \ ++ smp_mb__after_atomic(); \ + } while (0) + + static __always_inline void sched_rq_watermark_fill_downwards(int cpu, unsigned int end, +@@ -191,7 +193,9 @@ static __always_inline void sched_rq_watermark_fill_downwards(int cpu, unsigned + } + + while (end_idx != start_idx) { ++ smp_mb__before_atomic(); + atomic_long_set(p, set ? ~0UL : 0); ++ smp_mb__after_atomic(); + p -= 1; + end_idx -= 1; + } + +From a587150a10f0c85e954b3e8d4d95eae59a4ccef9 Mon Sep 17 00:00:00 2001 +From: Alfred Chen +Date: Thu, 14 Jul 2022 16:33:06 +0000 +Subject: [PATCH 08/10] sched/alt: [Sync] f886aab8291c sched: Fix + balance_push() vs __sched_setscheduler() + +--- + kernel/sched/alt_core.c | 39 ++++++++++++++++++++++++++++++++++----- + 1 file changed, 34 insertions(+), 5 deletions(-) + +diff --git a/kernel/sched/alt_core.c b/kernel/sched/alt_core.c +index 427a1f75631b3..bd92ea2c2923d 100644 +--- a/kernel/sched/alt_core.c ++++ b/kernel/sched/alt_core.c +@@ -3362,26 +3362,55 @@ static void do_balance_callbacks(struct rq *rq, struct callback_head *head) + + static void balance_push(struct rq *rq); + ++/* ++ * balance_push_callback is a right abuse of the callback interface and plays ++ * by significantly different rules. ++ * ++ * Where the normal balance_callback's purpose is to be ran in the same context ++ * that queued it (only later, when it's safe to drop rq->lock again), ++ * balance_push_callback is specifically targeted at __schedule(). ++ * ++ * This abuse is tolerated because it places all the unlikely/odd cases behind ++ * a single test, namely: rq->balance_callback == NULL. ++ */ + struct callback_head balance_push_callback = { + .next = NULL, + .func = (void (*)(struct callback_head *))balance_push, + }; + +-static inline struct callback_head *splice_balance_callbacks(struct rq *rq) ++static inline struct callback_head * ++__splice_balance_callbacks(struct rq *rq, bool split) + { + struct callback_head *head = rq->balance_callback; + +- if (head) { +- lockdep_assert_held(&rq->lock); ++ if (likely(!head)) ++ return NULL; ++ ++ lockdep_assert_rq_held(rq); ++ /* ++ * Must not take balance_push_callback off the list when ++ * splice_balance_callbacks() and balance_callbacks() are not ++ * in the same rq->lock section. ++ * ++ * In that case it would be possible for __schedule() to interleave ++ * and observe the list empty. ++ */ ++ if (split && head == &balance_push_callback) ++ head = NULL; ++ else + rq->balance_callback = NULL; +- } + + return head; + } + ++static inline struct callback_head *splice_balance_callbacks(struct rq *rq) ++{ ++ return __splice_balance_callbacks(rq, true); ++} ++ + static void __balance_callbacks(struct rq *rq) + { +- do_balance_callbacks(rq, splice_balance_callbacks(rq)); ++ do_balance_callbacks(rq, __splice_balance_callbacks(rq, false)); + } + + static inline void balance_callbacks(struct rq *rq, struct callback_head *head) + diff --git a/linux-tkg-patches/5.19/0009-prjc_v5.19-r0.patch b/linux-tkg-patches/5.19/0009-prjc_v5.19-r0.patch index 610cfe8..25c71a6 100644 --- a/linux-tkg-patches/5.19/0009-prjc_v5.19-r0.patch +++ b/linux-tkg-patches/5.19/0009-prjc_v5.19-r0.patch @@ -9954,3 +9954,321 @@ index a2d301f58ced..2ccdede8585c 100644 }; struct wakeup_test_data *x = data; +From 3728c383c5031dce5ae0f5ea53fc47afba71270f Mon Sep 17 00:00:00 2001 +From: Juuso Alasuutari +Date: Sun, 14 Aug 2022 18:19:09 +0300 +Subject: [PATCH 01/10] sched/alt: [Sync] sched/core: Always flush pending + blk_plug + +--- + kernel/sched/alt_core.c | 8 ++++++-- + 1 file changed, 6 insertions(+), 2 deletions(-) + +diff --git a/kernel/sched/alt_core.c b/kernel/sched/alt_core.c +index 588c7b983e3ba..8a6aa5b7279d3 100644 +--- a/kernel/sched/alt_core.c ++++ b/kernel/sched/alt_core.c +@@ -4663,8 +4663,12 @@ static inline void sched_submit_work(struct task_struct *tsk) + io_wq_worker_sleeping(tsk); + } + +- if (tsk_is_pi_blocked(tsk)) +- return; ++ /* ++ * spinlock and rwlock must not flush block requests. This will ++ * deadlock if the callback attempts to acquire a lock which is ++ * already acquired. ++ */ ++ SCHED_WARN_ON(current->__state & TASK_RTLOCK_WAIT); + + /* + * If we are going to sleep and we have plugged IO queued, + +From 379df22366dfa47d021a6bfe149c10a02d39a59e Mon Sep 17 00:00:00 2001 +From: Juuso Alasuutari +Date: Sun, 14 Aug 2022 18:19:09 +0300 +Subject: [PATCH 02/10] sched/alt: [Sync] io_uring: move to separate directory + +--- + kernel/sched/alt_core.c | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +diff --git a/kernel/sched/alt_core.c b/kernel/sched/alt_core.c +index 8a6aa5b7279d3..200d12b0ba6a9 100644 +--- a/kernel/sched/alt_core.c ++++ b/kernel/sched/alt_core.c +@@ -43,7 +43,7 @@ + + #include "pelt.h" + +-#include "../../fs/io-wq.h" ++#include "../../io_uring/io-wq.h" + #include "../smpboot.h" + + /* + +From 289d4f9619656155c2d467f9ea9fa5258b4aacd0 Mon Sep 17 00:00:00 2001 +From: Juuso Alasuutari +Date: Sun, 14 Aug 2022 18:19:09 +0300 +Subject: [PATCH 03/10] sched/alt: [Sync] sched, cpuset: Fix dl_cpu_busy() + panic due to empty cs->cpus_allowed + +--- + kernel/sched/alt_core.c | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +diff --git a/kernel/sched/alt_core.c b/kernel/sched/alt_core.c +index 200d12b0ba6a9..1aeb7a225d9bd 100644 +--- a/kernel/sched/alt_core.c ++++ b/kernel/sched/alt_core.c +@@ -6737,7 +6737,7 @@ int cpuset_cpumask_can_shrink(const struct cpumask __maybe_unused *cur, + } + + int task_can_attach(struct task_struct *p, +- const struct cpumask *cs_cpus_allowed) ++ const struct cpumask *cs_effective_cpus) + { + int ret = 0; + + +From 95e712f92034119e23b4157aba72e8ffb2d74fed Mon Sep 17 00:00:00 2001 +From: Tor Vic +Date: Wed, 17 Aug 2022 21:44:18 +0200 +Subject: [PATCH 05/10] sched/alt: Transpose the sched_rq_watermark array + +This is not my work. +All credits go to Torge Matthies as in below link. + +Link: https://gitlab.com/alfredchen/linux-prjc/-/merge_requests/11 +--- + kernel/sched/alt_core.c | 124 +++++++++++++++++++++++++++++++++------- + 1 file changed, 104 insertions(+), 20 deletions(-) + +diff --git a/kernel/sched/alt_core.c b/kernel/sched/alt_core.c +index cf71defb0e0be..7929b810ba74f 100644 +--- a/kernel/sched/alt_core.c ++++ b/kernel/sched/alt_core.c +@@ -147,7 +147,87 @@ DEFINE_PER_CPU_SHARED_ALIGNED(struct rq, runqueues); + #ifdef CONFIG_SCHED_SMT + static cpumask_t sched_sg_idle_mask ____cacheline_aligned_in_smp; + #endif +-static cpumask_t sched_rq_watermark[SCHED_QUEUE_BITS] ____cacheline_aligned_in_smp; ++ ++#define BITS_PER_ATOMIC_LONG_T BITS_PER_LONG ++typedef struct sched_bitmask { ++ atomic_long_t bits[DIV_ROUND_UP(SCHED_QUEUE_BITS, BITS_PER_ATOMIC_LONG_T)]; ++} sched_bitmask_t; ++static sched_bitmask_t sched_rq_watermark[NR_CPUS] ____cacheline_aligned_in_smp; ++ ++#define x(p, set, mask) \ ++ do { \ ++ if (set) \ ++ atomic_long_or((mask), (p)); \ ++ else \ ++ atomic_long_and(~(mask), (p)); \ ++ } while (0) ++ ++static __always_inline void sched_rq_watermark_fill_downwards(int cpu, unsigned int end, ++ unsigned int start, bool set) ++{ ++ unsigned int start_idx, start_bit; ++ unsigned int end_idx, end_bit; ++ atomic_long_t *p; ++ ++ if (end == start) { ++ return; ++ } ++ ++ start_idx = start / BITS_PER_ATOMIC_LONG_T; ++ start_bit = start % BITS_PER_ATOMIC_LONG_T; ++ end_idx = (end - 1) / BITS_PER_ATOMIC_LONG_T; ++ end_bit = (end - 1) % BITS_PER_ATOMIC_LONG_T; ++ p = &sched_rq_watermark[cpu].bits[end_idx]; ++ ++ if (end_idx == start_idx) { ++ x(p, set, (~0UL >> (BITS_PER_ATOMIC_LONG_T - 1 - end_bit)) & (~0UL << start_bit)); ++ return; ++ } ++ ++ if (end_bit != BITS_PER_ATOMIC_LONG_T - 1) { ++ x(p, set, (~0UL >> (BITS_PER_ATOMIC_LONG_T - 1 - end_bit))); ++ p -= 1; ++ end_idx -= 1; ++ } ++ ++ while (end_idx != start_idx) { ++ atomic_long_set(p, set ? ~0UL : 0); ++ p -= 1; ++ end_idx -= 1; ++ } ++ ++ x(p, set, ~0UL << start_bit); ++} ++ ++#undef x ++ ++static __always_inline bool sched_rq_watermark_and(cpumask_t *dstp, const cpumask_t *cpus, int prio, bool not) ++{ ++ int cpu; ++ bool ret = false; ++ int idx = prio / BITS_PER_ATOMIC_LONG_T; ++ int bit = prio % BITS_PER_ATOMIC_LONG_T; ++ ++ cpumask_clear(dstp); ++ for_each_cpu(cpu, cpus) ++ if (test_bit(bit, (long*)&sched_rq_watermark[cpu].bits[idx].counter) == !not) { ++ __cpumask_set_cpu(cpu, dstp); ++ ret = true; ++ } ++ return ret; ++} ++ ++static __always_inline bool sched_rq_watermark_test(const cpumask_t *cpus, int prio, bool not) ++{ ++ int cpu; ++ int idx = prio / BITS_PER_ATOMIC_LONG_T; ++ int bit = prio % BITS_PER_ATOMIC_LONG_T; ++ ++ for_each_cpu(cpu, cpus) ++ if (test_bit(bit, (long*)&sched_rq_watermark[cpu].bits[idx].counter) == !not) ++ return true; ++ return false; ++} + + /* sched_queue related functions */ + static inline void sched_queue_init(struct sched_queue *q) +@@ -176,7 +256,6 @@ static inline void update_sched_rq_watermark(struct rq *rq) + { + unsigned long watermark = find_first_bit(rq->queue.bitmap, SCHED_QUEUE_BITS); + unsigned long last_wm = rq->watermark; +- unsigned long i; + int cpu; + + if (watermark == last_wm) +@@ -185,28 +264,25 @@ static inline void update_sched_rq_watermark(struct rq *rq) + rq->watermark = watermark; + cpu = cpu_of(rq); + if (watermark < last_wm) { +- for (i = last_wm; i > watermark; i--) +- cpumask_clear_cpu(cpu, sched_rq_watermark + SCHED_QUEUE_BITS - i); ++ sched_rq_watermark_fill_downwards(cpu, SCHED_QUEUE_BITS - watermark, SCHED_QUEUE_BITS - last_wm, false); + #ifdef CONFIG_SCHED_SMT + if (static_branch_likely(&sched_smt_present) && +- IDLE_TASK_SCHED_PRIO == last_wm) ++ unlikely(IDLE_TASK_SCHED_PRIO == last_wm)) + cpumask_andnot(&sched_sg_idle_mask, + &sched_sg_idle_mask, cpu_smt_mask(cpu)); + #endif + return; + } + /* last_wm < watermark */ +- for (i = watermark; i > last_wm; i--) +- cpumask_set_cpu(cpu, sched_rq_watermark + SCHED_QUEUE_BITS - i); ++ sched_rq_watermark_fill_downwards(cpu, SCHED_QUEUE_BITS - last_wm, SCHED_QUEUE_BITS - watermark, true); + #ifdef CONFIG_SCHED_SMT + if (static_branch_likely(&sched_smt_present) && +- IDLE_TASK_SCHED_PRIO == watermark) { +- cpumask_t tmp; ++ unlikely(IDLE_TASK_SCHED_PRIO == watermark)) { ++ const cpumask_t *smt_mask = cpu_smt_mask(cpu); + +- cpumask_and(&tmp, cpu_smt_mask(cpu), sched_rq_watermark); +- if (cpumask_equal(&tmp, cpu_smt_mask(cpu))) ++ if (!sched_rq_watermark_test(smt_mask, 0, true)) + cpumask_or(&sched_sg_idle_mask, +- &sched_sg_idle_mask, cpu_smt_mask(cpu)); ++ &sched_sg_idle_mask, smt_mask); + } + #endif + } +@@ -1903,9 +1979,9 @@ static inline int select_task_rq(struct task_struct *p) + #ifdef CONFIG_SCHED_SMT + cpumask_and(&tmp, &chk_mask, &sched_sg_idle_mask) || + #endif +- cpumask_and(&tmp, &chk_mask, sched_rq_watermark) || +- cpumask_and(&tmp, &chk_mask, +- sched_rq_watermark + SCHED_QUEUE_BITS - 1 - task_sched_prio(p))) ++ sched_rq_watermark_and(&tmp, &chk_mask, 0, false) || ++ sched_rq_watermark_and(&tmp, &chk_mask, ++ SCHED_QUEUE_BITS - 1 - task_sched_prio(p), false)) + return best_mask_cpu(task_cpu(p), &tmp); + + return best_mask_cpu(task_cpu(p), &chk_mask); +@@ -3977,7 +4053,7 @@ static inline void sg_balance(struct rq *rq) + * find potential cpus which can migrate the current running task + */ + if (cpumask_test_cpu(cpu, &sched_sg_idle_mask) && +- cpumask_andnot(&chk, cpu_online_mask, sched_rq_watermark) && ++ sched_rq_watermark_and(&chk, cpu_online_mask, 0, true) && + cpumask_andnot(&chk, &chk, &sched_rq_pending_mask)) { + int i; + +@@ -4285,9 +4361,8 @@ static inline void schedule_debug(struct task_struct *prev, bool preempt) + #ifdef ALT_SCHED_DEBUG + void alt_sched_debug(void) + { +- printk(KERN_INFO "sched: pending: 0x%04lx, idle: 0x%04lx, sg_idle: 0x%04lx\n", ++ printk(KERN_INFO "sched: pending: 0x%04lx, sg_idle: 0x%04lx\n", + sched_rq_pending_mask.bits[0], +- sched_rq_watermark[0].bits[0], + sched_sg_idle_mask.bits[0]); + } + #else +@@ -7285,8 +7360,17 @@ void __init sched_init(void) + wait_bit_init(); + + #ifdef CONFIG_SMP +- for (i = 0; i < SCHED_QUEUE_BITS; i++) +- cpumask_copy(sched_rq_watermark + i, cpu_present_mask); ++ for (i = 0; i < nr_cpu_ids; i++) { ++ long val = cpumask_test_cpu(i, cpu_present_mask) ? -1L : 0; ++ int j; ++ for (j = 0; j < DIV_ROUND_UP(SCHED_QUEUE_BITS, BITS_PER_ATOMIC_LONG_T); j++) ++ atomic_long_set(&sched_rq_watermark[i].bits[j], val); ++ } ++ for (i = nr_cpu_ids; i < NR_CPUS; i++) { ++ int j; ++ for (j = 0; j < DIV_ROUND_UP(SCHED_QUEUE_BITS, BITS_PER_ATOMIC_LONG_T); j++) ++ atomic_long_set(&sched_rq_watermark[i].bits[j], 0); ++ } + #endif + + #ifdef CONFIG_CGROUP_SCHED + +From 5b3b4b3d14c234196c807568905ee2e013565508 Mon Sep 17 00:00:00 2001 +From: Torge Matthies +Date: Tue, 15 Mar 2022 23:08:54 +0100 +Subject: [PATCH 06/10] sched/alt: Add memory barriers around atomics. + +--- + kernel/sched/alt_core.c | 4 ++++ + 1 file changed, 4 insertions(+) + +diff --git a/kernel/sched/alt_core.c b/kernel/sched/alt_core.c +index 7929b810ba74f..b0cb6b772d5fa 100644 +--- a/kernel/sched/alt_core.c ++++ b/kernel/sched/alt_core.c +@@ -156,10 +156,12 @@ static sched_bitmask_t sched_rq_watermark[NR_CPUS] ____cacheline_aligned_in_smp; + + #define x(p, set, mask) \ + do { \ ++ smp_mb__before_atomic(); \ + if (set) \ + atomic_long_or((mask), (p)); \ + else \ + atomic_long_and(~(mask), (p)); \ ++ smp_mb__after_atomic(); \ + } while (0) + + static __always_inline void sched_rq_watermark_fill_downwards(int cpu, unsigned int end, +@@ -191,7 +193,9 @@ static __always_inline void sched_rq_watermark_fill_downwards(int cpu, unsigned + } + + while (end_idx != start_idx) { ++ smp_mb__before_atomic(); + atomic_long_set(p, set ? ~0UL : 0); ++ smp_mb__after_atomic(); + p -= 1; + end_idx -= 1; + } +