From c98ffc24b15f877e9a3d794bdffeef9d81c3c3de Mon Sep 17 00:00:00 2001 From: Tk-Glitch Date: Sun, 28 Nov 2021 17:17:03 +0100 Subject: [PATCH] linux515-tkg: Add optimizations from openglfreak - https://gitlab.com/alfredchen/linux-prjc/-/merge_requests/8 --- PKGBUILD | 6 +- linux-tkg-config/prepare | 1 + .../5.15/0005-glitched-pds.patch | 65 +++++++++++++++++++ .../5.15/0009-glitched-bmq.patch | 65 +++++++++++++++++++ 4 files changed, 134 insertions(+), 3 deletions(-) diff --git a/PKGBUILD b/PKGBUILD index c26dab5..1a997f9 100644 --- a/PKGBUILD +++ b/PKGBUILD @@ -59,7 +59,7 @@ else fi pkgname=("${pkgbase}" "${pkgbase}-headers") pkgver="${_basekernel}"."${_sub}" -pkgrel=222 +pkgrel=223 pkgdesc='Linux-tkg' arch=('x86_64') # no i686 in here url="http://www.kernel.org/" @@ -645,7 +645,7 @@ case $_basever in '2e2c5c546fb2aabfa90f31310355324f58c6783d520b45b8898577bb7d1a5277' '5efd40c392ece498d2d43d5443e6537c2d9ef7cf9820d5ce80b6577fc5d1a4b2' 'e5ea0bb25ee294c655ac3cc30e1eea497799826108fbfb4ef3258c676c1e8a12' - 'fca63d15ca4502aebd73e76d7499b243d2c03db71ff5ab0bf5cf268b2e576320' + '0b73ec751187d899a4c347b9287c7a76d06523abaeca985a76d0f7ae167d4b1f' '19661ec0d39f9663452b34433214c755179894528bf73a42f6ba52ccf572832a' '6c4f0099896f69e56ebd8c9eac266ac8ad993acecd50945e0e84ef6f95f9ddca' 'c8f7c50d9b1418ba22b5ca735c47111a162be416109714d26a674162e5b2cb97' @@ -653,7 +653,7 @@ case $_basever in 'a71ea523f0a7bcd24e2ad144ff12160aa03dc3f0c64daceac8dc1aae523d4491' '68659b54bd0c0539c22869feea8017faf947af6883d75c00089f2bfd9f265f8e' '9fad4a40449e09522899955762c8928ae17f4cdaa16e01239fd12592e9d58177' - 'a557b342111849a5f920bbe1c129f3ff1fc1eff62c6bd6685e0972fc88e39911' + '978b197efa56781a1d5651a3649c3d8b926d55748b4b9063788dfe1a861fc1bc' '768239d739180c0199545b5c5cf2d78de6261aec769008e6a2b7e97c7477b756' '4cd39ae0f0cd218c32bc3d0ad14ff35a34851da71f0cf0116385ac501354ed16' '1b656ad96004f27e9dc63d7f430b50d5c48510d6d4cd595a81c24b21adb70313' diff --git a/linux-tkg-config/prepare b/linux-tkg-config/prepare index 4fea53a..1e31e4d 100644 --- a/linux-tkg-config/prepare +++ b/linux-tkg-config/prepare @@ -572,6 +572,7 @@ _tkg_srcprep() { _disable "DEBUG_FORCE_FUNCTION_ALIGN_64B" scripts/config --set-str "ZSWAP_COMPRESSOR_DEFAULT" "lz4" scripts/config --set-str "CMDLINE" "${_custom_commandline}" + scripts/config --set-val "NR_CPUS" "64" _disable "CMDLINE_OVERRIDE" "X86_P6_NOP" "CPU_FREQ_DEFAULT_GOV_ONDEMAND" "CPU_FREQ_DEFAULT_GOV_CONSERVATIVE" #echo "# CONFIG_CPU_FREQ_DEFAULT_GOV_PERFORMANCE is not set" >> ./.config diff --git a/linux-tkg-patches/5.15/0005-glitched-pds.patch b/linux-tkg-patches/5.15/0005-glitched-pds.patch index 08c9ef3..1c4597f 100644 --- a/linux-tkg-patches/5.15/0005-glitched-pds.patch +++ b/linux-tkg-patches/5.15/0005-glitched-pds.patch @@ -88,3 +88,68 @@ index 9270a4370d54..30d01e647417 100644 static void set_task_reclaim_state(struct task_struct *task, struct reclaim_state *rs) +From f0438700352f5adebbdf122c00a3360bb0f5247c Mon Sep 17 00:00:00 2001 +From: Torge Matthies +Date: Sun, 21 Nov 2021 23:58:50 +0100 +Subject: [PATCH 1/2] sched/alt: Optimize loops in update_sched_rq_watermark. + +With the old code, gcc misses an optimization opportunity and compiles +the loops to five instructions each: + + 0x0000000000000ed3 <+83>: lock bts %rdi,(%rax) + 0x0000000000000ed8 <+88>: dec %rdx + 0x0000000000000edb <+91>: add $0x400,%rax + 0x0000000000000ee1 <+97>: cmp %rdx,%rsi + 0x0000000000000ee4 <+100>: jne 0xed3 + ... + 0x0000000000000f13 <+147>: lock btr %rdi,(%rax) + 0x0000000000000f18 <+152>: dec %rdx + 0x0000000000000f1b <+155>: add $0x400,%rax + 0x0000000000000f21 <+161>: cmp %rcx,%rdx + 0x0000000000000f24 <+164>: jne 0xf13 + +With this change, the loops get optimized to four instructions each: + + 0x0000000000000ed7 <+87>: lock bts %rsi,(%rdx) + 0x0000000000000edc <+92>: add $0x400,%rdx + 0x0000000000000ee3 <+99>: dec %rcx + 0x0000000000000ee6 <+102>: jne 0xed7 + ... + 0x0000000000000f1a <+154>: lock btr %rsi,(%rax) + 0x0000000000000f1f <+159>: add $0x400,%rax + 0x0000000000000f25 <+165>: dec %rdx + 0x0000000000000f28 <+168>: jne 0xf1a + +Signed-off-by: Torge Matthies +--- + kernel/sched/alt_core.c | 8 ++++---- + 1 file changed, 4 insertions(+), 4 deletions(-) + +diff --git a/kernel/sched/alt_core.c b/kernel/sched/alt_core.c +index 8b0ddbdd24e4..7d926e8eab96 100644 +--- a/kernel/sched/alt_core.c ++++ b/kernel/sched/alt_core.c +@@ -185,8 +185,8 @@ static inline void update_sched_rq_watermark(struct rq *rq) + rq->watermark = watermark; + cpu = cpu_of(rq); + if (watermark < last_wm) { +- for (i = last_wm; i > watermark; i--) +- cpumask_clear_cpu(cpu, sched_rq_watermark + SCHED_BITS - 1 - i); ++ for (i = last_wm - watermark; i > 0; i--) ++ cpumask_clear_cpu(cpu, sched_rq_watermark + SCHED_BITS - 1 - (i + watermark)); + #ifdef CONFIG_SCHED_SMT + if (static_branch_likely(&sched_smt_present) && + IDLE_TASK_SCHED_PRIO == last_wm) +@@ -196,8 +196,8 @@ static inline void update_sched_rq_watermark(struct rq *rq) + return; + } + /* last_wm < watermark */ +- for (i = watermark; i > last_wm; i--) +- cpumask_set_cpu(cpu, sched_rq_watermark + SCHED_BITS - 1 - i); ++ for (i = watermark - last_wm; i > 0; i--) ++ cpumask_set_cpu(cpu, sched_rq_watermark + SCHED_BITS - 1 - (i + last_wm)); + #ifdef CONFIG_SCHED_SMT + if (static_branch_likely(&sched_smt_present) && + IDLE_TASK_SCHED_PRIO == watermark) { +-- +GitLab diff --git a/linux-tkg-patches/5.15/0009-glitched-bmq.patch b/linux-tkg-patches/5.15/0009-glitched-bmq.patch index e42e522..2bbef6b 100644 --- a/linux-tkg-patches/5.15/0009-glitched-bmq.patch +++ b/linux-tkg-patches/5.15/0009-glitched-bmq.patch @@ -88,3 +88,68 @@ index 9270a4370d54..30d01e647417 100644 static void set_task_reclaim_state(struct task_struct *task, struct reclaim_state *rs) +From f0438700352f5adebbdf122c00a3360bb0f5247c Mon Sep 17 00:00:00 2001 +From: Torge Matthies +Date: Sun, 21 Nov 2021 23:58:50 +0100 +Subject: [PATCH 1/2] sched/alt: Optimize loops in update_sched_rq_watermark. + +With the old code, gcc misses an optimization opportunity and compiles +the loops to five instructions each: + + 0x0000000000000ed3 <+83>: lock bts %rdi,(%rax) + 0x0000000000000ed8 <+88>: dec %rdx + 0x0000000000000edb <+91>: add $0x400,%rax + 0x0000000000000ee1 <+97>: cmp %rdx,%rsi + 0x0000000000000ee4 <+100>: jne 0xed3 + ... + 0x0000000000000f13 <+147>: lock btr %rdi,(%rax) + 0x0000000000000f18 <+152>: dec %rdx + 0x0000000000000f1b <+155>: add $0x400,%rax + 0x0000000000000f21 <+161>: cmp %rcx,%rdx + 0x0000000000000f24 <+164>: jne 0xf13 + +With this change, the loops get optimized to four instructions each: + + 0x0000000000000ed7 <+87>: lock bts %rsi,(%rdx) + 0x0000000000000edc <+92>: add $0x400,%rdx + 0x0000000000000ee3 <+99>: dec %rcx + 0x0000000000000ee6 <+102>: jne 0xed7 + ... + 0x0000000000000f1a <+154>: lock btr %rsi,(%rax) + 0x0000000000000f1f <+159>: add $0x400,%rax + 0x0000000000000f25 <+165>: dec %rdx + 0x0000000000000f28 <+168>: jne 0xf1a + +Signed-off-by: Torge Matthies +--- + kernel/sched/alt_core.c | 8 ++++---- + 1 file changed, 4 insertions(+), 4 deletions(-) + +diff --git a/kernel/sched/alt_core.c b/kernel/sched/alt_core.c +index 8b0ddbdd24e4..7d926e8eab96 100644 +--- a/kernel/sched/alt_core.c ++++ b/kernel/sched/alt_core.c +@@ -185,8 +185,8 @@ static inline void update_sched_rq_watermark(struct rq *rq) + rq->watermark = watermark; + cpu = cpu_of(rq); + if (watermark < last_wm) { +- for (i = last_wm; i > watermark; i--) +- cpumask_clear_cpu(cpu, sched_rq_watermark + SCHED_BITS - 1 - i); ++ for (i = last_wm - watermark; i > 0; i--) ++ cpumask_clear_cpu(cpu, sched_rq_watermark + SCHED_BITS - 1 - (i + watermark)); + #ifdef CONFIG_SCHED_SMT + if (static_branch_likely(&sched_smt_present) && + IDLE_TASK_SCHED_PRIO == last_wm) +@@ -196,8 +196,8 @@ static inline void update_sched_rq_watermark(struct rq *rq) + return; + } + /* last_wm < watermark */ +- for (i = watermark; i > last_wm; i--) +- cpumask_set_cpu(cpu, sched_rq_watermark + SCHED_BITS - 1 - i); ++ for (i = watermark - last_wm; i > 0; i--) ++ cpumask_set_cpu(cpu, sched_rq_watermark + SCHED_BITS - 1 - (i + last_wm)); + #ifdef CONFIG_SCHED_SMT + if (static_branch_likely(&sched_smt_present) && + IDLE_TASK_SCHED_PRIO == watermark) { +-- +GitLab