linux515-tkg: Add optimizations from openglfreak - https://gitlab.com/alfredchen/linux-prjc/-/merge_requests/8

This commit is contained in:
Tk-Glitch
2021-11-28 17:17:03 +01:00
parent 711f49258a
commit c98ffc24b1
4 changed files with 134 additions and 3 deletions

View File

@@ -59,7 +59,7 @@ else
fi
pkgname=("${pkgbase}" "${pkgbase}-headers")
pkgver="${_basekernel}"."${_sub}"
pkgrel=222
pkgrel=223
pkgdesc='Linux-tkg'
arch=('x86_64') # no i686 in here
url="http://www.kernel.org/"
@@ -645,7 +645,7 @@ case $_basever in
'2e2c5c546fb2aabfa90f31310355324f58c6783d520b45b8898577bb7d1a5277'
'5efd40c392ece498d2d43d5443e6537c2d9ef7cf9820d5ce80b6577fc5d1a4b2'
'e5ea0bb25ee294c655ac3cc30e1eea497799826108fbfb4ef3258c676c1e8a12'
'fca63d15ca4502aebd73e76d7499b243d2c03db71ff5ab0bf5cf268b2e576320'
'0b73ec751187d899a4c347b9287c7a76d06523abaeca985a76d0f7ae167d4b1f'
'19661ec0d39f9663452b34433214c755179894528bf73a42f6ba52ccf572832a'
'6c4f0099896f69e56ebd8c9eac266ac8ad993acecd50945e0e84ef6f95f9ddca'
'c8f7c50d9b1418ba22b5ca735c47111a162be416109714d26a674162e5b2cb97'
@@ -653,7 +653,7 @@ case $_basever in
'a71ea523f0a7bcd24e2ad144ff12160aa03dc3f0c64daceac8dc1aae523d4491'
'68659b54bd0c0539c22869feea8017faf947af6883d75c00089f2bfd9f265f8e'
'9fad4a40449e09522899955762c8928ae17f4cdaa16e01239fd12592e9d58177'
'a557b342111849a5f920bbe1c129f3ff1fc1eff62c6bd6685e0972fc88e39911'
'978b197efa56781a1d5651a3649c3d8b926d55748b4b9063788dfe1a861fc1bc'
'768239d739180c0199545b5c5cf2d78de6261aec769008e6a2b7e97c7477b756'
'4cd39ae0f0cd218c32bc3d0ad14ff35a34851da71f0cf0116385ac501354ed16'
'1b656ad96004f27e9dc63d7f430b50d5c48510d6d4cd595a81c24b21adb70313'

View File

@@ -572,6 +572,7 @@ _tkg_srcprep() {
_disable "DEBUG_FORCE_FUNCTION_ALIGN_64B"
scripts/config --set-str "ZSWAP_COMPRESSOR_DEFAULT" "lz4"
scripts/config --set-str "CMDLINE" "${_custom_commandline}"
scripts/config --set-val "NR_CPUS" "64"
_disable "CMDLINE_OVERRIDE" "X86_P6_NOP" "CPU_FREQ_DEFAULT_GOV_ONDEMAND" "CPU_FREQ_DEFAULT_GOV_CONSERVATIVE"
#echo "# CONFIG_CPU_FREQ_DEFAULT_GOV_PERFORMANCE is not set" >> ./.config

View File

@@ -88,3 +88,68 @@ index 9270a4370d54..30d01e647417 100644
static void set_task_reclaim_state(struct task_struct *task,
struct reclaim_state *rs)
From f0438700352f5adebbdf122c00a3360bb0f5247c Mon Sep 17 00:00:00 2001
From: Torge Matthies <openglfreak@googlemail.com>
Date: Sun, 21 Nov 2021 23:58:50 +0100
Subject: [PATCH 1/2] sched/alt: Optimize loops in update_sched_rq_watermark.
With the old code, gcc misses an optimization opportunity and compiles
the loops to five instructions each:
0x0000000000000ed3 <+83>: lock bts %rdi,(%rax)
0x0000000000000ed8 <+88>: dec %rdx
0x0000000000000edb <+91>: add $0x400,%rax
0x0000000000000ee1 <+97>: cmp %rdx,%rsi
0x0000000000000ee4 <+100>: jne 0xed3 <update_sched_rq_watermark+83>
...
0x0000000000000f13 <+147>: lock btr %rdi,(%rax)
0x0000000000000f18 <+152>: dec %rdx
0x0000000000000f1b <+155>: add $0x400,%rax
0x0000000000000f21 <+161>: cmp %rcx,%rdx
0x0000000000000f24 <+164>: jne 0xf13 <update_sched_rq_watermark+147>
With this change, the loops get optimized to four instructions each:
0x0000000000000ed7 <+87>: lock bts %rsi,(%rdx)
0x0000000000000edc <+92>: add $0x400,%rdx
0x0000000000000ee3 <+99>: dec %rcx
0x0000000000000ee6 <+102>: jne 0xed7 <update_sched_rq_watermark+87>
...
0x0000000000000f1a <+154>: lock btr %rsi,(%rax)
0x0000000000000f1f <+159>: add $0x400,%rax
0x0000000000000f25 <+165>: dec %rdx
0x0000000000000f28 <+168>: jne 0xf1a <update_sched_rq_watermark+154>
Signed-off-by: Torge Matthies <openglfreak@googlemail.com>
---
kernel/sched/alt_core.c | 8 ++++----
1 file changed, 4 insertions(+), 4 deletions(-)
diff --git a/kernel/sched/alt_core.c b/kernel/sched/alt_core.c
index 8b0ddbdd24e4..7d926e8eab96 100644
--- a/kernel/sched/alt_core.c
+++ b/kernel/sched/alt_core.c
@@ -185,8 +185,8 @@ static inline void update_sched_rq_watermark(struct rq *rq)
rq->watermark = watermark;
cpu = cpu_of(rq);
if (watermark < last_wm) {
- for (i = last_wm; i > watermark; i--)
- cpumask_clear_cpu(cpu, sched_rq_watermark + SCHED_BITS - 1 - i);
+ for (i = last_wm - watermark; i > 0; i--)
+ cpumask_clear_cpu(cpu, sched_rq_watermark + SCHED_BITS - 1 - (i + watermark));
#ifdef CONFIG_SCHED_SMT
if (static_branch_likely(&sched_smt_present) &&
IDLE_TASK_SCHED_PRIO == last_wm)
@@ -196,8 +196,8 @@ static inline void update_sched_rq_watermark(struct rq *rq)
return;
}
/* last_wm < watermark */
- for (i = watermark; i > last_wm; i--)
- cpumask_set_cpu(cpu, sched_rq_watermark + SCHED_BITS - 1 - i);
+ for (i = watermark - last_wm; i > 0; i--)
+ cpumask_set_cpu(cpu, sched_rq_watermark + SCHED_BITS - 1 - (i + last_wm));
#ifdef CONFIG_SCHED_SMT
if (static_branch_likely(&sched_smt_present) &&
IDLE_TASK_SCHED_PRIO == watermark) {
--
GitLab

View File

@@ -88,3 +88,68 @@ index 9270a4370d54..30d01e647417 100644
static void set_task_reclaim_state(struct task_struct *task,
struct reclaim_state *rs)
From f0438700352f5adebbdf122c00a3360bb0f5247c Mon Sep 17 00:00:00 2001
From: Torge Matthies <openglfreak@googlemail.com>
Date: Sun, 21 Nov 2021 23:58:50 +0100
Subject: [PATCH 1/2] sched/alt: Optimize loops in update_sched_rq_watermark.
With the old code, gcc misses an optimization opportunity and compiles
the loops to five instructions each:
0x0000000000000ed3 <+83>: lock bts %rdi,(%rax)
0x0000000000000ed8 <+88>: dec %rdx
0x0000000000000edb <+91>: add $0x400,%rax
0x0000000000000ee1 <+97>: cmp %rdx,%rsi
0x0000000000000ee4 <+100>: jne 0xed3 <update_sched_rq_watermark+83>
...
0x0000000000000f13 <+147>: lock btr %rdi,(%rax)
0x0000000000000f18 <+152>: dec %rdx
0x0000000000000f1b <+155>: add $0x400,%rax
0x0000000000000f21 <+161>: cmp %rcx,%rdx
0x0000000000000f24 <+164>: jne 0xf13 <update_sched_rq_watermark+147>
With this change, the loops get optimized to four instructions each:
0x0000000000000ed7 <+87>: lock bts %rsi,(%rdx)
0x0000000000000edc <+92>: add $0x400,%rdx
0x0000000000000ee3 <+99>: dec %rcx
0x0000000000000ee6 <+102>: jne 0xed7 <update_sched_rq_watermark+87>
...
0x0000000000000f1a <+154>: lock btr %rsi,(%rax)
0x0000000000000f1f <+159>: add $0x400,%rax
0x0000000000000f25 <+165>: dec %rdx
0x0000000000000f28 <+168>: jne 0xf1a <update_sched_rq_watermark+154>
Signed-off-by: Torge Matthies <openglfreak@googlemail.com>
---
kernel/sched/alt_core.c | 8 ++++----
1 file changed, 4 insertions(+), 4 deletions(-)
diff --git a/kernel/sched/alt_core.c b/kernel/sched/alt_core.c
index 8b0ddbdd24e4..7d926e8eab96 100644
--- a/kernel/sched/alt_core.c
+++ b/kernel/sched/alt_core.c
@@ -185,8 +185,8 @@ static inline void update_sched_rq_watermark(struct rq *rq)
rq->watermark = watermark;
cpu = cpu_of(rq);
if (watermark < last_wm) {
- for (i = last_wm; i > watermark; i--)
- cpumask_clear_cpu(cpu, sched_rq_watermark + SCHED_BITS - 1 - i);
+ for (i = last_wm - watermark; i > 0; i--)
+ cpumask_clear_cpu(cpu, sched_rq_watermark + SCHED_BITS - 1 - (i + watermark));
#ifdef CONFIG_SCHED_SMT
if (static_branch_likely(&sched_smt_present) &&
IDLE_TASK_SCHED_PRIO == last_wm)
@@ -196,8 +196,8 @@ static inline void update_sched_rq_watermark(struct rq *rq)
return;
}
/* last_wm < watermark */
- for (i = watermark; i > last_wm; i--)
- cpumask_set_cpu(cpu, sched_rq_watermark + SCHED_BITS - 1 - i);
+ for (i = watermark - last_wm; i > 0; i--)
+ cpumask_set_cpu(cpu, sched_rq_watermark + SCHED_BITS - 1 - (i + last_wm));
#ifdef CONFIG_SCHED_SMT
if (static_branch_likely(&sched_smt_present) &&
IDLE_TASK_SCHED_PRIO == watermark) {
--
GitLab