diff --git a/linux-tkg-patches/6.2/0002-clear-patches.patch b/linux-tkg-patches/6.2/0002-clear-patches.patch index 22a32f5..da046bd 100644 --- a/linux-tkg-patches/6.2/0002-clear-patches.patch +++ b/linux-tkg-patches/6.2/0002-clear-patches.patch @@ -358,3 +358,263 @@ index c0cd1b9..af1e2fb 100644 -- https://clearlinux.org +From 676c2dc63592f52b716515573a3a825582a371e9 Mon Sep 17 00:00:00 2001 +From: Arjan van de Ven +Date: Sat, 8 Dec 2018 18:21:32 +0000 +Subject: [PATCH 1/9] x86/vdso: Use lfence instead of rep and nop + +Signed-off-by: Alexandre Frade +--- + arch/x86/include/asm/vdso/processor.h | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +diff --git a/arch/x86/include/asm/vdso/processor.h b/arch/x86/include/asm/vdso/processor.h +index 57b1a7034c64..e2c45674f989 100644 +--- a/arch/x86/include/asm/vdso/processor.h ++++ b/arch/x86/include/asm/vdso/processor.h +@@ -10,7 +10,7 @@ + /* REP NOP (PAUSE) is a good thing to insert into busy-wait loops. */ + static __always_inline void rep_nop(void) + { +- asm volatile("rep; nop" ::: "memory"); ++ asm volatile("lfence" ::: "memory"); + } + + static __always_inline void cpu_relax(void) +-- +2.39.1 + +From 48dc9669f8db68adc480ffc2698ed8204440e45b Mon Sep 17 00:00:00 2001 +From: Arjan van de Ven +Date: Thu, 13 Dec 2018 01:00:49 +0000 +Subject: [PATCH 2/9] sched/wait: Do accept() in LIFO order for cache + efficiency + +Signed-off-by: Alexandre Frade +--- + include/linux/wait.h | 2 ++ + kernel/sched/wait.c | 24 ++++++++++++++++++++++++ + net/ipv4/inet_connection_sock.c | 2 +- + 3 files changed, 27 insertions(+), 1 deletion(-) + +diff --git a/include/linux/wait.h b/include/linux/wait.h +index a0307b516b09..edc21128f387 100644 +--- a/include/linux/wait.h ++++ b/include/linux/wait.h +@@ -165,6 +165,7 @@ static inline bool wq_has_sleeper(struct wait_queue_head *wq_head) + + extern void add_wait_queue(struct wait_queue_head *wq_head, struct wait_queue_entry *wq_entry); + extern void add_wait_queue_exclusive(struct wait_queue_head *wq_head, struct wait_queue_entry *wq_entry); ++extern void add_wait_queue_exclusive_lifo(struct wait_queue_head *wq_head, struct wait_queue_entry *wq_entry); + extern void add_wait_queue_priority(struct wait_queue_head *wq_head, struct wait_queue_entry *wq_entry); + extern void remove_wait_queue(struct wait_queue_head *wq_head, struct wait_queue_entry *wq_entry); + +@@ -1192,6 +1193,7 @@ do { \ + */ + void prepare_to_wait(struct wait_queue_head *wq_head, struct wait_queue_entry *wq_entry, int state); + bool prepare_to_wait_exclusive(struct wait_queue_head *wq_head, struct wait_queue_entry *wq_entry, int state); ++void prepare_to_wait_exclusive_lifo(struct wait_queue_head *wq_head, struct wait_queue_entry *wq_entry, int state); + long prepare_to_wait_event(struct wait_queue_head *wq_head, struct wait_queue_entry *wq_entry, int state); + void finish_wait(struct wait_queue_head *wq_head, struct wait_queue_entry *wq_entry); + long wait_woken(struct wait_queue_entry *wq_entry, unsigned mode, long timeout); +diff --git a/kernel/sched/wait.c b/kernel/sched/wait.c +index 133b74730738..1647fb8662eb 100644 +--- a/kernel/sched/wait.c ++++ b/kernel/sched/wait.c +@@ -47,6 +47,17 @@ void add_wait_queue_priority(struct wait_queue_head *wq_head, struct wait_queue_ + } + EXPORT_SYMBOL_GPL(add_wait_queue_priority); + ++void add_wait_queue_exclusive_lifo(struct wait_queue_head *wq_head, struct wait_queue_entry *wq_entry) ++{ ++ unsigned long flags; ++ ++ wq_entry->flags |= WQ_FLAG_EXCLUSIVE; ++ spin_lock_irqsave(&wq_head->lock, flags); ++ __add_wait_queue(wq_head, wq_entry); ++ spin_unlock_irqrestore(&wq_head->lock, flags); ++} ++EXPORT_SYMBOL(add_wait_queue_exclusive_lifo); ++ + void remove_wait_queue(struct wait_queue_head *wq_head, struct wait_queue_entry *wq_entry) + { + unsigned long flags; +@@ -293,6 +304,19 @@ prepare_to_wait_exclusive(struct wait_queue_head *wq_head, struct wait_queue_ent + } + EXPORT_SYMBOL(prepare_to_wait_exclusive); + ++void prepare_to_wait_exclusive_lifo(struct wait_queue_head *wq_head, struct wait_queue_entry *wq_entry, int state) ++{ ++ unsigned long flags; ++ ++ wq_entry->flags |= WQ_FLAG_EXCLUSIVE; ++ spin_lock_irqsave(&wq_head->lock, flags); ++ if (list_empty(&wq_entry->entry)) ++ __add_wait_queue(wq_head, wq_entry); ++ set_current_state(state); ++ spin_unlock_irqrestore(&wq_head->lock, flags); ++} ++EXPORT_SYMBOL(prepare_to_wait_exclusive_lifo); ++ + void init_wait_entry(struct wait_queue_entry *wq_entry, int flags) + { + wq_entry->flags = flags; +diff --git a/net/ipv4/inet_connection_sock.c b/net/ipv4/inet_connection_sock.c +index f2c43f67187d..9885bfb429a2 100644 +--- a/net/ipv4/inet_connection_sock.c ++++ b/net/ipv4/inet_connection_sock.c +@@ -606,7 +606,7 @@ static int inet_csk_wait_for_connect(struct sock *sk, long timeo) + * having to remove and re-insert us on the wait queue. + */ + for (;;) { +- prepare_to_wait_exclusive(sk_sleep(sk), &wait, ++ prepare_to_wait_exclusive_lifo(sk_sleep(sk), &wait, + TASK_INTERRUPTIBLE); + release_sock(sk); + if (reqsk_queue_empty(&icsk->icsk_accept_queue)) +-- +2.39.1 + + + +From afa213811c5490906caf394b20bb4b616fc6f12a Mon Sep 17 00:00:00 2001 +From: Ricardo Neri +Date: Thu, 25 Aug 2022 15:55:26 -0700 +Subject: [PATCH 3/9] sched/fair: Simplify asym_packing logic for SMT sched + groups + +When the destination CPU is an SMT sibling and idle, it can only help the +busiest group if all of its other SMT siblings are also idle. Otherwise, +there is not increase in throughput. + +It does not matter whether the busiest group has SMT siblings. Simply +check if there are any tasks running on the local group before proceeding. + +Cc: Ben Segall +Cc: Daniel Bristot de Oliveira +Cc: Dietmar Eggemann +Cc: Len Brown +Cc: Mel Gorman +Cc: Rafael J. Wysocki +Cc: Srinivas Pandruvada +Cc: Steven Rostedt +Cc: Tim C. Chen +Cc: Valentin Schneider +Cc: x86@kernel.org +Cc: linux-kernel@vger.kernel.org +Reviewed-by: Len Brown +Signed-off-by: Ricardo Neri +Signed-off-by: Alexandre Frade +--- + kernel/sched/fair.c | 29 +++++++++-------------------- + 1 file changed, 9 insertions(+), 20 deletions(-) + +diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c +index 345cc5e9fa6e..60f9690a5626 100644 +--- a/kernel/sched/fair.c ++++ b/kernel/sched/fair.c +@@ -8921,12 +8921,10 @@ static bool asym_smt_can_pull_tasks(int dst_cpu, struct sd_lb_stats *sds, + struct sched_group *sg) + { + #ifdef CONFIG_SCHED_SMT +- bool local_is_smt, sg_is_smt; ++ bool local_is_smt; + int sg_busy_cpus; + + local_is_smt = sds->local->flags & SD_SHARE_CPUCAPACITY; +- sg_is_smt = sg->flags & SD_SHARE_CPUCAPACITY; +- + sg_busy_cpus = sgs->group_weight - sgs->idle_cpus; + + if (!local_is_smt) { +@@ -8947,25 +8945,16 @@ static bool asym_smt_can_pull_tasks(int dst_cpu, struct sd_lb_stats *sds, + return sched_asym_prefer(dst_cpu, sg->asym_prefer_cpu); + } + +- /* @dst_cpu has SMT siblings. */ +- +- if (sg_is_smt) { +- int local_busy_cpus = sds->local->group_weight - +- sds->local_stat.idle_cpus; +- int busy_cpus_delta = sg_busy_cpus - local_busy_cpus; +- +- if (busy_cpus_delta == 1) +- return sched_asym_prefer(dst_cpu, sg->asym_prefer_cpu); +- +- return false; +- } +- + /* +- * @sg does not have SMT siblings. Ensure that @sds::local does not end +- * up with more than one busy SMT sibling and only pull tasks if there +- * are not busy CPUs (i.e., no CPU has running tasks). ++ * @dst_cpu has SMT siblings. When both @dst_cpu and the busiest core ++ * have one or more busy siblings, moving tasks between them results ++ * in the same throughput. Only if all the siblings of @dst_cpu are ++ * idle throughput can increase. ++ * ++ * If the difference in the number of busy CPUs is two or more, let ++ * find_busiest_group() take care of it. + */ +- if (!sds->local_stat.sum_nr_running) ++ if (sg_busy_cpus == 1 && !sds->local_stat.sum_nr_running) + return sched_asym_prefer(dst_cpu, sg->asym_prefer_cpu); + + return false; +-- +2.39.1 + + + +From a1f627fd10ced4f5eeae678bc4ba96ea7fa01d7e Mon Sep 17 00:00:00 2001 +From: Ricardo Neri +Date: Thu, 25 Aug 2022 15:55:28 -0700 +Subject: [PATCH 4/9] sched/fair: Let lower-priority CPUs do active balancing + +When more than one SMT siblings of a physical core are busy, an idle CPU +of lower priority can help. + +Indicate that the low priority CPU can do active balancing from the high- +priority CPU only if they belong to separate cores. + +Cc: Ben Segall +Cc: Daniel Bristot de Oliveira +Cc: Dietmar Eggemann +Cc: Len Brown +Cc: Mel Gorman +Cc: Rafael J. Wysocki +Cc: Srinivas Pandruvada +Cc: Steven Rostedt +Cc: Tim C. Chen +Cc: Valentin Schneider +Cc: x86@kernel.org +Cc: linux-kernel@vger.kernel.org +Reviewed-by: Len Brown +Signed-off-by: Ricardo Neri +Signed-off-by: Alexandre Frade +--- + kernel/sched/fair.c | 7 ++++++- + 1 file changed, 6 insertions(+), 1 deletion(-) + +diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c +index 60f9690a5626..67b0eacad0e9 100644 +--- a/kernel/sched/fair.c ++++ b/kernel/sched/fair.c +@@ -10176,9 +10176,14 @@ asym_active_balance(struct lb_env *env) + * ASYM_PACKING needs to force migrate tasks from busy but + * lower priority CPUs in order to pack all tasks in the + * highest priority CPUs. ++ * ++ * If the busy CPU has higher priority but is an SMT sibling ++ * in which other SMT siblings are also busy, a lower-priority ++ * CPU in a separate core can help. + */ + return env->idle != CPU_NOT_IDLE && (env->sd->flags & SD_ASYM_PACKING) && +- sched_asym_prefer(env->dst_cpu, env->src_cpu); ++ (sched_asym_prefer(env->dst_cpu, env->src_cpu) || ++ !(env->sd->flags & SD_SHARE_CPUCAPACITY)); + } + + static inline bool +-- +2.39.1