Compare commits

...

4 Commits

5 changed files with 593 additions and 15 deletions

View File

@@ -3,7 +3,7 @@
# Linux distribution you are using, options are "Arch", "Ubuntu", "Debian", "Fedora", "Suse", "Gentoo", "Generic".
# It is automatically set to "Arch" when using PKGBUILD.
# If left empty, the script will prompt
_distro=""
_distro="Arch"
# Kernel Version - Options are "5.4", and from "5.7" to "5.19"
# you can also set a specific kernel version, e.g. "6.0-rc4" or "5.10.51",
@@ -46,7 +46,7 @@ CUSTOM_GCC_PATH=""
CUSTOM_LLVM_PATH=""
# Set to true to bypass makepkg.conf and use all available threads for compilation. False will respect your makepkg.conf options.
_force_all_threads="true"
_force_all_threads="false"
# Set to true to prevent ccache from being used and set CONFIG_GCC_PLUGINS=y (which needs to be disabled for ccache to work properly)
_noccache="false"
@@ -60,10 +60,10 @@ _modprobeddb="false"
_modprobeddb_db_path=~/.config/modprobed.db
# Set to "1" to call make menuconfig, "2" to call make nconfig, "3" to call make xconfig, before building the kernel. Set to false to disable and skip the prompt.
_menunconfig=""
_menunconfig="false"
# Set to true to generate a kernel config fragment from your changes in menuconfig/nconfig. Set to false to disable and skip the prompt.
_diffconfig=""
_diffconfig="false"
# Set to the file name where the generated config fragment should be written to. Only used if _diffconfig is active.
_diffconfig_name=""
@@ -97,11 +97,11 @@ _STRIP="true"
# LEAVE AN EMPTY VALUE TO BE PROMPTED ABOUT FOLLOWING OPTIONS AT BUILD TIME
# CPU scheduler - Options are "upds" (TkG's Undead PDS), "pds", "bmq", "muqss", "cacule", "tt", "bore" or "cfs" (kernel's default)
_cpusched=""
_cpusched="pds"
# Compiler to use - Options are "gcc" or "llvm".
# For advanced users.
_compiler=""
_compiler="gcc"
# Force the use of the LLVM Integrated Assembler whether using LLVM, LTO or not.
# Set to "1" to enable.
@@ -131,7 +131,7 @@ _preempt_rt_force=""
# For BMQ: 0: No yield.
# 1: Deboost and requeue task. (Default)
# 2: Set rq skip task.
_sched_yield_type=""
_sched_yield_type="0"
# Round Robin interval is the longest duration two tasks with the same nice level will be delayed for. When CPU time is requested by a task, it receives a time slice equal
# to the rr_interval in addition to a virtual deadline. When using yield_type 2, a low value can help offset the disadvantages of rescheduling a process that has yielded.
@@ -139,7 +139,7 @@ _sched_yield_type=""
# PDS default: 4ms"
# BMQ default: 2ms"
# Set to "1" for 2ms, "2" for 4ms, "3" for 6ms, "4" for 8ms, or "default" to keep the chosen scheduler defaults.
_rr_interval=""
_rr_interval="2"
# Set to "true" to disable FUNCTION_TRACER/GRAPH_TRACER, lowering overhead but limiting debugging and analyzing of kernel functions - Kernel default is "false"
_ftracedisable="false"
@@ -154,10 +154,10 @@ _misc_adds="true"
# Full tickless can give higher performances in case you use isolation of CPUs for tasks
# and it works only when using the nohz_full kernel parameter, otherwise behaves like idle.
# Just tickless idle perform better for most platforms.
_tickless=""
_tickless="2"
# Set to "true" to use ACS override patch - https://wiki.archlinux.org/index.php/PCI_passthrough_via_OVMF#Bypassing_the_IOMMU_groups_.28ACS_override_patch.29 - Kernel default is "false"
_acs_override=""
_acs_override="false"
# Set to "true" to add Bcache filesystem support. You'll have to install bcachefs-tools-git from AUR for utilities - https://bcachefs.org/ - If in doubt, set to "false"
# This can be buggy and isn't recommended on a production machine, also enabling this option will not allow you to enable MGLRU.
@@ -168,13 +168,13 @@ _bcachefs="false"
_winesync="false"
# Set to "true" to enable Binder and Ashmem, the kernel modules required to use the android emulator Anbox. ! This doesn't apply to 5.4.y !
_anbox=""
_anbox="false"
# A selection of patches from Zen/Liquorix kernel and additional tweaks for a better gaming experience (ZENIFY) - Default is "true"
_zenify="true"
# compiler optimization level - 1. Optimize for performance (-O2); 2. Optimize harder (-O3); 3. Optimize for size (-Os) - Kernel default is "1"
_compileroptlevel="1"
_compileroptlevel="2"
# CPU compiler optimizations - Defaults to prompt at kernel config if left empty
# AMD CPUs : "k8" "k8sse3" "k10" "barcelona" "bobcat" "jaguar" "bulldozer" "piledriver" "steamroller" "excavator" "zen" "zen2" "zen3" "zen4" (zen3 opt support depends on GCC11) (zen4 opt support depends on GCC13)
@@ -188,7 +188,7 @@ _compileroptlevel="1"
# - "generic_v2" (depends on GCC11 - to share the package between machines with different CPU µarch supporting at least x86-64-v2
# - "generic_v3" (depends on GCC11 - to share the package between machines with different CPU µarch supporting at least x86-64-v3
# - "generic_v4" (depends on GCC11 - to share the package between machines with different CPU µarch supporting at least x86-64-v4
_processor_opt=""
_processor_opt="skylake"
# CacULE only - Enable Response Driven Balancer, an experimental load balancer for CacULE
_cacule_rdb="false"
@@ -201,13 +201,13 @@ _cacule_rdb_interval="19"
_tt_high_hz="false"
# MuQSS and PDS only - SMT (Hyperthreading) aware nice priority and policy support (SMT_NICE) - Kernel default is "true" - You can disable this on non-SMT/HT CPUs for lower overhead
_smt_nice=""
_smt_nice="true"
# Trust the CPU manufacturer to initialize Linux's CRNG (RANDOM_TRUST_CPU) - Kernel default is "false"
_random_trust_cpu="true"
# Timer frequency - "100" "250" "300" "500" "750" "1000" ("2000" is available for cacule cpusched only) - More options available in kernel config prompt when left empty depending on selected cpusched with the default option pointed with a ">" (2000 for cacule, 100 for muqss and 1000 for other cpu schedulers)
_timer_freq=""
_timer_freq="500"
# Default CPU governor - "performance", "ondemand", "schedutil" or leave empty for default (schedutil)
_default_cpu_gov="ondemand"

View File

@@ -358,3 +358,263 @@ index c0cd1b9..af1e2fb 100644
--
https://clearlinux.org
From 676c2dc63592f52b716515573a3a825582a371e9 Mon Sep 17 00:00:00 2001
From: Arjan van de Ven <arjan@linux.intel.com>
Date: Sat, 8 Dec 2018 18:21:32 +0000
Subject: [PATCH 1/9] x86/vdso: Use lfence instead of rep and nop
Signed-off-by: Alexandre Frade <kernel@xanmod.org>
---
arch/x86/include/asm/vdso/processor.h | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/arch/x86/include/asm/vdso/processor.h b/arch/x86/include/asm/vdso/processor.h
index 57b1a7034c64..e2c45674f989 100644
--- a/arch/x86/include/asm/vdso/processor.h
+++ b/arch/x86/include/asm/vdso/processor.h
@@ -10,7 +10,7 @@
/* REP NOP (PAUSE) is a good thing to insert into busy-wait loops. */
static __always_inline void rep_nop(void)
{
- asm volatile("rep; nop" ::: "memory");
+ asm volatile("lfence" ::: "memory");
}
static __always_inline void cpu_relax(void)
--
2.39.1
From 48dc9669f8db68adc480ffc2698ed8204440e45b Mon Sep 17 00:00:00 2001
From: Arjan van de Ven <arjan@linux.intel.com>
Date: Thu, 13 Dec 2018 01:00:49 +0000
Subject: [PATCH 2/9] sched/wait: Do accept() in LIFO order for cache
efficiency
Signed-off-by: Alexandre Frade <kernel@xanmod.org>
---
include/linux/wait.h | 2 ++
kernel/sched/wait.c | 24 ++++++++++++++++++++++++
net/ipv4/inet_connection_sock.c | 2 +-
3 files changed, 27 insertions(+), 1 deletion(-)
diff --git a/include/linux/wait.h b/include/linux/wait.h
index a0307b516b09..edc21128f387 100644
--- a/include/linux/wait.h
+++ b/include/linux/wait.h
@@ -165,6 +165,7 @@ static inline bool wq_has_sleeper(struct wait_queue_head *wq_head)
extern void add_wait_queue(struct wait_queue_head *wq_head, struct wait_queue_entry *wq_entry);
extern void add_wait_queue_exclusive(struct wait_queue_head *wq_head, struct wait_queue_entry *wq_entry);
+extern void add_wait_queue_exclusive_lifo(struct wait_queue_head *wq_head, struct wait_queue_entry *wq_entry);
extern void add_wait_queue_priority(struct wait_queue_head *wq_head, struct wait_queue_entry *wq_entry);
extern void remove_wait_queue(struct wait_queue_head *wq_head, struct wait_queue_entry *wq_entry);
@@ -1192,6 +1193,7 @@ do { \
*/
void prepare_to_wait(struct wait_queue_head *wq_head, struct wait_queue_entry *wq_entry, int state);
bool prepare_to_wait_exclusive(struct wait_queue_head *wq_head, struct wait_queue_entry *wq_entry, int state);
+void prepare_to_wait_exclusive_lifo(struct wait_queue_head *wq_head, struct wait_queue_entry *wq_entry, int state);
long prepare_to_wait_event(struct wait_queue_head *wq_head, struct wait_queue_entry *wq_entry, int state);
void finish_wait(struct wait_queue_head *wq_head, struct wait_queue_entry *wq_entry);
long wait_woken(struct wait_queue_entry *wq_entry, unsigned mode, long timeout);
diff --git a/kernel/sched/wait.c b/kernel/sched/wait.c
index 133b74730738..1647fb8662eb 100644
--- a/kernel/sched/wait.c
+++ b/kernel/sched/wait.c
@@ -47,6 +47,17 @@ void add_wait_queue_priority(struct wait_queue_head *wq_head, struct wait_queue_
}
EXPORT_SYMBOL_GPL(add_wait_queue_priority);
+void add_wait_queue_exclusive_lifo(struct wait_queue_head *wq_head, struct wait_queue_entry *wq_entry)
+{
+ unsigned long flags;
+
+ wq_entry->flags |= WQ_FLAG_EXCLUSIVE;
+ spin_lock_irqsave(&wq_head->lock, flags);
+ __add_wait_queue(wq_head, wq_entry);
+ spin_unlock_irqrestore(&wq_head->lock, flags);
+}
+EXPORT_SYMBOL(add_wait_queue_exclusive_lifo);
+
void remove_wait_queue(struct wait_queue_head *wq_head, struct wait_queue_entry *wq_entry)
{
unsigned long flags;
@@ -293,6 +304,19 @@ prepare_to_wait_exclusive(struct wait_queue_head *wq_head, struct wait_queue_ent
}
EXPORT_SYMBOL(prepare_to_wait_exclusive);
+void prepare_to_wait_exclusive_lifo(struct wait_queue_head *wq_head, struct wait_queue_entry *wq_entry, int state)
+{
+ unsigned long flags;
+
+ wq_entry->flags |= WQ_FLAG_EXCLUSIVE;
+ spin_lock_irqsave(&wq_head->lock, flags);
+ if (list_empty(&wq_entry->entry))
+ __add_wait_queue(wq_head, wq_entry);
+ set_current_state(state);
+ spin_unlock_irqrestore(&wq_head->lock, flags);
+}
+EXPORT_SYMBOL(prepare_to_wait_exclusive_lifo);
+
void init_wait_entry(struct wait_queue_entry *wq_entry, int flags)
{
wq_entry->flags = flags;
diff --git a/net/ipv4/inet_connection_sock.c b/net/ipv4/inet_connection_sock.c
index f2c43f67187d..9885bfb429a2 100644
--- a/net/ipv4/inet_connection_sock.c
+++ b/net/ipv4/inet_connection_sock.c
@@ -606,7 +606,7 @@ static int inet_csk_wait_for_connect(struct sock *sk, long timeo)
* having to remove and re-insert us on the wait queue.
*/
for (;;) {
- prepare_to_wait_exclusive(sk_sleep(sk), &wait,
+ prepare_to_wait_exclusive_lifo(sk_sleep(sk), &wait,
TASK_INTERRUPTIBLE);
release_sock(sk);
if (reqsk_queue_empty(&icsk->icsk_accept_queue))
--
2.39.1
From afa213811c5490906caf394b20bb4b616fc6f12a Mon Sep 17 00:00:00 2001
From: Ricardo Neri <ricardo.neri-calderon@linux.intel.com>
Date: Thu, 25 Aug 2022 15:55:26 -0700
Subject: [PATCH 3/9] sched/fair: Simplify asym_packing logic for SMT sched
groups
When the destination CPU is an SMT sibling and idle, it can only help the
busiest group if all of its other SMT siblings are also idle. Otherwise,
there is not increase in throughput.
It does not matter whether the busiest group has SMT siblings. Simply
check if there are any tasks running on the local group before proceeding.
Cc: Ben Segall <bsegall@google.com>
Cc: Daniel Bristot de Oliveira <bristot@redhat.com>
Cc: Dietmar Eggemann <dietmar.eggemann@arm.com>
Cc: Len Brown <len.brown@intel.com>
Cc: Mel Gorman <mgorman@suse.de>
Cc: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
Cc: Srinivas Pandruvada <srinivas.pandruvada@linux.intel.com>
Cc: Steven Rostedt <rostedt@goodmis.org>
Cc: Tim C. Chen <tim.c.chen@intel.com>
Cc: Valentin Schneider <vschneid@redhat.com>
Cc: x86@kernel.org
Cc: linux-kernel@vger.kernel.org
Reviewed-by: Len Brown <len.brown@intel.com>
Signed-off-by: Ricardo Neri <ricardo.neri-calderon@linux.intel.com>
Signed-off-by: Alexandre Frade <kernel@xanmod.org>
---
kernel/sched/fair.c | 29 +++++++++--------------------
1 file changed, 9 insertions(+), 20 deletions(-)
diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c
index 345cc5e9fa6e..60f9690a5626 100644
--- a/kernel/sched/fair.c
+++ b/kernel/sched/fair.c
@@ -8921,12 +8921,10 @@ static bool asym_smt_can_pull_tasks(int dst_cpu, struct sd_lb_stats *sds,
struct sched_group *sg)
{
#ifdef CONFIG_SCHED_SMT
- bool local_is_smt, sg_is_smt;
+ bool local_is_smt;
int sg_busy_cpus;
local_is_smt = sds->local->flags & SD_SHARE_CPUCAPACITY;
- sg_is_smt = sg->flags & SD_SHARE_CPUCAPACITY;
-
sg_busy_cpus = sgs->group_weight - sgs->idle_cpus;
if (!local_is_smt) {
@@ -8947,25 +8945,16 @@ static bool asym_smt_can_pull_tasks(int dst_cpu, struct sd_lb_stats *sds,
return sched_asym_prefer(dst_cpu, sg->asym_prefer_cpu);
}
- /* @dst_cpu has SMT siblings. */
-
- if (sg_is_smt) {
- int local_busy_cpus = sds->local->group_weight -
- sds->local_stat.idle_cpus;
- int busy_cpus_delta = sg_busy_cpus - local_busy_cpus;
-
- if (busy_cpus_delta == 1)
- return sched_asym_prefer(dst_cpu, sg->asym_prefer_cpu);
-
- return false;
- }
-
/*
- * @sg does not have SMT siblings. Ensure that @sds::local does not end
- * up with more than one busy SMT sibling and only pull tasks if there
- * are not busy CPUs (i.e., no CPU has running tasks).
+ * @dst_cpu has SMT siblings. When both @dst_cpu and the busiest core
+ * have one or more busy siblings, moving tasks between them results
+ * in the same throughput. Only if all the siblings of @dst_cpu are
+ * idle throughput can increase.
+ *
+ * If the difference in the number of busy CPUs is two or more, let
+ * find_busiest_group() take care of it.
*/
- if (!sds->local_stat.sum_nr_running)
+ if (sg_busy_cpus == 1 && !sds->local_stat.sum_nr_running)
return sched_asym_prefer(dst_cpu, sg->asym_prefer_cpu);
return false;
--
2.39.1
From a1f627fd10ced4f5eeae678bc4ba96ea7fa01d7e Mon Sep 17 00:00:00 2001
From: Ricardo Neri <ricardo.neri-calderon@linux.intel.com>
Date: Thu, 25 Aug 2022 15:55:28 -0700
Subject: [PATCH 4/9] sched/fair: Let lower-priority CPUs do active balancing
When more than one SMT siblings of a physical core are busy, an idle CPU
of lower priority can help.
Indicate that the low priority CPU can do active balancing from the high-
priority CPU only if they belong to separate cores.
Cc: Ben Segall <bsegall@google.com>
Cc: Daniel Bristot de Oliveira <bristot@redhat.com>
Cc: Dietmar Eggemann <dietmar.eggemann@arm.com>
Cc: Len Brown <len.brown@intel.com>
Cc: Mel Gorman <mgorman@suse.de>
Cc: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
Cc: Srinivas Pandruvada <srinivas.pandruvada@linux.intel.com>
Cc: Steven Rostedt <rostedt@goodmis.org>
Cc: Tim C. Chen <tim.c.chen@intel.com>
Cc: Valentin Schneider <vschneid@redhat.com>
Cc: x86@kernel.org
Cc: linux-kernel@vger.kernel.org
Reviewed-by: Len Brown <len.brown@intel.com>
Signed-off-by: Ricardo Neri <ricardo.neri-calderon@linux.intel.com>
Signed-off-by: Alexandre Frade <kernel@xanmod.org>
---
kernel/sched/fair.c | 7 ++++++-
1 file changed, 6 insertions(+), 1 deletion(-)
diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c
index 60f9690a5626..67b0eacad0e9 100644
--- a/kernel/sched/fair.c
+++ b/kernel/sched/fair.c
@@ -10176,9 +10176,14 @@ asym_active_balance(struct lb_env *env)
* ASYM_PACKING needs to force migrate tasks from busy but
* lower priority CPUs in order to pack all tasks in the
* highest priority CPUs.
+ *
+ * If the busy CPU has higher priority but is an SMT sibling
+ * in which other SMT siblings are also busy, a lower-priority
+ * CPU in a separate core can help.
*/
return env->idle != CPU_NOT_IDLE && (env->sd->flags & SD_ASYM_PACKING) &&
- sched_asym_prefer(env->dst_cpu, env->src_cpu);
+ (sched_asym_prefer(env->dst_cpu, env->src_cpu) ||
+ !(env->sd->flags & SD_SHARE_CPUCAPACITY));
}
static inline bool
--
2.39.1

View File

@@ -820,3 +820,31 @@ index a0b0397e29ee4c..87a983a356530c 100644
spin_unlock(&zone->lock);
return allocated;
}
From 6329525a0fa10cd13f39b76948b1296150f75c95 Mon Sep 17 00:00:00 2001
From: Alexandre Frade <kernel@xanmod.org>
Date: Mon, 29 Aug 2022 16:47:26 +0000
Subject: [PATCH 14/16] XANMOD: Makefile: Disable GCC vectorization on trees
Signed-off-by: Alexandre Frade <kernel@xanmod.org>
---
Makefile | 3 +++
1 file changed, 3 insertions(+)
diff --git a/Makefile b/Makefile
index 3f6628780eb2..35a5ae1ede42 100644
--- a/Makefile
+++ b/Makefile
@@ -1069,6 +1069,9 @@ endif
KBUILD_CFLAGS-$(call gcc-min-version, 90100) += -Wno-alloc-size-larger-than
KBUILD_CFLAGS += $(KBUILD_CFLAGS-y) $(CONFIG_CC_IMPLICIT_FALLTHROUGH)
+# disable GCC vectorization on trees
+KBUILD_CFLAGS += $(call cc-option, -fno-tree-vectorize)
+
# disable invalid "can't wrap" optimizations for signed / pointers
KBUILD_CFLAGS += -fno-strict-overflow
--
2.39.1

View File

@@ -20,3 +20,265 @@ index 6b3b59cc51d6..2a0072192c3d 100644
int sched_thermal_decay_shift;
static int __init setup_sched_thermal_decay_shift(char *str)
From 5d5b708e3731e135ea7ae168571ad78d883e63e8 Mon Sep 17 00:00:00 2001
From: Alexandre Frade <kernel@xanmod.org>
Date: Wed, 1 Feb 2023 10:17:47 +0000
Subject: [PATCH 02/16] XANMOD: fair: Remove all energy efficiency functions
Signed-off-by: Alexandre Frade <kernel@xanmod.org>
---
kernel/sched/fair.c | 224 +-------------------------------------------
1 file changed, 3 insertions(+), 221 deletions(-)
diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c
index 0f8736991427..345cc5e9fa6e 100644
--- a/kernel/sched/fair.c
+++ b/kernel/sched/fair.c
@@ -19,6 +19,9 @@
*
* Adaptive scheduling granularity, math enhancements by Peter Zijlstra
* Copyright (C) 2007 Red Hat, Inc., Peter Zijlstra
+ *
+ * Remove energy efficiency functions by Alexandre Frade
+ * (C) 2021 Alexandre Frade <kernel@xanmod.org>
*/
#include <linux/energy_model.h>
#include <linux/mmap_lock.h>
@@ -7136,219 +7139,6 @@ eenv_pd_max_util(struct energy_env *eenv, struct cpumask *pd_cpus,
return min(max_util, eenv->cpu_cap);
}
-/*
- * compute_energy(): Use the Energy Model to estimate the energy that @pd would
- * consume for a given utilization landscape @eenv. When @dst_cpu < 0, the task
- * contribution is ignored.
- */
-static inline unsigned long
-compute_energy(struct energy_env *eenv, struct perf_domain *pd,
- struct cpumask *pd_cpus, struct task_struct *p, int dst_cpu)
-{
- unsigned long max_util = eenv_pd_max_util(eenv, pd_cpus, p, dst_cpu);
- unsigned long busy_time = eenv->pd_busy_time;
-
- if (dst_cpu >= 0)
- busy_time = min(eenv->pd_cap, busy_time + eenv->task_busy_time);
-
- return em_cpu_energy(pd->em_pd, max_util, busy_time, eenv->cpu_cap);
-}
-
-/*
- * find_energy_efficient_cpu(): Find most energy-efficient target CPU for the
- * waking task. find_energy_efficient_cpu() looks for the CPU with maximum
- * spare capacity in each performance domain and uses it as a potential
- * candidate to execute the task. Then, it uses the Energy Model to figure
- * out which of the CPU candidates is the most energy-efficient.
- *
- * The rationale for this heuristic is as follows. In a performance domain,
- * all the most energy efficient CPU candidates (according to the Energy
- * Model) are those for which we'll request a low frequency. When there are
- * several CPUs for which the frequency request will be the same, we don't
- * have enough data to break the tie between them, because the Energy Model
- * only includes active power costs. With this model, if we assume that
- * frequency requests follow utilization (e.g. using schedutil), the CPU with
- * the maximum spare capacity in a performance domain is guaranteed to be among
- * the best candidates of the performance domain.
- *
- * In practice, it could be preferable from an energy standpoint to pack
- * small tasks on a CPU in order to let other CPUs go in deeper idle states,
- * but that could also hurt our chances to go cluster idle, and we have no
- * ways to tell with the current Energy Model if this is actually a good
- * idea or not. So, find_energy_efficient_cpu() basically favors
- * cluster-packing, and spreading inside a cluster. That should at least be
- * a good thing for latency, and this is consistent with the idea that most
- * of the energy savings of EAS come from the asymmetry of the system, and
- * not so much from breaking the tie between identical CPUs. That's also the
- * reason why EAS is enabled in the topology code only for systems where
- * SD_ASYM_CPUCAPACITY is set.
- *
- * NOTE: Forkees are not accepted in the energy-aware wake-up path because
- * they don't have any useful utilization data yet and it's not possible to
- * forecast their impact on energy consumption. Consequently, they will be
- * placed by find_idlest_cpu() on the least loaded CPU, which might turn out
- * to be energy-inefficient in some use-cases. The alternative would be to
- * bias new tasks towards specific types of CPUs first, or to try to infer
- * their util_avg from the parent task, but those heuristics could hurt
- * other use-cases too. So, until someone finds a better way to solve this,
- * let's keep things simple by re-using the existing slow path.
- */
-static int find_energy_efficient_cpu(struct task_struct *p, int prev_cpu)
-{
- struct cpumask *cpus = this_cpu_cpumask_var_ptr(select_rq_mask);
- unsigned long prev_delta = ULONG_MAX, best_delta = ULONG_MAX;
- unsigned long p_util_min = uclamp_is_used() ? uclamp_eff_value(p, UCLAMP_MIN) : 0;
- unsigned long p_util_max = uclamp_is_used() ? uclamp_eff_value(p, UCLAMP_MAX) : 1024;
- struct root_domain *rd = this_rq()->rd;
- int cpu, best_energy_cpu, target = -1;
- struct sched_domain *sd;
- struct perf_domain *pd;
- struct energy_env eenv;
-
- rcu_read_lock();
- pd = rcu_dereference(rd->pd);
- if (!pd || READ_ONCE(rd->overutilized))
- goto unlock;
-
- /*
- * Energy-aware wake-up happens on the lowest sched_domain starting
- * from sd_asym_cpucapacity spanning over this_cpu and prev_cpu.
- */
- sd = rcu_dereference(*this_cpu_ptr(&sd_asym_cpucapacity));
- while (sd && !cpumask_test_cpu(prev_cpu, sched_domain_span(sd)))
- sd = sd->parent;
- if (!sd)
- goto unlock;
-
- target = prev_cpu;
-
- sync_entity_load_avg(&p->se);
- if (!uclamp_task_util(p, p_util_min, p_util_max))
- goto unlock;
-
- eenv_task_busy_time(&eenv, p, prev_cpu);
-
- for (; pd; pd = pd->next) {
- unsigned long util_min = p_util_min, util_max = p_util_max;
- unsigned long cpu_cap, cpu_thermal_cap, util;
- unsigned long cur_delta, max_spare_cap = 0;
- unsigned long rq_util_min, rq_util_max;
- unsigned long prev_spare_cap = 0;
- int max_spare_cap_cpu = -1;
- unsigned long base_energy;
-
- cpumask_and(cpus, perf_domain_span(pd), cpu_online_mask);
-
- if (cpumask_empty(cpus))
- continue;
-
- /* Account thermal pressure for the energy estimation */
- cpu = cpumask_first(cpus);
- cpu_thermal_cap = arch_scale_cpu_capacity(cpu);
- cpu_thermal_cap -= arch_scale_thermal_pressure(cpu);
-
- eenv.cpu_cap = cpu_thermal_cap;
- eenv.pd_cap = 0;
-
- for_each_cpu(cpu, cpus) {
- struct rq *rq = cpu_rq(cpu);
-
- eenv.pd_cap += cpu_thermal_cap;
-
- if (!cpumask_test_cpu(cpu, sched_domain_span(sd)))
- continue;
-
- if (!cpumask_test_cpu(cpu, p->cpus_ptr))
- continue;
-
- util = cpu_util_next(cpu, p, cpu);
- cpu_cap = capacity_of(cpu);
-
- /*
- * Skip CPUs that cannot satisfy the capacity request.
- * IOW, placing the task there would make the CPU
- * overutilized. Take uclamp into account to see how
- * much capacity we can get out of the CPU; this is
- * aligned with sched_cpu_util().
- */
- if (uclamp_is_used() && !uclamp_rq_is_idle(rq)) {
- /*
- * Open code uclamp_rq_util_with() except for
- * the clamp() part. Ie: apply max aggregation
- * only. util_fits_cpu() logic requires to
- * operate on non clamped util but must use the
- * max-aggregated uclamp_{min, max}.
- */
- rq_util_min = uclamp_rq_get(rq, UCLAMP_MIN);
- rq_util_max = uclamp_rq_get(rq, UCLAMP_MAX);
-
- util_min = max(rq_util_min, p_util_min);
- util_max = max(rq_util_max, p_util_max);
- }
- if (!util_fits_cpu(util, util_min, util_max, cpu))
- continue;
-
- lsub_positive(&cpu_cap, util);
-
- if (cpu == prev_cpu) {
- /* Always use prev_cpu as a candidate. */
- prev_spare_cap = cpu_cap;
- } else if (cpu_cap > max_spare_cap) {
- /*
- * Find the CPU with the maximum spare capacity
- * among the remaining CPUs in the performance
- * domain.
- */
- max_spare_cap = cpu_cap;
- max_spare_cap_cpu = cpu;
- }
- }
-
- if (max_spare_cap_cpu < 0 && prev_spare_cap == 0)
- continue;
-
- eenv_pd_busy_time(&eenv, cpus, p);
- /* Compute the 'base' energy of the pd, without @p */
- base_energy = compute_energy(&eenv, pd, cpus, p, -1);
-
- /* Evaluate the energy impact of using prev_cpu. */
- if (prev_spare_cap > 0) {
- prev_delta = compute_energy(&eenv, pd, cpus, p,
- prev_cpu);
- /* CPU utilization has changed */
- if (prev_delta < base_energy)
- goto unlock;
- prev_delta -= base_energy;
- best_delta = min(best_delta, prev_delta);
- }
-
- /* Evaluate the energy impact of using max_spare_cap_cpu. */
- if (max_spare_cap_cpu >= 0 && max_spare_cap > prev_spare_cap) {
- cur_delta = compute_energy(&eenv, pd, cpus, p,
- max_spare_cap_cpu);
- /* CPU utilization has changed */
- if (cur_delta < base_energy)
- goto unlock;
- cur_delta -= base_energy;
- if (cur_delta < best_delta) {
- best_delta = cur_delta;
- best_energy_cpu = max_spare_cap_cpu;
- }
- }
- }
- rcu_read_unlock();
-
- if (best_delta < prev_delta)
- target = best_energy_cpu;
-
- return target;
-
-unlock:
- rcu_read_unlock();
-
- return target;
-}
-
/*
* select_task_rq_fair: Select target runqueue for the waking task in domains
* that have the relevant SD flag set. In practice, this is SD_BALANCE_WAKE,
@@ -7376,14 +7166,6 @@ select_task_rq_fair(struct task_struct *p, int prev_cpu, int wake_flags)
lockdep_assert_held(&p->pi_lock);
if (wake_flags & WF_TTWU) {
record_wakee(p);
-
- if (sched_energy_enabled()) {
- new_cpu = find_energy_efficient_cpu(p, prev_cpu);
- if (new_cpu >= 0)
- return new_cpu;
- new_cpu = prev_cpu;
- }
-
want_affine = !wake_wide(p) && cpumask_test_cpu(cpu, p->cpus_ptr);
}
--
2.39.1

View File

@@ -88,3 +88,31 @@ index 6b423eebfd5d..61e3271675d6 100644
#define MICRO_FREQUENCY_MIN_SAMPLE_RATE (10000)
#define MIN_FREQUENCY_UP_THRESHOLD (1)
#define MAX_FREQUENCY_UP_THRESHOLD (100)
From cba31b19f8c38696b13ba48e0e8b6dbe747d6bae Mon Sep 17 00:00:00 2001
From: Alexandre Frade <admfrade@gmail.com>
Date: Mon, 29 Jan 2018 17:31:25 +0000
Subject: [PATCH 10/16] XANMOD: mm/vmscan: vm_swappiness = 30 decreases the
amount of swapping
Signed-off-by: Alexandre Frade <admfrade@gmail.com>
Signed-off-by: Alexandre Frade <kernel@xanmod.org>
---
mm/vmscan.c | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/mm/vmscan.c b/mm/vmscan.c
index 5b7b8d4f5297..549684b29418 100644
--- a/mm/vmscan.c
+++ b/mm/vmscan.c
@@ -190,7 +190,7 @@ struct scan_control {
/*
* From 0 .. 200. Higher means more swappy.
*/
-int vm_swappiness = 60;
+int vm_swappiness = 30;
static void set_task_reclaim_state(struct task_struct *task,
struct reclaim_state *rs)
--
2.39.1