From ee6cc74e03bb4911d2a7a90be5ccd74bb21dbf4a Mon Sep 17 00:00:00 2001 From: Tk-Glitch Date: Fri, 3 Mar 2023 13:20:55 +0100 Subject: [PATCH] linux 6.2.y: glitched-base: Add cherry picks from xanmod kernel https://github.com/xanmod/linux-patches/tree/master/linux-6.2.y-xanmod/xanmod --- .../6.2/0003-glitched-base.patch | 318 ++++++++++++++++++ 1 file changed, 318 insertions(+) diff --git a/linux-tkg-patches/6.2/0003-glitched-base.patch b/linux-tkg-patches/6.2/0003-glitched-base.patch index f6eda96..1f77f75 100644 --- a/linux-tkg-patches/6.2/0003-glitched-base.patch +++ b/linux-tkg-patches/6.2/0003-glitched-base.patch @@ -820,3 +820,321 @@ index a0b0397e29ee4c..87a983a356530c 100644 spin_unlock(&zone->lock); return allocated; } + +From 5d5b708e3731e135ea7ae168571ad78d883e63e8 Mon Sep 17 00:00:00 2001 +From: Alexandre Frade +Date: Wed, 1 Feb 2023 10:17:47 +0000 +Subject: [PATCH 02/16] XANMOD: fair: Remove all energy efficiency functions + +Signed-off-by: Alexandre Frade +--- + kernel/sched/fair.c | 224 +------------------------------------------- + 1 file changed, 3 insertions(+), 221 deletions(-) + +diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c +index 0f8736991427..345cc5e9fa6e 100644 +--- a/kernel/sched/fair.c ++++ b/kernel/sched/fair.c +@@ -19,6 +19,9 @@ + * + * Adaptive scheduling granularity, math enhancements by Peter Zijlstra + * Copyright (C) 2007 Red Hat, Inc., Peter Zijlstra ++ * ++ * Remove energy efficiency functions by Alexandre Frade ++ * (C) 2021 Alexandre Frade + */ + #include + #include +@@ -7136,219 +7139,6 @@ eenv_pd_max_util(struct energy_env *eenv, struct cpumask *pd_cpus, + return min(max_util, eenv->cpu_cap); + } + +-/* +- * compute_energy(): Use the Energy Model to estimate the energy that @pd would +- * consume for a given utilization landscape @eenv. When @dst_cpu < 0, the task +- * contribution is ignored. +- */ +-static inline unsigned long +-compute_energy(struct energy_env *eenv, struct perf_domain *pd, +- struct cpumask *pd_cpus, struct task_struct *p, int dst_cpu) +-{ +- unsigned long max_util = eenv_pd_max_util(eenv, pd_cpus, p, dst_cpu); +- unsigned long busy_time = eenv->pd_busy_time; +- +- if (dst_cpu >= 0) +- busy_time = min(eenv->pd_cap, busy_time + eenv->task_busy_time); +- +- return em_cpu_energy(pd->em_pd, max_util, busy_time, eenv->cpu_cap); +-} +- +-/* +- * find_energy_efficient_cpu(): Find most energy-efficient target CPU for the +- * waking task. find_energy_efficient_cpu() looks for the CPU with maximum +- * spare capacity in each performance domain and uses it as a potential +- * candidate to execute the task. Then, it uses the Energy Model to figure +- * out which of the CPU candidates is the most energy-efficient. +- * +- * The rationale for this heuristic is as follows. In a performance domain, +- * all the most energy efficient CPU candidates (according to the Energy +- * Model) are those for which we'll request a low frequency. When there are +- * several CPUs for which the frequency request will be the same, we don't +- * have enough data to break the tie between them, because the Energy Model +- * only includes active power costs. With this model, if we assume that +- * frequency requests follow utilization (e.g. using schedutil), the CPU with +- * the maximum spare capacity in a performance domain is guaranteed to be among +- * the best candidates of the performance domain. +- * +- * In practice, it could be preferable from an energy standpoint to pack +- * small tasks on a CPU in order to let other CPUs go in deeper idle states, +- * but that could also hurt our chances to go cluster idle, and we have no +- * ways to tell with the current Energy Model if this is actually a good +- * idea or not. So, find_energy_efficient_cpu() basically favors +- * cluster-packing, and spreading inside a cluster. That should at least be +- * a good thing for latency, and this is consistent with the idea that most +- * of the energy savings of EAS come from the asymmetry of the system, and +- * not so much from breaking the tie between identical CPUs. That's also the +- * reason why EAS is enabled in the topology code only for systems where +- * SD_ASYM_CPUCAPACITY is set. +- * +- * NOTE: Forkees are not accepted in the energy-aware wake-up path because +- * they don't have any useful utilization data yet and it's not possible to +- * forecast their impact on energy consumption. Consequently, they will be +- * placed by find_idlest_cpu() on the least loaded CPU, which might turn out +- * to be energy-inefficient in some use-cases. The alternative would be to +- * bias new tasks towards specific types of CPUs first, or to try to infer +- * their util_avg from the parent task, but those heuristics could hurt +- * other use-cases too. So, until someone finds a better way to solve this, +- * let's keep things simple by re-using the existing slow path. +- */ +-static int find_energy_efficient_cpu(struct task_struct *p, int prev_cpu) +-{ +- struct cpumask *cpus = this_cpu_cpumask_var_ptr(select_rq_mask); +- unsigned long prev_delta = ULONG_MAX, best_delta = ULONG_MAX; +- unsigned long p_util_min = uclamp_is_used() ? uclamp_eff_value(p, UCLAMP_MIN) : 0; +- unsigned long p_util_max = uclamp_is_used() ? uclamp_eff_value(p, UCLAMP_MAX) : 1024; +- struct root_domain *rd = this_rq()->rd; +- int cpu, best_energy_cpu, target = -1; +- struct sched_domain *sd; +- struct perf_domain *pd; +- struct energy_env eenv; +- +- rcu_read_lock(); +- pd = rcu_dereference(rd->pd); +- if (!pd || READ_ONCE(rd->overutilized)) +- goto unlock; +- +- /* +- * Energy-aware wake-up happens on the lowest sched_domain starting +- * from sd_asym_cpucapacity spanning over this_cpu and prev_cpu. +- */ +- sd = rcu_dereference(*this_cpu_ptr(&sd_asym_cpucapacity)); +- while (sd && !cpumask_test_cpu(prev_cpu, sched_domain_span(sd))) +- sd = sd->parent; +- if (!sd) +- goto unlock; +- +- target = prev_cpu; +- +- sync_entity_load_avg(&p->se); +- if (!uclamp_task_util(p, p_util_min, p_util_max)) +- goto unlock; +- +- eenv_task_busy_time(&eenv, p, prev_cpu); +- +- for (; pd; pd = pd->next) { +- unsigned long util_min = p_util_min, util_max = p_util_max; +- unsigned long cpu_cap, cpu_thermal_cap, util; +- unsigned long cur_delta, max_spare_cap = 0; +- unsigned long rq_util_min, rq_util_max; +- unsigned long prev_spare_cap = 0; +- int max_spare_cap_cpu = -1; +- unsigned long base_energy; +- +- cpumask_and(cpus, perf_domain_span(pd), cpu_online_mask); +- +- if (cpumask_empty(cpus)) +- continue; +- +- /* Account thermal pressure for the energy estimation */ +- cpu = cpumask_first(cpus); +- cpu_thermal_cap = arch_scale_cpu_capacity(cpu); +- cpu_thermal_cap -= arch_scale_thermal_pressure(cpu); +- +- eenv.cpu_cap = cpu_thermal_cap; +- eenv.pd_cap = 0; +- +- for_each_cpu(cpu, cpus) { +- struct rq *rq = cpu_rq(cpu); +- +- eenv.pd_cap += cpu_thermal_cap; +- +- if (!cpumask_test_cpu(cpu, sched_domain_span(sd))) +- continue; +- +- if (!cpumask_test_cpu(cpu, p->cpus_ptr)) +- continue; +- +- util = cpu_util_next(cpu, p, cpu); +- cpu_cap = capacity_of(cpu); +- +- /* +- * Skip CPUs that cannot satisfy the capacity request. +- * IOW, placing the task there would make the CPU +- * overutilized. Take uclamp into account to see how +- * much capacity we can get out of the CPU; this is +- * aligned with sched_cpu_util(). +- */ +- if (uclamp_is_used() && !uclamp_rq_is_idle(rq)) { +- /* +- * Open code uclamp_rq_util_with() except for +- * the clamp() part. Ie: apply max aggregation +- * only. util_fits_cpu() logic requires to +- * operate on non clamped util but must use the +- * max-aggregated uclamp_{min, max}. +- */ +- rq_util_min = uclamp_rq_get(rq, UCLAMP_MIN); +- rq_util_max = uclamp_rq_get(rq, UCLAMP_MAX); +- +- util_min = max(rq_util_min, p_util_min); +- util_max = max(rq_util_max, p_util_max); +- } +- if (!util_fits_cpu(util, util_min, util_max, cpu)) +- continue; +- +- lsub_positive(&cpu_cap, util); +- +- if (cpu == prev_cpu) { +- /* Always use prev_cpu as a candidate. */ +- prev_spare_cap = cpu_cap; +- } else if (cpu_cap > max_spare_cap) { +- /* +- * Find the CPU with the maximum spare capacity +- * among the remaining CPUs in the performance +- * domain. +- */ +- max_spare_cap = cpu_cap; +- max_spare_cap_cpu = cpu; +- } +- } +- +- if (max_spare_cap_cpu < 0 && prev_spare_cap == 0) +- continue; +- +- eenv_pd_busy_time(&eenv, cpus, p); +- /* Compute the 'base' energy of the pd, without @p */ +- base_energy = compute_energy(&eenv, pd, cpus, p, -1); +- +- /* Evaluate the energy impact of using prev_cpu. */ +- if (prev_spare_cap > 0) { +- prev_delta = compute_energy(&eenv, pd, cpus, p, +- prev_cpu); +- /* CPU utilization has changed */ +- if (prev_delta < base_energy) +- goto unlock; +- prev_delta -= base_energy; +- best_delta = min(best_delta, prev_delta); +- } +- +- /* Evaluate the energy impact of using max_spare_cap_cpu. */ +- if (max_spare_cap_cpu >= 0 && max_spare_cap > prev_spare_cap) { +- cur_delta = compute_energy(&eenv, pd, cpus, p, +- max_spare_cap_cpu); +- /* CPU utilization has changed */ +- if (cur_delta < base_energy) +- goto unlock; +- cur_delta -= base_energy; +- if (cur_delta < best_delta) { +- best_delta = cur_delta; +- best_energy_cpu = max_spare_cap_cpu; +- } +- } +- } +- rcu_read_unlock(); +- +- if (best_delta < prev_delta) +- target = best_energy_cpu; +- +- return target; +- +-unlock: +- rcu_read_unlock(); +- +- return target; +-} +- + /* + * select_task_rq_fair: Select target runqueue for the waking task in domains + * that have the relevant SD flag set. In practice, this is SD_BALANCE_WAKE, +@@ -7376,14 +7166,6 @@ select_task_rq_fair(struct task_struct *p, int prev_cpu, int wake_flags) + lockdep_assert_held(&p->pi_lock); + if (wake_flags & WF_TTWU) { + record_wakee(p); +- +- if (sched_energy_enabled()) { +- new_cpu = find_energy_efficient_cpu(p, prev_cpu); +- if (new_cpu >= 0) +- return new_cpu; +- new_cpu = prev_cpu; +- } +- + want_affine = !wake_wide(p) && cpumask_test_cpu(cpu, p->cpus_ptr); + } + +-- +2.39.1 + +From cba31b19f8c38696b13ba48e0e8b6dbe747d6bae Mon Sep 17 00:00:00 2001 +From: Alexandre Frade +Date: Mon, 29 Jan 2018 17:31:25 +0000 +Subject: [PATCH 10/16] XANMOD: mm/vmscan: vm_swappiness = 30 decreases the + amount of swapping + +Signed-off-by: Alexandre Frade +Signed-off-by: Alexandre Frade +--- + mm/vmscan.c | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +diff --git a/mm/vmscan.c b/mm/vmscan.c +index 5b7b8d4f5297..549684b29418 100644 +--- a/mm/vmscan.c ++++ b/mm/vmscan.c +@@ -190,7 +190,7 @@ struct scan_control { + /* + * From 0 .. 200. Higher means more swappy. + */ +-int vm_swappiness = 60; ++int vm_swappiness = 30; + + static void set_task_reclaim_state(struct task_struct *task, + struct reclaim_state *rs) +-- +2.39.1 + +From 6329525a0fa10cd13f39b76948b1296150f75c95 Mon Sep 17 00:00:00 2001 +From: Alexandre Frade +Date: Mon, 29 Aug 2022 16:47:26 +0000 +Subject: [PATCH 14/16] XANMOD: Makefile: Disable GCC vectorization on trees + +Signed-off-by: Alexandre Frade +--- + Makefile | 3 +++ + 1 file changed, 3 insertions(+) + +diff --git a/Makefile b/Makefile +index 3f6628780eb2..35a5ae1ede42 100644 +--- a/Makefile ++++ b/Makefile +@@ -1069,6 +1069,9 @@ endif + KBUILD_CFLAGS-$(call gcc-min-version, 90100) += -Wno-alloc-size-larger-than + KBUILD_CFLAGS += $(KBUILD_CFLAGS-y) $(CONFIG_CC_IMPLICIT_FALLTHROUGH) + ++# disable GCC vectorization on trees ++KBUILD_CFLAGS += $(call cc-option, -fno-tree-vectorize) ++ + # disable invalid "can't wrap" optimizations for signed / pointers + KBUILD_CFLAGS += -fno-strict-overflow + +-- +2.39.1 +