Docs, prepare: EEVDFfy (#844)

* Docs, prepare: EEVDFfy

* 6.6: add glitched eevdf additions

* apply comments

* more comments

* comments
This commit is contained in:
kylon
2023-11-14 10:12:40 +01:00
committed by GitHub
parent 562a12e778
commit f92d4ed9ca
5 changed files with 126 additions and 157 deletions

View File

@@ -15,21 +15,24 @@ This repository provides scripts to automatically download, patch and compile th
### Customization options
#### Alternative CPU schedulers
[CFS](https://en.wikipedia.org/wiki/Completely_Fair_Scheduler) is the only CPU scheduler available in the "vanilla" kernel sources. Its current implementation doesn't allow for injecting additional schedulers, and requires replacing it. Only one scheduler can be patched in at a time.
[CFS](https://en.wikipedia.org/wiki/Completely_Fair_Scheduler) is the only CPU scheduler available in the "vanilla" kernel sources ≤ 6.5.
[EEVDF](https://lwn.net/Articles/925371/) is the only CPU scheduler available in the "vanilla" kernel sources ≥ 6.6.
Its current implementation doesn't allow for injecting additional schedulers, and requires replacing it. Only one scheduler can be patched in at a time.
Alternative schedulers are available to you in linux-tkg:
- Project C / PDS & BMQ by Alfred Chen: [blog](http://cchalpha.blogspot.com/ ), [code repository](https://gitlab.com/alfredchen/projectc)
- MuQSS by Con Kolivas : [blog](http://ck-hack.blogspot.com/), [code repository](https://github.com/ckolivas/linux)
- CacULE by Hamad Marri - CFS based : [code repository](https://github.com/hamadmarri/cacule-cpu-scheduler)
- Task Type (TT) by Hamad Marri - CFS based : [code repository](https://github.com/hamadmarri/TT-CPU-Scheduler)
- BORE (Burst-Oriented Response Enhancer) by Masahito Suzuki - CFS based : [code repository](https://github.com/firelzrd/bore-scheduler)
- BORE (Burst-Oriented Response Enhancer) by Masahito Suzuki - CFS/EEVDF based : [code repository](https://github.com/firelzrd/bore-scheduler)
- Undead PDS : TkG's port of the pre-Project C "PDS-mq" scheduler by Alfred Chen. While PDS-mq got dropped with kernel 5.1 in favor of its BMQ evolution/rework, it wasn't on par with PDS-mq in gaming. "U" PDS still performed better in some cases than other schedulers, so it's been kept undead for a while.
These alternative schedulers can offer a better performance/latency ratio for gaming and desktop use. The availability of each scheduler depends on the chosen Kernel version: the script will display what's available on a per-version basis.
#### Default tweaks
- Memory management and swapping tweaks
- Scheduling tweaks
- `CFS` tweaks
- `CFS/EEVDF` tweaks
- Using the ["Cake"](https://www.bufferbloat.net/projects/codel/wiki/CakeTechnical/) network queue management system
- Using `vm.max_map_count=16777216` by default
- Cherry-picked patches from [Clear Linux's patchset](https://github.com/clearlinux-pkgs/linux)

View File

@@ -102,7 +102,7 @@ _STRIP="true"
# LEAVE AN EMPTY VALUE TO BE PROMPTED ABOUT FOLLOWING OPTIONS AT BUILD TIME
# CPU scheduler - Options are "pds", "bmq", "cacule", "tt", "bore", "bore-eevdf", "eevdf" or "cfs" (kernel's default)
# CPU scheduler - Options are "pds", "bmq", "cacule", "tt", "bore", "bore-eevdf", "cfs" (linux 6.5-) or "eevdf" (kernel's default, 6.6+)
# "upds" (TkG's Undead PDS) and "muqss" are also available on legacy kernel revisions
_cpusched=""

View File

@@ -179,7 +179,15 @@ _set_kver_internal_vars() {
# examples: "600", "515", "504" we use this variable to have proper comparisons
_kver="${_kver_split[0]}${_kver_split[1]}"
echo -e "_basekernel='$_basekernel'\n_basever='$_basever'\n_sub='$_sub'\n_kver='$_kver'" > "$_where"/BIG_UGLY_FROGMINER
if [ $_kver -le 605 ]; then
# chosen kernel <= 6.5
_default_cpu_sched="cfs"
else
# chosen kernel >= 6.6
_default_cpu_sched="eevdf"
fi
echo -e "_basekernel='$_basekernel'\n_basever='$_basever'\n_sub='$_sub'\n_kver='$_kver'\n_default_cpu_sched='$_default_cpu_sched'" > "$_where"/BIG_UGLY_FROGMINER
}
_set_kernel_version() {
@@ -252,14 +260,14 @@ _set_cpu_scheduler() {
_sched_description_array=(
["pds"]="Project C / PDS"
["bmq"]="Project C / BMQ"
["cfs"]="CFS (Completely Fair Scheduler) Linux kernel's default"
["eevdf"]="EEVDF (Earliest Eligible Virtual Deadline First scheduler) Linux kernel's default for ≥ 6.6"
["cfs"]="CFS (Completely Fair Scheduler) Linux kernel's default for ≤ 6.5"
["muqss"]="MuQSS (Multiple Queue Skiplist Scheduler)"
["upds"]="Undead PDS (TkG)"
["cacule"]="CacULE"
["tt"]="TT (TaskType)"
["bore"]="BORE (Burst-Oriented Response Enhancer) CPU Scheduler"
["bore-eevdf"]="BORE (Burst-Oriented Response Enhancer - EEVDF variant) CPU Scheduler"
["eevdf"]="Earliest Eligible Virtual Deadline First (EEVDF) scheduler"
)
# CPU SCHED selector - _projectc_unoff=1 sets unofficial Project C revision flag for a given version
@@ -304,7 +312,7 @@ _set_cpu_scheduler() {
elif [ "$_kver" = "606" ]; then
_avail_cpu_scheds=("eevdf" "bore")
else
_avail_cpu_scheds=("cfs")
_avail_cpu_scheds=("$_default_cpu_sched")
fi
if [ "${_preempt_rt}" = "1" ]; then
@@ -348,7 +356,7 @@ _set_cpu_scheduler() {
if [ -z "$_cpusched" ]; then
msg2 "Which CPU sched variant do you want to build/install?"
msg2 "Project C (pds) / BMQ (bmq) is usually a good balance for gaming."
msg2 "Select \"cfs\" (linux kernel's default) if unsure."
msg2 "Select \"$_default_cpu_sched\" (linux kernel's default) if unsure."
_default_index="0"
_prompt_from_array "${_avail_cpu_scheds_text[@]}"
@@ -854,16 +862,23 @@ _tkg_srcprep() {
_msg="Applying Glitched CFS additions patch"
tkgpatch="$srcdir/0003-glitched-cfs-additions.patch" && _tkg_patcher
elif [[ "${_cpusched}" =~ "eevdf" ]]; then
_msg="Applying Earliest Eligible Virtual Deadline First (EEVDF) scheduler patch"
tkgpatch="$srcdir/0003-eevdf.patch" && _tkg_patcher
if [[ $_kver == "604" || $_kver == "605" ]]; then
_msg="Applying Earliest Eligible Virtual Deadline First (EEVDF) scheduler patch"
tkgpatch="$srcdir/0003-eevdf.patch" && _tkg_patcher
if [[ "${_cpusched}" != "bore-eevdf" ]]; then
_msg="Applying eevdf-Disable-DELAY_DEQUEUE patch"
tkgpatch="$srcdir/0004-eevdf-Disable-DELAY_DEQUEUE.patch" && _tkg_patcher
fi
elif [[ $_kver -ge 606 ]]; then
_msg="Applying Glitched EEVDF additions patch"
tkgpatch="$srcdir/0003-glitched-eevdf-additions.patch" && _tkg_patcher
fi
if [ "${_cpusched}" = "bore-eevdf" ]; then
_msg="Applying BORE-EEVDF patch"
curl "https://raw.githubusercontent.com/CachyOS/kernel-patches/master/${_basekernel}/sched/0001-bore-eevdf.patch" > "$srcdir"/0001-bore-eevdf.patch
tkgpatch="$srcdir/0001-bore-eevdf.patch" && _tkg_patcher
else
_msg="Applying eevdf-Disable-DELAY_DEQUEUE patch"
tkgpatch="$srcdir/0004-eevdf-Disable-DELAY_DEQUEUE.patch" && _tkg_patcher
fi
fi
@@ -1110,7 +1125,7 @@ _tkg_srcprep() {
fi
if [[ "${_cpusched}" =~ ^(muqss|pds|bmq|upds)$ ]]; then
# Disable CFS
# Disable CFS/EEVDF
_disable "FAIR_GROUP_SCHED"
_disable "CFS_BANDWIDTH"

View File

@@ -1,142 +0,0 @@
diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c
index 6b3b59cc51d6..2a0072192c3d 100644
--- a/kernel/sched/fair.c
+++ b/kernel/sched/fair.c
@@ -81,10 +95,17 @@ unsigned int sysctl_sched_child_runs_first __read_mostly;
*
* (default: 1 msec * (1 + ilog(ncpus)), units: nanoseconds)
*/
+#ifdef CONFIG_ZENIFY
+unsigned int sysctl_sched_wakeup_granularity = 500000UL;
+static unsigned int normalized_sysctl_sched_wakeup_granularity = 500000UL;
+
+const_debug unsigned int sysctl_sched_migration_cost = 50000UL;
+#else
unsigned int sysctl_sched_wakeup_granularity = 1000000UL;
static unsigned int normalized_sysctl_sched_wakeup_granularity = 1000000UL;
const_debug unsigned int sysctl_sched_migration_cost = 500000UL;
+#endif
int sched_thermal_decay_shift;
static int __init setup_sched_thermal_decay_shift(char *str)
diff --git a/kernel/sched/topology.c b/kernel/sched/topology.c
index 051aaf65c..705df5511 100644
--- a/kernel/sched/topology.c
+++ b/kernel/sched/topology.c
@@ -208,7 +208,7 @@ sd_parent_degenerate(struct sched_domain *sd, struct sched_domain *parent)
#if defined(CONFIG_ENERGY_MODEL) && defined(CONFIG_CPU_FREQ_GOV_SCHEDUTIL)
DEFINE_STATIC_KEY_FALSE(sched_energy_present);
-static unsigned int sysctl_sched_energy_aware = 1;
+static unsigned int sysctl_sched_energy_aware = 0;
static DEFINE_MUTEX(sched_energy_mutex);
static bool sched_energy_update;
From f85ed068b4d0e6c31edce8574a95757a60e58b87 Mon Sep 17 00:00:00 2001
From: Etienne Juvigny <Ti3noU@gmail.com>
Date: Mon, 3 Sep 2018 17:36:25 +0200
Subject: [PATCH] Zenify & stuff
---
kernel/sched/fair.c | 25 +++++++++++++++++++++++++
mm/page-writeback.c | 8 ++++++++
2 files changed, 33 insertions(+)
diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c
index 6b3b59cc51d6..2a0072192c3d 100644
--- a/kernel/sched/fair.c
+++ b/kernel/sched/fair.c
@@ -37,8 +37,13 @@
*
* (default: 6ms * (1 + ilog(ncpus)), units: nanoseconds)
*/
+#ifdef CONFIG_ZENIFY
+unsigned int sysctl_sched_latency = 3000000ULL;
+static unsigned int normalized_sysctl_sched_latency = 3000000ULL;
+#else
unsigned int sysctl_sched_latency = 6000000ULL;
static unsigned int normalized_sysctl_sched_latency = 6000000ULL;
+#endif
/*
* The initial- and re-scaling of tunables is configurable
@@ -58,21 +63,34 @@ enum sched_tunable_scaling sysctl_sched_tunable_scaling = SCHED_TUNABLESCALING_L
*
* (default: 0.75 msec * (1 + ilog(ncpus)), units: nanoseconds)
*/
+#ifdef CONFIG_ZENIFY
+unsigned int sysctl_sched_min_granularity = 300000ULL;
+static unsigned int normalized_sysctl_sched_min_granularity = 300000ULL;
+#else
unsigned int sysctl_sched_min_granularity = 750000ULL;
static unsigned int normalized_sysctl_sched_min_granularity = 750000ULL;
+#endif
/*
* Minimal preemption granularity for CPU-bound SCHED_IDLE tasks.
* Applies only when SCHED_IDLE tasks compete with normal tasks.
*
* (default: 0.75 msec)
*/
+#ifdef CONFIG_ZENIFY
+unsigned int sysctl_sched_idle_min_granularity = 300000ULL;
+#else
unsigned int sysctl_sched_idle_min_granularity = 750000ULL;
+#endif
/*
* This value is kept at sysctl_sched_latency/sysctl_sched_min_granularity
*/
+#ifdef CONFIG_ZENIFY
+static unsigned int sched_nr_latency = 10;
+#else
static unsigned int sched_nr_latency = 8;
+#endif
/*
* After fork, child runs first. If set to 0 (default) then
@@ -128,8 +149,12 @@ int __weak arch_asym_cpu_priority(int cpu)
*
* (default: 5 msec, units: microseconds)
*/
+#ifdef CONFIG_ZENIFY
+static unsigned int sysctl_sched_cfs_bandwidth_slice = 3000UL;
+#else
static unsigned int sysctl_sched_cfs_bandwidth_slice = 5000UL;
#endif
+#endif
#ifdef CONFIG_SYSCTL
static struct ctl_table sched_fair_sysctls[] = {
diff --git a/mm/page-writeback.c b/mm/page-writeback.c
index 28b3e7a67565..01a1aef2b9b1 100644
--- a/mm/page-writeback.c
+++ b/mm/page-writeback.c
@@ -71,7 +71,11 @@ static long ratelimit_pages = 32;
/*
* Start background writeback (via writeback threads) at this percentage
*/
+#ifdef CONFIG_ZENIFY
+static int dirty_background_ratio = 20;
+#else
static int dirty_background_ratio = 10;
+#endif
/*
* dirty_background_bytes starts at 0 (disabled) so that it is a function of
@@ -88,7 +92,11 @@ int vm_highmem_is_dirtyable;
/*
* The generator of dirty data starts writeback at this percentage
*/
+#ifdef CONFIG_ZENIFY
+static int vm_dirty_ratio = 50;
+#else
static int vm_dirty_ratio = 20;
+#endif
/*
* vm_dirty_bytes starts at 0 (disabled) so that it is a function of
--
2.28.0

View File

@@ -0,0 +1,93 @@
diff -Naur vlinux-6.6.1/kernel/sched/fair.c linux-6.6.1/kernel/sched/fair.c
--- vlinux-6.6.1/kernel/sched/fair.c 2023-11-08 11:56:25.000000000 +0100
+++ linux-6.6.1/kernel/sched/fair.c 2023-11-11 15:51:09.630279108 +0100
@@ -75,16 +75,24 @@
*
* (default: 0.75 msec * (1 + ilog(ncpus)), units: nanoseconds)
*/
+#ifdef CONFIG_ZENIFY
+unsigned int sysctl_sched_base_slice = 400000ULL;
+static unsigned int normalized_sysctl_sched_base_slice = 400000ULL;
+#else
unsigned int sysctl_sched_base_slice = 750000ULL;
static unsigned int normalized_sysctl_sched_base_slice = 750000ULL;
-
+#endif
/*
* After fork, child runs first. If set to 0 (default) then
* parent will (try to) run first.
*/
unsigned int sysctl_sched_child_runs_first __read_mostly;
+#ifdef CONFIG_ZENIFY
+const_debug unsigned int sysctl_sched_migration_cost = 250000UL;
+#else
const_debug unsigned int sysctl_sched_migration_cost = 500000UL;
+#endif
int sched_thermal_decay_shift;
static int __init setup_sched_thermal_decay_shift(char *str)
@@ -135,8 +143,12 @@
*
* (default: 5 msec, units: microseconds)
*/
+#ifdef CONFIG_ZENIFY
+static unsigned int sysctl_sched_cfs_bandwidth_slice = 3000UL;
+#else
static unsigned int sysctl_sched_cfs_bandwidth_slice = 5000UL;
#endif
+#endif
#ifdef CONFIG_NUMA_BALANCING
/* Restrict the NUMA promotion throughput (MB/s) for each target node. */
diff -Naur vlinux-6.6.1/kernel/sched/sched.h linux-6.6.1/kernel/sched/sched.h
--- vlinux-6.6.1/kernel/sched/sched.h 2023-11-08 11:56:25.000000000 +0100
+++ linux-6.6.1/kernel/sched/sched.h 2023-11-11 15:52:03.241725632 +0100
@@ -2515,7 +2515,7 @@
extern void check_preempt_curr(struct rq *rq, struct task_struct *p, int flags);
-#ifdef CONFIG_PREEMPT_RT
+#if defined(CONFIG_PREEMPT_RT) || defined(CONFIG_ZENIFY)
#define SCHED_NR_MIGRATE_BREAK 8
#else
#define SCHED_NR_MIGRATE_BREAK 32
diff -Naur vlinux-6.6.1/kernel/sched/topology.c linux-6.6.1/kernel/sched/topology.c
--- vlinux-6.6.1/kernel/sched/topology.c 2023-11-08 11:56:25.000000000 +0100
+++ linux-6.6.1/kernel/sched/topology.c 2023-11-11 15:56:54.602473894 +0100
@@ -208,7 +208,7 @@
#if defined(CONFIG_ENERGY_MODEL) && defined(CONFIG_CPU_FREQ_GOV_SCHEDUTIL)
DEFINE_STATIC_KEY_FALSE(sched_energy_present);
-static unsigned int sysctl_sched_energy_aware = 1;
+static unsigned int sysctl_sched_energy_aware = 0;
static DEFINE_MUTEX(sched_energy_mutex);
static bool sched_energy_update;
diff -Naur vlinux-6.6.1/mm/page-writeback.c linux-6.6.1/mm/page-writeback.c
--- vlinux-6.6.1/mm/page-writeback.c 2023-11-08 11:56:25.000000000 +0100
+++ linux-6.6.1/mm/page-writeback.c 2023-11-11 16:07:22.214222902 +0100
@@ -71,7 +71,11 @@
/*
* Start background writeback (via writeback threads) at this percentage
*/
+#ifdef CONFIG_ZENIFY
+static int dirty_background_ratio = 20;
+#else
static int dirty_background_ratio = 10;
+#endif
/*
* dirty_background_bytes starts at 0 (disabled) so that it is a function of
@@ -88,7 +92,11 @@
/*
* The generator of dirty data starts writeback at this percentage
*/
+#ifdef CONFIG_ZENIFY
+static int vm_dirty_ratio = 50;
+#else
static int vm_dirty_ratio = 20;
+#endif
/*
* vm_dirty_bytes starts at 0 (disabled) so that it is a function of