linux-tkg/linux-tkg-patches/6.2/0003-glitched-base.patch

From f7f49141a5dbe9c99d78196b58c44307fb2e6be3 Mon Sep 17 00:00:00 2001
From: Tk-Glitch <ti3nou@gmail.com>
Date: Wed, 4 Jul 2018 04:30:08 +0200
Subject: [PATCH 01/17] glitched

---
 init/Makefile | 2 +-
 1 file changed, 1 insertions(+), 1 deletions(-)

diff --git a/init/Makefile b/init/Makefile
index baf3ab8d9d49..854e32e6aec7 100755
--- a/init/Makefile
+++ b/init/Makefile
@@ -19,7 +19,7 @@ else

 # Maximum length of UTS_VERSION is 64 chars
 filechk_uts_version = \
-	utsver=$$(echo '$(pound)'"$(build-version)" $(smp-flag-y) $(preempt-flag-y) "$(build-timestamp)" | cut -b -64); \
+	utsver=$$(echo '$(pound)'"$(build-version)" $(smp-flag-y) $(preempt-flag-y) "TKG" "$(build-timestamp)" | cut -b -64); \
 	echo '$(pound)'define UTS_VERSION \""$${utsver}"\"

 #
--
2.28.0


From c304f43d14e98d4bf1215fc10bc5012f554bdd8a Mon Sep 17 00:00:00 2001
From: Alexandre Frade <admfrade@gmail.com>
Date: Mon, 29 Jan 2018 16:59:22 +0000
Subject: [PATCH 02/17] dcache: cache_pressure = 50 decreases the rate at which
 VFS caches are reclaimed

Signed-off-by: Alexandre Frade <admfrade@gmail.com>
---
 fs/dcache.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/fs/dcache.c b/fs/dcache.c
index 361ea7ab30ea..0c5cf69b241a 100644
--- a/fs/dcache.c
+++ b/fs/dcache.c
@@ -71,7 +71,7 @@
  * If no ancestor relationship:
  * arbitrary, since it's serialized on rename_lock
  */
-int sysctl_vfs_cache_pressure __read_mostly = 100;
+int sysctl_vfs_cache_pressure __read_mostly = 50;
 EXPORT_SYMBOL_GPL(sysctl_vfs_cache_pressure);

 __cacheline_aligned_in_smp DEFINE_SEQLOCK(rename_lock);
--
2.28.0


diff --git a/kernel/sched/rt.c b/kernel/sched/rt.c
index f788cd61df21..2bfbb4213707 100644
--- a/kernel/sched/rt.c
+++ b/kernel/sched/rt.c
@@ -15,9 +15,9 @@ __read_mostly int scheduler_running;

 /*
  * part of the period that we allow rt tasks to run in us.
- * default: 0.95s
+ * XanMod default: 0.98s
  */
-int sysctl_sched_rt_runtime = 950000;
+int sysctl_sched_rt_runtime = 980000;

 #ifdef CONFIG_SYSCTL
 static int sysctl_sched_rr_timeslice = (MSEC_PER_SEC / HZ) * RR_TIMESLICE;
--
2.28.0


From acc49f33a10f61dc66c423888cbb883ba46710e4 Mon Sep 17 00:00:00 2001
From: Alexandre Frade <admfrade@gmail.com>
Date: Mon, 29 Jan 2018 17:41:29 +0000
Subject: [PATCH 04/17] scripts: disable the localversion "+" tag of a git repo

Signed-off-by: Alexandre Frade <admfrade@gmail.com>
---
 scripts/setlocalversion | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/scripts/setlocalversion b/scripts/setlocalversion
index 20f2efd57b11..0552d8b9f582 100755
--- a/scripts/setlocalversion
+++ b/scripts/setlocalversion
@@ -54,7 +54,7 @@ scm_version()
 			# If only the short version is requested, don't bother
 			# running further git commands
 			if $short; then
-				echo "+"
+			#	echo "+"
 				return
 			fi
 			# If we are past a tagged commit (like
--
2.28.0


From 360c6833e07cc9fdef5746f6bc45bdbc7212288d Mon Sep 17 00:00:00 2001
From: "Jan Alexander Steffens (heftig)" <jan.steffens@gmail.com>
Date: Fri, 26 Oct 2018 11:22:33 +0100
Subject: [PATCH 06/17] infiniband: Fix __read_overflow2 error with -O3
 inlining

---
 drivers/infiniband/core/addr.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/drivers/infiniband/core/addr.c b/drivers/infiniband/core/addr.c
index 3a98439bba83..6efc4f907f58 100644
--- a/drivers/infiniband/core/addr.c
+++ b/drivers/infiniband/core/addr.c
@@ -820,6 +820,7 @@ int rdma_addr_find_l2_eth_by_grh(const union ib_gid *sgid,
 	union {
 		struct sockaddr_in  _sockaddr_in;
 		struct sockaddr_in6 _sockaddr_in6;
+		struct sockaddr_ib  _sockaddr_ib;
 	} sgid_addr, dgid_addr;
 	int ret;

--
2.28.0


From f85ed068b4d0e6c31edce8574a95757a60e58b87 Mon Sep 17 00:00:00 2001
From: Etienne Juvigny <Ti3noU@gmail.com>
Date: Mon, 3 Sep 2018 17:36:25 +0200
Subject: [PATCH 07/17] Zenify & stuff

---
 init/Kconfig           | 32 ++++++++++++++++++++++++++++++++
 kernel/sched/fair.c    | 25 +++++++++++++++++++++++++
 mm/page-writeback.c    |  8 ++++++++
 3 files changed, 65 insertions(+)

diff --git a/init/Kconfig b/init/Kconfig
index 3ae8678e1145..da708eed0f1e 100644
--- a/init/Kconfig
+++ b/init/Kconfig
@@ -92,6 +92,38 @@ config THREAD_INFO_IN_TASK

 menu "General setup"

+config ZENIFY
+	bool "A selection of patches from Zen/Liquorix kernel and additional tweaks for a better gaming experience"
+	default y
+	help
+	  Tunes the kernel for responsiveness at the cost of throughput and power usage.
+
+	  --- Virtual Memory Subsystem ---------------------------
+
+	    Mem dirty before bg writeback..:  10 %  ->  20 %
+	    Mem dirty before sync writeback:  20 %  ->  50 %
+
+	  --- Block Layer ----------------------------------------
+
+	    Queue depth...............:      128    -> 512
+	    Default MQ scheduler......: mq-deadline -> bfq
+
+	  --- CFS CPU Scheduler ----------------------------------
+
+	    Scheduling latency.............:   6    ->   3    ms
+	    Minimal granularity............:   0.75 ->   0.3  ms
+	    Wakeup granularity.............:   1    ->   0.5  ms
+	    CPU migration cost.............:   0.5  ->   0.25 ms
+	    Bandwidth slice size...........:   5    ->   3    ms
+	    Ondemand fine upscaling limit..:  95 %  ->  85 %
+
+	  --- MuQSS CPU Scheduler --------------------------------
+
+	    Scheduling interval............:   6    ->   3    ms
+	    ISO task max realtime use......:  70 %  ->  25 %
+	    Ondemand coarse upscaling limit:  80 %  ->  45 %
+	    Ondemand fine upscaling limit..:  95 %  ->  45 %
+
 config BROKEN
 	bool

diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c
index 6b3b59cc51d6..2a0072192c3d 100644
--- a/kernel/sched/fair.c
+++ b/kernel/sched/fair.c
@@ -37,8 +37,13 @@
  *
  * (default: 6ms * (1 + ilog(ncpus)), units: nanoseconds)
  */
+#ifdef CONFIG_ZENIFY
+unsigned int sysctl_sched_latency			= 3000000ULL;
+static unsigned int normalized_sysctl_sched_latency	= 3000000ULL;
+#else
 unsigned int sysctl_sched_latency			= 6000000ULL;
 static unsigned int normalized_sysctl_sched_latency	= 6000000ULL;
+#endif

 /*
  * The initial- and re-scaling of tunables is configurable
@@ -58,21 +63,34 @@ enum sched_tunable_scaling sysctl_sched_tunable_scaling = SCHED_TUNABLESCALING_L
  *
  * (default: 0.75 msec * (1 + ilog(ncpus)), units: nanoseconds)
  */
+#ifdef CONFIG_ZENIFY
+unsigned int sysctl_sched_min_granularity			= 300000ULL;
+static unsigned int normalized_sysctl_sched_min_granularity	= 300000ULL;
+#else
 unsigned int sysctl_sched_min_granularity			= 750000ULL;
 static unsigned int normalized_sysctl_sched_min_granularity	= 750000ULL;
+#endif

 /*
  * Minimal preemption granularity for CPU-bound SCHED_IDLE tasks.
  * Applies only when SCHED_IDLE tasks compete with normal tasks.
  *
  * (default: 0.75 msec)
  */
+#ifdef CONFIG_ZENIFY
+unsigned int sysctl_sched_idle_min_granularity			= 300000ULL;
+#else
 unsigned int sysctl_sched_idle_min_granularity			= 750000ULL;
+#endif

 /*
  * This value is kept at sysctl_sched_latency/sysctl_sched_min_granularity
  */
+#ifdef CONFIG_ZENIFY
+static unsigned int sched_nr_latency = 10;
+#else
 static unsigned int sched_nr_latency = 8;
+#endif

 /*
  * After fork, child runs first. If set to 0 (default) then
@@ -128,8 +149,12 @@ int __weak arch_asym_cpu_priority(int cpu)
  *
  * (default: 5 msec, units: microseconds)
  */
+#ifdef CONFIG_ZENIFY
+static unsigned int sysctl_sched_cfs_bandwidth_slice		= 3000UL;
+#else
 static unsigned int sysctl_sched_cfs_bandwidth_slice		= 5000UL;
 #endif
+#endif

 #ifdef CONFIG_SYSCTL
 static struct ctl_table sched_fair_sysctls[] = {
diff --git a/mm/page-writeback.c b/mm/page-writeback.c
index 28b3e7a67565..01a1aef2b9b1 100644
--- a/mm/page-writeback.c
+++ b/mm/page-writeback.c
@@ -71,7 +71,11 @@ static long ratelimit_pages = 32;
 /*
  * Start background writeback (via writeback threads) at this percentage
  */
+#ifdef CONFIG_ZENIFY
+static int dirty_background_ratio = 20;
+#else
 static int dirty_background_ratio = 10;
+#endif

 /*
  * dirty_background_bytes starts at 0 (disabled) so that it is a function of
@@ -88,7 +92,11 @@ int vm_highmem_is_dirtyable;
 /*
  * The generator of dirty data starts writeback at this percentage
  */
+#ifdef CONFIG_ZENIFY
+static int vm_dirty_ratio = 50;
+#else
 static int vm_dirty_ratio = 20;
+#endif

 /*
  * vm_dirty_bytes starts at 0 (disabled) so that it is a function of
--
2.28.0


From e92e67143385cf285851e12aa8b7f083dd38dd24 Mon Sep 17 00:00:00 2001
From: Steven Barrett <damentz@liquorix.net>
Date: Sun, 16 Jan 2011 18:57:32 -0600
Subject: [PATCH 08/17] ZEN: Allow TCP YeAH as default congestion control

4.4: In my tests YeAH dramatically slowed down transfers over a WLAN,
     reducing throughput from ~65Mbps (CUBIC) to ~7MBps (YeAH) over 10
     seconds (netperf TCP_STREAM) including long stalls.

     Be careful when choosing this. ~heftig
---
 net/ipv4/Kconfig | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/net/ipv4/Kconfig b/net/ipv4/Kconfig
index e64e59b536d3..bfb55ef7ebbe 100644
--- a/net/ipv4/Kconfig
+++ b/net/ipv4/Kconfig
@@ -691,6 +691,9 @@ choice
 	config DEFAULT_VEGAS
 		bool "Vegas" if TCP_CONG_VEGAS=y

+	config DEFAULT_YEAH
+		bool "YeAH" if TCP_CONG_YEAH=y
+
 	config DEFAULT_VENO
 		bool "Veno" if TCP_CONG_VENO=y

@@ -724,6 +727,7 @@ config DEFAULT_TCP_CONG
 	default "htcp" if DEFAULT_HTCP
 	default "hybla" if DEFAULT_HYBLA
 	default "vegas" if DEFAULT_VEGAS
+	default "yeah" if DEFAULT_YEAH
 	default "westwood" if DEFAULT_WESTWOOD
 	default "veno" if DEFAULT_VENO
 	default "reno" if DEFAULT_RENO
--
2.28.0


From 76dbe7477bfde1b5e8bf29a71b5af7ab2be9b98e Mon Sep 17 00:00:00 2001
From: Steven Barrett <steven@liquorix.net>
Date: Wed, 28 Nov 2018 19:01:27 -0600
Subject: [PATCH 09/17] zen: Use [defer+madvise] as default khugepaged defrag
 strategy

For some reason, the default strategy to respond to THP fault fallbacks
is still just madvise, meaning stall if the program wants transparent
hugepages, but don't trigger a background reclaim / compaction if THP
begins to fail allocations.  This creates a snowball affect where we
still use the THP code paths, but we almost always fail once a system
has been active and busy for a while.

The option "defer" was created for interactive systems where THP can
still improve performance.  If we have to fallback to a regular page due
to an allocation failure or anything else, we will trigger a background
reclaim and compaction so future THP attempts succeed and previous
attempts eventually have their smaller pages combined without stalling
running applications.

We still want madvise to stall applications that explicitely want THP,
so defer+madvise _does_ make a ton of sense.  Make it the default for
interactive systems, especially if the kernel maintainer left
transparent hugepages on "always".

Reasoning and details in the original patch: https://lwn.net/Articles/711248/
---
 mm/huge_memory.c | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/mm/huge_memory.c b/mm/huge_memory.c
index 74300e337c3c..9277f22c10a7 100644
--- a/mm/huge_memory.c
+++ b/mm/huge_memory.c
@@ -53,7 +53,11 @@ unsigned long transparent_hugepage_flags __read_mostly =
 #ifdef CONFIG_TRANSPARENT_HUGEPAGE_MADVISE
 	(1<<TRANSPARENT_HUGEPAGE_REQ_MADV_FLAG)|
 #endif
+#ifdef CONFIG_ZENIFY
+	(1<<TRANSPARENT_HUGEPAGE_DEFRAG_KSWAPD_OR_MADV_FLAG)|
+#else
 	(1<<TRANSPARENT_HUGEPAGE_DEFRAG_REQ_MADV_FLAG)|
+#endif
 	(1<<TRANSPARENT_HUGEPAGE_DEFRAG_KHUGEPAGED_FLAG)|
 	(1<<TRANSPARENT_HUGEPAGE_USE_ZERO_PAGE_FLAG);

--
2.28.0


From 2b65a1329cb220b43c19c4d0de5833fae9e2b22d Mon Sep 17 00:00:00 2001
From: Alexandre Frade <admfrade@gmail.com>
Date: Wed, 24 Oct 2018 16:58:52 -0300
Subject: [PATCH 10/17] net/sched: allow configuring cake qdisc as default

Signed-off-by: Alexandre Frade <admfrade@gmail.com>
---
 net/sched/Kconfig | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/net/sched/Kconfig b/net/sched/Kconfig
index 84badf00647e..6a922bca9f39 100644
--- a/net/sched/Kconfig
+++ b/net/sched/Kconfig
@@ -471,6 +471,9 @@ choice
 	config DEFAULT_SFQ
 		bool "Stochastic Fair Queue" if NET_SCH_SFQ

+	config DEFAULT_CAKE
+		bool "Common Applications Kept Enhanced" if NET_SCH_CAKE
+
 	config DEFAULT_PFIFO_FAST
 		bool "Priority FIFO Fast"
 endchoice
@@ -481,6 +484,7 @@ config DEFAULT_NET_SCH
 	default "fq" if DEFAULT_FQ
 	default "fq_codel" if DEFAULT_FQ_CODEL
 	default "sfq" if DEFAULT_SFQ
+	default "cake" if DEFAULT_CAKE
 	default "pfifo_fast"
 endif

--
2.28.0


From 90240bcd90a568878738e66c0d45bed3e38e347b Mon Sep 17 00:00:00 2001
From: Tk-Glitch <ti3nou@gmail.com>
Date: Fri, 19 Apr 2019 12:33:38 +0200
Subject: [PATCH 12/17] Set vm.max_map_count to 262144 by default

The value is still pretty low, and AMD64-ABI and ELF extended numbering
supports that, so we should be fine on modern x86 systems.

This fixes crashes in some applications using more than 65535 vmas (also
affects some windows games running in wine, such as Star Citizen).
---
 include/linux/mm.h | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/include/linux/mm.h b/include/linux/mm.h
index bc05c3588aa3..b0cefe94920d 100644
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -190,8 +190,7 @@ static inline void __mm_zero_struct_page(struct page *page)
  * not a hard limit any more. Although some userspace tools can be surprised by
  * that.
  */
-#define MAPCOUNT_ELF_CORE_MARGIN	(5)
-#define DEFAULT_MAX_MAP_COUNT	(USHRT_MAX - MAPCOUNT_ELF_CORE_MARGIN)
+#define DEFAULT_MAX_MAP_COUNT	(262144)

 extern int sysctl_max_map_count;

--
2.28.0


From 3a34034dba5efe91bcec491efe8c66e8087f509b Mon Sep 17 00:00:00 2001
From: Tk-Glitch <ti3nou@gmail.com>
Date: Mon, 27 Jul 2020 00:19:18 +0200
Subject: [PATCH 13/17] mm: bump DEFAULT_MAX_MAP_COUNT

Some games such as Detroit: Become Human tend to be very crash prone with
lower values.
---
 include/linux/mm.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/include/linux/mm.h b/include/linux/mm.h
index b0cefe94920d..890165099b07 100644
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -190,7 +190,7 @@ static inline void __mm_zero_struct_page(struct page *page)
  * not a hard limit any more. Although some userspace tools can be surprised by
  * that.
  */
-#define DEFAULT_MAX_MAP_COUNT	(262144)
+#define DEFAULT_MAX_MAP_COUNT	(16777216)

 extern int sysctl_max_map_count;

--
2.28.0

From 977812938da7c7226415778c340832141d9278b7 Mon Sep 17 00:00:00 2001
From: Alexandre Frade <admfrade@gmail.com>
Date: Mon, 25 Nov 2019 15:13:06 -0300
Subject: [PATCH 14/17] elevator: set default scheduler to bfq for blk-mq

Signed-off-by: Alexandre Frade <admfrade@gmail.com>
---
 block/elevator.c | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/block/elevator.c b/block/elevator.c
index 4eab3d70e880..79669aa39d79 100644
--- a/block/elevator.c
+++ b/block/elevator.c
@@ -623,19 +623,19 @@ static inline bool elv_support_iosched(struct request_queue *q)
 }

 /*
- * For single queue devices, default to using mq-deadline. If we have multiple
- * queues or mq-deadline is not available, default to "none".
+ * For single queue devices, default to using bfq. If we have multiple
+ * queues or bfq is not available, default to "none".
  */
 static struct elevator_type *elevator_get_default(struct request_queue *q)
 {
 	if (q->tag_set && q->tag_set->flags & BLK_MQ_F_NO_SCHED_BY_DEFAULT)
 		return NULL;

 	if (q->nr_hw_queues != 1 &&
 	    !blk_mq_is_shared_tags(q->tag_set->flags))
 		return NULL;

-	return elevator_find_get(q, "mq-deadline");
+	return elevator_find_get(q, "bfq");
 }

 /*
--
2.28.0

From 3c229f434aca65c4ca61772bc03c3e0370817b92 Mon Sep 17 00:00:00 2001
From: Alexandre Frade <kernel@xanmod.org>
Date: Mon, 3 Aug 2020 17:05:04 +0000
Subject: [PATCH 16/17] mm: set 2 megabytes for address_space-level file
 read-ahead pages size

Signed-off-by: Alexandre Frade <kernel@xanmod.org>
---
 include/linux/pagemap.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/include/linux/pagemap.h b/include/linux/pagemap.h
index cf2468da68e9..007dea784451 100644
--- a/include/linux/pagemap.h
+++ b/include/linux/pagemap.h
@@ -655,7 +655,7 @@ int replace_page_cache_page(struct page *old, struct page *new, gfp_t gfp_mask);
 void delete_from_page_cache_batch(struct address_space *mapping,
 				  struct pagevec *pvec);

-#define VM_READAHEAD_PAGES	(SZ_128K / PAGE_SIZE)
+#define VM_READAHEAD_PAGES	(SZ_2M / PAGE_SIZE)

 void page_cache_sync_readahead(struct address_space *, struct file_ra_state *,
 		struct file *, pgoff_t index, unsigned long req_count);
--
2.28.0


From 716f41cf6631f3a85834dcb67b4ce99185b6387f Mon Sep 17 00:00:00 2001
From: Steven Barrett <steven@liquorix.net>
Date: Wed, 15 Jan 2020 20:43:56 -0600
Subject: [PATCH 17/17] ZEN: intel-pstate: Implement "enable" parameter

If intel-pstate is compiled into the kernel, it will preempt the loading
of acpi-cpufreq so you can take advantage of hardware p-states without
any friction.

However, intel-pstate is not completely superior to cpufreq's ondemand
for one reason.  There's no concept of an up_threshold property.

In ondemand, up_threshold essentially reduces the maximum utilization to
compare against, allowing you to hit max frequencies and turbo boost
from a much lower core utilization.

With intel-pstate, you have the concept of minimum and maximum
performance, but no tunable that lets you define, maximum frequency
means 50% core utilization.  For just this oversight, there's reasons
you may want ondemand.

Lets support setting "enable" in kernel boot parameters.  This lets
kernel maintainers include "intel_pstate=disable" statically in the
static boot parameters, but let users of the kernel override this
selection.
---
 Documentation/admin-guide/kernel-parameters.txt | 3 +++
 drivers/cpufreq/intel_pstate.c                  | 2 ++
 2 files changed, 5 insertions(+)

diff --git a/Documentation/admin-guide/kernel-parameters.txt b/Documentation/admin-guide/kernel-parameters.txt
index fb95fad81c79..3e92fee81e33 100644
--- a/Documentation/admin-guide/kernel-parameters.txt
+++ b/Documentation/admin-guide/kernel-parameters.txt
@@ -1857,6 +1857,9 @@
 			disable
 			  Do not enable intel_pstate as the default
 			  scaling driver for the supported processors
+			enable
+			  Enable intel_pstate in-case "disable" was passed
+			  previously in the kernel boot parameters
 			passive
 			  Use intel_pstate as a scaling driver, but configure it
 			  to work with generic cpufreq governors (instead of
diff --git a/drivers/cpufreq/intel_pstate.c b/drivers/cpufreq/intel_pstate.c
index 36a469150ff9..aee891c9b78a 100644
--- a/drivers/cpufreq/intel_pstate.c
+++ b/drivers/cpufreq/intel_pstate.c
@@ -2845,6 +2845,8 @@ static int __init intel_pstate_setup(char *str)
 		if (!strcmp(str, "no_hwp"))
 		no_hwp = 1;

+	if (!strcmp(str, "enable"))
+		no_load = 0;
 	if (!strcmp(str, "force"))
 		force_load = 1;
 	if (!strcmp(str, "hwp_only"))
--
2.28.0

From 379cbab18b5c75c622b93e2c5abdfac141fe9654 Mon Sep 17 00:00:00 2001
From: Kenny Levinsen <kl@kl.wtf>
Date: Sun, 27 Dec 2020 14:43:13 +0000
Subject: [PATCH] ZEN: Input: evdev - use call_rcu when detaching client

Significant time was spent on synchronize_rcu in evdev_detach_client
when applications closed evdev devices. Switching VT away from a
graphical environment commonly leads to mass input device closures,
which could lead to noticable delays on systems with many input devices.

Replace synchronize_rcu with call_rcu, deferring reclaim of the evdev
client struct till after the RCU grace period instead of blocking the
calling application.

While this does not solve all slow evdev fd closures, it takes care of a
good portion of them, including this simple test:

	#include <fcntl.h>
	#include <unistd.h>

	int main(int argc, char *argv[])
	{
		int idx, fd;
		const char *path = "/dev/input/event0";
		for (idx = 0; idx < 1000; idx++) {
			if ((fd = open(path, O_RDWR)) == -1) {
				return -1;
			}
			close(fd);
		}
		return 0;
	}

Time to completion of above test when run locally:

	Before: 0m27.111s
	After:  0m0.018s

Signed-off-by: Kenny Levinsen <kl@kl.wtf>
---
 drivers/input/evdev.c | 19 +++++++++++--------
 1 file changed, 11 insertions(+), 8 deletions(-)

diff --git a/drivers/input/evdev.c b/drivers/input/evdev.c
index 95f90699d2b17b..2b10fe29d2c8d9 100644
--- a/drivers/input/evdev.c
+++ b/drivers/input/evdev.c
@@ -46,6 +46,7 @@ struct evdev_client {
 	struct fasync_struct *fasync;
 	struct evdev *evdev;
 	struct list_head node;
+	struct rcu_head rcu;
 	enum input_clock_type clk_type;
 	bool revoked;
 	unsigned long *evmasks[EV_CNT];
@@ -377,13 +378,22 @@ static void evdev_attach_client(struct evdev *evdev,
 	spin_unlock(&evdev->client_lock);
 }

+static void evdev_reclaim_client(struct rcu_head *rp)
+{
+	struct evdev_client *client = container_of(rp, struct evdev_client, rcu);
+	unsigned int i;
+	for (i = 0; i < EV_CNT; ++i)
+		bitmap_free(client->evmasks[i]);
+	kvfree(client);
+}
+
 static void evdev_detach_client(struct evdev *evdev,
 				struct evdev_client *client)
 {
 	spin_lock(&evdev->client_lock);
 	list_del_rcu(&client->node);
 	spin_unlock(&evdev->client_lock);
-	synchronize_rcu();
+	call_rcu(&client->rcu, evdev_reclaim_client);
 }

 static int evdev_open_device(struct evdev *evdev)
@@ -436,7 +446,6 @@ static int evdev_release(struct inode *inode, struct file *file)
 {
 	struct evdev_client *client = file->private_data;
 	struct evdev *evdev = client->evdev;
-	unsigned int i;

 	mutex_lock(&evdev->mutex);

@@ -448,11 +457,6 @@ static int evdev_release(struct inode *inode, struct file *file)

 	evdev_detach_client(evdev, client);

-	for (i = 0; i < EV_CNT; ++i)
-		bitmap_free(client->evmasks[i]);
-
-	kvfree(client);
-
 	evdev_close_device(evdev);

 	return 0;
@@ -495,7 +499,6 @@ static int evdev_open(struct inode *inode, struct file *file)

  err_free_client:
 	evdev_detach_client(evdev, client);
-	kvfree(client);
 	return error;
 }


From 2aafb56f20e4b63d8c4af172fe9d017c64bc4129 Mon Sep 17 00:00:00 2001
From: Sultan Alsawaf <sultan@kerneltoast.com>
Date: Wed, 20 Oct 2021 20:50:11 -0700
Subject: [PATCH] ZEN: mm: Lower the non-hugetlbpage pageblock size to reduce
 scheduling delays

The page allocator processes free pages in groups of pageblocks, where
the size of a pageblock is typically quite large (1024 pages without
hugetlbpage support). Pageblocks are processed atomically with the zone
lock held, which can cause severe scheduling delays on both the CPU
going through the pageblock and any other CPUs waiting to acquire the
zone lock. A frequent offender is move_freepages_block(), which is used
by rmqueue() for page allocation.

As it turns out, there's no requirement for pageblocks to be so large,
so the pageblock order can simply be reduced to ease the scheduling
delays and zone lock contention. PAGE_ALLOC_COSTLY_ORDER is used as a
reasonable setting to ensure non-costly page allocation requests can
still be serviced without always needing to free up more than one
pageblock's worth of pages at a time.

This has a noticeable effect on overall system latency when memory
pressure is elevated. The various mm functions which operate on
pageblocks no longer appear in the preemptoff tracer, where previously
they would spend up to 100 ms on a mobile arm64 CPU processing a
pageblock with preemption disabled and the zone lock held.

Signed-off-by: Sultan Alsawaf <sultan@kerneltoast.com>
---
 include/linux/pageblock-flags.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/include/linux/pageblock-flags.h b/include/linux/pageblock-flags.h
index 5f1ae07d724b88..97cda629c9e909 100644
--- a/include/linux/pageblock-flags.h
+++ b/include/linux/pageblock-flags.h
@@ -48,7 +48,7 @@ extern unsigned int pageblock_order;
 #else /* CONFIG_HUGETLB_PAGE */

 /* If huge pages are not used, group by MAX_ORDER_NR_PAGES */
-#define pageblock_order		(MAX_ORDER-1)
+#define pageblock_order		PAGE_ALLOC_COSTLY_ORDER

 #endif /* CONFIG_HUGETLB_PAGE */


From f22bc56be85e69c71c8e36041193856bb8b01525 Mon Sep 17 00:00:00 2001
From: Sultan Alsawaf <sultan@kerneltoast.com>
Date: Wed, 20 Oct 2021 20:50:32 -0700
Subject: [PATCH] ZEN: mm: Don't hog the CPU and zone lock in rmqueue_bulk()

There is noticeable scheduling latency and heavy zone lock contention
stemming from rmqueue_bulk's single hold of the zone lock while doing
its work, as seen with the preemptoff tracer. There's no actual need for
rmqueue_bulk() to hold the zone lock the entire time; it only does so
for supposed efficiency. As such, we can relax the zone lock and even
reschedule when IRQs are enabled in order to keep the scheduling delays
and zone lock contention at bay. Forward progress is still guaranteed,
as the zone lock can only be relaxed after page removal.

With this change, rmqueue_bulk() no longer appears as a serious offender
in the preemptoff tracer, and system latency is noticeably improved.

Signed-off-by: Sultan Alsawaf <sultan@kerneltoast.com>
---
 mm/page_alloc.c | 23 ++++++++++++++++++-----
 1 file changed, 18 insertions(+), 5 deletions(-)

diff --git a/mm/page_alloc.c b/mm/page_alloc.c
index a0b0397e29ee4c..87a983a356530c 100644
--- a/mm/page_alloc.c
+++ b/mm/page_alloc.c
@@ -3118,15 +3119,16 @@ __rmqueue(struct zone *zone, unsigned int order, int migratetype,
 }

 /*
- * Obtain a specified number of elements from the buddy allocator, all under
- * a single hold of the lock, for efficiency.  Add them to the supplied list.
- * Returns the number of new pages which were placed at *list.
+ * Obtain a specified number of elements from the buddy allocator, and relax the
+ * zone lock when needed. Add them to the supplied list. Returns the number of
+ * new pages which were placed at *list.
  */
 static int rmqueue_bulk(struct zone *zone, unsigned int order,
 			unsigned long count, struct list_head *list,
 			int migratetype, unsigned int alloc_flags)
 {
	unsigned long flags;
-	int i, allocated = 0;
+	const bool can_resched = !preempt_count() && !irqs_disabled();
+	int i, allocated = 0, last_mod = 0;

 	/* Caller must hold IRQ-safe pcp->lock so IRQs are disabled. */
 	spin_lock(&zone->lock);
@@ -3137,6 +3138,18 @@ static int rmqueue_bulk(struct zone *zone, unsigned int order,
 		if (unlikely(page == NULL))
 			break;

+		/* Reschedule and ease the contention on the lock if needed */
+		if (i + 1 < count && ((can_resched && need_resched()) ||
+				      spin_needbreak(&zone->lock))) {
+			__mod_zone_page_state(zone, NR_FREE_PAGES,
+					      -((i + 1 - last_mod) << order));
+			last_mod = i + 1;
+			spin_unlock(&zone->lock);
+			if (can_resched)
+				cond_resched();
+			spin_lock(&zone->lock);
+		}
+
 		if (unlikely(check_pcp_refill(page, order)))
 			continue;

@@ -3163,7 +3176,7 @@ static int rmqueue_bulk(struct zone *zone, unsigned int order,
 	 * on i. Do not confuse with 'allocated' which is the number of
 	 * pages added to the pcp list.
 	 */
-	__mod_zone_page_state(zone, NR_FREE_PAGES, -(i << order));
+	__mod_zone_page_state(zone, NR_FREE_PAGES, -((i - last_mod) << order));
 	spin_unlock(&zone->lock);
 	return allocated;
 }

From 5d5b708e3731e135ea7ae168571ad78d883e63e8 Mon Sep 17 00:00:00 2001
From: Alexandre Frade <kernel@xanmod.org>
Date: Wed, 1 Feb 2023 10:17:47 +0000
Subject: [PATCH 02/16] XANMOD: fair: Remove all energy efficiency functions

Signed-off-by: Alexandre Frade <kernel@xanmod.org>
---
 kernel/sched/fair.c | 224 +-------------------------------------------
 1 file changed, 3 insertions(+), 221 deletions(-)

diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c
index 0f8736991427..345cc5e9fa6e 100644
--- a/kernel/sched/fair.c
+++ b/kernel/sched/fair.c
@@ -19,6 +19,9 @@
  *
  *  Adaptive scheduling granularity, math enhancements by Peter Zijlstra
  *  Copyright (C) 2007 Red Hat, Inc., Peter Zijlstra
+ *
+ *  Remove energy efficiency functions by Alexandre Frade
+ *  (C) 2021 Alexandre Frade <kernel@xanmod.org>
  */
 #include <linux/energy_model.h>
 #include <linux/mmap_lock.h>
@@ -7136,219 +7139,6 @@ eenv_pd_max_util(struct energy_env *eenv, struct cpumask *pd_cpus,
 	return min(max_util, eenv->cpu_cap);
 }

-/*
- * compute_energy(): Use the Energy Model to estimate the energy that @pd would
- * consume for a given utilization landscape @eenv. When @dst_cpu < 0, the task
- * contribution is ignored.
- */
-static inline unsigned long
-compute_energy(struct energy_env *eenv, struct perf_domain *pd,
-	       struct cpumask *pd_cpus, struct task_struct *p, int dst_cpu)
-{
-	unsigned long max_util = eenv_pd_max_util(eenv, pd_cpus, p, dst_cpu);
-	unsigned long busy_time = eenv->pd_busy_time;
-
-	if (dst_cpu >= 0)
-		busy_time = min(eenv->pd_cap, busy_time + eenv->task_busy_time);
-
-	return em_cpu_energy(pd->em_pd, max_util, busy_time, eenv->cpu_cap);
-}
-
-/*
- * find_energy_efficient_cpu(): Find most energy-efficient target CPU for the
- * waking task. find_energy_efficient_cpu() looks for the CPU with maximum
- * spare capacity in each performance domain and uses it as a potential
- * candidate to execute the task. Then, it uses the Energy Model to figure
- * out which of the CPU candidates is the most energy-efficient.
- *
- * The rationale for this heuristic is as follows. In a performance domain,
- * all the most energy efficient CPU candidates (according to the Energy
- * Model) are those for which we'll request a low frequency. When there are
- * several CPUs for which the frequency request will be the same, we don't
- * have enough data to break the tie between them, because the Energy Model
- * only includes active power costs. With this model, if we assume that
- * frequency requests follow utilization (e.g. using schedutil), the CPU with
- * the maximum spare capacity in a performance domain is guaranteed to be among
- * the best candidates of the performance domain.
- *
- * In practice, it could be preferable from an energy standpoint to pack
- * small tasks on a CPU in order to let other CPUs go in deeper idle states,
- * but that could also hurt our chances to go cluster idle, and we have no
- * ways to tell with the current Energy Model if this is actually a good
- * idea or not. So, find_energy_efficient_cpu() basically favors
- * cluster-packing, and spreading inside a cluster. That should at least be
- * a good thing for latency, and this is consistent with the idea that most
- * of the energy savings of EAS come from the asymmetry of the system, and
- * not so much from breaking the tie between identical CPUs. That's also the
- * reason why EAS is enabled in the topology code only for systems where
- * SD_ASYM_CPUCAPACITY is set.
- *
- * NOTE: Forkees are not accepted in the energy-aware wake-up path because
- * they don't have any useful utilization data yet and it's not possible to
- * forecast their impact on energy consumption. Consequently, they will be
- * placed by find_idlest_cpu() on the least loaded CPU, which might turn out
- * to be energy-inefficient in some use-cases. The alternative would be to
- * bias new tasks towards specific types of CPUs first, or to try to infer
- * their util_avg from the parent task, but those heuristics could hurt
- * other use-cases too. So, until someone finds a better way to solve this,
- * let's keep things simple by re-using the existing slow path.
- */
-static int find_energy_efficient_cpu(struct task_struct *p, int prev_cpu)
-{
-	struct cpumask *cpus = this_cpu_cpumask_var_ptr(select_rq_mask);
-	unsigned long prev_delta = ULONG_MAX, best_delta = ULONG_MAX;
-	unsigned long p_util_min = uclamp_is_used() ? uclamp_eff_value(p, UCLAMP_MIN) : 0;
-	unsigned long p_util_max = uclamp_is_used() ? uclamp_eff_value(p, UCLAMP_MAX) : 1024;
-	struct root_domain *rd = this_rq()->rd;
-	int cpu, best_energy_cpu, target = -1;
-	struct sched_domain *sd;
-	struct perf_domain *pd;
-	struct energy_env eenv;
-
-	rcu_read_lock();
-	pd = rcu_dereference(rd->pd);
-	if (!pd || READ_ONCE(rd->overutilized))
-		goto unlock;
-
-	/*
-	 * Energy-aware wake-up happens on the lowest sched_domain starting
-	 * from sd_asym_cpucapacity spanning over this_cpu and prev_cpu.
-	 */
-	sd = rcu_dereference(*this_cpu_ptr(&sd_asym_cpucapacity));
-	while (sd && !cpumask_test_cpu(prev_cpu, sched_domain_span(sd)))
-		sd = sd->parent;
-	if (!sd)
-		goto unlock;
-
-	target = prev_cpu;
-
-	sync_entity_load_avg(&p->se);
-	if (!uclamp_task_util(p, p_util_min, p_util_max))
-		goto unlock;
-
-	eenv_task_busy_time(&eenv, p, prev_cpu);
-
-	for (; pd; pd = pd->next) {
-		unsigned long util_min = p_util_min, util_max = p_util_max;
-		unsigned long cpu_cap, cpu_thermal_cap, util;
-		unsigned long cur_delta, max_spare_cap = 0;
-		unsigned long rq_util_min, rq_util_max;
-		unsigned long prev_spare_cap = 0;
-		int max_spare_cap_cpu = -1;
-		unsigned long base_energy;
-
-		cpumask_and(cpus, perf_domain_span(pd), cpu_online_mask);
-
-		if (cpumask_empty(cpus))
-			continue;
-
-		/* Account thermal pressure for the energy estimation */
-		cpu = cpumask_first(cpus);
-		cpu_thermal_cap = arch_scale_cpu_capacity(cpu);
-		cpu_thermal_cap -= arch_scale_thermal_pressure(cpu);
-
-		eenv.cpu_cap = cpu_thermal_cap;
-		eenv.pd_cap = 0;
-
-		for_each_cpu(cpu, cpus) {
-			struct rq *rq = cpu_rq(cpu);
-
-			eenv.pd_cap += cpu_thermal_cap;
-
-			if (!cpumask_test_cpu(cpu, sched_domain_span(sd)))
-				continue;
-
-			if (!cpumask_test_cpu(cpu, p->cpus_ptr))
-				continue;
-
-			util = cpu_util_next(cpu, p, cpu);
-			cpu_cap = capacity_of(cpu);
-
-			/*
-			 * Skip CPUs that cannot satisfy the capacity request.
-			 * IOW, placing the task there would make the CPU
-			 * overutilized. Take uclamp into account to see how
-			 * much capacity we can get out of the CPU; this is
-			 * aligned with sched_cpu_util().
-			 */
-			if (uclamp_is_used() && !uclamp_rq_is_idle(rq)) {
-				/*
-				 * Open code uclamp_rq_util_with() except for
-				 * the clamp() part. Ie: apply max aggregation
-				 * only. util_fits_cpu() logic requires to
-				 * operate on non clamped util but must use the
-				 * max-aggregated uclamp_{min, max}.
-				 */
-				rq_util_min = uclamp_rq_get(rq, UCLAMP_MIN);
-				rq_util_max = uclamp_rq_get(rq, UCLAMP_MAX);
-
-				util_min = max(rq_util_min, p_util_min);
-				util_max = max(rq_util_max, p_util_max);
-			}
-			if (!util_fits_cpu(util, util_min, util_max, cpu))
-				continue;
-
-			lsub_positive(&cpu_cap, util);
-
-			if (cpu == prev_cpu) {
-				/* Always use prev_cpu as a candidate. */
-				prev_spare_cap = cpu_cap;
-			} else if (cpu_cap > max_spare_cap) {
-				/*
-				 * Find the CPU with the maximum spare capacity
-				 * among the remaining CPUs in the performance
-				 * domain.
-				 */
-				max_spare_cap = cpu_cap;
-				max_spare_cap_cpu = cpu;
-			}
-		}
-
-		if (max_spare_cap_cpu < 0 && prev_spare_cap == 0)
-			continue;
-
-		eenv_pd_busy_time(&eenv, cpus, p);
-		/* Compute the 'base' energy of the pd, without @p */
-		base_energy = compute_energy(&eenv, pd, cpus, p, -1);
-
-		/* Evaluate the energy impact of using prev_cpu. */
-		if (prev_spare_cap > 0) {
-			prev_delta = compute_energy(&eenv, pd, cpus, p,
-						    prev_cpu);
-			/* CPU utilization has changed */
-			if (prev_delta < base_energy)
-				goto unlock;
-			prev_delta -= base_energy;
-			best_delta = min(best_delta, prev_delta);
-		}
-
-		/* Evaluate the energy impact of using max_spare_cap_cpu. */
-		if (max_spare_cap_cpu >= 0 && max_spare_cap > prev_spare_cap) {
-			cur_delta = compute_energy(&eenv, pd, cpus, p,
-						   max_spare_cap_cpu);
-			/* CPU utilization has changed */
-			if (cur_delta < base_energy)
-				goto unlock;
-			cur_delta -= base_energy;
-			if (cur_delta < best_delta) {
-				best_delta = cur_delta;
-				best_energy_cpu = max_spare_cap_cpu;
-			}
-		}
-	}
-	rcu_read_unlock();
-
-	if (best_delta < prev_delta)
-		target = best_energy_cpu;
-
-	return target;
-
-unlock:
-	rcu_read_unlock();
-
-	return target;
-}
-
 /*
  * select_task_rq_fair: Select target runqueue for the waking task in domains
  * that have the relevant SD flag set. In practice, this is SD_BALANCE_WAKE,
@@ -7376,14 +7166,6 @@ select_task_rq_fair(struct task_struct *p, int prev_cpu, int wake_flags)
 	lockdep_assert_held(&p->pi_lock);
 	if (wake_flags & WF_TTWU) {
 		record_wakee(p);
-
-		if (sched_energy_enabled()) {
-			new_cpu = find_energy_efficient_cpu(p, prev_cpu);
-			if (new_cpu >= 0)
-				return new_cpu;
-			new_cpu = prev_cpu;
-		}
-
 		want_affine = !wake_wide(p) && cpumask_test_cpu(cpu, p->cpus_ptr);
 	}

--
2.39.1

From cba31b19f8c38696b13ba48e0e8b6dbe747d6bae Mon Sep 17 00:00:00 2001
From: Alexandre Frade <admfrade@gmail.com>
Date: Mon, 29 Jan 2018 17:31:25 +0000
Subject: [PATCH 10/16] XANMOD: mm/vmscan: vm_swappiness = 30 decreases the
 amount of swapping

Signed-off-by: Alexandre Frade <admfrade@gmail.com>
Signed-off-by: Alexandre Frade <kernel@xanmod.org>
---
 mm/vmscan.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/mm/vmscan.c b/mm/vmscan.c
index 5b7b8d4f5297..549684b29418 100644
--- a/mm/vmscan.c
+++ b/mm/vmscan.c
@@ -190,7 +190,7 @@ struct scan_control {
 /*
  * From 0 .. 200.  Higher means more swappy.
  */
-int vm_swappiness = 60;
+int vm_swappiness = 30;

 static void set_task_reclaim_state(struct task_struct *task,
 				   struct reclaim_state *rs)
--
2.39.1

From 6329525a0fa10cd13f39b76948b1296150f75c95 Mon Sep 17 00:00:00 2001
From: Alexandre Frade <kernel@xanmod.org>
Date: Mon, 29 Aug 2022 16:47:26 +0000
Subject: [PATCH 14/16] XANMOD: Makefile: Disable GCC vectorization on trees

Signed-off-by: Alexandre Frade <kernel@xanmod.org>
---
 Makefile | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/Makefile b/Makefile
index 3f6628780eb2..35a5ae1ede42 100644
--- a/Makefile
+++ b/Makefile
@@ -1069,6 +1069,9 @@ endif
 KBUILD_CFLAGS-$(call gcc-min-version, 90100) += -Wno-alloc-size-larger-than
 KBUILD_CFLAGS += $(KBUILD_CFLAGS-y) $(CONFIG_CC_IMPLICIT_FALLTHROUGH)

+# disable GCC vectorization on trees
+KBUILD_CFLAGS	+= $(call cc-option, -fno-tree-vectorize)
+
 # disable invalid "can't wrap" optimizations for signed / pointers
 KBUILD_CFLAGS	+= -fno-strict-overflow

--
2.39.1