From d0d6f0294428bb8625a74c49132439f35ef5f3b1 Mon Sep 17 00:00:00 2001 From: Tk-Glitch Date: Fri, 16 Dec 2022 13:47:34 +0100 Subject: [PATCH] linux 6.1.y: base: Remove our `watermark_scale_factor` patch, which doesn't feel needed anymore, and add a selection of patches from Zen: `Input: evdev - use call_rcu when detaching client`, `mm: Lower the non-hugetlbpage pageblock size to reduce scheduling delays` and `mm: Don't hog the CPU and zone lock in rmqueue_bulk()`. https://github.com/zen-kernel/zen-kernel/commit/379cbab18b5c75c622b93e2c5abdfac141fe9654 https://github.com/zen-kernel/zen-kernel/commit/2aafb56f20e4b63d8c4af172fe9d017c64bc4129 https://github.com/zen-kernel/zen-kernel/commit/f22bc56be85e69c71c8e36041193856bb8b01525 --- .../6.1/0003-glitched-base.patch | 258 ++++++++++++++++-- 1 file changed, 229 insertions(+), 29 deletions(-) diff --git a/linux-tkg-patches/6.1/0003-glitched-base.patch b/linux-tkg-patches/6.1/0003-glitched-base.patch index 93b6917..51fcafe 100644 --- a/linux-tkg-patches/6.1/0003-glitched-base.patch +++ b/linux-tkg-patches/6.1/0003-glitched-base.patch @@ -403,34 +403,6 @@ index 84badf00647e..6a922bca9f39 100644 2.28.0 -From 816ee502759e954304693813bd03d94986b28dba Mon Sep 17 00:00:00 2001 -From: Tk-Glitch -Date: Mon, 18 Feb 2019 17:40:57 +0100 -Subject: [PATCH 11/17] mm: Set watermark_scale_factor to 200 (from 10) - -Multiple users have reported it's helping reducing/eliminating stuttering -with DXVK. ---- - mm/page_alloc.c | 2 +- - 1 file changed, 1 insertion(+), 1 deletion(-) - -diff --git a/mm/page_alloc.c b/mm/page_alloc.c -index 898ff44f2c7b..e72074034793 100644 ---- a/mm/page_alloc.c -+++ b/mm/page_alloc.c -@@ -330,7 +330,7 @@ int watermark_boost_factor __read_mostly; - int min_free_kbytes = 1024; - int user_min_free_kbytes = -1; - int watermark_boost_factor __read_mostly = 15000; --int watermark_scale_factor = 10; -+int watermark_scale_factor = 200; - - static unsigned long nr_kernel_pages __initdata; - static unsigned long nr_all_pages __initdata; --- -2.28.0 - - From 90240bcd90a568878738e66c0d45bed3e38e347b Mon Sep 17 00:00:00 2001 From: Tk-Glitch Date: Fri, 19 Apr 2019 12:33:38 +0200 @@ -490,7 +462,6 @@ index b0cefe94920d..890165099b07 100644 -- 2.28.0 - From 977812938da7c7226415778c340832141d9278b7 Mon Sep 17 00:00:00 2001 From: Alexandre Frade Date: Mon, 25 Nov 2019 15:13:06 -0300 @@ -619,3 +590,232 @@ index 36a469150ff9..aee891c9b78a 100644 -- 2.28.0 +From 379cbab18b5c75c622b93e2c5abdfac141fe9654 Mon Sep 17 00:00:00 2001 +From: Kenny Levinsen +Date: Sun, 27 Dec 2020 14:43:13 +0000 +Subject: [PATCH] ZEN: Input: evdev - use call_rcu when detaching client + +Significant time was spent on synchronize_rcu in evdev_detach_client +when applications closed evdev devices. Switching VT away from a +graphical environment commonly leads to mass input device closures, +which could lead to noticable delays on systems with many input devices. + +Replace synchronize_rcu with call_rcu, deferring reclaim of the evdev +client struct till after the RCU grace period instead of blocking the +calling application. + +While this does not solve all slow evdev fd closures, it takes care of a +good portion of them, including this simple test: + + #include + #include + + int main(int argc, char *argv[]) + { + int idx, fd; + const char *path = "/dev/input/event0"; + for (idx = 0; idx < 1000; idx++) { + if ((fd = open(path, O_RDWR)) == -1) { + return -1; + } + close(fd); + } + return 0; + } + +Time to completion of above test when run locally: + + Before: 0m27.111s + After: 0m0.018s + +Signed-off-by: Kenny Levinsen +--- + drivers/input/evdev.c | 19 +++++++++++-------- + 1 file changed, 11 insertions(+), 8 deletions(-) + +diff --git a/drivers/input/evdev.c b/drivers/input/evdev.c +index 95f90699d2b17b..2b10fe29d2c8d9 100644 +--- a/drivers/input/evdev.c ++++ b/drivers/input/evdev.c +@@ -46,6 +46,7 @@ struct evdev_client { + struct fasync_struct *fasync; + struct evdev *evdev; + struct list_head node; ++ struct rcu_head rcu; + enum input_clock_type clk_type; + bool revoked; + unsigned long *evmasks[EV_CNT]; +@@ -377,13 +378,22 @@ static void evdev_attach_client(struct evdev *evdev, + spin_unlock(&evdev->client_lock); + } + ++static void evdev_reclaim_client(struct rcu_head *rp) ++{ ++ struct evdev_client *client = container_of(rp, struct evdev_client, rcu); ++ unsigned int i; ++ for (i = 0; i < EV_CNT; ++i) ++ bitmap_free(client->evmasks[i]); ++ kvfree(client); ++} ++ + static void evdev_detach_client(struct evdev *evdev, + struct evdev_client *client) + { + spin_lock(&evdev->client_lock); + list_del_rcu(&client->node); + spin_unlock(&evdev->client_lock); +- synchronize_rcu(); ++ call_rcu(&client->rcu, evdev_reclaim_client); + } + + static int evdev_open_device(struct evdev *evdev) +@@ -436,7 +446,6 @@ static int evdev_release(struct inode *inode, struct file *file) + { + struct evdev_client *client = file->private_data; + struct evdev *evdev = client->evdev; +- unsigned int i; + + mutex_lock(&evdev->mutex); + +@@ -448,11 +457,6 @@ static int evdev_release(struct inode *inode, struct file *file) + + evdev_detach_client(evdev, client); + +- for (i = 0; i < EV_CNT; ++i) +- bitmap_free(client->evmasks[i]); +- +- kvfree(client); +- + evdev_close_device(evdev); + + return 0; +@@ -495,7 +499,6 @@ static int evdev_open(struct inode *inode, struct file *file) + + err_free_client: + evdev_detach_client(evdev, client); +- kvfree(client); + return error; + } + + +From 2aafb56f20e4b63d8c4af172fe9d017c64bc4129 Mon Sep 17 00:00:00 2001 +From: Sultan Alsawaf +Date: Wed, 20 Oct 2021 20:50:11 -0700 +Subject: [PATCH] ZEN: mm: Lower the non-hugetlbpage pageblock size to reduce + scheduling delays + +The page allocator processes free pages in groups of pageblocks, where +the size of a pageblock is typically quite large (1024 pages without +hugetlbpage support). Pageblocks are processed atomically with the zone +lock held, which can cause severe scheduling delays on both the CPU +going through the pageblock and any other CPUs waiting to acquire the +zone lock. A frequent offender is move_freepages_block(), which is used +by rmqueue() for page allocation. + +As it turns out, there's no requirement for pageblocks to be so large, +so the pageblock order can simply be reduced to ease the scheduling +delays and zone lock contention. PAGE_ALLOC_COSTLY_ORDER is used as a +reasonable setting to ensure non-costly page allocation requests can +still be serviced without always needing to free up more than one +pageblock's worth of pages at a time. + +This has a noticeable effect on overall system latency when memory +pressure is elevated. The various mm functions which operate on +pageblocks no longer appear in the preemptoff tracer, where previously +they would spend up to 100 ms on a mobile arm64 CPU processing a +pageblock with preemption disabled and the zone lock held. + +Signed-off-by: Sultan Alsawaf +--- + include/linux/pageblock-flags.h | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +diff --git a/include/linux/pageblock-flags.h b/include/linux/pageblock-flags.h +index 5f1ae07d724b88..97cda629c9e909 100644 +--- a/include/linux/pageblock-flags.h ++++ b/include/linux/pageblock-flags.h +@@ -48,7 +48,7 @@ extern unsigned int pageblock_order; + #else /* CONFIG_HUGETLB_PAGE */ + + /* If huge pages are not used, group by MAX_ORDER_NR_PAGES */ +-#define pageblock_order (MAX_ORDER-1) ++#define pageblock_order PAGE_ALLOC_COSTLY_ORDER + + #endif /* CONFIG_HUGETLB_PAGE */ + + +From f22bc56be85e69c71c8e36041193856bb8b01525 Mon Sep 17 00:00:00 2001 +From: Sultan Alsawaf +Date: Wed, 20 Oct 2021 20:50:32 -0700 +Subject: [PATCH] ZEN: mm: Don't hog the CPU and zone lock in rmqueue_bulk() + +There is noticeable scheduling latency and heavy zone lock contention +stemming from rmqueue_bulk's single hold of the zone lock while doing +its work, as seen with the preemptoff tracer. There's no actual need for +rmqueue_bulk() to hold the zone lock the entire time; it only does so +for supposed efficiency. As such, we can relax the zone lock and even +reschedule when IRQs are enabled in order to keep the scheduling delays +and zone lock contention at bay. Forward progress is still guaranteed, +as the zone lock can only be relaxed after page removal. + +With this change, rmqueue_bulk() no longer appears as a serious offender +in the preemptoff tracer, and system latency is noticeably improved. + +Signed-off-by: Sultan Alsawaf +--- + mm/page_alloc.c | 23 ++++++++++++++++++----- + 1 file changed, 18 insertions(+), 5 deletions(-) + +diff --git a/mm/page_alloc.c b/mm/page_alloc.c +index a0b0397e29ee4c..87a983a356530c 100644 +--- a/mm/page_alloc.c ++++ b/mm/page_alloc.c +@@ -3119,15 +3119,16 @@ __rmqueue(struct zone *zone, unsigned int order, int migratetype, + } + + /* +- * Obtain a specified number of elements from the buddy allocator, all under +- * a single hold of the lock, for efficiency. Add them to the supplied list. +- * Returns the number of new pages which were placed at *list. ++ * Obtain a specified number of elements from the buddy allocator, and relax the ++ * zone lock when needed. Add them to the supplied list. Returns the number of ++ * new pages which were placed at *list. + */ + static int rmqueue_bulk(struct zone *zone, unsigned int order, + unsigned long count, struct list_head *list, + int migratetype, unsigned int alloc_flags) + { +- int i, allocated = 0; ++ const bool can_resched = !preempt_count() && !irqs_disabled(); ++ int i, allocated = 0, last_mod = 0; + + /* Caller must hold IRQ-safe pcp->lock so IRQs are disabled. */ + spin_lock(&zone->lock); +@@ -3137,6 +3138,18 @@ static int rmqueue_bulk(struct zone *zone, unsigned int order, + if (unlikely(page == NULL)) + break; + ++ /* Reschedule and ease the contention on the lock if needed */ ++ if (i + 1 < count && ((can_resched && need_resched()) || ++ spin_needbreak(&zone->lock))) { ++ __mod_zone_page_state(zone, NR_FREE_PAGES, ++ -((i + 1 - last_mod) << order)); ++ last_mod = i + 1; ++ spin_unlock(&zone->lock); ++ if (can_resched) ++ cond_resched(); ++ spin_lock(&zone->lock); ++ } ++ + if (unlikely(check_pcp_refill(page, order))) + continue; + +@@ -3163,7 +3176,7 @@ static int rmqueue_bulk(struct zone *zone, unsigned int order, + * on i. Do not confuse with 'allocated' which is the number of + * pages added to the pcp list. + */ +- __mod_zone_page_state(zone, NR_FREE_PAGES, -(i << order)); ++ __mod_zone_page_state(zone, NR_FREE_PAGES, -((i - last_mod) << order)); + spin_unlock(&zone->lock); + return allocated; + }