PDS Kernel Configuration

2022-10-26 16:37:49 -04:00
16 changed files with 10268 additions and 15936 deletions
--- a/5
+++ b/5
@@ -31,6 +31,7 @@ _distro="Arch"
 declare -p -x > current_env

 source "$_where"/customization.cfg # load default configuration from file
+source "$_where"/linux-tkg-config/prepare

 if [ -e "$_EXT_CONFIG_PATH" ]; then
  msg2 "External configuration file $_EXT_CONFIG_PATH will be used and will override customization.cfg values."
@@ -39,8 +40,6 @@ fi

 source current_env

-source "$_where"/linux-tkg-config/prepare
-
 # Make sure we're in a clean state
 if [ ! -e "$_where"/BIG_UGLY_FROGMINER ]; then
  _tkg_initscript
@@ -57,7 +56,7 @@ else
 fi
 pkgname=("${pkgbase}" "${pkgbase}-headers")
 pkgver="${_basekernel}"."${_sub}"
-pkgrel=272
+pkgrel=271
 pkgdesc='Linux-tkg'
 arch=('x86_64') # no i686 in here
 url="https://www.kernel.org/"
--- a/customization.cfg
+++ b/customization.cfg
@@ -20,7 +20,7 @@ _EXT_CONFIG_PATH=~/.config/frogminer/linux-tkg.cfg
 # Default is "true".
 _NUKR="true"

-# Git mirror to use to get the kernel sources, possible values are "kernel.org", "googlesource.com", "github.com" and "torvalds"
+# Git mirror to use to get the kernel sources, possible values are "kernel.org", "googlesource.com" and "github.com"
 _git_mirror="kernel.org"

 # Custom compiler root dirs - Leave empty to use system compilers
@@ -144,9 +144,7 @@ _numadisable="false"
 _misc_adds="true"

 # Set to "0" for periodic ticks, "1" to use CattaRappa mode (enabling full tickless) and "2" for tickless idle only.
-# Full tickless can give higher performances in case you use isolation of CPUs for tasks
-# and it works only when using the nohz_full kernel parameter, otherwise behaves like idle.
-# Just tickless idle perform better for most platforms.
+# Full tickless can give higher performances in various cases but, depending on hardware, lower consistency. Just tickless idle can perform better on some platforms (mostly AMD based).
 _tickless="2"

 # Set to "true" to use ACS override patch - https://wiki.archlinux.org/index.php/PCI_passthrough_via_OVMF#Bypassing_the_IOMMU_groups_.28ACS_override_patch.29 - Kernel default is "false"
@@ -218,7 +216,7 @@ _tt_high_hz="false"
 _smt_nice="true"

 # Trust the CPU manufacturer to initialize Linux's CRNG (RANDOM_TRUST_CPU) - Kernel default is "false"
-_random_trust_cpu="true"
+_random_trust_cpu="false"

 # MuQSS only - CPU scheduler runqueue sharing - No sharing (RQ_NONE), SMT (hyperthread) siblings (RQ_SMT), Multicore siblings (RQ_MC), Symmetric Multi-Processing (RQ_SMP), NUMA (RQ_ALL)
 # Valid values are "none", "smt", "mc", "mc-llc"(for zen), "smp", "all" - Kernel default is "smt"
--- a/install.sh
+++ b/install.sh
@@ -44,12 +44,6 @@ plain() {

 declare -p -x > current_env
 source customization.cfg
-
-if [ -e "$_EXT_CONFIG_PATH" ]; then
-  msg2 "External configuration file $_EXT_CONFIG_PATH will be used and will override customization.cfg values."
-  source "$_EXT_CONFIG_PATH"
-fi
-
 . current_env

 source linux-tkg-config/prepare
--- a/linux-tkg-config/6.0/config.x86_64
+++ b/linux-tkg-config/6.0/config.x86_64
@@ -1,6 +1,6 @@
 #
 # Automatically generated file; DO NOT EDIT.
-# Linux/x86 6.0.5-arch1 Kernel Configuration
+# Linux/x86 6.0.3-arch2 Kernel Configuration
 #
 CONFIG_CC_VERSION_TEXT="gcc (GCC) 12.2.0"
 CONFIG_CC_IS_GCC=y
@@ -17,7 +17,7 @@ CONFIG_CC_HAS_ASM_GOTO_OUTPUT=y
 CONFIG_CC_HAS_ASM_GOTO_TIED_OUTPUT=y
 CONFIG_CC_HAS_ASM_INLINE=y
 CONFIG_CC_HAS_NO_PROFILE_FN_ATTR=y
-CONFIG_PAHOLE_VERSION=124
+CONFIG_PAHOLE_VERSION=123
 CONFIG_IRQ_WORK=y
 CONFIG_BUILDTIME_TABLE_SORT=y
 CONFIG_THREAD_INFO_IN_TASK=y
--- a/linux-tkg-config/prepare
+++ b/linux-tkg-config/prepare
@@ -11,7 +11,6 @@ _kernel_git_remotes=(
  ["kernel.org"]="https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git"
  ["googlesource.com"]="https://kernel.googlesource.com/pub/scm/linux/kernel/git/stable/linux-stable"
  ["github.com"]="https://github.com/gregkh/linux.git"
-  ["torvalds"]="https://github.com/torvalds/linux.git"
 )

 _git_remote_names=( "${!_kernel_git_remotes[@]}" )
@@ -36,35 +35,53 @@ for _key in "${_current_kernels[@]}" "${_eol_kernels[@]}"; do
  _kver_latest_tags_map[$_key]=$(echo "$_kernel_tags" | grep -F "v$_key" | tail -1 | cut -c1-)
 done

+# only used by workflow trigger script
+typeset -Ag _kver_subver_map
+_kver_subver_map=(
+  ["5.4"]="214"
+  ["5.7"]="19"
+  ["5.8"]="18"
+  ["5.9"]="16"
+  ["5.10"]="144"
+  ["5.11"]="22"
+  ["5.12"]="19"
+  ["5.13"]="19"
+  ["5.14"]="21"
+  ["5.15"]="69"
+  ["5.16"]="20"
+  ["5.17"]="15"
+  ["5.18"]="19"
+  ["5.19"]="10"
+  ["6.0"]="rc6"
+)
+
+
+
 # PREEMPT_RT's supported kernel subversion
 typeset -Ag _rt_subver_map
 _rt_subver_map=(
-  ["5.4"]="209"
+  ["5.4"]="188"
  ["5.9"]="1"
-  ["5.10"]="153"
+  ["5.10"]="109"
  ["5.11"]="4"
  ["5.14"]="2"
-  ["5.15"]="76"
+  ["5.15"]="32"
  ["5.16"]="2"
  ["5.17"]="1"
-  ["6.0"]="5"
-  ["6.1"]="rc3"
 )

 # PREEMPT_RT's patch revision for the kernel
 # We separated this to allow for forcing the application of the patch when _preempt_rt_force=1 on version mismatch
 typeset -Ag _rt_rev_map
 _rt_rev_map=(
-  ["5.4"]="77"
+  ["5.4"]="73"
  ["5.9"]="20"
-  ["5.10"]="76"
+  ["5.10"]="65"
  ["5.11"]="11"
  ["5.14"]="21"
-  ["5.15"]="53"
+  ["5.15"]="39"
  ["5.16"]="19"
  ["5.17"]="17"
-  ["6.0"]="14"
-  ["6.1"]="2"
 )

 _undefine() {
@@ -270,7 +287,7 @@ _set_cpu_scheduler() {
  elif [ "$_kver" = "514" ]; then
    _avail_cpu_scheds=("pds" "bmq" "cacule" "cfs")
  elif [ "$_kver" = "515" ]; then
-    _avail_cpu_scheds=("pds" "bmq" "cacule" "tt" "bore" "cfs")
+    _avail_cpu_scheds=("pds" "bmq" "cacule" "tt" "cfs")
  elif [ "$_kver" = "516" ]; then
    _avail_cpu_scheds=("pds" "bmq" "cacule" "cfs")
  elif [ "$_kver" = "517" ]; then
@@ -280,31 +297,19 @@ _set_cpu_scheduler() {
  elif [ "$_kver" = "519" ]; then
    _avail_cpu_scheds=("cfs" "pds" "bmq" "cacule" "tt" "bore")
  elif [ "$_kver" = "600" ]; then
-    _avail_cpu_scheds=("cfs" "pds" "bmq" "tt" "bore")
-    _projectc_unoff="1"
-  elif [ "$_kver" = "601" ]; then
-    _avail_cpu_scheds=("cfs" "bore")
+    _avail_cpu_scheds=("cfs" "tt" "bore")
  else
    _avail_cpu_scheds=("cfs")
  fi

  if [ "${_preempt_rt}" = "1" ]; then
-    warning "! Since you have enabled _preempt_rt, incompatible cpu schedulers will not be available !"
-    if [[ "${_avail_cpu_scheds[*]}" =~ "bore" ]]; then
-      _avail_cpu_scheds=("cfs" "bore")
-    else
-      _avail_cpu_scheds=("cfs")
-    fi
+    _avail_cpu_scheds=("cfs")
  fi

  # Populate descriptions of the available CPU schedulers
  _avail_cpu_scheds_text=()
  for _sched in "${_avail_cpu_scheds[@]}"; do
-    if [ "$_sched" = "pds" ] || [ "$_sched" = "bmq" ] && [ "$_projectc_unoff" = "1" ]; then
-      _avail_cpu_scheds_text+=("Project C / ${_sched^^} (unofficial port - ! possibly unstable !)")
-    else
-      _avail_cpu_scheds_text+=("${_sched_description_array[$_sched]}")
-    fi
+    _avail_cpu_scheds_text+=("${_sched_description_array[$_sched]}")
  done

  if ! [[ ${_avail_cpu_scheds[*]} =~ "$_cpusched" ]]; then
@@ -427,7 +432,6 @@ _linux_git_branch_checkout() {
      msg2 "master branch doesn't locally exist, shallow cloning..."
      git remote set-branches --add kernel.org master
      git remote set-branches --add googlesource.com master
-      git remote set-branches --add torvalds master
      git fetch --depth=1 $_git_mirror master
      git fetch --depth 1 $_git_mirror tag "$_kernel_git_tag"
      git checkout -b master-${_git_mirror} ${_git_mirror}/master
@@ -445,7 +449,6 @@ _linux_git_branch_checkout() {
      msg2 "${_basekernel}.y branch doesn't locally exist, shallow cloning..."
      git remote set-branches --add kernel.org linux-${_basekernel}.y
      git remote set-branches --add googlesource.com linux-${_basekernel}.y
-      git remote set-branches --add torvalds linux-${_basekernel}.y
      git fetch --depth=1 $_git_mirror linux-${_basekernel}.y
      git fetch --depth=1 $_git_mirror tag "$_kernel_git_tag"
      git checkout -b linux-${_basekernel}-${_git_mirror} ${_git_mirror}/linux-${_basekernel}.y
@@ -647,13 +650,8 @@ _tkg_srcprep() {
    if [ ${_rt_subver_map[$_basekernel]+_} = "_" ]; then
      preempt_rt_ksubver="${_rt_subver_map[$_basekernel]}"
      # Check if subversion is supported, skip check if forced
-      if [ "${_preempt_rt_force}" = "1" ] || [ "${preempt_rt_ksubver}" = "${_sub}" ]; then
-        if [[ "${_sub}" == *rc* ]]; then
-          _separator="-"
-        else
-          _separator="."
-        fi
-        preempt_rt_file_gz="patch-${_basekernel}${_separator}${preempt_rt_ksubver}-rt${_rt_rev_map["$_basekernel"]}.patch.gz"
+      if [ "${_preempt_rt_force}" = "1" ] || [ "${preempt_rt_ksubver}" = "${_kver_subver_map[$_basekernel]}" ]; then
+        preempt_rt_file_gz="patch-${_basekernel}.${preempt_rt_ksubver}-rt${_rt_rev_map["$_basekernel"]}.patch.gz"
        preempt_rt_file=`basename ${preempt_rt_file_gz} .gz`
        curl "https://cdn.kernel.org/pub/linux/kernel/projects/rt/${_basekernel}/${preempt_rt_file_gz}" > "$srcdir"/"${preempt_rt_file_gz}"
        last_pwd=`pwd`
@@ -663,10 +661,10 @@ _tkg_srcprep() {
        tkgpatch="$srcdir/${preempt_rt_file}"
        _msg="Applying PREEMPT_RT patch" && _tkg_patcher
      else
-        warning "Skipping PREEMPT_RT patch for ${_basekernel}.${_sub} (last known good ${_basekernel}.${preempt_rt_ksubver})"
+        msg2 "Skipping PREEMPT_RT patch for ${_basekernel}.${_sub}"
      fi
    else
-      warning "Skipping PREEMPT_RT patch on unsupported kernel version"
+      msg2 "Skipping PREEMPT_RT patch on unsupported kernel version"
    fi
  fi

@@ -679,11 +677,6 @@ _tkg_srcprep() {
  tkgpatch="$srcdir/0003-glitched-base.patch"
  _msg="Applying glitched base patch" && _tkg_patcher

-  if [ "${_preempt_rt}" != "1" ]; then
-    tkgpatch="$srcdir/0003-glitched-base-nonrt.patch"
-    _msg="Applying glitched base non-rt additions patch" && _tkg_patcher
-  fi
-
  if [[ "$_distro" =~ ^(Fedora|Suse)$ ]]; then
    tkgpatch="$srcdir/0013-fedora-rpm.patch"
    _msg="RPM: fixing spec generator" && _tkg_patcher
@@ -1335,7 +1328,7 @@ _tkg_srcprep() {
    plain "Use CattaRappa mode (Tickless/Dynticks) ?"
    _tickless_array_text=(
      "No, use periodic ticks."
-      "Yes, full tickless baby!\n       Full tickless can give higher performances in case you use isolation of CPUs for task, in other cases it behaves as Idle."
+      "Yes, full tickless baby!\n        Can give higher performances in many cases but lower consistency on some hardware."
      "Just tickless idle plz.\n        Just tickless idle can perform better with some platforms (mostly AMD) or CPU schedulers (mostly MuQSS)."
    )
    _default_index="2"
--- a/linux-tkg-patches/5.10/0003-glitched-base-nonrt.patch
+++ b/linux-tkg-patches/5.10/0003-glitched-base-nonrt.patch
@@ -1,38 +0,0 @@
-From 28f32f59d9d55ac7ec3a20b79bdd02d2a0a5f7e1 Mon Sep 17 00:00:00 2001
-From: Alexandre Frade <admfrade@gmail.com>
-Date: Mon, 29 Jan 2018 18:29:13 +0000
-Subject: [PATCH 03/17] sched/core: nr_migrate = 128 increases number of tasks
- to iterate in a single balance run.
-
-Signed-off-by: Alexandre Frade <admfrade@gmail.com>
---
- kernel/sched/core.c | 6 +++---
- 1 file changed, 3 insertions(+), 3 deletions(-)
-
-diff --git a/kernel/sched/core.c b/kernel/sched/core.c
-index f788cd61df21..2bfbb4213707 100644
--- a/kernel/sched/core.c
-+++ b/kernel/sched/core.c
-@@ -59,7 +59,7 @@ const_debug unsigned int sysctl_sched_features =
-  * Number of tasks to iterate in a single balance run.
-  * Limited because this is done with IRQs disabled.
-  */
-const_debug unsigned int sysctl_sched_nr_migrate = 32;
-+const_debug unsigned int sysctl_sched_nr_migrate = 128;
-
- /*
-  * period over which we measure -rt task CPU usage in us.
-@@ -71,9 +71,9 @@ __read_mostly int scheduler_running;
-
- /*
-  * part of the period that we allow rt tasks to run in us.
- * default: 0.95s
-+ * XanMod default: 0.98s
-  */
-int sysctl_sched_rt_runtime = 950000;
-+int sysctl_sched_rt_runtime = 980000;
-
- /*
-  * __task_rq_lock - lock the rq @p resides on.
--
-2.28.0
--- a/linux-tkg-patches/5.10/0003-glitched-base.patch
+++ b/linux-tkg-patches/5.10/0003-glitched-base.patch
@@ -54,6 +54,46 @@ index 361ea7ab30ea..0c5cf69b241a 100644
 2.28.0


+From 28f32f59d9d55ac7ec3a20b79bdd02d2a0a5f7e1 Mon Sep 17 00:00:00 2001
+From: Alexandre Frade <admfrade@gmail.com>
+Date: Mon, 29 Jan 2018 18:29:13 +0000
+Subject: [PATCH 03/17] sched/core: nr_migrate = 128 increases number of tasks
+ to iterate in a single balance run.
+
+Signed-off-by: Alexandre Frade <admfrade@gmail.com>
+---
+ kernel/sched/core.c | 6 +++---
+ 1 file changed, 3 insertions(+), 3 deletions(-)
+
+diff --git a/kernel/sched/core.c b/kernel/sched/core.c
+index f788cd61df21..2bfbb4213707 100644
+--- a/kernel/sched/core.c
+++ b/kernel/sched/core.c
+@@ -59,7 +59,7 @@ const_debug unsigned int sysctl_sched_features =
+  * Number of tasks to iterate in a single balance run.
+  * Limited because this is done with IRQs disabled.
+  */
+-const_debug unsigned int sysctl_sched_nr_migrate = 32;
+const_debug unsigned int sysctl_sched_nr_migrate = 128;
+ 
+ /*
+  * period over which we measure -rt task CPU usage in us.
+@@ -71,9 +71,9 @@ __read_mostly int scheduler_running;
+ 
+ /*
+  * part of the period that we allow rt tasks to run in us.
+- * default: 0.95s
+ * XanMod default: 0.98s
+  */
+-int sysctl_sched_rt_runtime = 950000;
+int sysctl_sched_rt_runtime = 980000;
+ 
+ /*
+  * __task_rq_lock - lock the rq @p resides on.
+-- 
+2.28.0
+
+
 From acc49f33a10f61dc66c423888cbb883ba46710e4 Mon Sep 17 00:00:00 2001
 From: Alexandre Frade <admfrade@gmail.com>
 Date: Mon, 29 Jan 2018 17:41:29 +0000
--- a/linux-tkg-patches/5.15/0003-glitched-base-nonrt.patch
+++ b/linux-tkg-patches/5.15/0003-glitched-base-nonrt.patch
@@ -1,38 +0,0 @@
-From 28f32f59d9d55ac7ec3a20b79bdd02d2a0a5f7e1 Mon Sep 17 00:00:00 2001
-From: Alexandre Frade <admfrade@gmail.com>
-Date: Mon, 29 Jan 2018 18:29:13 +0000
-Subject: [PATCH 03/17] sched/core: nr_migrate = 128 increases number of tasks
- to iterate in a single balance run.
-
-Signed-off-by: Alexandre Frade <admfrade@gmail.com>
---
- kernel/sched/core.c | 6 +++---
- 1 file changed, 3 insertions(+), 3 deletions(-)
-
-diff --git a/kernel/sched/core.c b/kernel/sched/core.c
-index f788cd61df21..2bfbb4213707 100644
--- a/kernel/sched/core.c
-+++ b/kernel/sched/core.c
-@@ -59,7 +59,7 @@ const_debug unsigned int sysctl_sched_features =
-  * Number of tasks to iterate in a single balance run.
-  * Limited because this is done with IRQs disabled.
-  */
-const_debug unsigned int sysctl_sched_nr_migrate = 32;
-+const_debug unsigned int sysctl_sched_nr_migrate = 128;
-
- /*
-  * period over which we measure -rt task CPU usage in us.
-@@ -71,9 +71,9 @@ __read_mostly int scheduler_running;
-
- /*
-  * part of the period that we allow rt tasks to run in us.
- * default: 0.95s
-+ * XanMod default: 0.98s
-  */
-int sysctl_sched_rt_runtime = 950000;
-+int sysctl_sched_rt_runtime = 980000;
-
- /*
-  * __task_rq_lock - lock the rq @p resides on.
--
-2.28.0
--- a/linux-tkg-patches/5.15/0003-glitched-base.patch
+++ b/linux-tkg-patches/5.15/0003-glitched-base.patch
@@ -54,6 +54,46 @@ index 361ea7ab30ea..0c5cf69b241a 100644
 2.28.0


+From 28f32f59d9d55ac7ec3a20b79bdd02d2a0a5f7e1 Mon Sep 17 00:00:00 2001
+From: Alexandre Frade <admfrade@gmail.com>
+Date: Mon, 29 Jan 2018 18:29:13 +0000
+Subject: [PATCH 03/17] sched/core: nr_migrate = 128 increases number of tasks
+ to iterate in a single balance run.
+
+Signed-off-by: Alexandre Frade <admfrade@gmail.com>
+---
+ kernel/sched/core.c | 6 +++---
+ 1 file changed, 3 insertions(+), 3 deletions(-)
+
+diff --git a/kernel/sched/core.c b/kernel/sched/core.c
+index f788cd61df21..2bfbb4213707 100644
+--- a/kernel/sched/core.c
+++ b/kernel/sched/core.c
+@@ -59,7 +59,7 @@ const_debug unsigned int sysctl_sched_features =
+  * Number of tasks to iterate in a single balance run.
+  * Limited because this is done with IRQs disabled.
+  */
+-const_debug unsigned int sysctl_sched_nr_migrate = 32;
+const_debug unsigned int sysctl_sched_nr_migrate = 128;
+ 
+ /*
+  * period over which we measure -rt task CPU usage in us.
+@@ -71,9 +71,9 @@ __read_mostly int scheduler_running;
+ 
+ /*
+  * part of the period that we allow rt tasks to run in us.
+- * default: 0.95s
+ * XanMod default: 0.98s
+  */
+-int sysctl_sched_rt_runtime = 950000;
+int sysctl_sched_rt_runtime = 980000;
+ 
+ /*
+  * __task_rq_lock - lock the rq @p resides on.
+-- 
+2.28.0
+
+
 From acc49f33a10f61dc66c423888cbb883ba46710e4 Mon Sep 17 00:00:00 2001
 From: Alexandre Frade <admfrade@gmail.com>
 Date: Mon, 29 Jan 2018 17:41:29 +0000
--- a/linux-tkg-patches/5.15/0008-5.15-bcachefs.patch
+++ b/linux-tkg-patches/5.15/0008-5.15-bcachefs.patch
--- a/linux-tkg-patches/5.4/0003-glitched-base-nonrt.patch
+++ b/linux-tkg-patches/5.4/0003-glitched-base-nonrt.patch
@@ -1,38 +0,0 @@
-From 28f32f59d9d55ac7ec3a20b79bdd02d2a0a5f7e1 Mon Sep 17 00:00:00 2001
-From: Alexandre Frade <admfrade@gmail.com>
-Date: Mon, 29 Jan 2018 18:29:13 +0000
-Subject: [PATCH 03/17] sched/core: nr_migrate = 128 increases number of tasks
- to iterate in a single balance run.
-
-Signed-off-by: Alexandre Frade <admfrade@gmail.com>
---
- kernel/sched/core.c | 6 +++---
- 1 file changed, 3 insertions(+), 3 deletions(-)
-
-diff --git a/kernel/sched/core.c b/kernel/sched/core.c
-index f788cd61df21..2bfbb4213707 100644
--- a/kernel/sched/core.c
-+++ b/kernel/sched/core.c
-@@ -59,7 +59,7 @@ const_debug unsigned int sysctl_sched_features =
-  * Number of tasks to iterate in a single balance run.
-  * Limited because this is done with IRQs disabled.
-  */
-const_debug unsigned int sysctl_sched_nr_migrate = 32;
-+const_debug unsigned int sysctl_sched_nr_migrate = 128;
-
- /*
-  * period over which we measure -rt task CPU usage in us.
-@@ -71,9 +71,9 @@ __read_mostly int scheduler_running;
-
- /*
-  * part of the period that we allow rt tasks to run in us.
- * default: 0.95s
-+ * XanMod default: 0.98s
-  */
-int sysctl_sched_rt_runtime = 950000;
-+int sysctl_sched_rt_runtime = 980000;
-
- /*
-  * __task_rq_lock - lock the rq @p resides on.
--
-2.28.0
--- a/linux-tkg-patches/5.4/0003-glitched-base.patch
+++ b/linux-tkg-patches/5.4/0003-glitched-base.patch
@@ -31,6 +31,31 @@ index 2acfc69878f5..3f1131431e06 100644
 EXPORT_SYMBOL_GPL(sysctl_vfs_cache_pressure);
 
 __cacheline_aligned_in_smp DEFINE_SEQLOCK(rename_lock);
+diff --git a/kernel/sched/core.c b/kernel/sched/core.c
+index 211890edf37e..37121563407d 100644
+--- a/kernel/sched/core.c
+++ b/kernel/sched/core.c
+@@ -41,7 +41,7 @@ const_debug unsigned int sysctl_sched_features =
+  * Number of tasks to iterate in a single balance run.
+  * Limited because this is done with IRQs disabled.
+  */
+-const_debug unsigned int sysctl_sched_nr_migrate = 32;
+const_debug unsigned int sysctl_sched_nr_migrate = 128;
+ 
+ /*
+  * period over which we average the RT time consumption, measured
+@@ -61,9 +61,9 @@ __read_mostly int scheduler_running;
+ 
+ /*
+  * part of the period that we allow rt tasks to run in us.
+- * default: 0.95s
+ * XanMod default: 0.98s
+  */
+-int sysctl_sched_rt_runtime = 950000;
+int sysctl_sched_rt_runtime = 980000;
+ 
+ /*
+  * __task_rq_lock - lock the rq @p resides on.
 diff --git a/lib/Kconfig b/lib/Kconfig
 index 5fe577673b98..c44c27cd6e05 100644
 --- a/lib/Kconfig
--- a/linux-tkg-patches/6.0/0008-6.0-bcachefs.patch
+++ b/linux-tkg-patches/6.0/0008-6.0-bcachefs.patch
--- a/linux-tkg-patches/6.0/0009-prjc_v6.0-r0.patch
+++ b/linux-tkg-patches/6.0/0009-prjc_v6.0-r0.patch
@@ -1,6 +1,6 @@
-From 711a56e8f6314d77141b0f661e6c13c8a2c4dddf Mon Sep 17 00:00:00 2001
+From b53bf730e6bba71ebc0ec8452cc2ca399137090e Mon Sep 17 00:00:00 2001
 From: Tor Vic <torvic9@mailbox.org>
-Date: Wed, 16 Nov 2022 11:29:00 +0100
+Date: Mon, 3 Oct 2022 11:19:50 +0200
 Subject: [PATCH] Project-C 6.0-rc0-vd

 ---
@@ -22,7 +22,7 @@ Subject: [PATCH] Project-C 6.0-rc0-vd
 kernel/exit.c                                 |    4 +-
 kernel/locking/rtmutex.c                      |   16 +-
 kernel/sched/Makefile                         |    5 +
- kernel/sched/alt_core.c                       | 7959 +++++++++++++++++
+ kernel/sched/alt_core.c                       | 7937 +++++++++++++++++
 kernel/sched/alt_debug.c                      |   31 +
 kernel/sched/alt_sched.h                      |  645 ++
 kernel/sched/bmq.h                            |  110 +
@@ -43,7 +43,7 @@ Subject: [PATCH] Project-C 6.0-rc0-vd
 kernel/time/hrtimer.c                         |    2 +
 kernel/time/posix-cpu-timers.c                |   10 +-
 kernel/trace/trace_selftest.c                 |    5 +
- 39 files changed, 9267 insertions(+), 23 deletions(-)
+ 39 files changed, 9245 insertions(+), 23 deletions(-)
 create mode 100644 Documentation/scheduler/sched-BMQ.txt
 create mode 100644 kernel/sched/alt_core.c
 create mode 100644 kernel/sched/alt_debug.c
@@ -685,10 +685,10 @@ index 976092b7bd45..31d587c16ec1 100644
 obj-y += build_utility.o
 diff --git a/kernel/sched/alt_core.c b/kernel/sched/alt_core.c
 new file mode 100644
-index 000000000000..18dfee354f9b
+index 000000000000..f3bac14124c3
 --- /dev/null
 +++ b/kernel/sched/alt_core.c
-@@ -0,0 +1,7959 @@
+@@ -0,0 +1,7937 @@
 +/*
 + *  kernel/sched/alt_core.c
 + *
@@ -3111,6 +3111,13 @@ index 000000000000..18dfee354f9b
 +	if (!llist)
 +		return;
 +
+	/*
+	 * rq::ttwu_pending racy indication of out-standing wakeups.
+	 * Races such that false-negatives are possible, since they
+	 * are shorter lived that false-positives would be.
+	 */
+	WRITE_ONCE(rq->ttwu_pending, 0);
+
 +	rq_lock_irqsave(rq, &rf);
 +	update_rq_clock(rq);
 +
@@ -3124,17 +3131,6 @@ index 000000000000..18dfee354f9b
 +		ttwu_do_activate(rq, p, p->sched_remote_wakeup ? WF_MIGRATED : 0);
 +	}
 +
-+	/*
-+	 * Must be after enqueueing at least once task such that
-+	 * idle_cpu() does not observe a false-negative -- if it does,
-+	 * it is possible for select_idle_siblings() to stack a number
-+	 * of tasks on this CPU during that window.
-+	 *
-+	 * It is ok to clear ttwu_pending when another task pending.
-+	 * We will receive IPI after local irq enabled and then enqueue it.
-+	 * Since now nr_running > 0, idle_cpu() will always get correct result.
-+	 */
-+	WRITE_ONCE(rq->ttwu_pending, 0);
 +	rq_unlock_irqrestore(rq, &rf);
 +}
 +
@@ -3605,40 +3601,6 @@ index 000000000000..18dfee354f9b
 +	return success;
 +}
 +
-+static bool __task_needs_rq_lock(struct task_struct *p)
-+{
-+	unsigned int state = READ_ONCE(p->__state);
-+
-+	/*
-+	 * Since pi->lock blocks try_to_wake_up(), we don't need rq->lock when
-+	 * the task is blocked. Make sure to check @state since ttwu() can drop
-+	 * locks at the end, see ttwu_queue_wakelist().
-+	 */
-+	if (state == TASK_RUNNING || state == TASK_WAKING)
-+		return true;
-+
-+	/*
-+	 * Ensure we load p->on_rq after p->__state, otherwise it would be
-+	 * possible to, falsely, observe p->on_rq == 0.
-+	 *
-+	 * See try_to_wake_up() for a longer comment.
-+	 */
-+	smp_rmb();
-+	if (p->on_rq)
-+		return true;
-+
-+#ifdef CONFIG_SMP
-+	/*
-+	 * Ensure the task has finished __schedule() and will not be referenced
-+	 * anymore. Again, see try_to_wake_up() for a longer comment.
-+	 */
-+	smp_rmb();
-+	smp_cond_load_acquire(&p->on_cpu, !VAL);
-+#endif
-+
-+	return false;
-+}
-+
 +/**
 + * task_call_func - Invoke a function on task in fixed state
 + * @p: Process for which the function is to be invoked, can be @current.
@@ -3656,12 +3618,28 @@ index 000000000000..18dfee354f9b
 +int task_call_func(struct task_struct *p, task_call_f func, void *arg)
 +{
 +	struct rq *rq = NULL;
+	unsigned int state;
 +	struct rq_flags rf;
 +	int ret;
 +
 +	raw_spin_lock_irqsave(&p->pi_lock, rf.flags);
 +
-+	if (__task_needs_rq_lock(p))
+	state = READ_ONCE(p->__state);
+
+	/*
+	 * Ensure we load p->on_rq after p->__state, otherwise it would be
+	 * possible to, falsely, observe p->on_rq == 0.
+	 *
+	 * See try_to_wake_up() for a longer comment.
+	 */
+	smp_rmb();
+
+	/*
+	 * Since pi->lock blocks try_to_wake_up(), we don't need rq->lock when
+	 * the task is blocked. Make sure to check @state since ttwu() can drop
+	 * locks at the end, see ttwu_queue_wakelist().
+	 */
+	if (state == TASK_RUNNING || state == TASK_WAKING || p->on_rq)
 +		rq = __task_rq_lock(p, &rf);
 +
 +	/*
@@ -10152,55 +10130,5 @@ index a2d301f58ced..2ccdede8585c 100644
 	struct wakeup_test_data *x = data;

 --
-2.38.1
+2.37.3

-diff --git a/kernel/sched/alt_core.c b/kernel/sched/alt_core.c
-index f3bac14124c3..27eafbccf23d 100644
--- a/kernel/sched/alt_core.c
-+++ b/kernel/sched/alt_core.c
-@@ -1448,11 +1448,13 @@ void set_task_cpu(struct task_struct *p, unsigned int new_cpu)
- 
- 	WARN_ON_ONCE(is_migration_disabled(p));
- #endif
-	if (task_cpu(p) == new_cpu)
-		return;
-+
- 	trace_sched_migrate_task(p, new_cpu);
-	rseq_migrate(p);
-	perf_event_task_migrate(p);
-+
-+	if (task_cpu(p) != new_cpu) {
-+		rseq_migrate(p);
-+		perf_event_task_migrate(p);
-+	}
- 
- 	__set_task_cpu(p, new_cpu);
- }
-diff --git a/kernel/sched/alt_core.c b/kernel/sched/alt_core.c
-index f3bac14124c3..5678c247c0ab 100644
--- a/kernel/sched/alt_core.c
-+++ b/kernel/sched/alt_core.c
-@@ -810,8 +810,8 @@ unsigned long get_wchan(struct task_struct *p)
-  * Context: rq->lock
-  */
- #define __SCHED_DEQUEUE_TASK(p, rq, flags)					\
-	psi_dequeue(p, flags & DEQUEUE_SLEEP);					\
- 	sched_info_dequeue(rq, p);						\
-+	psi_dequeue(p, flags & DEQUEUE_SLEEP);					\
- 										\
- 	list_del(&p->sq_node);							\
- 	if (list_empty(&rq->queue.heads[p->sq_idx])) 				\
-diff --git a/kernel/sched/alt_core.c b/kernel/sched/alt_core.c
-index f3bac14124c3..349a2c92d534 100644
--- a/kernel/sched/alt_core.c
-+++ b/kernel/sched/alt_core.c
-@@ -4404,8 +4404,8 @@ static inline void schedule_debug(struct task_struct *prev, bool preempt)
- 
- /*
-  * Compile time debug macro
- * #define ALT_SCHED_DEBUG
-  */
-+#define ALT_SCHED_DEBUG
- 
- #ifdef ALT_SCHED_DEBUG
- void alt_sched_debug(void)
--- a/linux-tkg-patches/6.0/0012-misc-additions.patch
+++ b/linux-tkg-patches/6.0/0012-misc-additions.patch
@@ -64,6 +64,140 @@ index 2c7171e0b0010..85de313ddec29 100644
 	select CPU_FREQ_GOV_PERFORMANCE
 	help

+From 2535fbde890f14c78b750139fcf87d1143850626 Mon Sep 17 00:00:00 2001
+From: Johannes Weiner <hannes@cmpxchg.org>
+Date: Tue, 2 Aug 2022 12:28:11 -0400
+Subject: [PATCH] mm: vmscan: fix extreme overreclaim and swap floods
+
+During proactive reclaim, we sometimes observe severe overreclaim, with
+several thousand times more pages reclaimed than requested.
+
+This trace was obtained from shrink_lruvec() during such an instance:
+
+    prio:0 anon_cost:1141521 file_cost:7767
+    nr_reclaimed:4387406 nr_to_reclaim:1047 (or_factor:4190)
+    nr=[7161123 345 578 1111]
+
+While he reclaimer requested 4M, vmscan reclaimed close to 16G, most of it
+by swapping.  These requests take over a minute, during which the write()
+to memory.reclaim is unkillably stuck inside the kernel.
+
+Digging into the source, this is caused by the proportional reclaim
+bailout logic.  This code tries to resolve a fundamental conflict: to
+reclaim roughly what was requested, while also aging all LRUs fairly and
+in accordance to their size, swappiness, refault rates etc.  The way it
+attempts fairness is that once the reclaim goal has been reached, it stops
+scanning the LRUs with the smaller remaining scan targets, and adjusts the
+remainder of the bigger LRUs according to how much of the smaller LRUs was
+scanned.  It then finishes scanning that remainder regardless of the
+reclaim goal.
+
+This works fine if priority levels are low and the LRU lists are
+comparable in size.  However, in this instance, the cgroup that is
+targeted by proactive reclaim has almost no files left - they've already
+been squeezed out by proactive reclaim earlier - and the remaining anon
+pages are hot.  Anon rotations cause the priority level to drop to 0,
+which results in reclaim targeting all of anon (a lot) and all of file
+(almost nothing).  By the time reclaim decides to bail, it has scanned
+most or all of the file target, and therefor must also scan most or all of
+the enormous anon target.  This target is thousands of times larger than
+the reclaim goal, thus causing the overreclaim.
+
+The bailout code hasn't changed in years, why is this failing now?  The
+most likely explanations are two other recent changes in anon reclaim:
+
+1. Before the series starting with commit 5df741963d52 ("mm: fix LRU
+   balancing effect of new transparent huge pages"), the VM was
+   overall relatively reluctant to swap at all, even if swap was
+   configured. This means the LRU balancing code didn't come into play
+   as often as it does now, and mostly in high pressure situations
+   where pronounced swap activity wouldn't be as surprising.
+
+2. For historic reasons, shrink_lruvec() loops on the scan targets of
+   all LRU lists except the active anon one, meaning it would bail if
+   the only remaining pages to scan were active anon - even if there
+   were a lot of them.
+
+   Before the series starting with commit ccc5dc67340c ("mm/vmscan:
+   make active/inactive ratio as 1:1 for anon lru"), most anon pages
+   would live on the active LRU; the inactive one would contain only a
+   handful of preselected reclaim candidates. After the series, anon
+   gets aged similarly to file, and the inactive list is the default
+   for new anon pages as well, making it often the much bigger list.
+
+   As a result, the VM is now more likely to actually finish large
+   anon targets than before.
+
+Change the code such that only one SWAP_CLUSTER_MAX-sized nudge toward the
+larger LRU lists is made before bailing out on a met reclaim goal.
+
+This fixes the extreme overreclaim problem.
+
+Fairness is more subtle and harder to evaluate.  No obvious misbehavior
+was observed on the test workload, in any case.  Conceptually, fairness
+should primarily be a cumulative effect from regular, lower priority
+scans.  Once the VM is in trouble and needs to escalate scan targets to
+make forward progress, fairness needs to take a backseat.  This is also
+acknowledged by the myriad exceptions in get_scan_count().  This patch
+makes fairness decrease gradually, as it keeps fairness work static over
+increasing priority levels with growing scan targets.  This should make
+more sense - although we may have to re-visit the exact values.
+
+Link: https://lkml.kernel.org/r/20220802162811.39216-1-hannes@cmpxchg.org
+Signed-off-by: Johannes Weiner <hannes@cmpxchg.org>
+Reviewed-by: Rik van Riel <riel@surriel.com>
+Acked-by: Mel Gorman <mgorman@techsingularity.net>
+Cc: Hugh Dickins <hughd@google.com>
+Cc: Joonsoo Kim <iamjoonsoo.kim@lge.com>
+Cc: <stable@vger.kernel.org>
+Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
+---
+ mm/vmscan.c | 10 ++++------
+ 1 file changed, 4 insertions(+), 6 deletions(-)
+
+diff --git a/mm/vmscan.c b/mm/vmscan.c
+index 382dbe97329f33..266eb8cfe93a67 100644
+--- a/mm/vmscan.c
+++ b/mm/vmscan.c
+@@ -2955,8 +2955,8 @@ static void shrink_lruvec(struct lruvec *lruvec, struct scan_control *sc)
+ 	enum lru_list lru;
+ 	unsigned long nr_reclaimed = 0;
+ 	unsigned long nr_to_reclaim = sc->nr_to_reclaim;
+	bool proportional_reclaim;
+ 	struct blk_plug plug;
+-	bool scan_adjusted;
+
+ 	get_scan_count(lruvec, sc, nr);
+
+@@ -2974,8 +2974,8 @@ static void shrink_lruvec(struct lruvec *lruvec, struct scan_control *sc)
+ 	 * abort proportional reclaim if either the file or anon lru has already
+ 	 * dropped to zero at the first pass.
+ 	 */
+-	scan_adjusted = (!cgroup_reclaim(sc) && !current_is_kswapd() &&
+-			 sc->priority == DEF_PRIORITY);
+	proportional_reclaim = (!cgroup_reclaim(sc) && !current_is_kswapd() &&
+				sc->priority == DEF_PRIORITY);
+
+ 	blk_start_plug(&plug);
+ 	while (nr[LRU_INACTIVE_ANON] || nr[LRU_ACTIVE_FILE] ||
+@@ -2995,7 +2995,7 @@ static void shrink_lruvec(struct lruvec *lruvec, struct scan_control *sc)
+
+ 		cond_resched();
+
+-		if (nr_reclaimed < nr_to_reclaim || scan_adjusted)
+		if (nr_reclaimed < nr_to_reclaim || proportional_reclaim)
+ 			continue;
+
+ 		/*
+@@ -3046,8 +3046,6 @@ static void shrink_lruvec(struct lruvec *lruvec, struct scan_control *sc)
+ 		nr_scanned = targets[lru] - nr[lru];
+ 		nr[lru] = targets[lru] * (100 - percentage) / 100;
+ 		nr[lru] -= min(nr[lru], nr_scanned);
+-
+-		scan_adjusted = true;
+ 	}
+ 	blk_finish_plug(&plug);
+ 	sc->nr_reclaimed += nr_reclaimed;
 From 430daaab3c78de6bd82f10cfb5a0f016c6e583f6 Mon Sep 17 00:00:00 2001
 From: Desmond Cheong Zhi Xi <desmondcheongzx@gmail.com>
 Date: Mon, 4 Oct 2021 14:07:34 -0400
@@ -423,3 +557,95 @@ index af6c1a93372d90..002bc26b525e87 100644

 	sdw_cdns_probe(cdns);

+From 785699dbc7041b99e0027bff27ffe17eba202e96 Mon Sep 17 00:00:00 2001
+From: Arunpravin Paneer Selvam <Arunpravin.PaneerSelvam@amd.com>
+Date: Tue, 4 Oct 2022 07:33:39 -0700
+Subject: [PATCH] drm/amdgpu: Fix VRAM BO swap issue
+MIME-Version: 1.0
+Content-Type: text/plain; charset=UTF-8
+Content-Transfer-Encoding: 8bit
+
+DRM buddy manager allocates the contiguous memory requests in
+a single block or multiple blocks. So for the ttm move operation
+(incase of low vram memory) we should consider all the blocks to
+compute the total memory size which compared with the struct
+ttm_resource num_pages in order to verify that the blocks are
+contiguous for the eviction process.
+
+v2: Added a Fixes tag
+v3: Rewrite the code to save a bit of calculations and
+    variables (Christian)
+
+Fixes: c9cad937c0c5 ("drm/amdgpu: add drm buddy support to amdgpu")
+Signed-off-by: Arunpravin Paneer Selvam <Arunpravin.PaneerSelvam@amd.com>
+Reviewed-by: Christian König <christian.koenig@amd.com>
+Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
+---
+ drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c | 17 ++++++++++++-----
+ 1 file changed, 12 insertions(+), 5 deletions(-)
+
+diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
+index 134575a3893c53..794062ab57fca4 100644
+--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
+@@ -424,8 +424,9 @@ static int amdgpu_move_blit(struct ttm_buffer_object *bo,
+ static bool amdgpu_mem_visible(struct amdgpu_device *adev,
+ 			       struct ttm_resource *mem)
+ {
+-	uint64_t mem_size = (u64)mem->num_pages << PAGE_SHIFT;
+	u64 mem_size = (u64)mem->num_pages << PAGE_SHIFT;
+ 	struct amdgpu_res_cursor cursor;
+	u64 end;
+
+ 	if (mem->mem_type == TTM_PL_SYSTEM ||
+ 	    mem->mem_type == TTM_PL_TT)
+@@ -434,12 +435,18 @@ static bool amdgpu_mem_visible(struct amdgpu_device *adev,
+ 		return false;
+
+ 	amdgpu_res_first(mem, 0, mem_size, &cursor);
+	end = cursor.start + cursor.size;
+	while (cursor.remaining) {
+		amdgpu_res_next(&cursor, cursor.size);
+
+-	/* ttm_resource_ioremap only supports contiguous memory */
+-	if (cursor.size != mem_size)
+-		return false;
+		/* ttm_resource_ioremap only supports contiguous memory */
+		if (end != cursor.start)
+			return false;
+
+		end = cursor.start + cursor.size;
+	}
+
+-	return cursor.start + cursor.size <= adev->gmc.visible_vram_size;
+	return end <= adev->gmc.visible_vram_size;
+ }
+
+ /*
+From 6df3912f64cea68409b08d282ffbccf0af7f8d8e Mon Sep 17 00:00:00 2001
+From: Arunpravin Paneer Selvam <Arunpravin.PaneerSelvam@amd.com>
+Date: Mon, 17 Oct 2022 13:15:21 -0700
+Subject: [PATCH] drm/amdgpu: Fix for BO move issue
+
+If there are no blocks to compare then exit
+the loop.
+
+Signed-off-by: Arunpravin Paneer Selvam <Arunpravin.PaneerSelvam@amd.com>
+---
+ drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c | 3 +++
+ 1 file changed, 3 insertions(+)
+
+diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
+index 794062ab57fca4..9e6c23266a1a0f 100644
+--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
+@@ -439,6 +439,9 @@ static bool amdgpu_mem_visible(struct amdgpu_device *adev,
+ 	while (cursor.remaining) {
+ 		amdgpu_res_next(&cursor, cursor.size);
+
+		if (!cursor.remaining)
+			break;
+
+ 		/* ttm_resource_ioremap only supports contiguous memory */
+ 		if (end != cursor.start)
+ 			return false;
--- a/linux-tkg-patches/6.1/0012-misc-additions.patch
+++ b/linux-tkg-patches/6.1/0012-misc-additions.patch
@@ -64,6 +64,140 @@ index 2c7171e0b0010..85de313ddec29 100644
 	select CPU_FREQ_GOV_PERFORMANCE
 	help

+From 2535fbde890f14c78b750139fcf87d1143850626 Mon Sep 17 00:00:00 2001
+From: Johannes Weiner <hannes@cmpxchg.org>
+Date: Tue, 2 Aug 2022 12:28:11 -0400
+Subject: [PATCH] mm: vmscan: fix extreme overreclaim and swap floods
+
+During proactive reclaim, we sometimes observe severe overreclaim, with
+several thousand times more pages reclaimed than requested.
+
+This trace was obtained from shrink_lruvec() during such an instance:
+
+    prio:0 anon_cost:1141521 file_cost:7767
+    nr_reclaimed:4387406 nr_to_reclaim:1047 (or_factor:4190)
+    nr=[7161123 345 578 1111]
+
+While he reclaimer requested 4M, vmscan reclaimed close to 16G, most of it
+by swapping.  These requests take over a minute, during which the write()
+to memory.reclaim is unkillably stuck inside the kernel.
+
+Digging into the source, this is caused by the proportional reclaim
+bailout logic.  This code tries to resolve a fundamental conflict: to
+reclaim roughly what was requested, while also aging all LRUs fairly and
+in accordance to their size, swappiness, refault rates etc.  The way it
+attempts fairness is that once the reclaim goal has been reached, it stops
+scanning the LRUs with the smaller remaining scan targets, and adjusts the
+remainder of the bigger LRUs according to how much of the smaller LRUs was
+scanned.  It then finishes scanning that remainder regardless of the
+reclaim goal.
+
+This works fine if priority levels are low and the LRU lists are
+comparable in size.  However, in this instance, the cgroup that is
+targeted by proactive reclaim has almost no files left - they've already
+been squeezed out by proactive reclaim earlier - and the remaining anon
+pages are hot.  Anon rotations cause the priority level to drop to 0,
+which results in reclaim targeting all of anon (a lot) and all of file
+(almost nothing).  By the time reclaim decides to bail, it has scanned
+most or all of the file target, and therefor must also scan most or all of
+the enormous anon target.  This target is thousands of times larger than
+the reclaim goal, thus causing the overreclaim.
+
+The bailout code hasn't changed in years, why is this failing now?  The
+most likely explanations are two other recent changes in anon reclaim:
+
+1. Before the series starting with commit 5df741963d52 ("mm: fix LRU
+   balancing effect of new transparent huge pages"), the VM was
+   overall relatively reluctant to swap at all, even if swap was
+   configured. This means the LRU balancing code didn't come into play
+   as often as it does now, and mostly in high pressure situations
+   where pronounced swap activity wouldn't be as surprising.
+
+2. For historic reasons, shrink_lruvec() loops on the scan targets of
+   all LRU lists except the active anon one, meaning it would bail if
+   the only remaining pages to scan were active anon - even if there
+   were a lot of them.
+
+   Before the series starting with commit ccc5dc67340c ("mm/vmscan:
+   make active/inactive ratio as 1:1 for anon lru"), most anon pages
+   would live on the active LRU; the inactive one would contain only a
+   handful of preselected reclaim candidates. After the series, anon
+   gets aged similarly to file, and the inactive list is the default
+   for new anon pages as well, making it often the much bigger list.
+
+   As a result, the VM is now more likely to actually finish large
+   anon targets than before.
+
+Change the code such that only one SWAP_CLUSTER_MAX-sized nudge toward the
+larger LRU lists is made before bailing out on a met reclaim goal.
+
+This fixes the extreme overreclaim problem.
+
+Fairness is more subtle and harder to evaluate.  No obvious misbehavior
+was observed on the test workload, in any case.  Conceptually, fairness
+should primarily be a cumulative effect from regular, lower priority
+scans.  Once the VM is in trouble and needs to escalate scan targets to
+make forward progress, fairness needs to take a backseat.  This is also
+acknowledged by the myriad exceptions in get_scan_count().  This patch
+makes fairness decrease gradually, as it keeps fairness work static over
+increasing priority levels with growing scan targets.  This should make
+more sense - although we may have to re-visit the exact values.
+
+Link: https://lkml.kernel.org/r/20220802162811.39216-1-hannes@cmpxchg.org
+Signed-off-by: Johannes Weiner <hannes@cmpxchg.org>
+Reviewed-by: Rik van Riel <riel@surriel.com>
+Acked-by: Mel Gorman <mgorman@techsingularity.net>
+Cc: Hugh Dickins <hughd@google.com>
+Cc: Joonsoo Kim <iamjoonsoo.kim@lge.com>
+Cc: <stable@vger.kernel.org>
+Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
+---
+ mm/vmscan.c | 10 ++++------
+ 1 file changed, 4 insertions(+), 6 deletions(-)
+
+diff --git a/mm/vmscan.c b/mm/vmscan.c
+index 382dbe97329f33..266eb8cfe93a67 100644
+--- a/mm/vmscan.c
+++ b/mm/vmscan.c
+@@ -2955,8 +2955,8 @@ static void shrink_lruvec(struct lruvec *lruvec, struct scan_control *sc)
+ 	enum lru_list lru;
+ 	unsigned long nr_reclaimed = 0;
+ 	unsigned long nr_to_reclaim = sc->nr_to_reclaim;
+	bool proportional_reclaim;
+ 	struct blk_plug plug;
+-	bool scan_adjusted;
+
+ 	get_scan_count(lruvec, sc, nr);
+
+@@ -2974,8 +2974,8 @@ static void shrink_lruvec(struct lruvec *lruvec, struct scan_control *sc)
+ 	 * abort proportional reclaim if either the file or anon lru has already
+ 	 * dropped to zero at the first pass.
+ 	 */
+-	scan_adjusted = (!cgroup_reclaim(sc) && !current_is_kswapd() &&
+-			 sc->priority == DEF_PRIORITY);
+	proportional_reclaim = (!cgroup_reclaim(sc) && !current_is_kswapd() &&
+				sc->priority == DEF_PRIORITY);
+
+ 	blk_start_plug(&plug);
+ 	while (nr[LRU_INACTIVE_ANON] || nr[LRU_ACTIVE_FILE] ||
+@@ -2995,7 +2995,7 @@ static void shrink_lruvec(struct lruvec *lruvec, struct scan_control *sc)
+
+ 		cond_resched();
+
+-		if (nr_reclaimed < nr_to_reclaim || scan_adjusted)
+		if (nr_reclaimed < nr_to_reclaim || proportional_reclaim)
+ 			continue;
+
+ 		/*
+@@ -3046,8 +3046,6 @@ static void shrink_lruvec(struct lruvec *lruvec, struct scan_control *sc)
+ 		nr_scanned = targets[lru] - nr[lru];
+ 		nr[lru] = targets[lru] * (100 - percentage) / 100;
+ 		nr[lru] -= min(nr[lru], nr_scanned);
+-
+-		scan_adjusted = true;
+ 	}
+ 	blk_finish_plug(&plug);
+ 	sc->nr_reclaimed += nr_reclaimed;
 From 430daaab3c78de6bd82f10cfb5a0f016c6e583f6 Mon Sep 17 00:00:00 2001
 From: Desmond Cheong Zhi Xi <desmondcheongzx@gmail.com>
 Date: Mon, 4 Oct 2021 14:07:34 -0400