PDS Kernel Configuration

2022-10-18 16:41:44 -04:00
6 changed files with 439 additions and 652 deletions
--- a/customization.cfg
+++ b/customization.cfg
@@ -144,9 +144,7 @@ _numadisable="false"
 _misc_adds="true"

 # Set to "0" for periodic ticks, "1" to use CattaRappa mode (enabling full tickless) and "2" for tickless idle only.
-# Full tickless can give higher performances in case you use isolation of CPUs for tasks
-# and it works only when using the nohz_full kernel parameter, otherwise behaves like idle.
-# Just tickless idle perform better for most platforms.
+# Full tickless can give higher performances in various cases but, depending on hardware, lower consistency. Just tickless idle can perform better on some platforms (mostly AMD based).
 _tickless="2"

 # Set to "true" to use ACS override patch - https://wiki.archlinux.org/index.php/PCI_passthrough_via_OVMF#Bypassing_the_IOMMU_groups_.28ACS_override_patch.29 - Kernel default is "false"
--- a/linux-tkg-config/6.0/config.x86_64
+++ b/linux-tkg-config/6.0/config.x86_64
@@ -1,6 +1,6 @@
 #
 # Automatically generated file; DO NOT EDIT.
-# Linux/x86 6.0.3-arch2 Kernel Configuration
+# Linux/x86 6.0.0-arch1 Kernel Configuration
 #
 CONFIG_CC_VERSION_TEXT="gcc (GCC) 12.2.0"
 CONFIG_CC_IS_GCC=y
@@ -7596,7 +7596,6 @@ CONFIG_HID_SMARTJOYPLUS=m
 CONFIG_SMARTJOYPLUS_FF=y
 CONFIG_HID_TIVO=m
 CONFIG_HID_TOPSEED=m
-CONFIG_HID_TOPRE=m
 CONFIG_HID_THINGM=m
 CONFIG_HID_THRUSTMASTER=m
 CONFIG_THRUSTMASTER_FF=y
@@ -10375,7 +10374,6 @@ CONFIG_LSM="landlock,lockdown,yama,integrity,bpf"
 # Memory initialization
 #
 CONFIG_CC_HAS_AUTO_VAR_INIT_PATTERN=y
-CONFIG_CC_HAS_AUTO_VAR_INIT_ZERO_BARE=y
 CONFIG_CC_HAS_AUTO_VAR_INIT_ZERO=y
 # CONFIG_INIT_STACK_NONE is not set
 # CONFIG_INIT_STACK_ALL_PATTERN is not set
@@ -10851,7 +10849,6 @@ CONFIG_DEBUG_KERNEL=y
 # Compile-time checks and compiler options
 #
 CONFIG_DEBUG_INFO=y
-CONFIG_AS_HAS_NON_CONST_LEB128=y
 # CONFIG_DEBUG_INFO_NONE is not set
 # CONFIG_DEBUG_INFO_DWARF_TOOLCHAIN_DEFAULT is not set
 # CONFIG_DEBUG_INFO_DWARF4 is not set
--- a/linux-tkg-config/prepare
+++ b/linux-tkg-config/prepare
@@ -1,10 +1,10 @@
 #!/bin/bash

 # List of kernels that are maintained upstream
-_current_kernels=("6.1" "6.0" "5.15" "5.10" "5.4")
+_current_kernels=("6.1" "6.0" "5.19" "5.15" "5.10" "5.4")

 # List of kernels that are no longer maintained upstream
-_eol_kernels=("5.19" "5.18" "5.17" "5.16" "5.14" "5.13" "5.12" "5.11" "5.9" "5.8" "5.7")
+_eol_kernels=("5.18" "5.17" "5.16" "5.14" "5.13" "5.12" "5.11" "5.9" "5.8" "5.7")

 typeset -Ag _kernel_git_remotes
 _kernel_git_remotes=(
@@ -287,7 +287,7 @@ _set_cpu_scheduler() {
  elif [ "$_kver" = "514" ]; then
    _avail_cpu_scheds=("pds" "bmq" "cacule" "cfs")
  elif [ "$_kver" = "515" ]; then
-    _avail_cpu_scheds=("pds" "bmq" "cacule" "tt" "bore" "cfs")
+    _avail_cpu_scheds=("pds" "bmq" "cacule" "tt" "cfs")
  elif [ "$_kver" = "516" ]; then
    _avail_cpu_scheds=("pds" "bmq" "cacule" "cfs")
  elif [ "$_kver" = "517" ]; then
@@ -298,8 +298,6 @@ _set_cpu_scheduler() {
    _avail_cpu_scheds=("cfs" "pds" "bmq" "cacule" "tt" "bore")
  elif [ "$_kver" = "600" ]; then
    _avail_cpu_scheds=("cfs" "tt" "bore")
-  elif [ "$_kver" = "601" ]; then
-    _avail_cpu_scheds=("cfs" "bore")
  else
    _avail_cpu_scheds=("cfs")
  fi
@@ -707,7 +705,7 @@ _tkg_srcprep() {
  elif [ "$_kver" = "509" ]; then
    rev=3
  elif [ "$_kver" = "510" ]; then
-    rev=5
+    rev=3
  elif [ "$_kver" = "511" ]; then
    rev=3
  elif [ "$_kver" = "512" ]; then
@@ -943,10 +941,10 @@ _tkg_srcprep() {
  # cpu opt
  _cpu_marchs=("native_amd" "native_intel" "generic_cpu" "generic_cpu2" "generic_cpu3" "generic_cpu4")
  _cpu_marchs+=("k8" "k8sse3" "k10" "barcelona" "bobcat" "jaguar" "bulldozer" "piledriver")
-  _cpu_marchs+=("steamroller" "excavator" "zen" "zen2" "zen3" "zen4" "mpsc" "atom" "core2" "nehalem" "westmere")
+  _cpu_marchs+=("steamroller" "excavator" "zen" "zen2" "zen3" "mpsc" "atom" "core2" "nehalem" "westmere")
  _cpu_marchs+=("bonnell" "silvermont" "sandybridge" "ivybridge" "haswell" "broadwell" "skylake")
  _cpu_marchs+=("skylakex" "cannonlake" "icelake" "goldmont" "goldmontplus" "cascadelake")
-  _cpu_marchs+=("cooperlake" "tigerlake" "sapphirerapids" "rocketlake" "alderlake" "meteorlake")
+  _cpu_marchs+=("cooperlake" "tigerlake" "sapphirerapids" "rocketlake" "alderlake")

  typeset -A _generic_march_map
  _generic_march_map=(
@@ -1330,7 +1328,7 @@ _tkg_srcprep() {
    plain "Use CattaRappa mode (Tickless/Dynticks) ?"
    _tickless_array_text=(
      "No, use periodic ticks."
-      "Yes, full tickless baby!\n       Full tickless can give higher performances in case you use isolation of CPUs for task, in other cases it behaves as Idle."
+      "Yes, full tickless baby!\n        Can give higher performances in many cases but lower consistency on some hardware."
      "Just tickless idle plz.\n        Just tickless idle can perform better with some platforms (mostly AMD) or CPU schedulers (mostly MuQSS)."
    )
    _default_index="2"
--- a/linux-tkg-patches/5.10/0009-prjc_v5.10-r3.patch
+++ b/linux-tkg-patches/5.10/0009-prjc_v5.10-r3.patch
--- a/linux-tkg-patches/6.0/0012-misc-additions.patch
+++ b/linux-tkg-patches/6.0/0012-misc-additions.patch
@@ -522,130 +522,3 @@ index 1fd3cbca20a2..c7bf189d50de 100644
 --
 2.25.1

-From fb23dad87a0bfb6fdfde3dc1d18104da631d050a Mon Sep 17 00:00:00 2001
-From: Sjoerd Simons <sjoerd@collabora.com>
-Date: Sat, 8 Oct 2022 21:57:51 +0200
-Subject: [PATCH] soundwire: intel: Initialize clock stop timeout
-
-The bus->clk_stop_timeout member is only initialized to a non-zero value
-during the codec driver probe. This can lead to corner cases where this
-value remains pegged at zero when the bus suspends, which results in an
-endless loop in sdw_bus_wait_for_clk_prep_deprep().
-
-Corner cases include configurations with no codecs described in the
-firmware, or delays in probing codec drivers.
-
-Initializing the default timeout to the smallest non-zero value avoid this
-problem and allows for the existing logic to be preserved: the
-bus->clk_stop_timeout is set as the maximum required by all codecs
-connected on the bus.
-
-Signed-off-by: Sjoerd Simons <sjoerd@collabora.com>
---
- drivers/soundwire/intel.c | 1 +
- 1 file changed, 1 insertion(+)
-
-diff --git a/drivers/soundwire/intel.c b/drivers/soundwire/intel.c
-index af6c1a93372d90..002bc26b525e87 100644
--- a/drivers/soundwire/intel.c
-+++ b/drivers/soundwire/intel.c
-@@ -1307,6 +1307,7 @@ static int intel_link_probe(struct auxiliary_device *auxdev,
- 	cdns->msg_count = 0;
-
- 	bus->link_id = auxdev->id;
-+	bus->clk_stop_timeout = 1;
-
- 	sdw_cdns_probe(cdns);
-
-From 785699dbc7041b99e0027bff27ffe17eba202e96 Mon Sep 17 00:00:00 2001
-From: Arunpravin Paneer Selvam <Arunpravin.PaneerSelvam@amd.com>
-Date: Tue, 4 Oct 2022 07:33:39 -0700
-Subject: [PATCH] drm/amdgpu: Fix VRAM BO swap issue
-MIME-Version: 1.0
-Content-Type: text/plain; charset=UTF-8
-Content-Transfer-Encoding: 8bit
-
-DRM buddy manager allocates the contiguous memory requests in
-a single block or multiple blocks. So for the ttm move operation
-(incase of low vram memory) we should consider all the blocks to
-compute the total memory size which compared with the struct
-ttm_resource num_pages in order to verify that the blocks are
-contiguous for the eviction process.
-
-v2: Added a Fixes tag
-v3: Rewrite the code to save a bit of calculations and
-    variables (Christian)
-
-Fixes: c9cad937c0c5 ("drm/amdgpu: add drm buddy support to amdgpu")
-Signed-off-by: Arunpravin Paneer Selvam <Arunpravin.PaneerSelvam@amd.com>
-Reviewed-by: Christian König <christian.koenig@amd.com>
-Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
---
- drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c | 17 ++++++++++++-----
- 1 file changed, 12 insertions(+), 5 deletions(-)
-
-diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
-index 134575a3893c53..794062ab57fca4 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
-+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
-@@ -424,8 +424,9 @@ static int amdgpu_move_blit(struct ttm_buffer_object *bo,
- static bool amdgpu_mem_visible(struct amdgpu_device *adev,
- 			       struct ttm_resource *mem)
- {
-	uint64_t mem_size = (u64)mem->num_pages << PAGE_SHIFT;
-+	u64 mem_size = (u64)mem->num_pages << PAGE_SHIFT;
- 	struct amdgpu_res_cursor cursor;
-+	u64 end;
-
- 	if (mem->mem_type == TTM_PL_SYSTEM ||
- 	    mem->mem_type == TTM_PL_TT)
-@@ -434,12 +435,18 @@ static bool amdgpu_mem_visible(struct amdgpu_device *adev,
- 		return false;
-
- 	amdgpu_res_first(mem, 0, mem_size, &cursor);
-+	end = cursor.start + cursor.size;
-+	while (cursor.remaining) {
-+		amdgpu_res_next(&cursor, cursor.size);
-
-	/* ttm_resource_ioremap only supports contiguous memory */
-	if (cursor.size != mem_size)
-		return false;
-+		/* ttm_resource_ioremap only supports contiguous memory */
-+		if (end != cursor.start)
-+			return false;
-+
-+		end = cursor.start + cursor.size;
-+	}
-
-	return cursor.start + cursor.size <= adev->gmc.visible_vram_size;
-+	return end <= adev->gmc.visible_vram_size;
- }
-
- /*
-From 6df3912f64cea68409b08d282ffbccf0af7f8d8e Mon Sep 17 00:00:00 2001
-From: Arunpravin Paneer Selvam <Arunpravin.PaneerSelvam@amd.com>
-Date: Mon, 17 Oct 2022 13:15:21 -0700
-Subject: [PATCH] drm/amdgpu: Fix for BO move issue
-
-If there are no blocks to compare then exit
-the loop.
-
-Signed-off-by: Arunpravin Paneer Selvam <Arunpravin.PaneerSelvam@amd.com>
---
- drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c | 3 +++
- 1 file changed, 3 insertions(+)
-
-diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
-index 794062ab57fca4..9e6c23266a1a0f 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
-+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
-@@ -439,6 +439,9 @@ static bool amdgpu_mem_visible(struct amdgpu_device *adev,
- 	while (cursor.remaining) {
- 		amdgpu_res_next(&cursor, cursor.size);
-
-+		if (!cursor.remaining)
-+			break;
-+
- 		/* ttm_resource_ioremap only supports contiguous memory */
- 		if (end != cursor.start)
- 			return false;
--- a/linux-tkg-patches/6.1/0012-misc-additions.patch
+++ b/linux-tkg-patches/6.1/0012-misc-additions.patch
@@ -426,3 +426,99 @@ index 4bf4ea6cbb5eee..4850dafbaa05fb 100644
 }

 /* ---- Socket functions ---- */
+From 50e6a66675f6c9835d4f1d4f8c947d1699ce8e24 Mon Sep 17 00:00:00 2001
+From: =?UTF-8?q?Christian=20K=C3=B6nig?= <christian.koenig@amd.com>
+Date: Fri, 7 Oct 2022 09:51:13 +0200
+Subject: [PATCH 4/5] drm/sched: add DRM_SCHED_FENCE_DONT_PIPELINE flag
+MIME-Version: 1.0
+Content-Type: text/plain; charset=UTF-8
+Content-Transfer-Encoding: 8bit
+
+Setting this flag on a scheduler fence prevents pipelining of jobs
+depending on this fence. In other words we always insert a full CPU
+round trip before dependen jobs are pushed to the pipeline.
+
+Signed-off-by: Christian König <christian.koenig@amd.com>
+---
+ drivers/gpu/drm/scheduler/sched_entity.c | 3 ++-
+ include/drm/gpu_scheduler.h              | 9 +++++++++
+ 2 files changed, 11 insertions(+), 1 deletion(-)
+
+diff --git a/drivers/gpu/drm/scheduler/sched_entity.c b/drivers/gpu/drm/scheduler/sched_entity.c
+index 6b25b2f4f5a3..6137537aaea4 100644
+--- a/drivers/gpu/drm/scheduler/sched_entity.c
+++ b/drivers/gpu/drm/scheduler/sched_entity.c
+@@ -385,7 +385,8 @@ static bool drm_sched_entity_add_dependency_cb(struct drm_sched_entity *entity)
+ 	}
+
+ 	s_fence = to_drm_sched_fence(fence);
+-	if (s_fence && s_fence->sched == sched) {
+	if (s_fence && s_fence->sched == sched &&
+	    !test_bit(DRM_SCHED_FENCE_DONT_PIPELINE, &fence->flags)) {
+
+ 		/*
+ 		 * Fence is from the same scheduler, only need to wait for
+diff --git a/include/drm/gpu_scheduler.h b/include/drm/gpu_scheduler.h
+index addb135eeea6..289a33e80639 100644
+--- a/include/drm/gpu_scheduler.h
+++ b/include/drm/gpu_scheduler.h
+@@ -32,6 +32,15 @@
+
+ #define MAX_WAIT_SCHED_ENTITY_Q_EMPTY msecs_to_jiffies(1000)
+
+/**
+ * DRM_SCHED_FENCE_DONT_PIPELINE - Prefent dependency pipelining
+ *
+ * Setting this flag on a scheduler fence prevents pipelining of jobs depending
+ * on this fence. In other words we always insert a full CPU round trip before
+ * dependen jobs are pushed to the hw queue.
+ */
+#define DRM_SCHED_FENCE_DONT_PIPELINE	DMA_FENCE_FLAG_USER_BITS
+
+ struct drm_gem_object;
+
+ struct drm_gpu_scheduler;
+--
+2.25.1
+
+From e15e1601fba660124acd7ad41b6f61d46a1c4835 Mon Sep 17 00:00:00 2001
+From: =?UTF-8?q?Christian=20K=C3=B6nig?= <christian.koenig@amd.com>
+Date: Fri, 7 Oct 2022 10:59:58 +0200
+Subject: [PATCH 5/5] drm/amdgpu: use DRM_SCHED_FENCE_DONT_PIPELINE for VM
+ updates
+MIME-Version: 1.0
+Content-Type: text/plain; charset=UTF-8
+Content-Transfer-Encoding: 8bit
+
+Make sure that we always have a CPU round trip to let the submission
+code correctly decide if a TLB flush is necessary or not.
+
+Signed-off-by: Christian König <christian.koenig@amd.com>
+---
+ drivers/gpu/drm/amd/amdgpu/amdgpu_vm_sdma.c | 9 ++++++++-
+ 1 file changed, 8 insertions(+), 1 deletion(-)
+
+diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm_sdma.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm_sdma.c
+index 1fd3cbca20a2..c7bf189d50de 100644
+--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm_sdma.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm_sdma.c
+@@ -115,8 +115,15 @@ static int amdgpu_vm_sdma_commit(struct amdgpu_vm_update_params *p,
+ 		amdgpu_bo_fence(p->vm->root.bo, f, true);
+ 	}
+
+-	if (fence && !p->immediate)
+	if (fence && !p->immediate) {
+		/*
+		 * Most hw generations now have a separate queue for page table
+		 * updates, but when the queue is shared with userspace we need
+		 * the extra CPU round trip to correctly flush the TLB.
+		 */
+		set_bit(DRM_SCHED_FENCE_DONT_PIPELINE, &f->flags);
+ 		swap(*fence, f);
+	}
+ 	dma_fence_put(f);
+ 	return 0;
+
+--
+2.25.1
+