Compare commits

...

7 Commits

6 changed files with 660 additions and 451 deletions

View File

@@ -3,7 +3,7 @@
# Linux distribution you are using, options are "Arch", "Void", "Ubuntu", "Debian", "Fedora", "Suse", "Gentoo", "Generic". # Linux distribution you are using, options are "Arch", "Void", "Ubuntu", "Debian", "Fedora", "Suse", "Gentoo", "Generic".
# It is automatically set to "Arch" when using PKGBUILD. # It is automatically set to "Arch" when using PKGBUILD.
# If left empty, the script will prompt # If left empty, the script will prompt
_distro="" _distro="Arch"
# Kernel Version - Options are "5.4", and from "5.7" to "5.19" # Kernel Version - Options are "5.4", and from "5.7" to "5.19"
# you can also set a specific kernel version, e.g. "6.0-rc4" or "5.10.51", # you can also set a specific kernel version, e.g. "6.0-rc4" or "5.10.51",
@@ -32,7 +32,7 @@ CUSTOM_GCC_PATH=""
CUSTOM_LLVM_PATH="" CUSTOM_LLVM_PATH=""
# Set to true to bypass makepkg.conf and use all available threads for compilation. False will respect your makepkg.conf options. # Set to true to bypass makepkg.conf and use all available threads for compilation. False will respect your makepkg.conf options.
_force_all_threads="true" _force_all_threads="false"
# Set to true to prevent ccache from being used and set CONFIG_GCC_PLUGINS=y (which needs to be disabled for ccache to work properly) # Set to true to prevent ccache from being used and set CONFIG_GCC_PLUGINS=y (which needs to be disabled for ccache to work properly)
_noccache="false" _noccache="false"
@@ -46,10 +46,10 @@ _modprobeddb="false"
_modprobeddb_db_path=~/.config/modprobed.db _modprobeddb_db_path=~/.config/modprobed.db
# Set to "1" to call make menuconfig, "2" to call make nconfig, "3" to call make xconfig, before building the kernel. Set to false to disable and skip the prompt. # Set to "1" to call make menuconfig, "2" to call make nconfig, "3" to call make xconfig, before building the kernel. Set to false to disable and skip the prompt.
_menunconfig="" _menunconfig="false"
# Set to true to generate a kernel config fragment from your changes in menuconfig/nconfig. Set to false to disable and skip the prompt. # Set to true to generate a kernel config fragment from your changes in menuconfig/nconfig. Set to false to disable and skip the prompt.
_diffconfig="" _diffconfig="false"
# Set to the file name where the generated config fragment should be written to. Only used if _diffconfig is active. # Set to the file name where the generated config fragment should be written to. Only used if _diffconfig is active.
_diffconfig_name="" _diffconfig_name=""
@@ -90,11 +90,11 @@ _STRIP="true"
# LEAVE AN EMPTY VALUE TO BE PROMPTED ABOUT FOLLOWING OPTIONS AT BUILD TIME # LEAVE AN EMPTY VALUE TO BE PROMPTED ABOUT FOLLOWING OPTIONS AT BUILD TIME
# CPU scheduler - Options are "upds" (TkG's Undead PDS), "pds", "bmq", "muqss", "cacule" or "cfs" (kernel's default) # CPU scheduler - Options are "upds" (TkG's Undead PDS), "pds", "bmq", "muqss", "cacule" or "cfs" (kernel's default)
_cpusched="" _cpusched="pds"
# Compiler to use - Options are "gcc" or "llvm". # Compiler to use - Options are "gcc" or "llvm".
# For advanced users. # For advanced users.
_compiler="" _compiler="gcc"
# Force the use of the LLVM Integrated Assembler whether using LLVM, LTO or not. # Force the use of the LLVM Integrated Assembler whether using LLVM, LTO or not.
# Set to "1" to enable. # Set to "1" to enable.
@@ -124,7 +124,7 @@ _preempt_rt_force=""
# For BMQ: 0: No yield. # For BMQ: 0: No yield.
# 1: Deboost and requeue task. (Default) # 1: Deboost and requeue task. (Default)
# 2: Set rq skip task. # 2: Set rq skip task.
_sched_yield_type="" _sched_yield_type="0"
# Round Robin interval is the longest duration two tasks with the same nice level will be delayed for. When CPU time is requested by a task, it receives a time slice equal # Round Robin interval is the longest duration two tasks with the same nice level will be delayed for. When CPU time is requested by a task, it receives a time slice equal
# to the rr_interval in addition to a virtual deadline. When using yield_type 2, a low value can help offset the disadvantages of rescheduling a process that has yielded. # to the rr_interval in addition to a virtual deadline. When using yield_type 2, a low value can help offset the disadvantages of rescheduling a process that has yielded.
@@ -132,7 +132,7 @@ _sched_yield_type=""
# PDS default: 4ms" # PDS default: 4ms"
# BMQ default: 2ms" # BMQ default: 2ms"
# Set to "1" for 2ms, "2" for 4ms, "3" for 6ms, "4" for 8ms, or "default" to keep the chosen scheduler defaults. # Set to "1" for 2ms, "2" for 4ms, "3" for 6ms, "4" for 8ms, or "default" to keep the chosen scheduler defaults.
_rr_interval="" _rr_interval="2"
# Set to "true" to disable FUNCTION_TRACER/GRAPH_TRACER, lowering overhead but limiting debugging and analyzing of kernel functions - Kernel default is "false" # Set to "true" to disable FUNCTION_TRACER/GRAPH_TRACER, lowering overhead but limiting debugging and analyzing of kernel functions - Kernel default is "false"
_ftracedisable="false" _ftracedisable="false"
@@ -145,10 +145,10 @@ _misc_adds="true"
# Set to "0" for periodic ticks, "1" to use CattaRappa mode (enabling full tickless) and "2" for tickless idle only. # Set to "0" for periodic ticks, "1" to use CattaRappa mode (enabling full tickless) and "2" for tickless idle only.
# Full tickless can give higher performances in various cases but, depending on hardware, lower consistency. Just tickless idle can perform better on some platforms (mostly AMD based). # Full tickless can give higher performances in various cases but, depending on hardware, lower consistency. Just tickless idle can perform better on some platforms (mostly AMD based).
_tickless="" _tickless="2"
# Set to "true" to use ACS override patch - https://wiki.archlinux.org/index.php/PCI_passthrough_via_OVMF#Bypassing_the_IOMMU_groups_.28ACS_override_patch.29 - Kernel default is "false" # Set to "true" to use ACS override patch - https://wiki.archlinux.org/index.php/PCI_passthrough_via_OVMF#Bypassing_the_IOMMU_groups_.28ACS_override_patch.29 - Kernel default is "false"
_acs_override="" _acs_override="false"
# Set to "true" to add Bcache filesystem support. You'll have to install bcachefs-tools-git from AUR for utilities - https://bcachefs.org/ - If in doubt, set to "false" # Set to "true" to add Bcache filesystem support. You'll have to install bcachefs-tools-git from AUR for utilities - https://bcachefs.org/ - If in doubt, set to "false"
# This can be buggy and isn't recommended on a production machine, also enabling this option will not allow you to enable MGLRU. # This can be buggy and isn't recommended on a production machine, also enabling this option will not allow you to enable MGLRU.
@@ -177,13 +177,13 @@ _futex_waitv="false"
_winesync="false" _winesync="false"
# Set to "true" to enable Binder and Ashmem, the kernel modules required to use the android emulator Anbox. ! This doesn't apply to 5.4.y ! # Set to "true" to enable Binder and Ashmem, the kernel modules required to use the android emulator Anbox. ! This doesn't apply to 5.4.y !
_anbox="" _anbox="false"
# A selection of patches from Zen/Liquorix kernel and additional tweaks for a better gaming experience (ZENIFY) - Default is "true" # A selection of patches from Zen/Liquorix kernel and additional tweaks for a better gaming experience (ZENIFY) - Default is "true"
_zenify="true" _zenify="true"
# compiler optimization level - 1. Optimize for performance (-O2); 2. Optimize harder (-O3); 3. Optimize for size (-Os) - Kernel default is "1" # compiler optimization level - 1. Optimize for performance (-O2); 2. Optimize harder (-O3); 3. Optimize for size (-Os) - Kernel default is "1"
_compileroptlevel="1" _compileroptlevel="2"
# CPU compiler optimizations - Defaults to prompt at kernel config if left empty # CPU compiler optimizations - Defaults to prompt at kernel config if left empty
# AMD CPUs : "k8" "k8sse3" "k10" "barcelona" "bobcat" "jaguar" "bulldozer" "piledriver" "steamroller" "excavator" "zen" "zen2" "zen3" (zen3 opt support depends on GCC11) # AMD CPUs : "k8" "k8sse3" "k10" "barcelona" "bobcat" "jaguar" "bulldozer" "piledriver" "steamroller" "excavator" "zen" "zen2" "zen3" (zen3 opt support depends on GCC11)
@@ -197,7 +197,7 @@ _compileroptlevel="1"
# - "generic_v2" (depends on GCC11 - to share the package between machines with different CPU µarch supporting at least x86-64-v2 # - "generic_v2" (depends on GCC11 - to share the package between machines with different CPU µarch supporting at least x86-64-v2
# - "generic_v3" (depends on GCC11 - to share the package between machines with different CPU µarch supporting at least x86-64-v3 # - "generic_v3" (depends on GCC11 - to share the package between machines with different CPU µarch supporting at least x86-64-v3
# - "generic_v4" (depends on GCC11 - to share the package between machines with different CPU µarch supporting at least x86-64-v4 # - "generic_v4" (depends on GCC11 - to share the package between machines with different CPU µarch supporting at least x86-64-v4
_processor_opt="" _processor_opt="skylake"
# MuQSS only - Make IRQ threading compulsory (FORCE_IRQ_THREADING) - Default is "false" # MuQSS only - Make IRQ threading compulsory (FORCE_IRQ_THREADING) - Default is "false"
_irq_threading="false" _irq_threading="false"
@@ -213,7 +213,7 @@ _cacule_rdb_interval="19"
_tt_high_hz="false" _tt_high_hz="false"
# MuQSS and PDS only - SMT (Hyperthreading) aware nice priority and policy support (SMT_NICE) - Kernel default is "true" - You can disable this on non-SMT/HT CPUs for lower overhead # MuQSS and PDS only - SMT (Hyperthreading) aware nice priority and policy support (SMT_NICE) - Kernel default is "true" - You can disable this on non-SMT/HT CPUs for lower overhead
_smt_nice="" _smt_nice="true"
# Trust the CPU manufacturer to initialize Linux's CRNG (RANDOM_TRUST_CPU) - Kernel default is "false" # Trust the CPU manufacturer to initialize Linux's CRNG (RANDOM_TRUST_CPU) - Kernel default is "false"
_random_trust_cpu="false" _random_trust_cpu="false"
@@ -223,7 +223,7 @@ _random_trust_cpu="false"
_runqueue_sharing="" _runqueue_sharing=""
# Timer frequency - "100" "250" "300" "500" "750" "1000" ("2000" is available for cacule cpusched only) - More options available in kernel config prompt when left empty depending on selected cpusched with the default option pointed with a ">" (2000 for cacule, 100 for muqss and 1000 for other cpu schedulers) # Timer frequency - "100" "250" "300" "500" "750" "1000" ("2000" is available for cacule cpusched only) - More options available in kernel config prompt when left empty depending on selected cpusched with the default option pointed with a ">" (2000 for cacule, 100 for muqss and 1000 for other cpu schedulers)
_timer_freq="" _timer_freq="500"
# Default CPU governor - "performance", "ondemand", "schedutil" or leave empty for default (schedutil) # Default CPU governor - "performance", "ondemand", "schedutil" or leave empty for default (schedutil)
_default_cpu_gov="ondemand" _default_cpu_gov="ondemand"

View File

@@ -1,6 +1,6 @@
# #
# Automatically generated file; DO NOT EDIT. # Automatically generated file; DO NOT EDIT.
# Linux/x86 6.0.0-arch1 Kernel Configuration # Linux/x86 6.0.3-arch2 Kernel Configuration
# #
CONFIG_CC_VERSION_TEXT="gcc (GCC) 12.2.0" CONFIG_CC_VERSION_TEXT="gcc (GCC) 12.2.0"
CONFIG_CC_IS_GCC=y CONFIG_CC_IS_GCC=y
@@ -7596,6 +7596,7 @@ CONFIG_HID_SMARTJOYPLUS=m
CONFIG_SMARTJOYPLUS_FF=y CONFIG_SMARTJOYPLUS_FF=y
CONFIG_HID_TIVO=m CONFIG_HID_TIVO=m
CONFIG_HID_TOPSEED=m CONFIG_HID_TOPSEED=m
CONFIG_HID_TOPRE=m
CONFIG_HID_THINGM=m CONFIG_HID_THINGM=m
CONFIG_HID_THRUSTMASTER=m CONFIG_HID_THRUSTMASTER=m
CONFIG_THRUSTMASTER_FF=y CONFIG_THRUSTMASTER_FF=y
@@ -10374,6 +10375,7 @@ CONFIG_LSM="landlock,lockdown,yama,integrity,bpf"
# Memory initialization # Memory initialization
# #
CONFIG_CC_HAS_AUTO_VAR_INIT_PATTERN=y CONFIG_CC_HAS_AUTO_VAR_INIT_PATTERN=y
CONFIG_CC_HAS_AUTO_VAR_INIT_ZERO_BARE=y
CONFIG_CC_HAS_AUTO_VAR_INIT_ZERO=y CONFIG_CC_HAS_AUTO_VAR_INIT_ZERO=y
# CONFIG_INIT_STACK_NONE is not set # CONFIG_INIT_STACK_NONE is not set
# CONFIG_INIT_STACK_ALL_PATTERN is not set # CONFIG_INIT_STACK_ALL_PATTERN is not set
@@ -10849,6 +10851,7 @@ CONFIG_DEBUG_KERNEL=y
# Compile-time checks and compiler options # Compile-time checks and compiler options
# #
CONFIG_DEBUG_INFO=y CONFIG_DEBUG_INFO=y
CONFIG_AS_HAS_NON_CONST_LEB128=y
# CONFIG_DEBUG_INFO_NONE is not set # CONFIG_DEBUG_INFO_NONE is not set
# CONFIG_DEBUG_INFO_DWARF_TOOLCHAIN_DEFAULT is not set # CONFIG_DEBUG_INFO_DWARF_TOOLCHAIN_DEFAULT is not set
# CONFIG_DEBUG_INFO_DWARF4 is not set # CONFIG_DEBUG_INFO_DWARF4 is not set

View File

@@ -1,10 +1,10 @@
#!/bin/bash #!/bin/bash
# List of kernels that are maintained upstream # List of kernels that are maintained upstream
_current_kernels=("6.1" "6.0" "5.19" "5.15" "5.10" "5.4") _current_kernels=("6.1" "6.0" "5.15" "5.10" "5.4")
# List of kernels that are no longer maintained upstream # List of kernels that are no longer maintained upstream
_eol_kernels=("5.18" "5.17" "5.16" "5.14" "5.13" "5.12" "5.11" "5.9" "5.8" "5.7") _eol_kernels=("5.19" "5.18" "5.17" "5.16" "5.14" "5.13" "5.12" "5.11" "5.9" "5.8" "5.7")
typeset -Ag _kernel_git_remotes typeset -Ag _kernel_git_remotes
_kernel_git_remotes=( _kernel_git_remotes=(
@@ -705,7 +705,7 @@ _tkg_srcprep() {
elif [ "$_kver" = "509" ]; then elif [ "$_kver" = "509" ]; then
rev=3 rev=3
elif [ "$_kver" = "510" ]; then elif [ "$_kver" = "510" ]; then
rev=3 rev=5
elif [ "$_kver" = "511" ]; then elif [ "$_kver" = "511" ]; then
rev=3 rev=3
elif [ "$_kver" = "512" ]; then elif [ "$_kver" = "512" ]; then
@@ -941,10 +941,10 @@ _tkg_srcprep() {
# cpu opt # cpu opt
_cpu_marchs=("native_amd" "native_intel" "generic_cpu" "generic_cpu2" "generic_cpu3" "generic_cpu4") _cpu_marchs=("native_amd" "native_intel" "generic_cpu" "generic_cpu2" "generic_cpu3" "generic_cpu4")
_cpu_marchs+=("k8" "k8sse3" "k10" "barcelona" "bobcat" "jaguar" "bulldozer" "piledriver") _cpu_marchs+=("k8" "k8sse3" "k10" "barcelona" "bobcat" "jaguar" "bulldozer" "piledriver")
_cpu_marchs+=("steamroller" "excavator" "zen" "zen2" "zen3" "mpsc" "atom" "core2" "nehalem" "westmere") _cpu_marchs+=("steamroller" "excavator" "zen" "zen2" "zen3" "zen4" "mpsc" "atom" "core2" "nehalem" "westmere")
_cpu_marchs+=("bonnell" "silvermont" "sandybridge" "ivybridge" "haswell" "broadwell" "skylake") _cpu_marchs+=("bonnell" "silvermont" "sandybridge" "ivybridge" "haswell" "broadwell" "skylake")
_cpu_marchs+=("skylakex" "cannonlake" "icelake" "goldmont" "goldmontplus" "cascadelake") _cpu_marchs+=("skylakex" "cannonlake" "icelake" "goldmont" "goldmontplus" "cascadelake")
_cpu_marchs+=("cooperlake" "tigerlake" "sapphirerapids" "rocketlake" "alderlake") _cpu_marchs+=("cooperlake" "tigerlake" "sapphirerapids" "rocketlake" "alderlake" "meteorlake")
typeset -A _generic_march_map typeset -A _generic_march_map
_generic_march_map=( _generic_march_map=(

View File

@@ -522,3 +522,130 @@ index 1fd3cbca20a2..c7bf189d50de 100644
-- --
2.25.1 2.25.1
From fb23dad87a0bfb6fdfde3dc1d18104da631d050a Mon Sep 17 00:00:00 2001
From: Sjoerd Simons <sjoerd@collabora.com>
Date: Sat, 8 Oct 2022 21:57:51 +0200
Subject: [PATCH] soundwire: intel: Initialize clock stop timeout
The bus->clk_stop_timeout member is only initialized to a non-zero value
during the codec driver probe. This can lead to corner cases where this
value remains pegged at zero when the bus suspends, which results in an
endless loop in sdw_bus_wait_for_clk_prep_deprep().
Corner cases include configurations with no codecs described in the
firmware, or delays in probing codec drivers.
Initializing the default timeout to the smallest non-zero value avoid this
problem and allows for the existing logic to be preserved: the
bus->clk_stop_timeout is set as the maximum required by all codecs
connected on the bus.
Signed-off-by: Sjoerd Simons <sjoerd@collabora.com>
---
drivers/soundwire/intel.c | 1 +
1 file changed, 1 insertion(+)
diff --git a/drivers/soundwire/intel.c b/drivers/soundwire/intel.c
index af6c1a93372d90..002bc26b525e87 100644
--- a/drivers/soundwire/intel.c
+++ b/drivers/soundwire/intel.c
@@ -1307,6 +1307,7 @@ static int intel_link_probe(struct auxiliary_device *auxdev,
cdns->msg_count = 0;
bus->link_id = auxdev->id;
+ bus->clk_stop_timeout = 1;
sdw_cdns_probe(cdns);
From 785699dbc7041b99e0027bff27ffe17eba202e96 Mon Sep 17 00:00:00 2001
From: Arunpravin Paneer Selvam <Arunpravin.PaneerSelvam@amd.com>
Date: Tue, 4 Oct 2022 07:33:39 -0700
Subject: [PATCH] drm/amdgpu: Fix VRAM BO swap issue
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit
DRM buddy manager allocates the contiguous memory requests in
a single block or multiple blocks. So for the ttm move operation
(incase of low vram memory) we should consider all the blocks to
compute the total memory size which compared with the struct
ttm_resource num_pages in order to verify that the blocks are
contiguous for the eviction process.
v2: Added a Fixes tag
v3: Rewrite the code to save a bit of calculations and
variables (Christian)
Fixes: c9cad937c0c5 ("drm/amdgpu: add drm buddy support to amdgpu")
Signed-off-by: Arunpravin Paneer Selvam <Arunpravin.PaneerSelvam@amd.com>
Reviewed-by: Christian König <christian.koenig@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
---
drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c | 17 ++++++++++++-----
1 file changed, 12 insertions(+), 5 deletions(-)
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
index 134575a3893c53..794062ab57fca4 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
@@ -424,8 +424,9 @@ static int amdgpu_move_blit(struct ttm_buffer_object *bo,
static bool amdgpu_mem_visible(struct amdgpu_device *adev,
struct ttm_resource *mem)
{
- uint64_t mem_size = (u64)mem->num_pages << PAGE_SHIFT;
+ u64 mem_size = (u64)mem->num_pages << PAGE_SHIFT;
struct amdgpu_res_cursor cursor;
+ u64 end;
if (mem->mem_type == TTM_PL_SYSTEM ||
mem->mem_type == TTM_PL_TT)
@@ -434,12 +435,18 @@ static bool amdgpu_mem_visible(struct amdgpu_device *adev,
return false;
amdgpu_res_first(mem, 0, mem_size, &cursor);
+ end = cursor.start + cursor.size;
+ while (cursor.remaining) {
+ amdgpu_res_next(&cursor, cursor.size);
- /* ttm_resource_ioremap only supports contiguous memory */
- if (cursor.size != mem_size)
- return false;
+ /* ttm_resource_ioremap only supports contiguous memory */
+ if (end != cursor.start)
+ return false;
+
+ end = cursor.start + cursor.size;
+ }
- return cursor.start + cursor.size <= adev->gmc.visible_vram_size;
+ return end <= adev->gmc.visible_vram_size;
}
/*
From 6df3912f64cea68409b08d282ffbccf0af7f8d8e Mon Sep 17 00:00:00 2001
From: Arunpravin Paneer Selvam <Arunpravin.PaneerSelvam@amd.com>
Date: Mon, 17 Oct 2022 13:15:21 -0700
Subject: [PATCH] drm/amdgpu: Fix for BO move issue
If there are no blocks to compare then exit
the loop.
Signed-off-by: Arunpravin Paneer Selvam <Arunpravin.PaneerSelvam@amd.com>
---
drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c | 3 +++
1 file changed, 3 insertions(+)
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
index 794062ab57fca4..9e6c23266a1a0f 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
@@ -439,6 +439,9 @@ static bool amdgpu_mem_visible(struct amdgpu_device *adev,
while (cursor.remaining) {
amdgpu_res_next(&cursor, cursor.size);
+ if (!cursor.remaining)
+ break;
+
/* ttm_resource_ioremap only supports contiguous memory */
if (end != cursor.start)
return false;

View File

@@ -426,99 +426,3 @@ index 4bf4ea6cbb5eee..4850dafbaa05fb 100644
} }
/* ---- Socket functions ---- */ /* ---- Socket functions ---- */
From 50e6a66675f6c9835d4f1d4f8c947d1699ce8e24 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Christian=20K=C3=B6nig?= <christian.koenig@amd.com>
Date: Fri, 7 Oct 2022 09:51:13 +0200
Subject: [PATCH 4/5] drm/sched: add DRM_SCHED_FENCE_DONT_PIPELINE flag
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit
Setting this flag on a scheduler fence prevents pipelining of jobs
depending on this fence. In other words we always insert a full CPU
round trip before dependen jobs are pushed to the pipeline.
Signed-off-by: Christian König <christian.koenig@amd.com>
---
drivers/gpu/drm/scheduler/sched_entity.c | 3 ++-
include/drm/gpu_scheduler.h | 9 +++++++++
2 files changed, 11 insertions(+), 1 deletion(-)
diff --git a/drivers/gpu/drm/scheduler/sched_entity.c b/drivers/gpu/drm/scheduler/sched_entity.c
index 6b25b2f4f5a3..6137537aaea4 100644
--- a/drivers/gpu/drm/scheduler/sched_entity.c
+++ b/drivers/gpu/drm/scheduler/sched_entity.c
@@ -385,7 +385,8 @@ static bool drm_sched_entity_add_dependency_cb(struct drm_sched_entity *entity)
}
s_fence = to_drm_sched_fence(fence);
- if (s_fence && s_fence->sched == sched) {
+ if (s_fence && s_fence->sched == sched &&
+ !test_bit(DRM_SCHED_FENCE_DONT_PIPELINE, &fence->flags)) {
/*
* Fence is from the same scheduler, only need to wait for
diff --git a/include/drm/gpu_scheduler.h b/include/drm/gpu_scheduler.h
index addb135eeea6..289a33e80639 100644
--- a/include/drm/gpu_scheduler.h
+++ b/include/drm/gpu_scheduler.h
@@ -32,6 +32,15 @@
#define MAX_WAIT_SCHED_ENTITY_Q_EMPTY msecs_to_jiffies(1000)
+/**
+ * DRM_SCHED_FENCE_DONT_PIPELINE - Prefent dependency pipelining
+ *
+ * Setting this flag on a scheduler fence prevents pipelining of jobs depending
+ * on this fence. In other words we always insert a full CPU round trip before
+ * dependen jobs are pushed to the hw queue.
+ */
+#define DRM_SCHED_FENCE_DONT_PIPELINE DMA_FENCE_FLAG_USER_BITS
+
struct drm_gem_object;
struct drm_gpu_scheduler;
--
2.25.1
From e15e1601fba660124acd7ad41b6f61d46a1c4835 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Christian=20K=C3=B6nig?= <christian.koenig@amd.com>
Date: Fri, 7 Oct 2022 10:59:58 +0200
Subject: [PATCH 5/5] drm/amdgpu: use DRM_SCHED_FENCE_DONT_PIPELINE for VM
updates
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit
Make sure that we always have a CPU round trip to let the submission
code correctly decide if a TLB flush is necessary or not.
Signed-off-by: Christian König <christian.koenig@amd.com>
---
drivers/gpu/drm/amd/amdgpu/amdgpu_vm_sdma.c | 9 ++++++++-
1 file changed, 8 insertions(+), 1 deletion(-)
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm_sdma.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm_sdma.c
index 1fd3cbca20a2..c7bf189d50de 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm_sdma.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm_sdma.c
@@ -115,8 +115,15 @@ static int amdgpu_vm_sdma_commit(struct amdgpu_vm_update_params *p,
amdgpu_bo_fence(p->vm->root.bo, f, true);
}
- if (fence && !p->immediate)
+ if (fence && !p->immediate) {
+ /*
+ * Most hw generations now have a separate queue for page table
+ * updates, but when the queue is shared with userspace we need
+ * the extra CPU round trip to correctly flush the TLB.
+ */
+ set_bit(DRM_SCHED_FENCE_DONT_PIPELINE, &f->flags);
swap(*fence, f);
+ }
dma_fence_put(f);
return 0;
--
2.25.1