Compare commits
20 Commits
Author | SHA1 | Date | |
---|---|---|---|
|
06e2ade846 | ||
|
5298957d9b | ||
|
8c776ecf64 | ||
|
a6877bd422 | ||
|
12ff2a498f | ||
|
b57e1824c2 | ||
|
c141a8c304 | ||
|
5c3621fc6a | ||
|
be5dfffb8d | ||
|
1fc60cc388 | ||
|
73e9a2eb3c | ||
|
f4324da8a1 | ||
|
1854907001 | ||
|
348f3c9bb6 | ||
|
9948b85dac | ||
|
eb4c793ca8 | ||
|
367925c322 | ||
|
d0d6f02944 | ||
|
38e73fb059 | ||
|
4547454e3d |
4
PKGBUILD
4
PKGBUILD
@@ -57,7 +57,7 @@ else
|
|||||||
fi
|
fi
|
||||||
pkgname=("${pkgbase}" "${pkgbase}-headers")
|
pkgname=("${pkgbase}" "${pkgbase}-headers")
|
||||||
pkgver="${_basekernel}"."${_sub}"
|
pkgver="${_basekernel}"."${_sub}"
|
||||||
pkgrel=272
|
pkgrel=273
|
||||||
pkgdesc='Linux-tkg'
|
pkgdesc='Linux-tkg'
|
||||||
arch=('x86_64') # no i686 in here
|
arch=('x86_64') # no i686 in here
|
||||||
url="https://www.kernel.org/"
|
url="https://www.kernel.org/"
|
||||||
@@ -262,7 +262,7 @@ hackheaders() {
|
|||||||
msg2 "Stripping build tools..."
|
msg2 "Stripping build tools..."
|
||||||
local file
|
local file
|
||||||
while read -rd '' file; do
|
while read -rd '' file; do
|
||||||
case "$(file -bi "$file")" in
|
case "$(file -Sib "$file")" in
|
||||||
application/x-sharedlib\;*) # Libraries (.so)
|
application/x-sharedlib\;*) # Libraries (.so)
|
||||||
strip -v $STRIP_SHARED "$file" ;;
|
strip -v $STRIP_SHARED "$file" ;;
|
||||||
application/x-archive\;*) # Libraries (.a)
|
application/x-archive\;*) # Libraries (.a)
|
||||||
|
@@ -3,7 +3,7 @@
|
|||||||
# Linux distribution you are using, options are "Arch", "Void", "Ubuntu", "Debian", "Fedora", "Suse", "Gentoo", "Generic".
|
# Linux distribution you are using, options are "Arch", "Void", "Ubuntu", "Debian", "Fedora", "Suse", "Gentoo", "Generic".
|
||||||
# It is automatically set to "Arch" when using PKGBUILD.
|
# It is automatically set to "Arch" when using PKGBUILD.
|
||||||
# If left empty, the script will prompt
|
# If left empty, the script will prompt
|
||||||
_distro=""
|
_distro="Arch"
|
||||||
|
|
||||||
# Kernel Version - Options are "5.4", and from "5.7" to "5.19"
|
# Kernel Version - Options are "5.4", and from "5.7" to "5.19"
|
||||||
# you can also set a specific kernel version, e.g. "6.0-rc4" or "5.10.51",
|
# you can also set a specific kernel version, e.g. "6.0-rc4" or "5.10.51",
|
||||||
@@ -32,7 +32,7 @@ CUSTOM_GCC_PATH=""
|
|||||||
CUSTOM_LLVM_PATH=""
|
CUSTOM_LLVM_PATH=""
|
||||||
|
|
||||||
# Set to true to bypass makepkg.conf and use all available threads for compilation. False will respect your makepkg.conf options.
|
# Set to true to bypass makepkg.conf and use all available threads for compilation. False will respect your makepkg.conf options.
|
||||||
_force_all_threads="true"
|
_force_all_threads="false"
|
||||||
|
|
||||||
# Set to true to prevent ccache from being used and set CONFIG_GCC_PLUGINS=y (which needs to be disabled for ccache to work properly)
|
# Set to true to prevent ccache from being used and set CONFIG_GCC_PLUGINS=y (which needs to be disabled for ccache to work properly)
|
||||||
_noccache="false"
|
_noccache="false"
|
||||||
@@ -46,10 +46,10 @@ _modprobeddb="false"
|
|||||||
_modprobeddb_db_path=~/.config/modprobed.db
|
_modprobeddb_db_path=~/.config/modprobed.db
|
||||||
|
|
||||||
# Set to "1" to call make menuconfig, "2" to call make nconfig, "3" to call make xconfig, before building the kernel. Set to false to disable and skip the prompt.
|
# Set to "1" to call make menuconfig, "2" to call make nconfig, "3" to call make xconfig, before building the kernel. Set to false to disable and skip the prompt.
|
||||||
_menunconfig=""
|
_menunconfig="false"
|
||||||
|
|
||||||
# Set to true to generate a kernel config fragment from your changes in menuconfig/nconfig. Set to false to disable and skip the prompt.
|
# Set to true to generate a kernel config fragment from your changes in menuconfig/nconfig. Set to false to disable and skip the prompt.
|
||||||
_diffconfig=""
|
_diffconfig="false"
|
||||||
|
|
||||||
# Set to the file name where the generated config fragment should be written to. Only used if _diffconfig is active.
|
# Set to the file name where the generated config fragment should be written to. Only used if _diffconfig is active.
|
||||||
_diffconfig_name=""
|
_diffconfig_name=""
|
||||||
@@ -57,6 +57,10 @@ _diffconfig_name=""
|
|||||||
# [install.sh specific] Use tmpfs as a work directory, recommended when RAM >= 32GB to reduce HDD/SSD usage. For more information, see https://wiki.archlinux.org/title/Tmpfs
|
# [install.sh specific] Use tmpfs as a work directory, recommended when RAM >= 32GB to reduce HDD/SSD usage. For more information, see https://wiki.archlinux.org/title/Tmpfs
|
||||||
_use_tmpfs="false"
|
_use_tmpfs="false"
|
||||||
|
|
||||||
|
# Always make a fresh clone of the source in tmpfs to speed up compilation times
|
||||||
|
# ! This will take ~20GB of RAM by itself, so don't use on <32GB RAM systems !
|
||||||
|
_source_in_tmpfs="false"
|
||||||
|
|
||||||
# [install.sh specific] tmpfs folder path, only used when _use_tmpfs="true".
|
# [install.sh specific] tmpfs folder path, only used when _use_tmpfs="true".
|
||||||
# Creates a linux-tkg work folder within that pathmake sure to have nothing important in "$_tmpfs_path/linux-tkg"
|
# Creates a linux-tkg work folder within that pathmake sure to have nothing important in "$_tmpfs_path/linux-tkg"
|
||||||
_tmpfs_path="/tmp"
|
_tmpfs_path="/tmp"
|
||||||
@@ -90,11 +94,11 @@ _STRIP="true"
|
|||||||
# LEAVE AN EMPTY VALUE TO BE PROMPTED ABOUT FOLLOWING OPTIONS AT BUILD TIME
|
# LEAVE AN EMPTY VALUE TO BE PROMPTED ABOUT FOLLOWING OPTIONS AT BUILD TIME
|
||||||
|
|
||||||
# CPU scheduler - Options are "upds" (TkG's Undead PDS), "pds", "bmq", "muqss", "cacule" or "cfs" (kernel's default)
|
# CPU scheduler - Options are "upds" (TkG's Undead PDS), "pds", "bmq", "muqss", "cacule" or "cfs" (kernel's default)
|
||||||
_cpusched=""
|
_cpusched="pds"
|
||||||
|
|
||||||
# Compiler to use - Options are "gcc" or "llvm".
|
# Compiler to use - Options are "gcc" or "llvm".
|
||||||
# For advanced users.
|
# For advanced users.
|
||||||
_compiler=""
|
_compiler="gcc"
|
||||||
|
|
||||||
# Force the use of the LLVM Integrated Assembler whether using LLVM, LTO or not.
|
# Force the use of the LLVM Integrated Assembler whether using LLVM, LTO or not.
|
||||||
# Set to "1" to enable.
|
# Set to "1" to enable.
|
||||||
@@ -124,7 +128,7 @@ _preempt_rt_force=""
|
|||||||
# For BMQ: 0: No yield.
|
# For BMQ: 0: No yield.
|
||||||
# 1: Deboost and requeue task. (Default)
|
# 1: Deboost and requeue task. (Default)
|
||||||
# 2: Set rq skip task.
|
# 2: Set rq skip task.
|
||||||
_sched_yield_type=""
|
_sched_yield_type="0"
|
||||||
|
|
||||||
# Round Robin interval is the longest duration two tasks with the same nice level will be delayed for. When CPU time is requested by a task, it receives a time slice equal
|
# Round Robin interval is the longest duration two tasks with the same nice level will be delayed for. When CPU time is requested by a task, it receives a time slice equal
|
||||||
# to the rr_interval in addition to a virtual deadline. When using yield_type 2, a low value can help offset the disadvantages of rescheduling a process that has yielded.
|
# to the rr_interval in addition to a virtual deadline. When using yield_type 2, a low value can help offset the disadvantages of rescheduling a process that has yielded.
|
||||||
@@ -132,7 +136,7 @@ _sched_yield_type=""
|
|||||||
# PDS default: 4ms"
|
# PDS default: 4ms"
|
||||||
# BMQ default: 2ms"
|
# BMQ default: 2ms"
|
||||||
# Set to "1" for 2ms, "2" for 4ms, "3" for 6ms, "4" for 8ms, or "default" to keep the chosen scheduler defaults.
|
# Set to "1" for 2ms, "2" for 4ms, "3" for 6ms, "4" for 8ms, or "default" to keep the chosen scheduler defaults.
|
||||||
_rr_interval=""
|
_rr_interval="2"
|
||||||
|
|
||||||
# Set to "true" to disable FUNCTION_TRACER/GRAPH_TRACER, lowering overhead but limiting debugging and analyzing of kernel functions - Kernel default is "false"
|
# Set to "true" to disable FUNCTION_TRACER/GRAPH_TRACER, lowering overhead but limiting debugging and analyzing of kernel functions - Kernel default is "false"
|
||||||
_ftracedisable="false"
|
_ftracedisable="false"
|
||||||
@@ -147,10 +151,10 @@ _misc_adds="true"
|
|||||||
# Full tickless can give higher performances in case you use isolation of CPUs for tasks
|
# Full tickless can give higher performances in case you use isolation of CPUs for tasks
|
||||||
# and it works only when using the nohz_full kernel parameter, otherwise behaves like idle.
|
# and it works only when using the nohz_full kernel parameter, otherwise behaves like idle.
|
||||||
# Just tickless idle perform better for most platforms.
|
# Just tickless idle perform better for most platforms.
|
||||||
_tickless=""
|
_tickless="2"
|
||||||
|
|
||||||
# Set to "true" to use ACS override patch - https://wiki.archlinux.org/index.php/PCI_passthrough_via_OVMF#Bypassing_the_IOMMU_groups_.28ACS_override_patch.29 - Kernel default is "false"
|
# Set to "true" to use ACS override patch - https://wiki.archlinux.org/index.php/PCI_passthrough_via_OVMF#Bypassing_the_IOMMU_groups_.28ACS_override_patch.29 - Kernel default is "false"
|
||||||
_acs_override=""
|
_acs_override="false"
|
||||||
|
|
||||||
# Set to "true" to add Bcache filesystem support. You'll have to install bcachefs-tools-git from AUR for utilities - https://bcachefs.org/ - If in doubt, set to "false"
|
# Set to "true" to add Bcache filesystem support. You'll have to install bcachefs-tools-git from AUR for utilities - https://bcachefs.org/ - If in doubt, set to "false"
|
||||||
# This can be buggy and isn't recommended on a production machine, also enabling this option will not allow you to enable MGLRU.
|
# This can be buggy and isn't recommended on a production machine, also enabling this option will not allow you to enable MGLRU.
|
||||||
@@ -179,13 +183,13 @@ _futex_waitv="false"
|
|||||||
_winesync="false"
|
_winesync="false"
|
||||||
|
|
||||||
# Set to "true" to enable Binder and Ashmem, the kernel modules required to use the android emulator Anbox. ! This doesn't apply to 5.4.y !
|
# Set to "true" to enable Binder and Ashmem, the kernel modules required to use the android emulator Anbox. ! This doesn't apply to 5.4.y !
|
||||||
_anbox=""
|
_anbox="false"
|
||||||
|
|
||||||
# A selection of patches from Zen/Liquorix kernel and additional tweaks for a better gaming experience (ZENIFY) - Default is "true"
|
# A selection of patches from Zen/Liquorix kernel and additional tweaks for a better gaming experience (ZENIFY) - Default is "true"
|
||||||
_zenify="true"
|
_zenify="true"
|
||||||
|
|
||||||
# compiler optimization level - 1. Optimize for performance (-O2); 2. Optimize harder (-O3); 3. Optimize for size (-Os) - Kernel default is "1"
|
# compiler optimization level - 1. Optimize for performance (-O2); 2. Optimize harder (-O3); 3. Optimize for size (-Os) - Kernel default is "1"
|
||||||
_compileroptlevel="1"
|
_compileroptlevel="2"
|
||||||
|
|
||||||
# CPU compiler optimizations - Defaults to prompt at kernel config if left empty
|
# CPU compiler optimizations - Defaults to prompt at kernel config if left empty
|
||||||
# AMD CPUs : "k8" "k8sse3" "k10" "barcelona" "bobcat" "jaguar" "bulldozer" "piledriver" "steamroller" "excavator" "zen" "zen2" "zen3" "zen4" (zen3 opt support depends on GCC11) (zen4 opt support depends on GCC13)
|
# AMD CPUs : "k8" "k8sse3" "k10" "barcelona" "bobcat" "jaguar" "bulldozer" "piledriver" "steamroller" "excavator" "zen" "zen2" "zen3" "zen4" (zen3 opt support depends on GCC11) (zen4 opt support depends on GCC13)
|
||||||
@@ -199,7 +203,7 @@ _compileroptlevel="1"
|
|||||||
# - "generic_v2" (depends on GCC11 - to share the package between machines with different CPU µarch supporting at least x86-64-v2
|
# - "generic_v2" (depends on GCC11 - to share the package between machines with different CPU µarch supporting at least x86-64-v2
|
||||||
# - "generic_v3" (depends on GCC11 - to share the package between machines with different CPU µarch supporting at least x86-64-v3
|
# - "generic_v3" (depends on GCC11 - to share the package between machines with different CPU µarch supporting at least x86-64-v3
|
||||||
# - "generic_v4" (depends on GCC11 - to share the package between machines with different CPU µarch supporting at least x86-64-v4
|
# - "generic_v4" (depends on GCC11 - to share the package between machines with different CPU µarch supporting at least x86-64-v4
|
||||||
_processor_opt=""
|
_processor_opt="skylake"
|
||||||
|
|
||||||
# MuQSS only - Make IRQ threading compulsory (FORCE_IRQ_THREADING) - Default is "false"
|
# MuQSS only - Make IRQ threading compulsory (FORCE_IRQ_THREADING) - Default is "false"
|
||||||
_irq_threading="false"
|
_irq_threading="false"
|
||||||
@@ -215,7 +219,7 @@ _cacule_rdb_interval="19"
|
|||||||
_tt_high_hz="false"
|
_tt_high_hz="false"
|
||||||
|
|
||||||
# MuQSS and PDS only - SMT (Hyperthreading) aware nice priority and policy support (SMT_NICE) - Kernel default is "true" - You can disable this on non-SMT/HT CPUs for lower overhead
|
# MuQSS and PDS only - SMT (Hyperthreading) aware nice priority and policy support (SMT_NICE) - Kernel default is "true" - You can disable this on non-SMT/HT CPUs for lower overhead
|
||||||
_smt_nice=""
|
_smt_nice="true"
|
||||||
|
|
||||||
# Trust the CPU manufacturer to initialize Linux's CRNG (RANDOM_TRUST_CPU) - Kernel default is "false"
|
# Trust the CPU manufacturer to initialize Linux's CRNG (RANDOM_TRUST_CPU) - Kernel default is "false"
|
||||||
_random_trust_cpu="true"
|
_random_trust_cpu="true"
|
||||||
@@ -225,7 +229,7 @@ _random_trust_cpu="true"
|
|||||||
_runqueue_sharing=""
|
_runqueue_sharing=""
|
||||||
|
|
||||||
# Timer frequency - "100" "250" "300" "500" "750" "1000" ("2000" is available for cacule cpusched only) - More options available in kernel config prompt when left empty depending on selected cpusched with the default option pointed with a ">" (2000 for cacule, 100 for muqss and 1000 for other cpu schedulers)
|
# Timer frequency - "100" "250" "300" "500" "750" "1000" ("2000" is available for cacule cpusched only) - More options available in kernel config prompt when left empty depending on selected cpusched with the default option pointed with a ">" (2000 for cacule, 100 for muqss and 1000 for other cpu schedulers)
|
||||||
_timer_freq=""
|
_timer_freq="500"
|
||||||
|
|
||||||
# Default CPU governor - "performance", "ondemand", "schedutil" or leave empty for default (schedutil)
|
# Default CPU governor - "performance", "ondemand", "schedutil" or leave empty for default (schedutil)
|
||||||
_default_cpu_gov="ondemand"
|
_default_cpu_gov="ondemand"
|
||||||
@@ -241,7 +245,7 @@ _aggressive_ondemand="true"
|
|||||||
_tcp_cong_alg=""
|
_tcp_cong_alg=""
|
||||||
|
|
||||||
# You can pass a default set of kernel command line options here - example: "intel_pstate=passive nowatchdog amdgpu.ppfeaturemask=0xfffd7fff mitigations=off"
|
# You can pass a default set of kernel command line options here - example: "intel_pstate=passive nowatchdog amdgpu.ppfeaturemask=0xfffd7fff mitigations=off"
|
||||||
_custom_commandline="intel_pstate=passive"
|
_custom_commandline="intel_pstate=passive split_lock_detect=off"
|
||||||
|
|
||||||
# Selection of Clearlinux patches
|
# Selection of Clearlinux patches
|
||||||
_clear_patches="true"
|
_clear_patches="true"
|
||||||
@@ -260,10 +264,10 @@ _kernel_localversion=""
|
|||||||
# Set to "true" to add back missing symbol for AES-NI/AVX support on ZFS - This is a legacy option that can be ignored on 5.10+ kernels - https://github.com/NixOS/nixpkgs/blob/master/pkgs/os-specific/linux/kernel/export_kernel_fpu_functions.patch
|
# Set to "true" to add back missing symbol for AES-NI/AVX support on ZFS - This is a legacy option that can be ignored on 5.10+ kernels - https://github.com/NixOS/nixpkgs/blob/master/pkgs/os-specific/linux/kernel/export_kernel_fpu_functions.patch
|
||||||
_zfsfix="true"
|
_zfsfix="true"
|
||||||
|
|
||||||
# Set to your maximum number of CPUs (physical + logical cores) - Lower means less overhead - You can set it to "$(nproc)" to use the current host's CPU(s) core count, or leave empty to get a prompt
|
# Set to your maximum number of CPUs (physical + logical cores) - Lower means less overhead - You can set it to "$(nproc)" to use the current host's CPU(s) core count, or leave empty to use default
|
||||||
# If you set this to a lower value than you have cores, some cores will be disabled
|
# If you set this to a lower value than you have cores, some cores will be disabled
|
||||||
# Default Arch kernel value is 320
|
# Default Arch kernel value is 320
|
||||||
_NR_CPUS_value="128"
|
_NR_CPUS_value=""
|
||||||
|
|
||||||
|
|
||||||
#### USER PATCHES ####
|
#### USER PATCHES ####
|
||||||
|
14
linux-tkg-config/6.2/90-cleanup.hook
Normal file
14
linux-tkg-config/6.2/90-cleanup.hook
Normal file
@@ -0,0 +1,14 @@
|
|||||||
|
[Trigger]
|
||||||
|
Type = File
|
||||||
|
Operation = Install
|
||||||
|
Operation = Upgrade
|
||||||
|
Operation = Remove
|
||||||
|
Target = usr/lib/modules/*/
|
||||||
|
Target = !usr/lib/modules/*/?*
|
||||||
|
|
||||||
|
[Action]
|
||||||
|
Description = Cleaning up...
|
||||||
|
When = PostTransaction
|
||||||
|
Exec = /usr/share/libalpm/scripts/cleanup
|
||||||
|
NeedsTargets
|
||||||
|
|
10
linux-tkg-config/6.2/cleanup
Executable file
10
linux-tkg-config/6.2/cleanup
Executable file
@@ -0,0 +1,10 @@
|
|||||||
|
#!/bin/bash
|
||||||
|
|
||||||
|
for _f in /usr/lib/modules/*tkg*; do
|
||||||
|
if [[ ! -e ${_f}/vmlinuz ]]; then
|
||||||
|
rm -rf "$_f"
|
||||||
|
fi
|
||||||
|
done
|
||||||
|
|
||||||
|
# vim:set ft=sh sw=2 et:
|
||||||
|
|
11333
linux-tkg-config/6.2/config.x86_64
Normal file
11333
linux-tkg-config/6.2/config.x86_64
Normal file
File diff suppressed because it is too large
Load Diff
@@ -1,7 +1,7 @@
|
|||||||
#!/bin/bash
|
#!/bin/bash
|
||||||
|
|
||||||
# List of kernels that are maintained upstream
|
# List of kernels that are maintained upstream
|
||||||
_current_kernels=("6.1" "6.0" "5.15" "5.10" "5.4")
|
_current_kernels=("6.2" "6.1" "6.0" "5.15" "5.10" "5.4")
|
||||||
|
|
||||||
# List of kernels that are no longer maintained upstream
|
# List of kernels that are no longer maintained upstream
|
||||||
_eol_kernels=("5.19" "5.18" "5.17" "5.16" "5.14" "5.13" "5.12" "5.11" "5.9" "5.8" "5.7")
|
_eol_kernels=("5.19" "5.18" "5.17" "5.16" "5.14" "5.13" "5.12" "5.11" "5.9" "5.8" "5.7")
|
||||||
@@ -39,32 +39,32 @@ done
|
|||||||
# PREEMPT_RT's supported kernel subversion
|
# PREEMPT_RT's supported kernel subversion
|
||||||
typeset -Ag _rt_subver_map
|
typeset -Ag _rt_subver_map
|
||||||
_rt_subver_map=(
|
_rt_subver_map=(
|
||||||
["5.4"]="209"
|
["5.4"]="221"
|
||||||
["5.9"]="1"
|
["5.9"]="1"
|
||||||
["5.10"]="153"
|
["5.10"]="153"
|
||||||
["5.11"]="4"
|
["5.11"]="4"
|
||||||
["5.14"]="2"
|
["5.14"]="2"
|
||||||
["5.15"]="76"
|
["5.15"]="79"
|
||||||
["5.16"]="2"
|
["5.16"]="2"
|
||||||
["5.17"]="1"
|
["5.17"]="1"
|
||||||
["6.0"]="5"
|
["6.0"]="5"
|
||||||
["6.1"]="rc3"
|
["6.1"]="rc7"
|
||||||
)
|
)
|
||||||
|
|
||||||
# PREEMPT_RT's patch revision for the kernel
|
# PREEMPT_RT's patch revision for the kernel
|
||||||
# We separated this to allow for forcing the application of the patch when _preempt_rt_force=1 on version mismatch
|
# We separated this to allow for forcing the application of the patch when _preempt_rt_force=1 on version mismatch
|
||||||
typeset -Ag _rt_rev_map
|
typeset -Ag _rt_rev_map
|
||||||
_rt_rev_map=(
|
_rt_rev_map=(
|
||||||
["5.4"]="77"
|
["5.4"]="79"
|
||||||
["5.9"]="20"
|
["5.9"]="20"
|
||||||
["5.10"]="76"
|
["5.10"]="76"
|
||||||
["5.11"]="11"
|
["5.11"]="11"
|
||||||
["5.14"]="21"
|
["5.14"]="21"
|
||||||
["5.15"]="53"
|
["5.15"]="54"
|
||||||
["5.16"]="19"
|
["5.16"]="19"
|
||||||
["5.17"]="17"
|
["5.17"]="17"
|
||||||
["6.0"]="14"
|
["6.0"]="14"
|
||||||
["6.1"]="2"
|
["6.1"]="5"
|
||||||
)
|
)
|
||||||
|
|
||||||
_undefine() {
|
_undefine() {
|
||||||
@@ -282,6 +282,9 @@ _set_cpu_scheduler() {
|
|||||||
elif [ "$_kver" = "600" ]; then
|
elif [ "$_kver" = "600" ]; then
|
||||||
_avail_cpu_scheds=("cfs" "pds" "bmq" "tt" "bore")
|
_avail_cpu_scheds=("cfs" "pds" "bmq" "tt" "bore")
|
||||||
elif [ "$_kver" = "601" ]; then
|
elif [ "$_kver" = "601" ]; then
|
||||||
|
_avail_cpu_scheds=("cfs" "pds" "bmq" "tt" "bore")
|
||||||
|
_projectc_unoff=1
|
||||||
|
elif [ "$_kver" = "602" ]; then
|
||||||
_avail_cpu_scheds=("cfs" "tt" "bore")
|
_avail_cpu_scheds=("cfs" "tt" "bore")
|
||||||
else
|
else
|
||||||
_avail_cpu_scheds=("cfs")
|
_avail_cpu_scheds=("cfs")
|
||||||
@@ -389,9 +392,15 @@ _linux_git_branch_checkout() {
|
|||||||
|
|
||||||
cd "$_where"
|
cd "$_where"
|
||||||
|
|
||||||
if ! [ -d linux-src-git ]; then
|
if ! [ -d linux-src-git ] || ( [ "$_source_in_tmpfs" = "true" ] && ! [ -d /tmp/linux-src-git ] ); then
|
||||||
msg2 "First initialization of the linux source code git folder"
|
msg2 "First initialization of the linux source code git folder"
|
||||||
mkdir linux-src-git
|
if [ "$_source_in_tmpfs" = "true" ]; then
|
||||||
|
rm -rf "${_where}/linux-src-git"
|
||||||
|
mkdir "/tmp/linux-src-git"
|
||||||
|
ln -s "/tmp/linux-src-git" "${_where}"
|
||||||
|
else
|
||||||
|
mkdir linux-src-git
|
||||||
|
fi
|
||||||
cd linux-src-git
|
cd linux-src-git
|
||||||
git init
|
git init
|
||||||
|
|
||||||
@@ -399,6 +408,11 @@ _linux_git_branch_checkout() {
|
|||||||
git remote add "$remote" "${_kernel_git_remotes[$remote]}"
|
git remote add "$remote" "${_kernel_git_remotes[$remote]}"
|
||||||
done
|
done
|
||||||
else
|
else
|
||||||
|
if [ "$_source_in_tmpfs" = "true" ]; then
|
||||||
|
rm -rf "${_where}/linux-src-git"
|
||||||
|
ln -s "/tmp/linux-src-git" "${_where}"
|
||||||
|
fi
|
||||||
|
|
||||||
cd linux-src-git
|
cd linux-src-git
|
||||||
|
|
||||||
# Remove "origin" remote if present
|
# Remove "origin" remote if present
|
||||||
@@ -883,13 +897,19 @@ _tkg_srcprep() {
|
|||||||
if [ "$_kver" = "504" ] || [ "$_kver" = "509" ]; then
|
if [ "$_kver" = "504" ] || [ "$_kver" = "509" ]; then
|
||||||
scripts/config --set-val "RCU_BOOST_DELAY" "0"
|
scripts/config --set-val "RCU_BOOST_DELAY" "0"
|
||||||
fi
|
fi
|
||||||
_disable "NTP_PPS" "CPU_FREQ_DEFAULT_GOV_PERFORMANCE_NODEF" "ZSWAP_COMPRESSOR_DEFAULT_LZO" "PROFILE_ALL_BRANCHES"
|
_disable "NTP_PPS" "ZSWAP_COMPRESSOR_DEFAULT_LZO" "PROFILE_ALL_BRANCHES"
|
||||||
_enable "CRYPTO_LZ4" "CRYPTO_LZ4HC" "LZ4_COMPRESS" "LZ4HC_COMPRESS" "ZSWAP_COMPRESSOR_DEFAULT_LZ4" "CMDLINE_BOOL" "BLK_DEV_LOOP" "X86_AMD_PSTATE" "AMD_PINCTRL" "CONTEXT_TRACKING_FORCE"
|
_enable "CRYPTO_LZ4" "CRYPTO_LZ4HC" "LZ4_COMPRESS" "LZ4HC_COMPRESS" "ZSWAP_COMPRESSOR_DEFAULT_LZ4" "X86_AMD_PSTATE" "AMD_PINCTRL"
|
||||||
_disable "DEBUG_FORCE_FUNCTION_ALIGN_64B"
|
_disable "DEBUG_FORCE_FUNCTION_ALIGN_64B" "X86_P6_NOP"
|
||||||
scripts/config --set-str "ZSWAP_COMPRESSOR_DEFAULT" "lz4"
|
scripts/config --set-str "ZSWAP_COMPRESSOR_DEFAULT" "lz4"
|
||||||
scripts/config --set-str "CMDLINE" "${_custom_commandline}"
|
_enable "CPU_FREQ_DEFAULT_GOV_SCHEDUTIL"
|
||||||
_disable "CMDLINE_OVERRIDE" "X86_P6_NOP" "CPU_FREQ_DEFAULT_GOV_ONDEMAND" "CPU_FREQ_DEFAULT_GOV_CONSERVATIVE"
|
_disable "CPU_FREQ_DEFAULT_GOV_ONDEMAND" "CPU_FREQ_DEFAULT_GOV_CONSERVATIVE" "CPU_FREQ_DEFAULT_GOV_PERFORMANCE" "CPU_FREQ_DEFAULT_GOV_PERFORMANCE_NODEF"
|
||||||
#echo "# CONFIG_CPU_FREQ_DEFAULT_GOV_PERFORMANCE is not set" >> ./.config
|
_module "BLK_DEV_LOOP"
|
||||||
|
|
||||||
|
if [ -n "$_custom_commandline" ]; then
|
||||||
|
_enable "CMDLINE_BOOL"
|
||||||
|
_disable "CMDLINE_OVERRIDE"
|
||||||
|
scripts/config --set-str "CMDLINE" "${_custom_commandline}"
|
||||||
|
fi
|
||||||
|
|
||||||
# openrgb
|
# openrgb
|
||||||
_module "I2C_NCT6775"
|
_module "I2C_NCT6775"
|
||||||
@@ -1342,14 +1362,14 @@ _tkg_srcprep() {
|
|||||||
_tickless="${_selected_index}"
|
_tickless="${_selected_index}"
|
||||||
fi
|
fi
|
||||||
if [ "$_tickless" = "0" ]; then
|
if [ "$_tickless" = "0" ]; then
|
||||||
_disable "NO_HZ_FULL_NODEF" "NO_HZ_IDLE" "NO_HZ_FULL" "NO_HZ" "NO_HZ_COMMON" "VIRT_CPU_ACCOUNTING_GEN"
|
_disable "NO_HZ_FULL_NODEF" "NO_HZ_IDLE" "NO_HZ_FULL" "NO_HZ" "NO_HZ_COMMON" "VIRT_CPU_ACCOUNTING" "VIRT_CPU_ACCOUNTING_GEN"
|
||||||
_enable "HZ_PERIODIC" "TICK_CPU_ACCOUNTING"
|
_enable "HZ_PERIODIC" "TICK_CPU_ACCOUNTING"
|
||||||
elif [ "$_tickless" = "1" ]; then
|
elif [ "$_tickless" = "1" ]; then
|
||||||
_disable "HZ_PERIODIC" "NO_HZ_IDLE" "TICK_CPU_ACCOUNTING"
|
_disable "HZ_PERIODIC" "NO_HZ_IDLE" "TICK_CPU_ACCOUNTING" "CONTEXT_TRACKING_FORCE"
|
||||||
_enable "NO_HZ_FULL_NODEF" "NO_HZ_FULL" "NO_HZ" "NO_HZ_COMMON" "CONTEXT_TRACKING" "VIRT_CPU_ACCOUNTING_GEN"
|
_enable "NO_HZ_FULL_NODEF" "NO_HZ_FULL" "NO_HZ" "NO_HZ_COMMON" "CONTEXT_TRACKING" "VIRT_CPU_ACCOUNTING" "VIRT_CPU_ACCOUNTING_GEN"
|
||||||
else
|
else
|
||||||
_disable "NO_HZ_FULL_NODEF" "HZ_PERIODIC" "NO_HZ_FULL" "TICK_CPU_ACCOUNTING"
|
_disable "NO_HZ_FULL_NODEF" "HZ_PERIODIC" "NO_HZ_FULL" "TICK_CPU_ACCOUNTING" "CONTEXT_TRACKING_FORCE"
|
||||||
_enable "NO_HZ_IDLE" "NO_HZ" "NO_HZ_COMMON" "VIRT_CPU_ACCOUNTING_GEN"
|
_enable "NO_HZ_IDLE" "NO_HZ" "NO_HZ_COMMON" "CONTEXT_TRACKING" "VIRT_CPU_ACCOUNTING" "VIRT_CPU_ACCOUNTING_GEN"
|
||||||
fi
|
fi
|
||||||
|
|
||||||
# acs override
|
# acs override
|
||||||
@@ -1593,22 +1613,9 @@ CONFIG_DEBUG_INFO_BTF_MODULES=y\r
|
|||||||
fi
|
fi
|
||||||
|
|
||||||
# NR_CPUS
|
# NR_CPUS
|
||||||
if [ -z "$_NR_CPUS_value" ]; then
|
|
||||||
plain ""
|
|
||||||
plain "Set NR_CPUS value to the current host's threads count?"
|
|
||||||
plain "For best results, it should be equal to the maximum number of threads the target machine has."
|
|
||||||
plain "If you want to use the resulting kernel on a machine with more threads, you can hit enter or answer N to use a default of 128."
|
|
||||||
read -rp "`echo $' > N/y : '`" CONDITION_nrcpus;
|
|
||||||
fi
|
|
||||||
if [[ "$CONDITION_nrcpus" =~ [yY] ]]; then
|
|
||||||
_NR_CPUS_value="$(nproc)"
|
|
||||||
fi
|
|
||||||
if [ -n "$_NR_CPUS_value" ]; then
|
if [ -n "$_NR_CPUS_value" ]; then
|
||||||
scripts/config --set-val "NR_CPUS" "$_NR_CPUS_value"
|
scripts/config --set-val "NR_CPUS" "$_NR_CPUS_value"
|
||||||
_enable "FORCE_NR_CPUS"
|
_enable "FORCE_NR_CPUS"
|
||||||
else
|
|
||||||
scripts/config --set-val "NR_CPUS" "128"
|
|
||||||
_disable "FORCE_NR_CPUS"
|
|
||||||
fi
|
fi
|
||||||
|
|
||||||
fi
|
fi
|
||||||
|
@@ -14,7 +14,7 @@ of Proton to still use fsync in new kernel releases.
|
|||||||
Signed-off-by: André Almeida <andrealmeid@collabora.com>
|
Signed-off-by: André Almeida <andrealmeid@collabora.com>
|
||||||
---
|
---
|
||||||
include/uapi/linux/futex.h | 12 ++++++
|
include/uapi/linux/futex.h | 12 ++++++
|
||||||
kernel/futex.c | 75 +++++++++++++++++++++++++++++++++++++-
|
kernel/futex/core.c | 75 +++++++++++++++++++++++++++++++++++++-
|
||||||
2 files changed, 86 insertions(+), 1 deletion(-)
|
2 files changed, 86 insertions(+), 1 deletion(-)
|
||||||
|
|
||||||
diff --git a/include/uapi/linux/futex.h b/include/uapi/linux/futex.h
|
diff --git a/include/uapi/linux/futex.h b/include/uapi/linux/futex.h
|
||||||
@@ -47,10 +47,10 @@ index 2a06b99f9803..417c5d89b745 100644
|
|||||||
|
|
||||||
/*
|
/*
|
||||||
* Support for robust futexes: the kernel cleans up held futexes at
|
* Support for robust futexes: the kernel cleans up held futexes at
|
||||||
diff --git a/kernel/futex.c b/kernel/futex.c
|
diff --git a/kernel/futex/core.c b/kernel/futex/core.c
|
||||||
index 4a9e7ce3714a..c3f2e65afab8 100644
|
index 4a9e7ce3714a..c3f2e65afab8 100644
|
||||||
--- a/kernel/futex.c
|
--- a/kernel/futex/core.c
|
||||||
+++ b/kernel/futex.c
|
+++ b/kernel/futex/core.c
|
||||||
@@ -4012,6 +4012,7 @@ static __always_inline bool futex_cmd_has_timeout(u32 cmd)
|
@@ -4012,6 +4012,7 @@ static __always_inline bool futex_cmd_has_timeout(u32 cmd)
|
||||||
case FUTEX_LOCK_PI2:
|
case FUTEX_LOCK_PI2:
|
||||||
case FUTEX_WAIT_BITSET:
|
case FUTEX_WAIT_BITSET:
|
||||||
|
@@ -43,7 +43,7 @@ Link: https://lore.kernel.org/r/20210923171111.300673-17-andrealmeid@collabora.c
|
|||||||
include/linux/syscalls.h | 6 +
|
include/linux/syscalls.h | 6 +
|
||||||
include/uapi/asm-generic/unistd.h | 5 +-
|
include/uapi/asm-generic/unistd.h | 5 +-
|
||||||
include/uapi/linux/futex.h | 26 +++
|
include/uapi/linux/futex.h | 26 +++
|
||||||
kernel/futex.c | 334 ++++++++++++++++++++++++++++++
|
kernel/futex/core.c | 334 ++++++++++++++++++++++++++++++
|
||||||
kernel/sys_ni.c | 1 +
|
kernel/sys_ni.c | 1 +
|
||||||
5 files changed, 371 insertions(+), 1 deletion(-)
|
5 files changed, 371 insertions(+), 1 deletion(-)
|
||||||
|
|
||||||
@@ -125,10 +125,10 @@ index a89eb0accd5e..1666f5e4b837 100644
|
|||||||
/*
|
/*
|
||||||
* Support for robust futexes: the kernel cleans up held futexes at
|
* Support for robust futexes: the kernel cleans up held futexes at
|
||||||
* thread exit time.
|
* thread exit time.
|
||||||
diff --git a/kernel/futex.c b/kernel/futex.c
|
diff --git a/kernel/futex/core.c b/kernel/futex/core.c
|
||||||
index 408cad5e8968..d7dc0bd9379c 100644
|
index 408cad5e8968..d7dc0bd9379c 100644
|
||||||
--- a/kernel/futex.c
|
--- a/kernel/futex/core.c
|
||||||
+++ b/kernel/futex.c
|
+++ b/kernel/futex/core.c
|
||||||
@@ -285,6 +285,18 @@ static const struct futex_q futex_q_init = {
|
@@ -285,6 +285,18 @@ static const struct futex_q futex_q_init = {
|
||||||
.requeue_state = ATOMIC_INIT(Q_REQUEUE_PI_NONE),
|
.requeue_state = ATOMIC_INIT(Q_REQUEUE_PI_NONE),
|
||||||
};
|
};
|
||||||
|
@@ -709,7 +709,7 @@ index 000000000000..8b0ddbdd24e4
|
|||||||
+#include <asm/switch_to.h>
|
+#include <asm/switch_to.h>
|
||||||
+
|
+
|
||||||
+#include "../workqueue_internal.h"
|
+#include "../workqueue_internal.h"
|
||||||
+#include "../../fs/io-wq.h"
|
+#include "../../io_uring/io-wq.h"
|
||||||
+#include "../smpboot.h"
|
+#include "../smpboot.h"
|
||||||
+
|
+
|
||||||
+#include "pelt.h"
|
+#include "pelt.h"
|
||||||
|
@@ -1,124 +1,48 @@
|
|||||||
From 5ec2dd3a095442ec1a21d86042a4994f2ba24e63 Mon Sep 17 00:00:00 2001
|
From d50977b164e708bf523a35ef53315355528c3ca6 Mon Sep 17 00:00:00 2001
|
||||||
Message-Id: <5ec2dd3a095442ec1a21d86042a4994f2ba24e63.1512651251.git.jan.steffens@gmail.com>
|
|
||||||
From: Serge Hallyn <serge.hallyn@canonical.com>
|
|
||||||
Date: Fri, 31 May 2013 19:12:12 +0100
|
|
||||||
Subject: [PATCH] add sysctl to disallow unprivileged CLONE_NEWUSER by default
|
|
||||||
|
|
||||||
Signed-off-by: Serge Hallyn <serge.hallyn@ubuntu.com>
|
|
||||||
[bwh: Remove unneeded binary sysctl bits]
|
|
||||||
Signed-off-by: Daniel Micay <danielmicay@gmail.com>
|
|
||||||
---
|
|
||||||
kernel/fork.c | 15 +++++++++++++++
|
|
||||||
kernel/sysctl.c | 12 ++++++++++++
|
|
||||||
kernel/user_namespace.c | 3 +++
|
|
||||||
3 files changed, 30 insertions(+)
|
|
||||||
|
|
||||||
diff --git a/kernel/fork.c b/kernel/fork.c
|
|
||||||
index 07cc743698d3668e..4011d68a8ff9305c 100644
|
|
||||||
--- a/kernel/fork.c
|
|
||||||
+++ b/kernel/fork.c
|
|
||||||
@@ -102,6 +102,11 @@
|
|
||||||
|
|
||||||
#define CREATE_TRACE_POINTS
|
|
||||||
#include <trace/events/task.h>
|
|
||||||
+#ifdef CONFIG_USER_NS
|
|
||||||
+extern int unprivileged_userns_clone;
|
|
||||||
+#else
|
|
||||||
+#define unprivileged_userns_clone 0
|
|
||||||
+#endif
|
|
||||||
|
|
||||||
/*
|
|
||||||
* Minimum number of threads to boot the kernel
|
|
||||||
@@ -1555,6 +1560,10 @@ static __latent_entropy struct task_struct *copy_process(
|
|
||||||
if ((clone_flags & (CLONE_NEWUSER|CLONE_FS)) == (CLONE_NEWUSER|CLONE_FS))
|
|
||||||
return ERR_PTR(-EINVAL);
|
|
||||||
|
|
||||||
+ if ((clone_flags & CLONE_NEWUSER) && !unprivileged_userns_clone)
|
|
||||||
+ if (!capable(CAP_SYS_ADMIN))
|
|
||||||
+ return ERR_PTR(-EPERM);
|
|
||||||
+
|
|
||||||
/*
|
|
||||||
* Thread groups must share signals as well, and detached threads
|
|
||||||
* can only be started up within the thread group.
|
|
||||||
@@ -2348,6 +2357,12 @@ SYSCALL_DEFINE1(unshare, unsigned long, unshare_flags)
|
|
||||||
if (unshare_flags & CLONE_NEWNS)
|
|
||||||
unshare_flags |= CLONE_FS;
|
|
||||||
|
|
||||||
+ if ((unshare_flags & CLONE_NEWUSER) && !unprivileged_userns_clone) {
|
|
||||||
+ err = -EPERM;
|
|
||||||
+ if (!capable(CAP_SYS_ADMIN))
|
|
||||||
+ goto bad_unshare_out;
|
|
||||||
+ }
|
|
||||||
+
|
|
||||||
err = check_unshare_flags(unshare_flags);
|
|
||||||
if (err)
|
|
||||||
goto bad_unshare_out;
|
|
||||||
diff --git a/kernel/sysctl.c b/kernel/sysctl.c
|
|
||||||
index b86520ed3fb60fbf..f7dab3760839f1a1 100644
|
|
||||||
--- a/kernel/sysctl.c
|
|
||||||
+++ b/kernel/sysctl.c
|
|
||||||
@@ -105,6 +105,9 @@ extern int core_uses_pid;
|
|
||||||
|
|
||||||
#if defined(CONFIG_SYSCTL)
|
|
||||||
|
|
||||||
+#ifdef CONFIG_USER_NS
|
|
||||||
+extern int unprivileged_userns_clone;
|
|
||||||
+#endif
|
|
||||||
/* Constants used for minimum and maximum */
|
|
||||||
|
|
||||||
#ifdef CONFIG_PERF_EVENTS
|
|
||||||
@@ -513,6 +516,15 @@ static struct ctl_table kern_table[] = {
|
|
||||||
.proc_handler = proc_dointvec,
|
|
||||||
},
|
|
||||||
#endif
|
|
||||||
+#ifdef CONFIG_USER_NS
|
|
||||||
+ {
|
|
||||||
+ .procname = "unprivileged_userns_clone",
|
|
||||||
+ .data = &unprivileged_userns_clone,
|
|
||||||
+ .maxlen = sizeof(int),
|
|
||||||
+ .mode = 0644,
|
|
||||||
+ .proc_handler = proc_dointvec,
|
|
||||||
+ },
|
|
||||||
+#endif
|
|
||||||
#ifdef CONFIG_PROC_SYSCTL
|
|
||||||
{
|
|
||||||
.procname = "tainted",
|
|
||||||
diff --git a/kernel/user_namespace.c b/kernel/user_namespace.c
|
|
||||||
index c490f1e4313b998a..dd03bd39d7bf194d 100644
|
|
||||||
--- a/kernel/user_namespace.c
|
|
||||||
+++ b/kernel/user_namespace.c
|
|
||||||
@@ -24,6 +24,9 @@
|
|
||||||
#include <linux/projid.h>
|
|
||||||
#include <linux/fs_struct.h>
|
|
||||||
|
|
||||||
+/* sysctl */
|
|
||||||
+int unprivileged_userns_clone;
|
|
||||||
+
|
|
||||||
static struct kmem_cache *user_ns_cachep __read_mostly;
|
|
||||||
static DEFINE_MUTEX(userns_state_mutex);
|
|
||||||
|
|
||||||
--
|
|
||||||
2.15.1
|
|
||||||
|
|
||||||
From b5202296055dd333db4425120d3f93ef4e6a0573 Mon Sep 17 00:00:00 2001
|
|
||||||
From: "Jan Alexander Steffens (heftig)" <jan.steffens@gmail.com>
|
From: "Jan Alexander Steffens (heftig)" <jan.steffens@gmail.com>
|
||||||
Date: Thu, 7 Dec 2017 13:50:48 +0100
|
Date: Mon, 16 Sep 2019 04:53:20 +0200
|
||||||
Subject: ZEN: Add CONFIG for unprivileged_userns_clone
|
Subject: [PATCH] ZEN: Add sysctl and CONFIG to disallow unprivileged
|
||||||
|
CLONE_NEWUSER
|
||||||
|
|
||||||
This way our default behavior continues to match the vanilla kernel.
|
Our default behavior continues to match the vanilla kernel.
|
||||||
---
|
---
|
||||||
init/Kconfig | 16 ++++++++++++++++
|
include/linux/user_namespace.h | 4 ++++
|
||||||
kernel/user_namespace.c | 4 ++++
|
init/Kconfig | 16 ++++++++++++++++
|
||||||
2 files changed, 20 insertions(+)
|
kernel/fork.c | 14 ++++++++++++++
|
||||||
|
kernel/sysctl.c | 12 ++++++++++++
|
||||||
|
kernel/user_namespace.c | 7 +++++++
|
||||||
|
5 files changed, 53 insertions(+)
|
||||||
|
|
||||||
|
diff --git a/include/linux/user_namespace.h b/include/linux/user_namespace.h
|
||||||
|
index 45f09bec02c485..87b20e2ee27445 100644
|
||||||
|
--- a/include/linux/user_namespace.h
|
||||||
|
+++ b/include/linux/user_namespace.h
|
||||||
|
@@ -148,6 +148,8 @@ static inline void set_userns_rlimit_max(struct user_namespace *ns,
|
||||||
|
|
||||||
|
#ifdef CONFIG_USER_NS
|
||||||
|
|
||||||
|
+extern int unprivileged_userns_clone;
|
||||||
|
+
|
||||||
|
static inline struct user_namespace *get_user_ns(struct user_namespace *ns)
|
||||||
|
{
|
||||||
|
if (ns)
|
||||||
|
@@ -181,6 +183,8 @@ extern bool current_in_userns(const struct user_namespace *target_ns);
|
||||||
|
struct ns_common *ns_get_owner(struct ns_common *ns);
|
||||||
|
#else
|
||||||
|
|
||||||
|
+#define unprivileged_userns_clone 0
|
||||||
|
+
|
||||||
|
static inline struct user_namespace *get_user_ns(struct user_namespace *ns)
|
||||||
|
{
|
||||||
|
return &init_user_ns;
|
||||||
diff --git a/init/Kconfig b/init/Kconfig
|
diff --git a/init/Kconfig b/init/Kconfig
|
||||||
index 4592bf7997c0..f3df02990aff 100644
|
index 94125d3b6893c7..9f7139b536f638 100644
|
||||||
--- a/init/Kconfig
|
--- a/init/Kconfig
|
||||||
+++ b/init/Kconfig
|
+++ b/init/Kconfig
|
||||||
@@ -1004,6 +1004,22 @@ config USER_NS
|
@@ -1247,6 +1247,22 @@ config USER_NS
|
||||||
|
|
||||||
If unsure, say N.
|
If unsure, say N.
|
||||||
|
|
||||||
+config USER_NS_UNPRIVILEGED
|
+config USER_NS_UNPRIVILEGED
|
||||||
+ bool "Allow unprivileged users to create namespaces"
|
+ bool "Allow unprivileged users to create namespaces"
|
||||||
+ default y
|
+ default y
|
||||||
@@ -138,19 +62,90 @@ index 4592bf7997c0..f3df02990aff 100644
|
|||||||
config PID_NS
|
config PID_NS
|
||||||
bool "PID Namespaces"
|
bool "PID Namespaces"
|
||||||
default y
|
default y
|
||||||
|
diff --git a/kernel/fork.c b/kernel/fork.c
|
||||||
|
index 08969f5aa38d59..ff601cb7a1fae0 100644
|
||||||
|
--- a/kernel/fork.c
|
||||||
|
+++ b/kernel/fork.c
|
||||||
|
@@ -98,6 +98,10 @@
|
||||||
|
#include <linux/io_uring.h>
|
||||||
|
#include <linux/bpf.h>
|
||||||
|
|
||||||
|
+#ifdef CONFIG_USER_NS
|
||||||
|
+#include <linux/user_namespace.h>
|
||||||
|
+#endif
|
||||||
|
+
|
||||||
|
#include <asm/pgalloc.h>
|
||||||
|
#include <linux/uaccess.h>
|
||||||
|
#include <asm/mmu_context.h>
|
||||||
|
@@ -2008,6 +2012,10 @@ static __latent_entropy struct task_struct *copy_process(
|
||||||
|
if ((clone_flags & (CLONE_NEWUSER|CLONE_FS)) == (CLONE_NEWUSER|CLONE_FS))
|
||||||
|
return ERR_PTR(-EINVAL);
|
||||||
|
|
||||||
|
+ if ((clone_flags & CLONE_NEWUSER) && !unprivileged_userns_clone)
|
||||||
|
+ if (!capable(CAP_SYS_ADMIN))
|
||||||
|
+ return ERR_PTR(-EPERM);
|
||||||
|
+
|
||||||
|
/*
|
||||||
|
* Thread groups must share signals as well, and detached threads
|
||||||
|
* can only be started up within the thread group.
|
||||||
|
@@ -3166,6 +3174,12 @@ int ksys_unshare(unsigned long unshare_flags)
|
||||||
|
if (unshare_flags & CLONE_NEWNS)
|
||||||
|
unshare_flags |= CLONE_FS;
|
||||||
|
|
||||||
|
+ if ((unshare_flags & CLONE_NEWUSER) && !unprivileged_userns_clone) {
|
||||||
|
+ err = -EPERM;
|
||||||
|
+ if (!capable(CAP_SYS_ADMIN))
|
||||||
|
+ goto bad_unshare_out;
|
||||||
|
+ }
|
||||||
|
+
|
||||||
|
err = check_unshare_flags(unshare_flags);
|
||||||
|
if (err)
|
||||||
|
goto bad_unshare_out;
|
||||||
|
diff --git a/kernel/sysctl.c b/kernel/sysctl.c
|
||||||
|
index c6d9dec11b749d..9a4514ad481b21 100644
|
||||||
|
--- a/kernel/sysctl.c
|
||||||
|
+++ b/kernel/sysctl.c
|
||||||
|
@@ -81,6 +81,9 @@
|
||||||
|
#ifdef CONFIG_RT_MUTEXES
|
||||||
|
#include <linux/rtmutex.h>
|
||||||
|
#endif
|
||||||
|
+#ifdef CONFIG_USER_NS
|
||||||
|
+#include <linux/user_namespace.h>
|
||||||
|
+#endif
|
||||||
|
|
||||||
|
/* shared constants to be used in various sysctls */
|
||||||
|
const int sysctl_vals[] = { 0, 1, 2, 3, 4, 100, 200, 1000, 3000, INT_MAX, 65535, -1 };
|
||||||
|
@@ -1659,6 +1662,15 @@ static struct ctl_table kern_table[] = {
|
||||||
|
.mode = 0644,
|
||||||
|
.proc_handler = proc_dointvec,
|
||||||
|
},
|
||||||
|
+#ifdef CONFIG_USER_NS
|
||||||
|
+ {
|
||||||
|
+ .procname = "unprivileged_userns_clone",
|
||||||
|
+ .data = &unprivileged_userns_clone,
|
||||||
|
+ .maxlen = sizeof(int),
|
||||||
|
+ .mode = 0644,
|
||||||
|
+ .proc_handler = proc_dointvec,
|
||||||
|
+ },
|
||||||
|
+#endif
|
||||||
|
#ifdef CONFIG_PROC_SYSCTL
|
||||||
|
{
|
||||||
|
.procname = "tainted",
|
||||||
diff --git a/kernel/user_namespace.c b/kernel/user_namespace.c
|
diff --git a/kernel/user_namespace.c b/kernel/user_namespace.c
|
||||||
index 6b9dbc257e34..107b17f0d528 100644
|
index 54211dbd516c57..16ca0c1516298d 100644
|
||||||
--- a/kernel/user_namespace.c
|
--- a/kernel/user_namespace.c
|
||||||
+++ b/kernel/user_namespace.c
|
+++ b/kernel/user_namespace.c
|
||||||
@@ -27,7 +27,11 @@
|
@@ -22,6 +22,13 @@
|
||||||
|
#include <linux/bsearch.h>
|
||||||
#include <linux/sort.h>
|
#include <linux/sort.h>
|
||||||
|
|
||||||
/* sysctl */
|
+/* sysctl */
|
||||||
+#ifdef CONFIG_USER_NS_UNPRIVILEGED
|
+#ifdef CONFIG_USER_NS_UNPRIVILEGED
|
||||||
+int unprivileged_userns_clone = 1;
|
+int unprivileged_userns_clone = 1;
|
||||||
+#else
|
+#else
|
||||||
int unprivileged_userns_clone;
|
+int unprivileged_userns_clone;
|
||||||
+#endif
|
+#endif
|
||||||
|
+
|
||||||
static struct kmem_cache *user_ns_cachep __read_mostly;
|
static struct kmem_cache *user_ns_cachep __read_mostly;
|
||||||
static DEFINE_MUTEX(userns_state_mutex);
|
static DEFINE_MUTEX(userns_state_mutex);
|
||||||
|
|
||||||
|
@@ -403,34 +403,6 @@ index 84badf00647e..6a922bca9f39 100644
|
|||||||
2.28.0
|
2.28.0
|
||||||
|
|
||||||
|
|
||||||
From 816ee502759e954304693813bd03d94986b28dba Mon Sep 17 00:00:00 2001
|
|
||||||
From: Tk-Glitch <ti3nou@gmail.com>
|
|
||||||
Date: Mon, 18 Feb 2019 17:40:57 +0100
|
|
||||||
Subject: [PATCH 11/17] mm: Set watermark_scale_factor to 200 (from 10)
|
|
||||||
|
|
||||||
Multiple users have reported it's helping reducing/eliminating stuttering
|
|
||||||
with DXVK.
|
|
||||||
---
|
|
||||||
mm/page_alloc.c | 2 +-
|
|
||||||
1 file changed, 1 insertion(+), 1 deletion(-)
|
|
||||||
|
|
||||||
diff --git a/mm/page_alloc.c b/mm/page_alloc.c
|
|
||||||
index 898ff44f2c7b..e72074034793 100644
|
|
||||||
--- a/mm/page_alloc.c
|
|
||||||
+++ b/mm/page_alloc.c
|
|
||||||
@@ -330,7 +330,7 @@ int watermark_boost_factor __read_mostly;
|
|
||||||
int min_free_kbytes = 1024;
|
|
||||||
int user_min_free_kbytes = -1;
|
|
||||||
int watermark_boost_factor __read_mostly = 15000;
|
|
||||||
-int watermark_scale_factor = 10;
|
|
||||||
+int watermark_scale_factor = 200;
|
|
||||||
|
|
||||||
static unsigned long nr_kernel_pages __initdata;
|
|
||||||
static unsigned long nr_all_pages __initdata;
|
|
||||||
--
|
|
||||||
2.28.0
|
|
||||||
|
|
||||||
|
|
||||||
From 90240bcd90a568878738e66c0d45bed3e38e347b Mon Sep 17 00:00:00 2001
|
From 90240bcd90a568878738e66c0d45bed3e38e347b Mon Sep 17 00:00:00 2001
|
||||||
From: Tk-Glitch <ti3nou@gmail.com>
|
From: Tk-Glitch <ti3nou@gmail.com>
|
||||||
Date: Fri, 19 Apr 2019 12:33:38 +0200
|
Date: Fri, 19 Apr 2019 12:33:38 +0200
|
||||||
@@ -490,7 +462,6 @@ index b0cefe94920d..890165099b07 100644
|
|||||||
--
|
--
|
||||||
2.28.0
|
2.28.0
|
||||||
|
|
||||||
|
|
||||||
From 977812938da7c7226415778c340832141d9278b7 Mon Sep 17 00:00:00 2001
|
From 977812938da7c7226415778c340832141d9278b7 Mon Sep 17 00:00:00 2001
|
||||||
From: Alexandre Frade <admfrade@gmail.com>
|
From: Alexandre Frade <admfrade@gmail.com>
|
||||||
Date: Mon, 25 Nov 2019 15:13:06 -0300
|
Date: Mon, 25 Nov 2019 15:13:06 -0300
|
||||||
@@ -619,3 +590,232 @@ index 36a469150ff9..aee891c9b78a 100644
|
|||||||
--
|
--
|
||||||
2.28.0
|
2.28.0
|
||||||
|
|
||||||
|
From 379cbab18b5c75c622b93e2c5abdfac141fe9654 Mon Sep 17 00:00:00 2001
|
||||||
|
From: Kenny Levinsen <kl@kl.wtf>
|
||||||
|
Date: Sun, 27 Dec 2020 14:43:13 +0000
|
||||||
|
Subject: [PATCH] ZEN: Input: evdev - use call_rcu when detaching client
|
||||||
|
|
||||||
|
Significant time was spent on synchronize_rcu in evdev_detach_client
|
||||||
|
when applications closed evdev devices. Switching VT away from a
|
||||||
|
graphical environment commonly leads to mass input device closures,
|
||||||
|
which could lead to noticable delays on systems with many input devices.
|
||||||
|
|
||||||
|
Replace synchronize_rcu with call_rcu, deferring reclaim of the evdev
|
||||||
|
client struct till after the RCU grace period instead of blocking the
|
||||||
|
calling application.
|
||||||
|
|
||||||
|
While this does not solve all slow evdev fd closures, it takes care of a
|
||||||
|
good portion of them, including this simple test:
|
||||||
|
|
||||||
|
#include <fcntl.h>
|
||||||
|
#include <unistd.h>
|
||||||
|
|
||||||
|
int main(int argc, char *argv[])
|
||||||
|
{
|
||||||
|
int idx, fd;
|
||||||
|
const char *path = "/dev/input/event0";
|
||||||
|
for (idx = 0; idx < 1000; idx++) {
|
||||||
|
if ((fd = open(path, O_RDWR)) == -1) {
|
||||||
|
return -1;
|
||||||
|
}
|
||||||
|
close(fd);
|
||||||
|
}
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
Time to completion of above test when run locally:
|
||||||
|
|
||||||
|
Before: 0m27.111s
|
||||||
|
After: 0m0.018s
|
||||||
|
|
||||||
|
Signed-off-by: Kenny Levinsen <kl@kl.wtf>
|
||||||
|
---
|
||||||
|
drivers/input/evdev.c | 19 +++++++++++--------
|
||||||
|
1 file changed, 11 insertions(+), 8 deletions(-)
|
||||||
|
|
||||||
|
diff --git a/drivers/input/evdev.c b/drivers/input/evdev.c
|
||||||
|
index 95f90699d2b17b..2b10fe29d2c8d9 100644
|
||||||
|
--- a/drivers/input/evdev.c
|
||||||
|
+++ b/drivers/input/evdev.c
|
||||||
|
@@ -46,6 +46,7 @@ struct evdev_client {
|
||||||
|
struct fasync_struct *fasync;
|
||||||
|
struct evdev *evdev;
|
||||||
|
struct list_head node;
|
||||||
|
+ struct rcu_head rcu;
|
||||||
|
enum input_clock_type clk_type;
|
||||||
|
bool revoked;
|
||||||
|
unsigned long *evmasks[EV_CNT];
|
||||||
|
@@ -377,13 +378,22 @@ static void evdev_attach_client(struct evdev *evdev,
|
||||||
|
spin_unlock(&evdev->client_lock);
|
||||||
|
}
|
||||||
|
|
||||||
|
+static void evdev_reclaim_client(struct rcu_head *rp)
|
||||||
|
+{
|
||||||
|
+ struct evdev_client *client = container_of(rp, struct evdev_client, rcu);
|
||||||
|
+ unsigned int i;
|
||||||
|
+ for (i = 0; i < EV_CNT; ++i)
|
||||||
|
+ bitmap_free(client->evmasks[i]);
|
||||||
|
+ kvfree(client);
|
||||||
|
+}
|
||||||
|
+
|
||||||
|
static void evdev_detach_client(struct evdev *evdev,
|
||||||
|
struct evdev_client *client)
|
||||||
|
{
|
||||||
|
spin_lock(&evdev->client_lock);
|
||||||
|
list_del_rcu(&client->node);
|
||||||
|
spin_unlock(&evdev->client_lock);
|
||||||
|
- synchronize_rcu();
|
||||||
|
+ call_rcu(&client->rcu, evdev_reclaim_client);
|
||||||
|
}
|
||||||
|
|
||||||
|
static int evdev_open_device(struct evdev *evdev)
|
||||||
|
@@ -436,7 +446,6 @@ static int evdev_release(struct inode *inode, struct file *file)
|
||||||
|
{
|
||||||
|
struct evdev_client *client = file->private_data;
|
||||||
|
struct evdev *evdev = client->evdev;
|
||||||
|
- unsigned int i;
|
||||||
|
|
||||||
|
mutex_lock(&evdev->mutex);
|
||||||
|
|
||||||
|
@@ -448,11 +457,6 @@ static int evdev_release(struct inode *inode, struct file *file)
|
||||||
|
|
||||||
|
evdev_detach_client(evdev, client);
|
||||||
|
|
||||||
|
- for (i = 0; i < EV_CNT; ++i)
|
||||||
|
- bitmap_free(client->evmasks[i]);
|
||||||
|
-
|
||||||
|
- kvfree(client);
|
||||||
|
-
|
||||||
|
evdev_close_device(evdev);
|
||||||
|
|
||||||
|
return 0;
|
||||||
|
@@ -495,7 +499,6 @@ static int evdev_open(struct inode *inode, struct file *file)
|
||||||
|
|
||||||
|
err_free_client:
|
||||||
|
evdev_detach_client(evdev, client);
|
||||||
|
- kvfree(client);
|
||||||
|
return error;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
From 2aafb56f20e4b63d8c4af172fe9d017c64bc4129 Mon Sep 17 00:00:00 2001
|
||||||
|
From: Sultan Alsawaf <sultan@kerneltoast.com>
|
||||||
|
Date: Wed, 20 Oct 2021 20:50:11 -0700
|
||||||
|
Subject: [PATCH] ZEN: mm: Lower the non-hugetlbpage pageblock size to reduce
|
||||||
|
scheduling delays
|
||||||
|
|
||||||
|
The page allocator processes free pages in groups of pageblocks, where
|
||||||
|
the size of a pageblock is typically quite large (1024 pages without
|
||||||
|
hugetlbpage support). Pageblocks are processed atomically with the zone
|
||||||
|
lock held, which can cause severe scheduling delays on both the CPU
|
||||||
|
going through the pageblock and any other CPUs waiting to acquire the
|
||||||
|
zone lock. A frequent offender is move_freepages_block(), which is used
|
||||||
|
by rmqueue() for page allocation.
|
||||||
|
|
||||||
|
As it turns out, there's no requirement for pageblocks to be so large,
|
||||||
|
so the pageblock order can simply be reduced to ease the scheduling
|
||||||
|
delays and zone lock contention. PAGE_ALLOC_COSTLY_ORDER is used as a
|
||||||
|
reasonable setting to ensure non-costly page allocation requests can
|
||||||
|
still be serviced without always needing to free up more than one
|
||||||
|
pageblock's worth of pages at a time.
|
||||||
|
|
||||||
|
This has a noticeable effect on overall system latency when memory
|
||||||
|
pressure is elevated. The various mm functions which operate on
|
||||||
|
pageblocks no longer appear in the preemptoff tracer, where previously
|
||||||
|
they would spend up to 100 ms on a mobile arm64 CPU processing a
|
||||||
|
pageblock with preemption disabled and the zone lock held.
|
||||||
|
|
||||||
|
Signed-off-by: Sultan Alsawaf <sultan@kerneltoast.com>
|
||||||
|
---
|
||||||
|
include/linux/pageblock-flags.h | 2 +-
|
||||||
|
1 file changed, 1 insertion(+), 1 deletion(-)
|
||||||
|
|
||||||
|
diff --git a/include/linux/pageblock-flags.h b/include/linux/pageblock-flags.h
|
||||||
|
index 5f1ae07d724b88..97cda629c9e909 100644
|
||||||
|
--- a/include/linux/pageblock-flags.h
|
||||||
|
+++ b/include/linux/pageblock-flags.h
|
||||||
|
@@ -48,7 +48,7 @@ extern unsigned int pageblock_order;
|
||||||
|
#else /* CONFIG_HUGETLB_PAGE */
|
||||||
|
|
||||||
|
/* If huge pages are not used, group by MAX_ORDER_NR_PAGES */
|
||||||
|
-#define pageblock_order (MAX_ORDER-1)
|
||||||
|
+#define pageblock_order PAGE_ALLOC_COSTLY_ORDER
|
||||||
|
|
||||||
|
#endif /* CONFIG_HUGETLB_PAGE */
|
||||||
|
|
||||||
|
|
||||||
|
From f22bc56be85e69c71c8e36041193856bb8b01525 Mon Sep 17 00:00:00 2001
|
||||||
|
From: Sultan Alsawaf <sultan@kerneltoast.com>
|
||||||
|
Date: Wed, 20 Oct 2021 20:50:32 -0700
|
||||||
|
Subject: [PATCH] ZEN: mm: Don't hog the CPU and zone lock in rmqueue_bulk()
|
||||||
|
|
||||||
|
There is noticeable scheduling latency and heavy zone lock contention
|
||||||
|
stemming from rmqueue_bulk's single hold of the zone lock while doing
|
||||||
|
its work, as seen with the preemptoff tracer. There's no actual need for
|
||||||
|
rmqueue_bulk() to hold the zone lock the entire time; it only does so
|
||||||
|
for supposed efficiency. As such, we can relax the zone lock and even
|
||||||
|
reschedule when IRQs are enabled in order to keep the scheduling delays
|
||||||
|
and zone lock contention at bay. Forward progress is still guaranteed,
|
||||||
|
as the zone lock can only be relaxed after page removal.
|
||||||
|
|
||||||
|
With this change, rmqueue_bulk() no longer appears as a serious offender
|
||||||
|
in the preemptoff tracer, and system latency is noticeably improved.
|
||||||
|
|
||||||
|
Signed-off-by: Sultan Alsawaf <sultan@kerneltoast.com>
|
||||||
|
---
|
||||||
|
mm/page_alloc.c | 23 ++++++++++++++++++-----
|
||||||
|
1 file changed, 18 insertions(+), 5 deletions(-)
|
||||||
|
|
||||||
|
diff --git a/mm/page_alloc.c b/mm/page_alloc.c
|
||||||
|
index a0b0397e29ee4c..87a983a356530c 100644
|
||||||
|
--- a/mm/page_alloc.c
|
||||||
|
+++ b/mm/page_alloc.c
|
||||||
|
@@ -3119,15 +3119,16 @@ __rmqueue(struct zone *zone, unsigned int order, int migratetype,
|
||||||
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
- * Obtain a specified number of elements from the buddy allocator, all under
|
||||||
|
- * a single hold of the lock, for efficiency. Add them to the supplied list.
|
||||||
|
- * Returns the number of new pages which were placed at *list.
|
||||||
|
+ * Obtain a specified number of elements from the buddy allocator, and relax the
|
||||||
|
+ * zone lock when needed. Add them to the supplied list. Returns the number of
|
||||||
|
+ * new pages which were placed at *list.
|
||||||
|
*/
|
||||||
|
static int rmqueue_bulk(struct zone *zone, unsigned int order,
|
||||||
|
unsigned long count, struct list_head *list,
|
||||||
|
int migratetype, unsigned int alloc_flags)
|
||||||
|
{
|
||||||
|
- int i, allocated = 0;
|
||||||
|
+ const bool can_resched = !preempt_count() && !irqs_disabled();
|
||||||
|
+ int i, allocated = 0, last_mod = 0;
|
||||||
|
|
||||||
|
/* Caller must hold IRQ-safe pcp->lock so IRQs are disabled. */
|
||||||
|
spin_lock(&zone->lock);
|
||||||
|
@@ -3137,6 +3138,18 @@ static int rmqueue_bulk(struct zone *zone, unsigned int order,
|
||||||
|
if (unlikely(page == NULL))
|
||||||
|
break;
|
||||||
|
|
||||||
|
+ /* Reschedule and ease the contention on the lock if needed */
|
||||||
|
+ if (i + 1 < count && ((can_resched && need_resched()) ||
|
||||||
|
+ spin_needbreak(&zone->lock))) {
|
||||||
|
+ __mod_zone_page_state(zone, NR_FREE_PAGES,
|
||||||
|
+ -((i + 1 - last_mod) << order));
|
||||||
|
+ last_mod = i + 1;
|
||||||
|
+ spin_unlock(&zone->lock);
|
||||||
|
+ if (can_resched)
|
||||||
|
+ cond_resched();
|
||||||
|
+ spin_lock(&zone->lock);
|
||||||
|
+ }
|
||||||
|
+
|
||||||
|
if (unlikely(check_pcp_refill(page, order)))
|
||||||
|
continue;
|
||||||
|
|
||||||
|
@@ -3163,7 +3176,7 @@ static int rmqueue_bulk(struct zone *zone, unsigned int order,
|
||||||
|
* on i. Do not confuse with 'allocated' which is the number of
|
||||||
|
* pages added to the pcp list.
|
||||||
|
*/
|
||||||
|
- __mod_zone_page_state(zone, NR_FREE_PAGES, -(i << order));
|
||||||
|
+ __mod_zone_page_state(zone, NR_FREE_PAGES, -((i - last_mod) << order));
|
||||||
|
spin_unlock(&zone->lock);
|
||||||
|
return allocated;
|
||||||
|
}
|
||||||
|
96926
linux-tkg-patches/6.1/0008-6.1-bcachefs.patch
Normal file
96926
linux-tkg-patches/6.1/0008-6.1-bcachefs.patch
Normal file
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
@@ -64,422 +64,253 @@ index 2c7171e0b0010..85de313ddec29 100644
|
|||||||
select CPU_FREQ_GOV_PERFORMANCE
|
select CPU_FREQ_GOV_PERFORMANCE
|
||||||
help
|
help
|
||||||
|
|
||||||
From 430daaab3c78de6bd82f10cfb5a0f016c6e583f6 Mon Sep 17 00:00:00 2001
|
From 0c079d3f88df5f8286cd5c91b54bdac7c819be85 Mon Sep 17 00:00:00 2001
|
||||||
From: Desmond Cheong Zhi Xi <desmondcheongzx@gmail.com>
|
From: Matthew Auld <matthew.auld@intel.com>
|
||||||
Date: Mon, 4 Oct 2021 14:07:34 -0400
|
Date: Tue, 6 Dec 2022 16:11:41 +0000
|
||||||
Subject: [PATCH] Bluetooth: fix deadlock for RFCOMM sk state change
|
Subject: [PATCH] drm/i915: improve the catch-all evict to handle lock
|
||||||
|
contention
|
||||||
|
MIME-Version: 1.0
|
||||||
|
Content-Type: text/plain; charset=UTF-8
|
||||||
|
Content-Transfer-Encoding: 8bit
|
||||||
|
|
||||||
Syzbot reports the following task hang [1]:
|
The catch-all evict can fail due to object lock contention, since it
|
||||||
|
only goes as far as trylocking the object, due to us already holding the
|
||||||
|
vm->mutex. Doing a full object lock here can deadlock, since the
|
||||||
|
vm->mutex is always our inner lock. Add another execbuf pass which drops
|
||||||
|
the vm->mutex and then tries to grab the object will the full lock,
|
||||||
|
before then retrying the eviction. This should be good enough for now to
|
||||||
|
fix the immediate regression with userspace seeing -ENOSPC from execbuf
|
||||||
|
due to contended object locks during GTT eviction.
|
||||||
|
|
||||||
INFO: task syz-executor255:8499 blocked for more than 143 seconds.
|
Testcase: igt@gem_ppgtt@shrink-vs-evict-*
|
||||||
Not tainted 5.14.0-rc7-syzkaller #0
|
Fixes: 7e00897be8bf ("drm/i915: Add object locking to i915_gem_evict_for_node and i915_gem_evict_something, v2.")
|
||||||
|
References: https://gitlab.freedesktop.org/drm/intel/-/issues/7627
|
||||||
|
References: https://gitlab.freedesktop.org/drm/intel/-/issues/7570
|
||||||
|
References: https://bugzilla.mozilla.org/show_bug.cgi?id=1779558
|
||||||
|
Signed-off-by: Matthew Auld <matthew.auld@intel.com>
|
||||||
|
Cc: Maarten Lankhorst <maarten.lankhorst@linux.intel.com>
|
||||||
|
Cc: Thomas Hellström <thomas.hellstrom@linux.intel.com>
|
||||||
|
Cc: Tvrtko Ursulin <tvrtko.ursulin@linux.intel.com>
|
||||||
|
Cc: Andrzej Hajda <andrzej.hajda@intel.com>
|
||||||
|
Cc: Mani Milani <mani@chromium.org>
|
||||||
|
Cc: <stable@vger.kernel.org> # v5.18+
|
||||||
|
|
||||||
Call Trace:
|
Revision 1 of https://patchwork.freedesktop.org/series/111686/
|
||||||
context_switch kernel/sched/core.c:4681 [inline]
|
|
||||||
__schedule+0x93a/0x26f0 kernel/sched/core.c:5938
|
|
||||||
schedule+0xd3/0x270 kernel/sched/core.c:6017
|
|
||||||
__lock_sock+0x13d/0x260 net/core/sock.c:2644
|
|
||||||
lock_sock_nested+0xf6/0x120 net/core/sock.c:3185
|
|
||||||
lock_sock include/net/sock.h:1612 [inline]
|
|
||||||
rfcomm_sk_state_change+0xb4/0x390 net/bluetooth/rfcomm/sock.c:73
|
|
||||||
__rfcomm_dlc_close+0x1b6/0x8a0 net/bluetooth/rfcomm/core.c:489
|
|
||||||
rfcomm_dlc_close+0x1ea/0x240 net/bluetooth/rfcomm/core.c:520
|
|
||||||
__rfcomm_sock_close+0xac/0x260 net/bluetooth/rfcomm/sock.c:220
|
|
||||||
rfcomm_sock_shutdown+0xe9/0x210 net/bluetooth/rfcomm/sock.c:931
|
|
||||||
rfcomm_sock_release+0x5f/0x140 net/bluetooth/rfcomm/sock.c:951
|
|
||||||
__sock_release+0xcd/0x280 net/socket.c:649
|
|
||||||
sock_close+0x18/0x20 net/socket.c:1314
|
|
||||||
__fput+0x288/0x920 fs/file_table.c:280
|
|
||||||
task_work_run+0xdd/0x1a0 kernel/task_work.c:164
|
|
||||||
exit_task_work include/linux/task_work.h:32 [inline]
|
|
||||||
do_exit+0xbd4/0x2a60 kernel/exit.c:825
|
|
||||||
do_group_exit+0x125/0x310 kernel/exit.c:922
|
|
||||||
get_signal+0x47f/0x2160 kernel/signal.c:2808
|
|
||||||
arch_do_signal_or_restart+0x2a9/0x1c40 arch/x86/kernel/signal.c:865
|
|
||||||
handle_signal_work kernel/entry/common.c:148 [inline]
|
|
||||||
exit_to_user_mode_loop kernel/entry/common.c:172 [inline]
|
|
||||||
exit_to_user_mode_prepare+0x17d/0x290 kernel/entry/common.c:209
|
|
||||||
__syscall_exit_to_user_mode_work kernel/entry/common.c:291 [inline]
|
|
||||||
syscall_exit_to_user_mode+0x19/0x60 kernel/entry/common.c:302
|
|
||||||
do_syscall_64+0x42/0xb0 arch/x86/entry/common.c:86
|
|
||||||
entry_SYSCALL_64_after_hwframe+0x44/0xae
|
|
||||||
|
|
||||||
Showing all locks held in the system:
|
|
||||||
1 lock held by khungtaskd/1653:
|
|
||||||
#0: ffffffff8b97c280 (rcu_read_lock){....}-{1:2}, at:
|
|
||||||
debug_show_all_locks+0x53/0x260 kernel/locking/lockdep.c:6446
|
|
||||||
1 lock held by krfcommd/4781:
|
|
||||||
#0: ffffffff8d306528 (rfcomm_mutex){+.+.}-{3:3}, at:
|
|
||||||
rfcomm_process_sessions net/bluetooth/rfcomm/core.c:1979 [inline]
|
|
||||||
#0: ffffffff8d306528 (rfcomm_mutex){+.+.}-{3:3}, at:
|
|
||||||
rfcomm_run+0x2ed/0x4a20 net/bluetooth/rfcomm/core.c:2086
|
|
||||||
2 locks held by in:imklog/8206:
|
|
||||||
#0: ffff8880182ce5f0 (&f->f_pos_lock){+.+.}-{3:3}, at:
|
|
||||||
__fdget_pos+0xe9/0x100 fs/file.c:974
|
|
||||||
#1: ffff8880b9c51a58 (&rq->__lock){-.-.}-{2:2}, at:
|
|
||||||
raw_spin_rq_lock_nested kernel/sched/core.c:460 [inline]
|
|
||||||
#1: ffff8880b9c51a58 (&rq->__lock){-.-.}-{2:2}, at: raw_spin_rq_lock
|
|
||||||
kernel/sched/sched.h:1307 [inline]
|
|
||||||
#1: ffff8880b9c51a58 (&rq->__lock){-.-.}-{2:2}, at: rq_lock
|
|
||||||
kernel/sched/sched.h:1610 [inline]
|
|
||||||
#1: ffff8880b9c51a58 (&rq->__lock){-.-.}-{2:2}, at:
|
|
||||||
__schedule+0x233/0x26f0 kernel/sched/core.c:5852
|
|
||||||
4 locks held by syz-executor255/8499:
|
|
||||||
#0: ffff888039a83690 (&sb->s_type->i_mutex_key#13){+.+.}-{3:3}, at:
|
|
||||||
inode_lock include/linux/fs.h:774 [inline]
|
|
||||||
#0: ffff888039a83690 (&sb->s_type->i_mutex_key#13){+.+.}-{3:3}, at:
|
|
||||||
__sock_release+0x86/0x280 net/socket.c:648
|
|
||||||
#1:
|
|
||||||
ffff88802fa31120 (sk_lock-AF_BLUETOOTH-BTPROTO_RFCOMM){+.+.}-{0:0},
|
|
||||||
at: lock_sock include/net/sock.h:1612 [inline]
|
|
||||||
#1:
|
|
||||||
ffff88802fa31120 (sk_lock-AF_BLUETOOTH-BTPROTO_RFCOMM){+.+.}-{0:0},
|
|
||||||
at: rfcomm_sock_shutdown+0x54/0x210 net/bluetooth/rfcomm/sock.c:928
|
|
||||||
#2: ffffffff8d306528 (rfcomm_mutex){+.+.}-{3:3}, at:
|
|
||||||
rfcomm_dlc_close+0x34/0x240 net/bluetooth/rfcomm/core.c:507
|
|
||||||
#3: ffff888141bd6d28 (&d->lock){+.+.}-{3:3}, at:
|
|
||||||
__rfcomm_dlc_close+0x162/0x8a0 net/bluetooth/rfcomm/core.c:487
|
|
||||||
==================================================================
|
|
||||||
|
|
||||||
The task hangs because of a deadlock that occurs when lock_sock() is
|
|
||||||
called in rfcomm_sk_state_change(). One such call stack is:
|
|
||||||
|
|
||||||
rfcomm_sock_shutdown():
|
|
||||||
lock_sock();
|
|
||||||
__rfcomm_sock_close():
|
|
||||||
rfcomm_dlc_close():
|
|
||||||
__rfcomm_dlc_close():
|
|
||||||
rfcomm_dlc_lock();
|
|
||||||
rfcomm_sk_state_change():
|
|
||||||
lock_sock();
|
|
||||||
|
|
||||||
lock_sock() has to be called when the sk state is changed because the
|
|
||||||
lock is not always held when rfcomm_sk_state_change() is
|
|
||||||
called. However, besides the recursive deadlock, there is also an
|
|
||||||
issue of a lock hierarchy inversion between rfcomm_dlc_lock() and
|
|
||||||
lock_sock() if the socket is locked in rfcomm_sk_state_change().
|
|
||||||
|
|
||||||
To avoid these issues, we can instead schedule the sk state change in
|
|
||||||
the global workqueue. This is already the implicit assumption about
|
|
||||||
how sk state changes happen. For example, in rfcomm_sock_shutdown(),
|
|
||||||
the call to __rfcomm_sock_close() is followed by
|
|
||||||
bt_sock_wait_state().
|
|
||||||
|
|
||||||
Additionally, the call to rfcomm_sock_kill() inside
|
|
||||||
rfcomm_sk_state_change() should be removed. The socket shouldn't be
|
|
||||||
killed here because only rfcomm_sock_release() calls sock_orphan(),
|
|
||||||
which it already follows up with a call to rfcomm_sock_kill().
|
|
||||||
|
|
||||||
Fixes: b7ce436a5d79 ("Bluetooth: switch to lock_sock in RFCOMM")
|
|
||||||
Link: https://syzkaller.appspot.com/bug?extid=7d51f807c81b190a127d [1]
|
|
||||||
Reported-by: syzbot+7d51f807c81b190a127d@syzkaller.appspotmail.com
|
|
||||||
Tested-by: syzbot+7d51f807c81b190a127d@syzkaller.appspotmail.com
|
|
||||||
Signed-off-by: Desmond Cheong Zhi Xi <desmondcheongzx@gmail.com>
|
|
||||||
Cc: Hillf Danton <hdanton@sina.com>
|
|
||||||
---
|
---
|
||||||
include/net/bluetooth/rfcomm.h | 3 +++
|
.../gpu/drm/i915/gem/i915_gem_execbuffer.c | 25 +++++++++++--
|
||||||
net/bluetooth/rfcomm/core.c | 2 ++
|
drivers/gpu/drm/i915/gem/i915_gem_mman.c | 2 +-
|
||||||
net/bluetooth/rfcomm/sock.c | 34 ++++++++++++++++++++++------------
|
drivers/gpu/drm/i915/i915_gem_evict.c | 37 ++++++++++++++-----
|
||||||
3 files changed, 27 insertions(+), 12 deletions(-)
|
drivers/gpu/drm/i915/i915_gem_evict.h | 4 +-
|
||||||
|
drivers/gpu/drm/i915/i915_vma.c | 2 +-
|
||||||
|
.../gpu/drm/i915/selftests/i915_gem_evict.c | 4 +-
|
||||||
|
6 files changed, 56 insertions(+), 18 deletions(-)
|
||||||
|
|
||||||
diff --git a/include/net/bluetooth/rfcomm.h b/include/net/bluetooth/rfcomm.h
|
diff --git a/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c b/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c
|
||||||
index 99d26879b02a53..a92799fc5e74d0 100644
|
index 845023c14eb36f..094e92ed28db4f 100644
|
||||||
--- a/include/net/bluetooth/rfcomm.h
|
--- a/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c
|
||||||
+++ b/include/net/bluetooth/rfcomm.h
|
+++ b/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c
|
||||||
@@ -171,6 +171,7 @@ struct rfcomm_dlc {
|
@@ -741,25 +741,44 @@ static int eb_reserve(struct i915_execbuffer *eb)
|
||||||
struct rfcomm_session *session;
|
*
|
||||||
struct sk_buff_head tx_queue;
|
* Defragmenting is skipped if all objects are pinned at a fixed location.
|
||||||
struct timer_list timer;
|
*/
|
||||||
+ struct work_struct state_change_work;
|
- for (pass = 0; pass <= 2; pass++) {
|
||||||
|
+ for (pass = 0; pass <= 3; pass++) {
|
||||||
|
int pin_flags = PIN_USER | PIN_VALIDATE;
|
||||||
|
|
||||||
struct mutex lock;
|
if (pass == 0)
|
||||||
unsigned long state;
|
pin_flags |= PIN_NONBLOCK;
|
||||||
@@ -186,6 +187,7 @@ struct rfcomm_dlc {
|
|
||||||
u8 sec_level;
|
|
||||||
u8 role_switch;
|
|
||||||
u32 defer_setup;
|
|
||||||
+ int err;
|
|
||||||
|
|
||||||
uint mtu;
|
if (pass >= 1)
|
||||||
uint cfc;
|
- unpinned = eb_unbind(eb, pass == 2);
|
||||||
@@ -310,6 +312,7 @@ struct rfcomm_pinfo {
|
+ unpinned = eb_unbind(eb, pass >= 2);
|
||||||
u8 role_switch;
|
|
||||||
};
|
|
||||||
|
|
||||||
+void __rfcomm_sk_state_change(struct work_struct *work);
|
if (pass == 2) {
|
||||||
int rfcomm_init_sockets(void);
|
err = mutex_lock_interruptible(&eb->context->vm->mutex);
|
||||||
void rfcomm_cleanup_sockets(void);
|
if (!err) {
|
||||||
|
- err = i915_gem_evict_vm(eb->context->vm, &eb->ww);
|
||||||
|
+ err = i915_gem_evict_vm(eb->context->vm, &eb->ww, NULL);
|
||||||
|
mutex_unlock(&eb->context->vm->mutex);
|
||||||
|
}
|
||||||
|
if (err)
|
||||||
|
return err;
|
||||||
|
}
|
||||||
|
|
||||||
diff --git a/net/bluetooth/rfcomm/core.c b/net/bluetooth/rfcomm/core.c
|
+ if (pass == 3) {
|
||||||
index 7324764384b677..c6494e85cd68b2 100644
|
+retry:
|
||||||
--- a/net/bluetooth/rfcomm/core.c
|
+ err = mutex_lock_interruptible(&eb->context->vm->mutex);
|
||||||
+++ b/net/bluetooth/rfcomm/core.c
|
+ if (!err) {
|
||||||
@@ -289,6 +289,7 @@ static void rfcomm_dlc_clear_state(struct rfcomm_dlc *d)
|
+ struct drm_i915_gem_object *busy_bo = NULL;
|
||||||
d->flags = 0;
|
|
||||||
d->mscex = 0;
|
|
||||||
d->sec_level = BT_SECURITY_LOW;
|
|
||||||
+ d->err = 0;
|
|
||||||
d->mtu = RFCOMM_DEFAULT_MTU;
|
|
||||||
d->v24_sig = RFCOMM_V24_RTC | RFCOMM_V24_RTR | RFCOMM_V24_DV;
|
|
||||||
|
|
||||||
@@ -306,6 +307,7 @@ struct rfcomm_dlc *rfcomm_dlc_alloc(gfp_t prio)
|
|
||||||
timer_setup(&d->timer, rfcomm_dlc_timeout, 0);
|
|
||||||
|
|
||||||
skb_queue_head_init(&d->tx_queue);
|
|
||||||
+ INIT_WORK(&d->state_change_work, __rfcomm_sk_state_change);
|
|
||||||
mutex_init(&d->lock);
|
|
||||||
refcount_set(&d->refcnt, 1);
|
|
||||||
|
|
||||||
diff --git a/net/bluetooth/rfcomm/sock.c b/net/bluetooth/rfcomm/sock.c
|
|
||||||
index 4bf4ea6cbb5eee..4850dafbaa05fb 100644
|
|
||||||
--- a/net/bluetooth/rfcomm/sock.c
|
|
||||||
+++ b/net/bluetooth/rfcomm/sock.c
|
|
||||||
@@ -61,19 +61,22 @@ static void rfcomm_sk_data_ready(struct rfcomm_dlc *d, struct sk_buff *skb)
|
|
||||||
rfcomm_dlc_throttle(d);
|
|
||||||
}
|
|
||||||
|
|
||||||
-static void rfcomm_sk_state_change(struct rfcomm_dlc *d, int err)
|
|
||||||
+void __rfcomm_sk_state_change(struct work_struct *work)
|
|
||||||
{
|
|
||||||
+ struct rfcomm_dlc *d = container_of(work, struct rfcomm_dlc,
|
|
||||||
+ state_change_work);
|
|
||||||
struct sock *sk = d->owner, *parent;
|
|
||||||
|
|
||||||
if (!sk)
|
|
||||||
return;
|
|
||||||
|
|
||||||
- BT_DBG("dlc %p state %ld err %d", d, d->state, err);
|
|
||||||
-
|
|
||||||
lock_sock(sk);
|
|
||||||
+ rfcomm_dlc_lock(d);
|
|
||||||
|
|
||||||
- if (err)
|
|
||||||
- sk->sk_err = err;
|
|
||||||
+ BT_DBG("dlc %p state %ld err %d", d, d->state, d->err);
|
|
||||||
+
|
+
|
||||||
+ if (d->err)
|
+ err = i915_gem_evict_vm(eb->context->vm, &eb->ww, &busy_bo);
|
||||||
+ sk->sk_err = d->err;
|
+ mutex_unlock(&eb->context->vm->mutex);
|
||||||
|
+ if (err && busy_bo) {
|
||||||
|
+ err = i915_gem_object_lock(busy_bo, &eb->ww);
|
||||||
|
+ i915_gem_object_put(busy_bo);
|
||||||
|
+ if (!err)
|
||||||
|
+ goto retry;
|
||||||
|
+ }
|
||||||
|
+ }
|
||||||
|
+ if (err)
|
||||||
|
+ return err;
|
||||||
|
+ }
|
||||||
|
+
|
||||||
|
list_for_each_entry(ev, &eb->unbound, bind_link) {
|
||||||
|
err = eb_reserve_vma(eb, ev, pin_flags);
|
||||||
|
if (err)
|
||||||
|
diff --git a/drivers/gpu/drm/i915/gem/i915_gem_mman.c b/drivers/gpu/drm/i915/gem/i915_gem_mman.c
|
||||||
|
index 73d9eda1d6b7a6..c83d98e1dc5da0 100644
|
||||||
|
--- a/drivers/gpu/drm/i915/gem/i915_gem_mman.c
|
||||||
|
+++ b/drivers/gpu/drm/i915/gem/i915_gem_mman.c
|
||||||
|
@@ -369,7 +369,7 @@ static vm_fault_t vm_fault_gtt(struct vm_fault *vmf)
|
||||||
|
if (vma == ERR_PTR(-ENOSPC)) {
|
||||||
|
ret = mutex_lock_interruptible(&ggtt->vm.mutex);
|
||||||
|
if (!ret) {
|
||||||
|
- ret = i915_gem_evict_vm(&ggtt->vm, &ww);
|
||||||
|
+ ret = i915_gem_evict_vm(&ggtt->vm, &ww, NULL);
|
||||||
|
mutex_unlock(&ggtt->vm.mutex);
|
||||||
|
}
|
||||||
|
if (ret)
|
||||||
|
diff --git a/drivers/gpu/drm/i915/i915_gem_evict.c b/drivers/gpu/drm/i915/i915_gem_evict.c
|
||||||
|
index f025ee4fa52618..a4b4d9b7d26c7a 100644
|
||||||
|
--- a/drivers/gpu/drm/i915/i915_gem_evict.c
|
||||||
|
+++ b/drivers/gpu/drm/i915/i915_gem_evict.c
|
||||||
|
@@ -416,6 +416,11 @@ int i915_gem_evict_for_node(struct i915_address_space *vm,
|
||||||
|
* @vm: Address space to cleanse
|
||||||
|
* @ww: An optional struct i915_gem_ww_ctx. If not NULL, i915_gem_evict_vm
|
||||||
|
* will be able to evict vma's locked by the ww as well.
|
||||||
|
+ * @busy_bo: Optional pointer to struct drm_i915_gem_object. If not NULL, then
|
||||||
|
+ * in the event i915_gem_evict_vm() is unable to trylock an object for eviction,
|
||||||
|
+ * then @busy_bo will point to it. -EBUSY is also returned. The caller must drop
|
||||||
|
+ * the vm->mutex, before trying again to acquire the contended lock. The caller
|
||||||
|
+ * also owns a reference to the object.
|
||||||
|
*
|
||||||
|
* This function evicts all vmas from a vm.
|
||||||
|
*
|
||||||
|
@@ -425,7 +430,8 @@ int i915_gem_evict_for_node(struct i915_address_space *vm,
|
||||||
|
* To clarify: This is for freeing up virtual address space, not for freeing
|
||||||
|
* memory in e.g. the shrinker.
|
||||||
|
*/
|
||||||
|
-int i915_gem_evict_vm(struct i915_address_space *vm, struct i915_gem_ww_ctx *ww)
|
||||||
|
+int i915_gem_evict_vm(struct i915_address_space *vm, struct i915_gem_ww_ctx *ww,
|
||||||
|
+ struct drm_i915_gem_object **busy_bo)
|
||||||
|
{
|
||||||
|
int ret = 0;
|
||||||
|
|
||||||
sk->sk_state = d->state;
|
@@ -457,15 +463,22 @@ int i915_gem_evict_vm(struct i915_address_space *vm, struct i915_gem_ww_ctx *ww)
|
||||||
|
* the resv is shared among multiple objects, we still
|
||||||
|
* need the object ref.
|
||||||
|
*/
|
||||||
|
- if (dying_vma(vma) ||
|
||||||
|
+ if (!i915_gem_object_get_rcu(vma->obj) ||
|
||||||
|
(ww && (dma_resv_locking_ctx(vma->obj->base.resv) == &ww->ctx))) {
|
||||||
|
__i915_vma_pin(vma);
|
||||||
|
list_add(&vma->evict_link, &locked_eviction_list);
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
|
||||||
@@ -91,15 +94,22 @@ static void rfcomm_sk_state_change(struct rfcomm_dlc *d, int err)
|
- if (!i915_gem_object_trylock(vma->obj, ww))
|
||||||
sk->sk_state_change(sk);
|
+ if (!i915_gem_object_trylock(vma->obj, ww)) {
|
||||||
|
+ if (busy_bo) {
|
||||||
|
+ *busy_bo = vma->obj; /* holds ref */
|
||||||
|
+ ret = -EBUSY;
|
||||||
|
+ break;
|
||||||
|
+ }
|
||||||
|
+ i915_gem_object_put(vma->obj);
|
||||||
|
continue;
|
||||||
|
+ }
|
||||||
|
|
||||||
|
__i915_vma_pin(vma);
|
||||||
|
list_add(&vma->evict_link, &eviction_list);
|
||||||
|
@@ -473,25 +486,29 @@ int i915_gem_evict_vm(struct i915_address_space *vm, struct i915_gem_ww_ctx *ww)
|
||||||
|
if (list_empty(&eviction_list) && list_empty(&locked_eviction_list))
|
||||||
|
break;
|
||||||
|
|
||||||
|
- ret = 0;
|
||||||
|
/* Unbind locked objects first, before unlocking the eviction_list */
|
||||||
|
list_for_each_entry_safe(vma, vn, &locked_eviction_list, evict_link) {
|
||||||
|
__i915_vma_unpin(vma);
|
||||||
|
|
||||||
|
- if (ret == 0)
|
||||||
|
+ if (ret == 0) {
|
||||||
|
ret = __i915_vma_unbind(vma);
|
||||||
|
- if (ret != -EINTR) /* "Get me out of here!" */
|
||||||
|
- ret = 0;
|
||||||
|
+ if (ret != -EINTR) /* "Get me out of here!" */
|
||||||
|
+ ret = 0;
|
||||||
|
+ }
|
||||||
|
+ if (!dying_vma(vma))
|
||||||
|
+ i915_gem_object_put(vma->obj);
|
||||||
|
}
|
||||||
|
|
||||||
|
list_for_each_entry_safe(vma, vn, &eviction_list, evict_link) {
|
||||||
|
__i915_vma_unpin(vma);
|
||||||
|
- if (ret == 0)
|
||||||
|
+ if (ret == 0) {
|
||||||
|
ret = __i915_vma_unbind(vma);
|
||||||
|
- if (ret != -EINTR) /* "Get me out of here!" */
|
||||||
|
- ret = 0;
|
||||||
|
+ if (ret != -EINTR) /* "Get me out of here!" */
|
||||||
|
+ ret = 0;
|
||||||
|
+ }
|
||||||
|
|
||||||
|
i915_gem_object_unlock(vma->obj);
|
||||||
|
+ i915_gem_object_put(vma->obj);
|
||||||
|
}
|
||||||
|
} while (ret == 0);
|
||||||
|
|
||||||
|
diff --git a/drivers/gpu/drm/i915/i915_gem_evict.h b/drivers/gpu/drm/i915/i915_gem_evict.h
|
||||||
|
index e593c530f9bd7a..bf0ee0e4fe6088 100644
|
||||||
|
--- a/drivers/gpu/drm/i915/i915_gem_evict.h
|
||||||
|
+++ b/drivers/gpu/drm/i915/i915_gem_evict.h
|
||||||
|
@@ -11,6 +11,7 @@
|
||||||
|
struct drm_mm_node;
|
||||||
|
struct i915_address_space;
|
||||||
|
struct i915_gem_ww_ctx;
|
||||||
|
+struct drm_i915_gem_object;
|
||||||
|
|
||||||
|
int __must_check i915_gem_evict_something(struct i915_address_space *vm,
|
||||||
|
struct i915_gem_ww_ctx *ww,
|
||||||
|
@@ -23,6 +24,7 @@ int __must_check i915_gem_evict_for_node(struct i915_address_space *vm,
|
||||||
|
struct drm_mm_node *node,
|
||||||
|
unsigned int flags);
|
||||||
|
int i915_gem_evict_vm(struct i915_address_space *vm,
|
||||||
|
- struct i915_gem_ww_ctx *ww);
|
||||||
|
+ struct i915_gem_ww_ctx *ww,
|
||||||
|
+ struct drm_i915_gem_object **busy_bo);
|
||||||
|
|
||||||
|
#endif /* __I915_GEM_EVICT_H__ */
|
||||||
|
diff --git a/drivers/gpu/drm/i915/i915_vma.c b/drivers/gpu/drm/i915/i915_vma.c
|
||||||
|
index f17c09ead7d778..4d06875de14a14 100644
|
||||||
|
--- a/drivers/gpu/drm/i915/i915_vma.c
|
||||||
|
+++ b/drivers/gpu/drm/i915/i915_vma.c
|
||||||
|
@@ -1569,7 +1569,7 @@ static int __i915_ggtt_pin(struct i915_vma *vma, struct i915_gem_ww_ctx *ww,
|
||||||
|
* locked objects when called from execbuf when pinning
|
||||||
|
* is removed. This would probably regress badly.
|
||||||
|
*/
|
||||||
|
- i915_gem_evict_vm(vm, NULL);
|
||||||
|
+ i915_gem_evict_vm(vm, NULL, NULL);
|
||||||
|
mutex_unlock(&vm->mutex);
|
||||||
|
}
|
||||||
|
} while (1);
|
||||||
|
diff --git a/drivers/gpu/drm/i915/selftests/i915_gem_evict.c b/drivers/gpu/drm/i915/selftests/i915_gem_evict.c
|
||||||
|
index 8c6517d29b8e0c..37068542aafe7f 100644
|
||||||
|
--- a/drivers/gpu/drm/i915/selftests/i915_gem_evict.c
|
||||||
|
+++ b/drivers/gpu/drm/i915/selftests/i915_gem_evict.c
|
||||||
|
@@ -344,7 +344,7 @@ static int igt_evict_vm(void *arg)
|
||||||
|
|
||||||
|
/* Everything is pinned, nothing should happen */
|
||||||
|
mutex_lock(&ggtt->vm.mutex);
|
||||||
|
- err = i915_gem_evict_vm(&ggtt->vm, NULL);
|
||||||
|
+ err = i915_gem_evict_vm(&ggtt->vm, NULL, NULL);
|
||||||
|
mutex_unlock(&ggtt->vm.mutex);
|
||||||
|
if (err) {
|
||||||
|
pr_err("i915_gem_evict_vm on a full GGTT returned err=%d]\n",
|
||||||
|
@@ -356,7 +356,7 @@ static int igt_evict_vm(void *arg)
|
||||||
|
|
||||||
|
for_i915_gem_ww(&ww, err, false) {
|
||||||
|
mutex_lock(&ggtt->vm.mutex);
|
||||||
|
- err = i915_gem_evict_vm(&ggtt->vm, &ww);
|
||||||
|
+ err = i915_gem_evict_vm(&ggtt->vm, &ww, NULL);
|
||||||
|
mutex_unlock(&ggtt->vm.mutex);
|
||||||
}
|
}
|
||||||
|
|
||||||
+ rfcomm_dlc_unlock(d);
|
|
||||||
release_sock(sk);
|
|
||||||
+ sock_put(sk);
|
|
||||||
+}
|
|
||||||
|
|
||||||
- if (parent && sock_flag(sk, SOCK_ZAPPED)) {
|
|
||||||
- /* We have to drop DLC lock here, otherwise
|
|
||||||
- * rfcomm_sock_destruct() will dead lock. */
|
|
||||||
- rfcomm_dlc_unlock(d);
|
|
||||||
- rfcomm_sock_kill(sk);
|
|
||||||
- rfcomm_dlc_lock(d);
|
|
||||||
- }
|
|
||||||
+static void rfcomm_sk_state_change(struct rfcomm_dlc *d, int err)
|
|
||||||
+{
|
|
||||||
+ struct sock *sk = d->owner;
|
|
||||||
+
|
|
||||||
+ if (!sk)
|
|
||||||
+ return;
|
|
||||||
+
|
|
||||||
+ d->err = err;
|
|
||||||
+ sock_hold(sk);
|
|
||||||
+ if (!schedule_work(&d->state_change_work))
|
|
||||||
+ sock_put(sk);
|
|
||||||
}
|
|
||||||
|
|
||||||
/* ---- Socket functions ---- */
|
|
||||||
|
|
||||||
From 727209376f4998bc84db1d5d8af15afea846a92b Mon Sep 17 00:00:00 2001
|
|
||||||
From: "Guilherme G. Piccoli" <gpiccoli@igalia.com>
|
|
||||||
Date: Mon, 24 Oct 2022 17:02:54 -0300
|
|
||||||
Subject: x86/split_lock: Add sysctl to control the misery mode
|
|
||||||
|
|
||||||
Commit b041b525dab9 ("x86/split_lock: Make life miserable for split lockers")
|
|
||||||
changed the way the split lock detector works when in "warn" mode;
|
|
||||||
basically, it not only shows the warn message, but also intentionally
|
|
||||||
introduces a slowdown through sleeping plus serialization mechanism
|
|
||||||
on such task. Based on discussions in [0], seems the warning alone
|
|
||||||
wasn't enough motivation for userspace developers to fix their
|
|
||||||
applications.
|
|
||||||
|
|
||||||
This slowdown is enough to totally break some proprietary (aka.
|
|
||||||
unfixable) userspace[1].
|
|
||||||
|
|
||||||
Happens that originally the proposal in [0] was to add a new mode
|
|
||||||
which would warns + slowdown the "split locking" task, keeping the
|
|
||||||
old warn mode untouched. In the end, that idea was discarded and
|
|
||||||
the regular/default "warn" mode now slows down the applications. This
|
|
||||||
is quite aggressive with regards proprietary/legacy programs that
|
|
||||||
basically are unable to properly run in kernel with this change.
|
|
||||||
While it is understandable that a malicious application could DoS
|
|
||||||
by split locking, it seems unacceptable to regress old/proprietary
|
|
||||||
userspace programs through a default configuration that previously
|
|
||||||
worked. An example of such breakage was reported in [1].
|
|
||||||
|
|
||||||
Add a sysctl to allow controlling the "misery mode" behavior, as per
|
|
||||||
Thomas suggestion on [2]. This way, users running legacy and/or
|
|
||||||
proprietary software are allowed to still execute them with a decent
|
|
||||||
performance while still observing the warning messages on kernel log.
|
|
||||||
|
|
||||||
[0] https://lore.kernel.org/lkml/20220217012721.9694-1-tony.luck@intel.com/
|
|
||||||
[1] https://github.com/doitsujin/dxvk/issues/2938
|
|
||||||
[2] https://lore.kernel.org/lkml/87pmf4bter.ffs@tglx/
|
|
||||||
|
|
||||||
[ dhansen: minor changelog tweaks, including clarifying the actual
|
|
||||||
problem ]
|
|
||||||
|
|
||||||
Fixes: b041b525dab9 ("x86/split_lock: Make life miserable for split lockers")
|
|
||||||
Suggested-by: Thomas Gleixner <tglx@linutronix.de>
|
|
||||||
Signed-off-by: Guilherme G. Piccoli <gpiccoli@igalia.com>
|
|
||||||
Signed-off-by: Dave Hansen <dave.hansen@linux.intel.com>
|
|
||||||
Reviewed-by: Tony Luck <tony.luck@intel.com>
|
|
||||||
Tested-by: Andre Almeida <andrealmeid@igalia.com>
|
|
||||||
Link: https://lore.kernel.org/all/20221024200254.635256-1-gpiccoli%40igalia.com
|
|
||||||
---
|
|
||||||
Documentation/admin-guide/sysctl/kernel.rst | 23 +++++++++++
|
|
||||||
arch/x86/kernel/cpu/intel.c | 63 ++++++++++++++++++++++++-----
|
|
||||||
2 files changed, 76 insertions(+), 10 deletions(-)
|
|
||||||
|
|
||||||
diff --git a/Documentation/admin-guide/sysctl/kernel.rst b/Documentation/admin-guide/sysctl/kernel.rst
|
|
||||||
index 98d1b198b2b4c..c2c64c1b706ff 100644
|
|
||||||
--- a/Documentation/admin-guide/sysctl/kernel.rst
|
|
||||||
+++ b/Documentation/admin-guide/sysctl/kernel.rst
|
|
||||||
@@ -1314,6 +1314,29 @@ watchdog work to be queued by the watchdog timer function, otherwise the NMI
|
|
||||||
watchdog — if enabled — can detect a hard lockup condition.
|
|
||||||
|
|
||||||
|
|
||||||
+split_lock_mitigate (x86 only)
|
|
||||||
+==============================
|
|
||||||
+
|
|
||||||
+On x86, each "split lock" imposes a system-wide performance penalty. On larger
|
|
||||||
+systems, large numbers of split locks from unprivileged users can result in
|
|
||||||
+denials of service to well-behaved and potentially more important users.
|
|
||||||
+
|
|
||||||
+The kernel mitigates these bad users by detecting split locks and imposing
|
|
||||||
+penalties: forcing them to wait and only allowing one core to execute split
|
|
||||||
+locks at a time.
|
|
||||||
+
|
|
||||||
+These mitigations can make those bad applications unbearably slow. Setting
|
|
||||||
+split_lock_mitigate=0 may restore some application performance, but will also
|
|
||||||
+increase system exposure to denial of service attacks from split lock users.
|
|
||||||
+
|
|
||||||
+= ===================================================================
|
|
||||||
+0 Disable the mitigation mode - just warns the split lock on kernel log
|
|
||||||
+ and exposes the system to denials of service from the split lockers.
|
|
||||||
+1 Enable the mitigation mode (this is the default) - penalizes the split
|
|
||||||
+ lockers with intentional performance degradation.
|
|
||||||
+= ===================================================================
|
|
||||||
+
|
|
||||||
+
|
|
||||||
stack_erasing
|
|
||||||
=============
|
|
||||||
|
|
||||||
diff --git a/arch/x86/kernel/cpu/intel.c b/arch/x86/kernel/cpu/intel.c
|
|
||||||
index 2d7ea5480ec33..4278996504833 100644
|
|
||||||
--- a/arch/x86/kernel/cpu/intel.c
|
|
||||||
+++ b/arch/x86/kernel/cpu/intel.c
|
|
||||||
@@ -1034,8 +1034,32 @@ static const struct {
|
|
||||||
|
|
||||||
static struct ratelimit_state bld_ratelimit;
|
|
||||||
|
|
||||||
+static unsigned int sysctl_sld_mitigate = 1;
|
|
||||||
static DEFINE_SEMAPHORE(buslock_sem);
|
|
||||||
|
|
||||||
+#ifdef CONFIG_PROC_SYSCTL
|
|
||||||
+static struct ctl_table sld_sysctls[] = {
|
|
||||||
+ {
|
|
||||||
+ .procname = "split_lock_mitigate",
|
|
||||||
+ .data = &sysctl_sld_mitigate,
|
|
||||||
+ .maxlen = sizeof(unsigned int),
|
|
||||||
+ .mode = 0644,
|
|
||||||
+ .proc_handler = proc_douintvec_minmax,
|
|
||||||
+ .extra1 = SYSCTL_ZERO,
|
|
||||||
+ .extra2 = SYSCTL_ONE,
|
|
||||||
+ },
|
|
||||||
+ {}
|
|
||||||
+};
|
|
||||||
+
|
|
||||||
+static int __init sld_mitigate_sysctl_init(void)
|
|
||||||
+{
|
|
||||||
+ register_sysctl_init("kernel", sld_sysctls);
|
|
||||||
+ return 0;
|
|
||||||
+}
|
|
||||||
+
|
|
||||||
+late_initcall(sld_mitigate_sysctl_init);
|
|
||||||
+#endif
|
|
||||||
+
|
|
||||||
static inline bool match_option(const char *arg, int arglen, const char *opt)
|
|
||||||
{
|
|
||||||
int len = strlen(opt), ratelimit;
|
|
||||||
@@ -1146,12 +1170,20 @@ static void split_lock_init(void)
|
|
||||||
split_lock_verify_msr(sld_state != sld_off);
|
|
||||||
}
|
|
||||||
|
|
||||||
-static void __split_lock_reenable(struct work_struct *work)
|
|
||||||
+static void __split_lock_reenable_unlock(struct work_struct *work)
|
|
||||||
{
|
|
||||||
sld_update_msr(true);
|
|
||||||
up(&buslock_sem);
|
|
||||||
}
|
|
||||||
|
|
||||||
+static DECLARE_DELAYED_WORK(sl_reenable_unlock, __split_lock_reenable_unlock);
|
|
||||||
+
|
|
||||||
+static void __split_lock_reenable(struct work_struct *work)
|
|
||||||
+{
|
|
||||||
+ sld_update_msr(true);
|
|
||||||
+}
|
|
||||||
+static DECLARE_DELAYED_WORK(sl_reenable, __split_lock_reenable);
|
|
||||||
+
|
|
||||||
/*
|
|
||||||
* If a CPU goes offline with pending delayed work to re-enable split lock
|
|
||||||
* detection then the delayed work will be executed on some other CPU. That
|
|
||||||
@@ -1169,10 +1201,9 @@ static int splitlock_cpu_offline(unsigned int cpu)
|
|
||||||
return 0;
|
|
||||||
}
|
|
||||||
|
|
||||||
-static DECLARE_DELAYED_WORK(split_lock_reenable, __split_lock_reenable);
|
|
||||||
-
|
|
||||||
static void split_lock_warn(unsigned long ip)
|
|
||||||
{
|
|
||||||
+ struct delayed_work *work;
|
|
||||||
int cpu;
|
|
||||||
|
|
||||||
if (!current->reported_split_lock)
|
|
||||||
@@ -1180,14 +1211,26 @@ static void split_lock_warn(unsigned long ip)
|
|
||||||
current->comm, current->pid, ip);
|
|
||||||
current->reported_split_lock = 1;
|
|
||||||
|
|
||||||
- /* misery factor #1, sleep 10ms before trying to execute split lock */
|
|
||||||
- if (msleep_interruptible(10) > 0)
|
|
||||||
- return;
|
|
||||||
- /* Misery factor #2, only allow one buslocked disabled core at a time */
|
|
||||||
- if (down_interruptible(&buslock_sem) == -EINTR)
|
|
||||||
- return;
|
|
||||||
+ if (sysctl_sld_mitigate) {
|
|
||||||
+ /*
|
|
||||||
+ * misery factor #1:
|
|
||||||
+ * sleep 10ms before trying to execute split lock.
|
|
||||||
+ */
|
|
||||||
+ if (msleep_interruptible(10) > 0)
|
|
||||||
+ return;
|
|
||||||
+ /*
|
|
||||||
+ * Misery factor #2:
|
|
||||||
+ * only allow one buslocked disabled core at a time.
|
|
||||||
+ */
|
|
||||||
+ if (down_interruptible(&buslock_sem) == -EINTR)
|
|
||||||
+ return;
|
|
||||||
+ work = &sl_reenable_unlock;
|
|
||||||
+ } else {
|
|
||||||
+ work = &sl_reenable;
|
|
||||||
+ }
|
|
||||||
+
|
|
||||||
cpu = get_cpu();
|
|
||||||
- schedule_delayed_work_on(cpu, &split_lock_reenable, 2);
|
|
||||||
+ schedule_delayed_work_on(cpu, work, 2);
|
|
||||||
|
|
||||||
/* Disable split lock detection on this CPU to make progress */
|
|
||||||
sld_update_msr(false);
|
|
||||||
|
@@ -0,0 +1,151 @@
|
|||||||
|
From d50977b164e708bf523a35ef53315355528c3ca6 Mon Sep 17 00:00:00 2001
|
||||||
|
From: "Jan Alexander Steffens (heftig)" <jan.steffens@gmail.com>
|
||||||
|
Date: Mon, 16 Sep 2019 04:53:20 +0200
|
||||||
|
Subject: [PATCH] ZEN: Add sysctl and CONFIG to disallow unprivileged
|
||||||
|
CLONE_NEWUSER
|
||||||
|
|
||||||
|
Our default behavior continues to match the vanilla kernel.
|
||||||
|
---
|
||||||
|
include/linux/user_namespace.h | 4 ++++
|
||||||
|
init/Kconfig | 16 ++++++++++++++++
|
||||||
|
kernel/fork.c | 14 ++++++++++++++
|
||||||
|
kernel/sysctl.c | 12 ++++++++++++
|
||||||
|
kernel/user_namespace.c | 7 +++++++
|
||||||
|
5 files changed, 53 insertions(+)
|
||||||
|
|
||||||
|
diff --git a/include/linux/user_namespace.h b/include/linux/user_namespace.h
|
||||||
|
index 45f09bec02c485..87b20e2ee27445 100644
|
||||||
|
--- a/include/linux/user_namespace.h
|
||||||
|
+++ b/include/linux/user_namespace.h
|
||||||
|
@@ -148,6 +148,8 @@ static inline void set_userns_rlimit_max(struct user_namespace *ns,
|
||||||
|
|
||||||
|
#ifdef CONFIG_USER_NS
|
||||||
|
|
||||||
|
+extern int unprivileged_userns_clone;
|
||||||
|
+
|
||||||
|
static inline struct user_namespace *get_user_ns(struct user_namespace *ns)
|
||||||
|
{
|
||||||
|
if (ns)
|
||||||
|
@@ -181,6 +183,8 @@ extern bool current_in_userns(const struct user_namespace *target_ns);
|
||||||
|
struct ns_common *ns_get_owner(struct ns_common *ns);
|
||||||
|
#else
|
||||||
|
|
||||||
|
+#define unprivileged_userns_clone 0
|
||||||
|
+
|
||||||
|
static inline struct user_namespace *get_user_ns(struct user_namespace *ns)
|
||||||
|
{
|
||||||
|
return &init_user_ns;
|
||||||
|
diff --git a/init/Kconfig b/init/Kconfig
|
||||||
|
index 94125d3b6893c7..9f7139b536f638 100644
|
||||||
|
--- a/init/Kconfig
|
||||||
|
+++ b/init/Kconfig
|
||||||
|
@@ -1247,6 +1247,22 @@ config USER_NS
|
||||||
|
|
||||||
|
If unsure, say N.
|
||||||
|
|
||||||
|
+config USER_NS_UNPRIVILEGED
|
||||||
|
+ bool "Allow unprivileged users to create namespaces"
|
||||||
|
+ default y
|
||||||
|
+ depends on USER_NS
|
||||||
|
+ help
|
||||||
|
+ When disabled, unprivileged users will not be able to create
|
||||||
|
+ new namespaces. Allowing users to create their own namespaces
|
||||||
|
+ has been part of several recent local privilege escalation
|
||||||
|
+ exploits, so if you need user namespaces but are
|
||||||
|
+ paranoid^Wsecurity-conscious you want to disable this.
|
||||||
|
+
|
||||||
|
+ This setting can be overridden at runtime via the
|
||||||
|
+ kernel.unprivileged_userns_clone sysctl.
|
||||||
|
+
|
||||||
|
+ If unsure, say Y.
|
||||||
|
+
|
||||||
|
config PID_NS
|
||||||
|
bool "PID Namespaces"
|
||||||
|
default y
|
||||||
|
diff --git a/kernel/fork.c b/kernel/fork.c
|
||||||
|
index 08969f5aa38d59..ff601cb7a1fae0 100644
|
||||||
|
--- a/kernel/fork.c
|
||||||
|
+++ b/kernel/fork.c
|
||||||
|
@@ -98,6 +98,10 @@
|
||||||
|
#include <linux/io_uring.h>
|
||||||
|
#include <linux/bpf.h>
|
||||||
|
|
||||||
|
+#ifdef CONFIG_USER_NS
|
||||||
|
+#include <linux/user_namespace.h>
|
||||||
|
+#endif
|
||||||
|
+
|
||||||
|
#include <asm/pgalloc.h>
|
||||||
|
#include <linux/uaccess.h>
|
||||||
|
#include <asm/mmu_context.h>
|
||||||
|
@@ -2008,6 +2012,10 @@ static __latent_entropy struct task_struct *copy_process(
|
||||||
|
if ((clone_flags & (CLONE_NEWUSER|CLONE_FS)) == (CLONE_NEWUSER|CLONE_FS))
|
||||||
|
return ERR_PTR(-EINVAL);
|
||||||
|
|
||||||
|
+ if ((clone_flags & CLONE_NEWUSER) && !unprivileged_userns_clone)
|
||||||
|
+ if (!capable(CAP_SYS_ADMIN))
|
||||||
|
+ return ERR_PTR(-EPERM);
|
||||||
|
+
|
||||||
|
/*
|
||||||
|
* Thread groups must share signals as well, and detached threads
|
||||||
|
* can only be started up within the thread group.
|
||||||
|
@@ -3166,6 +3174,12 @@ int ksys_unshare(unsigned long unshare_flags)
|
||||||
|
if (unshare_flags & CLONE_NEWNS)
|
||||||
|
unshare_flags |= CLONE_FS;
|
||||||
|
|
||||||
|
+ if ((unshare_flags & CLONE_NEWUSER) && !unprivileged_userns_clone) {
|
||||||
|
+ err = -EPERM;
|
||||||
|
+ if (!capable(CAP_SYS_ADMIN))
|
||||||
|
+ goto bad_unshare_out;
|
||||||
|
+ }
|
||||||
|
+
|
||||||
|
err = check_unshare_flags(unshare_flags);
|
||||||
|
if (err)
|
||||||
|
goto bad_unshare_out;
|
||||||
|
diff --git a/kernel/sysctl.c b/kernel/sysctl.c
|
||||||
|
index c6d9dec11b749d..9a4514ad481b21 100644
|
||||||
|
--- a/kernel/sysctl.c
|
||||||
|
+++ b/kernel/sysctl.c
|
||||||
|
@@ -81,6 +81,9 @@
|
||||||
|
#ifdef CONFIG_RT_MUTEXES
|
||||||
|
#include <linux/rtmutex.h>
|
||||||
|
#endif
|
||||||
|
+#ifdef CONFIG_USER_NS
|
||||||
|
+#include <linux/user_namespace.h>
|
||||||
|
+#endif
|
||||||
|
|
||||||
|
/* shared constants to be used in various sysctls */
|
||||||
|
const int sysctl_vals[] = { 0, 1, 2, 3, 4, 100, 200, 1000, 3000, INT_MAX, 65535, -1 };
|
||||||
|
@@ -1659,6 +1662,15 @@ static struct ctl_table kern_table[] = {
|
||||||
|
.mode = 0644,
|
||||||
|
.proc_handler = proc_dointvec,
|
||||||
|
},
|
||||||
|
+#ifdef CONFIG_USER_NS
|
||||||
|
+ {
|
||||||
|
+ .procname = "unprivileged_userns_clone",
|
||||||
|
+ .data = &unprivileged_userns_clone,
|
||||||
|
+ .maxlen = sizeof(int),
|
||||||
|
+ .mode = 0644,
|
||||||
|
+ .proc_handler = proc_dointvec,
|
||||||
|
+ },
|
||||||
|
+#endif
|
||||||
|
#ifdef CONFIG_PROC_SYSCTL
|
||||||
|
{
|
||||||
|
.procname = "tainted",
|
||||||
|
diff --git a/kernel/user_namespace.c b/kernel/user_namespace.c
|
||||||
|
index 54211dbd516c57..16ca0c1516298d 100644
|
||||||
|
--- a/kernel/user_namespace.c
|
||||||
|
+++ b/kernel/user_namespace.c
|
||||||
|
@@ -22,6 +22,13 @@
|
||||||
|
#include <linux/bsearch.h>
|
||||||
|
#include <linux/sort.h>
|
||||||
|
|
||||||
|
+/* sysctl */
|
||||||
|
+#ifdef CONFIG_USER_NS_UNPRIVILEGED
|
||||||
|
+int unprivileged_userns_clone = 1;
|
||||||
|
+#else
|
||||||
|
+int unprivileged_userns_clone;
|
||||||
|
+#endif
|
||||||
|
+
|
||||||
|
static struct kmem_cache *user_ns_cachep __read_mostly;
|
||||||
|
static DEFINE_MUTEX(userns_state_mutex);
|
||||||
|
|
@@ -0,0 +1,244 @@
|
|||||||
|
From 5ae86c8436b83762bc6cf46bea1da6ace2d3f50e Mon Sep 17 00:00:00 2001
|
||||||
|
From: Paul Gofman <pgofman@codeweavers.com>
|
||||||
|
Date: Wed, 6 May 2020 14:37:44 +0300
|
||||||
|
Subject: [PATCH 1/2] mm: Support soft dirty flag reset for VA range.
|
||||||
|
|
||||||
|
---
|
||||||
|
fs/proc/task_mmu.c | 129 ++++++++++++++++++++++++++++++++++++---------
|
||||||
|
1 file changed, 103 insertions(+), 26 deletions(-)
|
||||||
|
|
||||||
|
diff --git a/fs/proc/task_mmu.c b/fs/proc/task_mmu.c
|
||||||
|
index 3cec6fbef725..7c7865028f10 100644
|
||||||
|
--- a/fs/proc/task_mmu.c
|
||||||
|
+++ b/fs/proc/task_mmu.c
|
||||||
|
@@ -1032,6 +1032,8 @@ enum clear_refs_types {
|
||||||
|
|
||||||
|
struct clear_refs_private {
|
||||||
|
enum clear_refs_types type;
|
||||||
|
+ unsigned long start, end;
|
||||||
|
+ bool clear_range;
|
||||||
|
};
|
||||||
|
|
||||||
|
#ifdef CONFIG_MEM_SOFT_DIRTY
|
||||||
|
@@ -1125,6 +1127,8 @@ static int clear_refs_pte_range(pmd_t *pmd, unsigned long addr,
|
||||||
|
spinlock_t *ptl;
|
||||||
|
struct page *page;
|
||||||
|
|
||||||
|
+ BUG_ON(addr < cp->start || end > cp->end);
|
||||||
|
+
|
||||||
|
ptl = pmd_trans_huge_lock(pmd, vma);
|
||||||
|
if (ptl) {
|
||||||
|
if (cp->type == CLEAR_REFS_SOFT_DIRTY) {
|
||||||
|
@@ -1181,9 +1185,11 @@ static int clear_refs_test_walk(unsigned long start, unsigned long end,
|
||||||
|
struct clear_refs_private *cp = walk->private;
|
||||||
|
struct vm_area_struct *vma = walk->vma;
|
||||||
|
|
||||||
|
- if (vma->vm_flags & VM_PFNMAP)
|
||||||
|
+ if (!cp->clear_range && (vma->vm_flags & VM_PFNMAP))
|
||||||
|
return 1;
|
||||||
|
|
||||||
|
+ BUG_ON(start < cp->start || end > cp->end);
|
||||||
|
+
|
||||||
|
/*
|
||||||
|
* Writing 1 to /proc/pid/clear_refs affects all pages.
|
||||||
|
* Writing 2 to /proc/pid/clear_refs only affects anonymous pages.
|
||||||
|
@@ -1206,10 +1212,12 @@ static ssize_t clear_refs_write(struct file *file, const char __user *buf,
|
||||||
|
size_t count, loff_t *ppos)
|
||||||
|
{
|
||||||
|
struct task_struct *task;
|
||||||
|
- char buffer[PROC_NUMBUF];
|
||||||
|
+ char buffer[18];
|
||||||
|
struct mm_struct *mm;
|
||||||
|
struct vm_area_struct *vma;
|
||||||
|
enum clear_refs_types type;
|
||||||
|
+ unsigned long start, end;
|
||||||
|
+ bool clear_range;
|
||||||
|
int itype;
|
||||||
|
int rv;
|
||||||
|
|
||||||
|
@@ -1218,12 +1226,34 @@ static ssize_t clear_refs_write(struct file *file, const char __user *buf,
|
||||||
|
count = sizeof(buffer) - 1;
|
||||||
|
if (copy_from_user(buffer, buf, count))
|
||||||
|
return -EFAULT;
|
||||||
|
- rv = kstrtoint(strstrip(buffer), 10, &itype);
|
||||||
|
- if (rv < 0)
|
||||||
|
- return rv;
|
||||||
|
- type = (enum clear_refs_types)itype;
|
||||||
|
- if (type < CLEAR_REFS_ALL || type >= CLEAR_REFS_LAST)
|
||||||
|
- return -EINVAL;
|
||||||
|
+
|
||||||
|
+ if (buffer[0] == '6')
|
||||||
|
+ {
|
||||||
|
+ static int once;
|
||||||
|
+
|
||||||
|
+ if (!once++)
|
||||||
|
+ printk(KERN_DEBUG "task_mmu: Using POC clear refs range implementation.\n");
|
||||||
|
+
|
||||||
|
+ if (count != 17)
|
||||||
|
+ return -EINVAL;
|
||||||
|
+
|
||||||
|
+ type = CLEAR_REFS_SOFT_DIRTY;
|
||||||
|
+ start = *(unsigned long *)(buffer + 1);
|
||||||
|
+ end = *(unsigned long *)(buffer + 1 + 8);
|
||||||
|
+ }
|
||||||
|
+ else
|
||||||
|
+ {
|
||||||
|
+ rv = kstrtoint(strstrip(buffer), 10, &itype);
|
||||||
|
+ if (rv < 0)
|
||||||
|
+ return rv;
|
||||||
|
+ type = (enum clear_refs_types)itype;
|
||||||
|
+
|
||||||
|
+ if (type < CLEAR_REFS_ALL || type >= CLEAR_REFS_LAST)
|
||||||
|
+ return -EINVAL;
|
||||||
|
+
|
||||||
|
+ start = 0;
|
||||||
|
+ end = -1UL;
|
||||||
|
+ }
|
||||||
|
|
||||||
|
task = get_proc_task(file_inode(file));
|
||||||
|
if (!task)
|
||||||
|
@@ -1235,40 +1265,86 @@ static ssize_t clear_refs_write(struct file *file, const char __user *buf,
|
||||||
|
.type = type,
|
||||||
|
};
|
||||||
|
|
||||||
|
- if (mmap_write_lock_killable(mm)) {
|
||||||
|
- count = -EINTR;
|
||||||
|
- goto out_mm;
|
||||||
|
+ if (start || end != -1UL)
|
||||||
|
+ {
|
||||||
|
+ start = min(start, -1) & PAGE_MASK;
|
||||||
|
+ end = min(end, -1) & PAGE_MASK;
|
||||||
|
+
|
||||||
|
+ if (start >= end)
|
||||||
|
+ {
|
||||||
|
+ count = -EINVAL;
|
||||||
|
+ goto out_mm;
|
||||||
|
+ }
|
||||||
|
+ clear_range = true;
|
||||||
|
}
|
||||||
|
+ else
|
||||||
|
+ {
|
||||||
|
+ clear_range = false;
|
||||||
|
+ }
|
||||||
|
+
|
||||||
|
+ cp.start = start;
|
||||||
|
+ cp.end = end;
|
||||||
|
+ cp.clear_range = clear_range;
|
||||||
|
+
|
||||||
|
if (type == CLEAR_REFS_MM_HIWATER_RSS) {
|
||||||
|
+ if (mmap_write_lock_killable(mm)) {
|
||||||
|
+ count = -EINTR;
|
||||||
|
+ goto out_mm;
|
||||||
|
+ }
|
||||||
|
+
|
||||||
|
/*
|
||||||
|
* Writing 5 to /proc/pid/clear_refs resets the peak
|
||||||
|
* resident set size to this mm's current rss value.
|
||||||
|
*/
|
||||||
|
reset_mm_hiwater_rss(mm);
|
||||||
|
- goto out_unlock;
|
||||||
|
+ mmap_write_unlock(mm);
|
||||||
|
+ goto out_mm;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (type == CLEAR_REFS_SOFT_DIRTY) {
|
||||||
|
- mas_for_each(&mas, vma, ULONG_MAX) {
|
||||||
|
- if (!(vma->vm_flags & VM_SOFTDIRTY))
|
||||||
|
- continue;
|
||||||
|
- vma->vm_flags &= ~VM_SOFTDIRTY;
|
||||||
|
- vma_set_page_prot(vma);
|
||||||
|
+ if (mmap_read_lock_killable(mm)) {
|
||||||
|
+ count = -EINTR;
|
||||||
|
+ goto out_mm;
|
||||||
|
}
|
||||||
|
-
|
||||||
|
+ if (!clear_range)
|
||||||
|
+ mas_for_each(&mas, vma, ULONG_MAX) {
|
||||||
|
+ if (!(vma->vm_flags & VM_SOFTDIRTY))
|
||||||
|
+ continue;
|
||||||
|
+ mmap_read_unlock(mm);
|
||||||
|
+ if (mmap_write_lock_killable(mm)) {
|
||||||
|
+ count = -EINTR;
|
||||||
|
+ goto out_mm;
|
||||||
|
+ }
|
||||||
|
+ mas_for_each(&mas, vma, ULONG_MAX) {
|
||||||
|
+ vma->vm_flags &= ~VM_SOFTDIRTY;
|
||||||
|
+ vma_set_page_prot(vma);
|
||||||
|
+ }
|
||||||
|
+ mmap_write_downgrade(mm);
|
||||||
|
+ break;
|
||||||
|
+ }
|
||||||
|
inc_tlb_flush_pending(mm);
|
||||||
|
mmu_notifier_range_init(&range, MMU_NOTIFY_SOFT_DIRTY,
|
||||||
|
- 0, NULL, mm, 0, -1UL);
|
||||||
|
+ 0, NULL, mm, start, end);
|
||||||
|
mmu_notifier_invalidate_range_start(&range);
|
||||||
|
}
|
||||||
|
- walk_page_range(mm, 0, -1, &clear_refs_walk_ops, &cp);
|
||||||
|
+ else
|
||||||
|
+ {
|
||||||
|
+ if (mmap_write_lock_killable(mm)) {
|
||||||
|
+ count = -EINTR;
|
||||||
|
+ goto out_mm;
|
||||||
|
+ }
|
||||||
|
+ }
|
||||||
|
+ walk_page_range(mm, start, end == -1UL ? -1 : end, &clear_refs_walk_ops, &cp);
|
||||||
|
if (type == CLEAR_REFS_SOFT_DIRTY) {
|
||||||
|
mmu_notifier_invalidate_range_end(&range);
|
||||||
|
flush_tlb_mm(mm);
|
||||||
|
dec_tlb_flush_pending(mm);
|
||||||
|
+ mmap_read_unlock(mm);
|
||||||
|
+ }
|
||||||
|
+ else
|
||||||
|
+ {
|
||||||
|
+ mmap_write_unlock(mm);
|
||||||
|
}
|
||||||
|
-out_unlock:
|
||||||
|
- mmap_write_unlock(mm);
|
||||||
|
out_mm:
|
||||||
|
mmput(mm);
|
||||||
|
}
|
||||||
|
@@ -1301,6 +1377,7 @@ struct pagemapread {
|
||||||
|
#define PM_PFRAME_MASK GENMASK_ULL(PM_PFRAME_BITS - 1, 0)
|
||||||
|
#define PM_SOFT_DIRTY BIT_ULL(55)
|
||||||
|
#define PM_MMAP_EXCLUSIVE BIT_ULL(56)
|
||||||
|
+#define PM_SOFT_DIRTY_PAGE BIT_ULL(57)
|
||||||
|
#define PM_UFFD_WP BIT_ULL(57)
|
||||||
|
#define PM_FILE BIT_ULL(61)
|
||||||
|
#define PM_SWAP BIT_ULL(62)
|
||||||
|
@@ -1373,13 +1450,13 @@ static pagemap_entry_t pte_to_pagemap_entry(struct pagemapread *pm,
|
||||||
|
flags |= PM_PRESENT;
|
||||||
|
page = vm_normal_page(vma, addr, pte);
|
||||||
|
if (pte_soft_dirty(pte))
|
||||||
|
- flags |= PM_SOFT_DIRTY;
|
||||||
|
+ flags |= PM_SOFT_DIRTY | PM_SOFT_DIRTY_PAGE;
|
||||||
|
if (pte_uffd_wp(pte))
|
||||||
|
flags |= PM_UFFD_WP;
|
||||||
|
} else if (is_swap_pte(pte)) {
|
||||||
|
swp_entry_t entry;
|
||||||
|
if (pte_swp_soft_dirty(pte))
|
||||||
|
- flags |= PM_SOFT_DIRTY;
|
||||||
|
+ flags |= PM_SOFT_DIRTY | PM_SOFT_DIRTY_PAGE;
|
||||||
|
if (pte_swp_uffd_wp(pte))
|
||||||
|
flags |= PM_UFFD_WP;
|
||||||
|
entry = pte_to_swp_entry(pte);
|
||||||
|
@@ -1500,7 +1500,7 @@
|
||||||
|
|
||||||
|
flags |= PM_PRESENT;
|
||||||
|
if (pmd_soft_dirty(pmd))
|
||||||
|
- flags |= PM_SOFT_DIRTY;
|
||||||
|
+ flags |= PM_SOFT_DIRTY | PM_SOFT_DIRTY_PAGE;
|
||||||
|
if (pmd_uffd_wp(pmd))
|
||||||
|
flags |= PM_UFFD_WP;
|
||||||
|
if (pm->show_pfn)
|
||||||
|
@@ -1442,7 +1519,7 @@ static int pagemap_pmd_range(pmd_t *pmdp, unsigned long addr, unsigned long end,
|
||||||
|
}
|
||||||
|
flags |= PM_SWAP;
|
||||||
|
if (pmd_swp_soft_dirty(pmd))
|
||||||
|
- flags |= PM_SOFT_DIRTY;
|
||||||
|
+ flags |= PM_SOFT_DIRTY | PM_SOFT_DIRTY_PAGE;
|
||||||
|
if (pmd_swp_uffd_wp(pmd))
|
||||||
|
flags |= PM_UFFD_WP;
|
||||||
|
VM_BUG_ON(!is_pmd_migration_entry(pmd));
|
||||||
|
--
|
||||||
|
2.30.2
|
360
linux-tkg-patches/6.2/0002-clear-patches.patch
Normal file
360
linux-tkg-patches/6.2/0002-clear-patches.patch
Normal file
@@ -0,0 +1,360 @@
|
|||||||
|
From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
|
||||||
|
From: Arjan van de Ven <arjan@linux.intel.com>
|
||||||
|
Date: Mon, 14 Mar 2016 11:10:58 -0600
|
||||||
|
Subject: [PATCH] pci pme wakeups
|
||||||
|
|
||||||
|
Reduce wakeups for PME checks, which are a workaround for miswired
|
||||||
|
boards (sadly, too many of them) in laptops.
|
||||||
|
---
|
||||||
|
drivers/pci/pci.c | 2 +-
|
||||||
|
1 file changed, 1 insertion(+), 1 deletion(-)
|
||||||
|
|
||||||
|
diff --git a/drivers/pci/pci.c b/drivers/pci/pci.c
|
||||||
|
index c9338f9..6974fbf 100644
|
||||||
|
--- a/drivers/pci/pci.c
|
||||||
|
+++ b/drivers/pci/pci.c
|
||||||
|
@@ -62,7 +62,7 @@ struct pci_pme_device {
|
||||||
|
struct pci_dev *dev;
|
||||||
|
};
|
||||||
|
|
||||||
|
-#define PME_TIMEOUT 1000 /* How long between PME checks */
|
||||||
|
+#define PME_TIMEOUT 4000 /* How long between PME checks */
|
||||||
|
|
||||||
|
static void pci_dev_d3_sleep(struct pci_dev *dev)
|
||||||
|
{
|
||||||
|
--
|
||||||
|
https://clearlinux.org
|
||||||
|
|
||||||
|
From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
|
||||||
|
From: Arjan van de Ven <arjan@linux.intel.com>
|
||||||
|
Date: Sat, 19 Mar 2016 21:32:19 -0400
|
||||||
|
Subject: [PATCH] intel_idle: tweak cpuidle cstates
|
||||||
|
|
||||||
|
Increase target_residency in cpuidle cstate
|
||||||
|
|
||||||
|
Tune intel_idle to be a bit less agressive;
|
||||||
|
Clear linux is cleaner in hygiene (wakupes) than the average linux,
|
||||||
|
so we can afford changing these in a way that increases
|
||||||
|
performance while keeping power efficiency
|
||||||
|
---
|
||||||
|
drivers/idle/intel_idle.c | 44 +++++++++++++++++++--------------------
|
||||||
|
1 file changed, 22 insertions(+), 22 deletions(-)
|
||||||
|
|
||||||
|
diff --git a/drivers/idle/intel_idle.c b/drivers/idle/intel_idle.c
|
||||||
|
index f449584..c994d24 100644
|
||||||
|
--- a/drivers/idle/intel_idle.c
|
||||||
|
+++ b/drivers/idle/intel_idle.c
|
||||||
|
@@ -531,7 +531,7 @@ static struct cpuidle_state hsw_cstates[] __initdata = {
|
||||||
|
.desc = "MWAIT 0x01",
|
||||||
|
.flags = MWAIT2flg(0x01) | CPUIDLE_FLAG_ALWAYS_ENABLE,
|
||||||
|
.exit_latency = 10,
|
||||||
|
- .target_residency = 20,
|
||||||
|
+ .target_residency = 120,
|
||||||
|
.enter = &intel_idle,
|
||||||
|
.enter_s2idle = intel_idle_s2idle, },
|
||||||
|
{
|
||||||
|
@@ -539,7 +539,7 @@ static struct cpuidle_state hsw_cstates[] __initdata = {
|
||||||
|
.desc = "MWAIT 0x10",
|
||||||
|
.flags = MWAIT2flg(0x10) | CPUIDLE_FLAG_TLB_FLUSHED,
|
||||||
|
.exit_latency = 33,
|
||||||
|
- .target_residency = 100,
|
||||||
|
+ .target_residency = 900,
|
||||||
|
.enter = &intel_idle,
|
||||||
|
.enter_s2idle = intel_idle_s2idle, },
|
||||||
|
{
|
||||||
|
@@ -547,7 +547,7 @@ static struct cpuidle_state hsw_cstates[] __initdata = {
|
||||||
|
.desc = "MWAIT 0x20",
|
||||||
|
.flags = MWAIT2flg(0x20) | CPUIDLE_FLAG_TLB_FLUSHED,
|
||||||
|
.exit_latency = 133,
|
||||||
|
- .target_residency = 400,
|
||||||
|
+ .target_residency = 1000,
|
||||||
|
.enter = &intel_idle,
|
||||||
|
.enter_s2idle = intel_idle_s2idle, },
|
||||||
|
{
|
||||||
|
@@ -555,7 +555,7 @@ static struct cpuidle_state hsw_cstates[] __initdata = {
|
||||||
|
.desc = "MWAIT 0x32",
|
||||||
|
.flags = MWAIT2flg(0x32) | CPUIDLE_FLAG_TLB_FLUSHED,
|
||||||
|
.exit_latency = 166,
|
||||||
|
- .target_residency = 500,
|
||||||
|
+ .target_residency = 1500,
|
||||||
|
.enter = &intel_idle,
|
||||||
|
.enter_s2idle = intel_idle_s2idle, },
|
||||||
|
{
|
||||||
|
@@ -563,7 +563,7 @@ static struct cpuidle_state hsw_cstates[] __initdata = {
|
||||||
|
.desc = "MWAIT 0x40",
|
||||||
|
.flags = MWAIT2flg(0x40) | CPUIDLE_FLAG_TLB_FLUSHED,
|
||||||
|
.exit_latency = 300,
|
||||||
|
- .target_residency = 900,
|
||||||
|
+ .target_residency = 2000,
|
||||||
|
.enter = &intel_idle,
|
||||||
|
.enter_s2idle = intel_idle_s2idle, },
|
||||||
|
{
|
||||||
|
@@ -571,7 +571,7 @@ static struct cpuidle_state hsw_cstates[] __initdata = {
|
||||||
|
.desc = "MWAIT 0x50",
|
||||||
|
.flags = MWAIT2flg(0x50) | CPUIDLE_FLAG_TLB_FLUSHED,
|
||||||
|
.exit_latency = 600,
|
||||||
|
- .target_residency = 1800,
|
||||||
|
+ .target_residency = 5000,
|
||||||
|
.enter = &intel_idle,
|
||||||
|
.enter_s2idle = intel_idle_s2idle, },
|
||||||
|
{
|
||||||
|
@@ -579,7 +579,7 @@ static struct cpuidle_state hsw_cstates[] __initdata = {
|
||||||
|
.desc = "MWAIT 0x60",
|
||||||
|
.flags = MWAIT2flg(0x60) | CPUIDLE_FLAG_TLB_FLUSHED,
|
||||||
|
.exit_latency = 2600,
|
||||||
|
- .target_residency = 7700,
|
||||||
|
+ .target_residency = 9000,
|
||||||
|
.enter = &intel_idle,
|
||||||
|
.enter_s2idle = intel_idle_s2idle, },
|
||||||
|
{
|
||||||
|
@@ -599,7 +599,7 @@ static struct cpuidle_state bdw_cstates[] __initdata = {
|
||||||
|
.desc = "MWAIT 0x01",
|
||||||
|
.flags = MWAIT2flg(0x01) | CPUIDLE_FLAG_ALWAYS_ENABLE,
|
||||||
|
.exit_latency = 10,
|
||||||
|
- .target_residency = 20,
|
||||||
|
+ .target_residency = 120,
|
||||||
|
.enter = &intel_idle,
|
||||||
|
.enter_s2idle = intel_idle_s2idle, },
|
||||||
|
{
|
||||||
|
@@ -607,7 +607,7 @@ static struct cpuidle_state bdw_cstates[] __initdata = {
|
||||||
|
.desc = "MWAIT 0x10",
|
||||||
|
.flags = MWAIT2flg(0x10) | CPUIDLE_FLAG_TLB_FLUSHED,
|
||||||
|
.exit_latency = 40,
|
||||||
|
- .target_residency = 100,
|
||||||
|
+ .target_residency = 1000,
|
||||||
|
.enter = &intel_idle,
|
||||||
|
.enter_s2idle = intel_idle_s2idle, },
|
||||||
|
{
|
||||||
|
@@ -615,7 +615,7 @@ static struct cpuidle_state bdw_cstates[] __initdata = {
|
||||||
|
.desc = "MWAIT 0x20",
|
||||||
|
.flags = MWAIT2flg(0x20) | CPUIDLE_FLAG_TLB_FLUSHED,
|
||||||
|
.exit_latency = 133,
|
||||||
|
- .target_residency = 400,
|
||||||
|
+ .target_residency = 1000,
|
||||||
|
.enter = &intel_idle,
|
||||||
|
.enter_s2idle = intel_idle_s2idle, },
|
||||||
|
{
|
||||||
|
@@ -623,7 +623,7 @@ static struct cpuidle_state bdw_cstates[] __initdata = {
|
||||||
|
.desc = "MWAIT 0x32",
|
||||||
|
.flags = MWAIT2flg(0x32) | CPUIDLE_FLAG_TLB_FLUSHED,
|
||||||
|
.exit_latency = 166,
|
||||||
|
- .target_residency = 500,
|
||||||
|
+ .target_residency = 2000,
|
||||||
|
.enter = &intel_idle,
|
||||||
|
.enter_s2idle = intel_idle_s2idle, },
|
||||||
|
{
|
||||||
|
@@ -631,7 +631,7 @@ static struct cpuidle_state bdw_cstates[] __initdata = {
|
||||||
|
.desc = "MWAIT 0x40",
|
||||||
|
.flags = MWAIT2flg(0x40) | CPUIDLE_FLAG_TLB_FLUSHED,
|
||||||
|
.exit_latency = 300,
|
||||||
|
- .target_residency = 900,
|
||||||
|
+ .target_residency = 4000,
|
||||||
|
.enter = &intel_idle,
|
||||||
|
.enter_s2idle = intel_idle_s2idle, },
|
||||||
|
{
|
||||||
|
@@ -639,7 +639,7 @@ static struct cpuidle_state bdw_cstates[] __initdata = {
|
||||||
|
.desc = "MWAIT 0x50",
|
||||||
|
.flags = MWAIT2flg(0x50) | CPUIDLE_FLAG_TLB_FLUSHED,
|
||||||
|
.exit_latency = 600,
|
||||||
|
- .target_residency = 1800,
|
||||||
|
+ .target_residency = 7000,
|
||||||
|
.enter = &intel_idle,
|
||||||
|
.enter_s2idle = intel_idle_s2idle, },
|
||||||
|
{
|
||||||
|
@@ -647,7 +647,7 @@ static struct cpuidle_state bdw_cstates[] __initdata = {
|
||||||
|
.desc = "MWAIT 0x60",
|
||||||
|
.flags = MWAIT2flg(0x60) | CPUIDLE_FLAG_TLB_FLUSHED,
|
||||||
|
.exit_latency = 2600,
|
||||||
|
- .target_residency = 7700,
|
||||||
|
+ .target_residency = 9000,
|
||||||
|
.enter = &intel_idle,
|
||||||
|
.enter_s2idle = intel_idle_s2idle, },
|
||||||
|
{
|
||||||
|
@@ -668,7 +668,7 @@ static struct cpuidle_state skl_cstates[] __initdata = {
|
||||||
|
.desc = "MWAIT 0x01",
|
||||||
|
.flags = MWAIT2flg(0x01) | CPUIDLE_FLAG_ALWAYS_ENABLE,
|
||||||
|
.exit_latency = 10,
|
||||||
|
- .target_residency = 20,
|
||||||
|
+ .target_residency = 120,
|
||||||
|
.enter = &intel_idle,
|
||||||
|
.enter_s2idle = intel_idle_s2idle, },
|
||||||
|
{
|
||||||
|
@@ -676,7 +676,7 @@ static struct cpuidle_state skl_cstates[] __initdata = {
|
||||||
|
.desc = "MWAIT 0x10",
|
||||||
|
.flags = MWAIT2flg(0x10) | CPUIDLE_FLAG_TLB_FLUSHED,
|
||||||
|
.exit_latency = 70,
|
||||||
|
- .target_residency = 100,
|
||||||
|
+ .target_residency = 1000,
|
||||||
|
.enter = &intel_idle,
|
||||||
|
.enter_s2idle = intel_idle_s2idle, },
|
||||||
|
{
|
||||||
|
@@ -684,7 +684,7 @@ static struct cpuidle_state skl_cstates[] __initdata = {
|
||||||
|
.desc = "MWAIT 0x20",
|
||||||
|
.flags = MWAIT2flg(0x20) | CPUIDLE_FLAG_TLB_FLUSHED,
|
||||||
|
.exit_latency = 85,
|
||||||
|
- .target_residency = 200,
|
||||||
|
+ .target_residency = 600,
|
||||||
|
.enter = &intel_idle,
|
||||||
|
.enter_s2idle = intel_idle_s2idle, },
|
||||||
|
{
|
||||||
|
@@ -692,7 +692,7 @@ static struct cpuidle_state skl_cstates[] __initdata = {
|
||||||
|
.desc = "MWAIT 0x33",
|
||||||
|
.flags = MWAIT2flg(0x33) | CPUIDLE_FLAG_TLB_FLUSHED,
|
||||||
|
.exit_latency = 124,
|
||||||
|
- .target_residency = 800,
|
||||||
|
+ .target_residency = 3000,
|
||||||
|
.enter = &intel_idle,
|
||||||
|
.enter_s2idle = intel_idle_s2idle, },
|
||||||
|
{
|
||||||
|
@@ -700,7 +700,7 @@ static struct cpuidle_state skl_cstates[] __initdata = {
|
||||||
|
.desc = "MWAIT 0x40",
|
||||||
|
.flags = MWAIT2flg(0x40) | CPUIDLE_FLAG_TLB_FLUSHED,
|
||||||
|
.exit_latency = 200,
|
||||||
|
- .target_residency = 800,
|
||||||
|
+ .target_residency = 3200,
|
||||||
|
.enter = &intel_idle,
|
||||||
|
.enter_s2idle = intel_idle_s2idle, },
|
||||||
|
{
|
||||||
|
@@ -708,7 +708,7 @@ static struct cpuidle_state skl_cstates[] __initdata = {
|
||||||
|
.desc = "MWAIT 0x50",
|
||||||
|
.flags = MWAIT2flg(0x50) | CPUIDLE_FLAG_TLB_FLUSHED,
|
||||||
|
.exit_latency = 480,
|
||||||
|
- .target_residency = 5000,
|
||||||
|
+ .target_residency = 9000,
|
||||||
|
.enter = &intel_idle,
|
||||||
|
.enter_s2idle = intel_idle_s2idle, },
|
||||||
|
{
|
||||||
|
@@ -716,7 +716,7 @@ static struct cpuidle_state skl_cstates[] __initdata = {
|
||||||
|
.desc = "MWAIT 0x60",
|
||||||
|
.flags = MWAIT2flg(0x60) | CPUIDLE_FLAG_TLB_FLUSHED,
|
||||||
|
.exit_latency = 890,
|
||||||
|
- .target_residency = 5000,
|
||||||
|
+ .target_residency = 9000,
|
||||||
|
.enter = &intel_idle,
|
||||||
|
.enter_s2idle = intel_idle_s2idle, },
|
||||||
|
{
|
||||||
|
@@ -737,7 +737,7 @@ static struct cpuidle_state skx_cstates[] __initdata = {
|
||||||
|
.desc = "MWAIT 0x01",
|
||||||
|
.flags = MWAIT2flg(0x01) | CPUIDLE_FLAG_ALWAYS_ENABLE,
|
||||||
|
.exit_latency = 10,
|
||||||
|
- .target_residency = 20,
|
||||||
|
+ .target_residency = 300,
|
||||||
|
.enter = &intel_idle,
|
||||||
|
.enter_s2idle = intel_idle_s2idle, },
|
||||||
|
{
|
||||||
|
--
|
||||||
|
https://clearlinux.org
|
||||||
|
|
||||||
|
From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
|
||||||
|
From: Arjan van de Ven <arjan@linux.intel.com>
|
||||||
|
Date: Fri, 6 Jan 2017 15:34:09 +0000
|
||||||
|
Subject: [PATCH] ipv4/tcp: allow the memory tuning for tcp to go a little
|
||||||
|
bigger than default
|
||||||
|
|
||||||
|
---
|
||||||
|
net/ipv4/tcp.c | 4 ++--
|
||||||
|
1 file changed, 2 insertions(+), 2 deletions(-)
|
||||||
|
|
||||||
|
diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c
|
||||||
|
index 30c1142..4345075 100644
|
||||||
|
--- a/net/ipv4/tcp.c
|
||||||
|
+++ b/net/ipv4/tcp.c
|
||||||
|
@@ -4201,8 +4201,8 @@ void __init tcp_init(void)
|
||||||
|
tcp_init_mem();
|
||||||
|
/* Set per-socket limits to no more than 1/128 the pressure threshold */
|
||||||
|
limit = nr_free_buffer_pages() << (PAGE_SHIFT - 7);
|
||||||
|
- max_wshare = min(4UL*1024*1024, limit);
|
||||||
|
- max_rshare = min(6UL*1024*1024, limit);
|
||||||
|
+ max_wshare = min(16UL*1024*1024, limit);
|
||||||
|
+ max_rshare = min(16UL*1024*1024, limit);
|
||||||
|
|
||||||
|
init_net.ipv4.sysctl_tcp_wmem[0] = SK_MEM_QUANTUM;
|
||||||
|
init_net.ipv4.sysctl_tcp_wmem[1] = 16*1024;
|
||||||
|
--
|
||||||
|
https://clearlinux.org
|
||||||
|
|
||||||
|
From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
|
||||||
|
From: Arjan van de Ven <arjan@linux.intel.com>
|
||||||
|
Date: Sun, 18 Feb 2018 23:35:41 +0000
|
||||||
|
Subject: [PATCH] locking: rwsem: spin faster
|
||||||
|
|
||||||
|
tweak rwsem owner spinning a bit
|
||||||
|
---
|
||||||
|
kernel/locking/rwsem.c | 4 +++-
|
||||||
|
1 file changed, 3 insertions(+), 1 deletion(-)
|
||||||
|
|
||||||
|
diff --git a/kernel/locking/rwsem.c b/kernel/locking/rwsem.c
|
||||||
|
index f11b9bd..1bbfcc1 100644
|
||||||
|
--- a/kernel/locking/rwsem.c
|
||||||
|
+++ b/kernel/locking/rwsem.c
|
||||||
|
@@ -717,6 +717,7 @@ rwsem_spin_on_owner(struct rw_semaphore *sem, unsigned long nonspinnable)
|
||||||
|
struct task_struct *new, *owner;
|
||||||
|
unsigned long flags, new_flags;
|
||||||
|
enum owner_state state;
|
||||||
|
+ int i = 0;
|
||||||
|
|
||||||
|
owner = rwsem_owner_flags(sem, &flags);
|
||||||
|
state = rwsem_owner_state(owner, flags, nonspinnable);
|
||||||
|
@@ -750,7 +751,8 @@ rwsem_spin_on_owner(struct rw_semaphore *sem, unsigned long nonspinnable)
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
|
||||||
|
- cpu_relax();
|
||||||
|
+ if (i++ > 1000)
|
||||||
|
+ cpu_relax();
|
||||||
|
}
|
||||||
|
rcu_read_unlock();
|
||||||
|
|
||||||
|
--
|
||||||
|
https://clearlinux.org
|
||||||
|
|
||||||
|
From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
|
||||||
|
From: Arjan van de Ven <arjan@linux.intel.com>
|
||||||
|
Date: Thu, 2 Jun 2016 23:36:32 -0500
|
||||||
|
Subject: [PATCH] initialize ata before graphics
|
||||||
|
|
||||||
|
ATA init is the long pole in the boot process, and its asynchronous.
|
||||||
|
move the graphics init after it so that ata and graphics initialize
|
||||||
|
in parallel
|
||||||
|
---
|
||||||
|
drivers/Makefile | 15 ++++++++-------
|
||||||
|
1 file changed, 8 insertions(+), 7 deletions(-)
|
||||||
|
|
||||||
|
diff --git a/drivers/Makefile b/drivers/Makefile
|
||||||
|
index c0cd1b9..af1e2fb 100644
|
||||||
|
--- a/drivers/Makefile
|
||||||
|
+++ b/drivers/Makefile
|
||||||
|
@@ -59,15 +59,8 @@ obj-y += char/
|
||||||
|
# iommu/ comes before gpu as gpu are using iommu controllers
|
||||||
|
obj-y += iommu/
|
||||||
|
|
||||||
|
-# gpu/ comes after char for AGP vs DRM startup and after iommu
|
||||||
|
-obj-y += gpu/
|
||||||
|
-
|
||||||
|
obj-$(CONFIG_CONNECTOR) += connector/
|
||||||
|
|
||||||
|
-# i810fb and intelfb depend on char/agp/
|
||||||
|
-obj-$(CONFIG_FB_I810) += video/fbdev/i810/
|
||||||
|
-obj-$(CONFIG_FB_INTEL) += video/fbdev/intelfb/
|
||||||
|
-
|
||||||
|
obj-$(CONFIG_PARPORT) += parport/
|
||||||
|
obj-$(CONFIG_NVM) += lightnvm/
|
||||||
|
obj-y += base/ block/ misc/ mfd/ nfc/
|
||||||
|
@@ -80,6 +73,14 @@ obj-$(CONFIG_IDE) += ide/
|
||||||
|
obj-y += scsi/
|
||||||
|
obj-y += nvme/
|
||||||
|
obj-$(CONFIG_ATA) += ata/
|
||||||
|
+
|
||||||
|
+# gpu/ comes after char for AGP vs DRM startup and after iommu
|
||||||
|
+obj-y += gpu/
|
||||||
|
+
|
||||||
|
+# i810fb and intelfb depend on char/agp/
|
||||||
|
+obj-$(CONFIG_FB_I810) += video/fbdev/i810/
|
||||||
|
+obj-$(CONFIG_FB_INTEL) += video/fbdev/intelfb/
|
||||||
|
+
|
||||||
|
obj-$(CONFIG_TARGET_CORE) += target/
|
||||||
|
obj-$(CONFIG_MTD) += mtd/
|
||||||
|
obj-$(CONFIG_SPI) += spi/
|
||||||
|
--
|
||||||
|
https://clearlinux.org
|
||||||
|
|
@@ -0,0 +1,363 @@
|
|||||||
|
From 9c85113cf4019e7b277a44e72bda8b78347aa72f Mon Sep 17 00:00:00 2001
|
||||||
|
From: Paul Gofman <pgofman@codeweavers.com>
|
||||||
|
Date: Thu, 7 May 2020 14:05:31 +0300
|
||||||
|
Subject: [PATCH 2/2] mm: Support soft dirty flag read with reset.
|
||||||
|
|
||||||
|
---
|
||||||
|
fs/proc/base.c | 3 +
|
||||||
|
fs/proc/internal.h | 1 +
|
||||||
|
fs/proc/task_mmu.c | 144 +++++++++++++++++++++++++++++++++++++++------
|
||||||
|
3 files changed, 130 insertions(+), 18 deletions(-)
|
||||||
|
|
||||||
|
diff --git a/fs/proc/base.c b/fs/proc/base.c
|
||||||
|
index b3422cda2a91..8199ae2411ca 100644
|
||||||
|
--- a/fs/proc/base.c
|
||||||
|
+++ b/fs/proc/base.c
|
||||||
|
@@ -3202,6 +3202,9 @@ static const struct pid_entry tgid_base_stuff[] = {
|
||||||
|
REG("smaps", S_IRUGO, proc_pid_smaps_operations),
|
||||||
|
REG("smaps_rollup", S_IRUGO, proc_pid_smaps_rollup_operations),
|
||||||
|
REG("pagemap", S_IRUSR, proc_pagemap_operations),
|
||||||
|
+#ifdef CONFIG_MEM_SOFT_DIRTY
|
||||||
|
+ REG("pagemap_reset", S_IRUSR, proc_pagemap_reset_operations),
|
||||||
|
+#endif
|
||||||
|
#endif
|
||||||
|
#ifdef CONFIG_SECURITY
|
||||||
|
DIR("attr", S_IRUGO|S_IXUGO, proc_attr_dir_inode_operations, proc_attr_dir_operations),
|
||||||
|
diff --git a/fs/proc/internal.h b/fs/proc/internal.h
|
||||||
|
index f60b379dcdc7..36a901cf0e7f 100644
|
||||||
|
--- a/fs/proc/internal.h
|
||||||
|
+++ b/fs/proc/internal.h
|
||||||
|
@@ -303,6 +303,7 @@ extern const struct file_operations proc_pid_smaps_operations;
|
||||||
|
extern const struct file_operations proc_pid_smaps_rollup_operations;
|
||||||
|
extern const struct file_operations proc_clear_refs_operations;
|
||||||
|
extern const struct file_operations proc_pagemap_operations;
|
||||||
|
+extern const struct file_operations proc_pagemap_reset_operations;
|
||||||
|
|
||||||
|
extern unsigned long task_vsize(struct mm_struct *);
|
||||||
|
extern unsigned long task_statm(struct mm_struct *,
|
||||||
|
diff --git a/fs/proc/task_mmu.c b/fs/proc/task_mmu.c
|
||||||
|
index 7c7865028f10..a21694967915 100644
|
||||||
|
--- a/fs/proc/task_mmu.c
|
||||||
|
+++ b/fs/proc/task_mmu.c
|
||||||
|
@@ -1056,8 +1056,8 @@ static inline bool pte_is_pinned(struct vm_area_struct *vma, unsigned long addr,
|
||||||
|
return page_maybe_dma_pinned(page);
|
||||||
|
}
|
||||||
|
|
||||||
|
-static inline void clear_soft_dirty(struct vm_area_struct *vma,
|
||||||
|
- unsigned long addr, pte_t *pte)
|
||||||
|
+static inline bool clear_soft_dirty(struct vm_area_struct *vma,
|
||||||
|
+ unsigned long addr, pte_t *pte)
|
||||||
|
{
|
||||||
|
/*
|
||||||
|
* The soft-dirty tracker uses #PF-s to catch writes
|
||||||
|
@@ -1066,37 +1066,46 @@ static inline void clear_soft_dirty(struct vm_area_struct *vma,
|
||||||
|
* of how soft-dirty works.
|
||||||
|
*/
|
||||||
|
pte_t ptent = *pte;
|
||||||
|
+ bool ret = false;
|
||||||
|
|
||||||
|
if (pte_present(ptent)) {
|
||||||
|
pte_t old_pte;
|
||||||
|
|
||||||
|
if (pte_is_pinned(vma, addr, ptent))
|
||||||
|
- return;
|
||||||
|
+ return ret;
|
||||||
|
old_pte = ptep_modify_prot_start(vma, addr, pte);
|
||||||
|
+ ret = pte_soft_dirty(old_pte);
|
||||||
|
ptent = pte_wrprotect(old_pte);
|
||||||
|
ptent = pte_clear_soft_dirty(ptent);
|
||||||
|
ptep_modify_prot_commit(vma, addr, pte, old_pte, ptent);
|
||||||
|
} else if (is_swap_pte(ptent)) {
|
||||||
|
+ ret = pte_swp_soft_dirty(ptent);
|
||||||
|
ptent = pte_swp_clear_soft_dirty(ptent);
|
||||||
|
set_pte_at(vma->vm_mm, addr, pte, ptent);
|
||||||
|
}
|
||||||
|
+ return ret;
|
||||||
|
}
|
||||||
|
#else
|
||||||
|
-static inline void clear_soft_dirty(struct vm_area_struct *vma,
|
||||||
|
+static inline bool clear_soft_dirty(struct vm_area_struct *vma,
|
||||||
|
unsigned long addr, pte_t *pte)
|
||||||
|
{
|
||||||
|
+ return false;
|
||||||
|
}
|
||||||
|
#endif
|
||||||
|
|
||||||
|
#if defined(CONFIG_MEM_SOFT_DIRTY) && defined(CONFIG_TRANSPARENT_HUGEPAGE)
|
||||||
|
-static inline void clear_soft_dirty_pmd(struct vm_area_struct *vma,
|
||||||
|
+static inline bool clear_soft_dirty_pmd(struct vm_area_struct *vma,
|
||||||
|
unsigned long addr, pmd_t *pmdp)
|
||||||
|
{
|
||||||
|
pmd_t old, pmd = *pmdp;
|
||||||
|
+ bool ret = false;
|
||||||
|
|
||||||
|
if (pmd_present(pmd)) {
|
||||||
|
/* See comment in change_huge_pmd() */
|
||||||
|
old = pmdp_invalidate(vma, addr, pmdp);
|
||||||
|
+
|
||||||
|
+ ret = pmd_soft_dirty(old);
|
||||||
|
+
|
||||||
|
if (pmd_dirty(old))
|
||||||
|
pmd = pmd_mkdirty(pmd);
|
||||||
|
if (pmd_young(old))
|
||||||
|
@@ -1107,14 +1116,17 @@ static inline void clear_soft_dirty_pmd(struct vm_area_struct *vma,
|
||||||
|
|
||||||
|
set_pmd_at(vma->vm_mm, addr, pmdp, pmd);
|
||||||
|
} else if (is_migration_entry(pmd_to_swp_entry(pmd))) {
|
||||||
|
+ ret = pmd_swp_soft_dirty(pmd);
|
||||||
|
pmd = pmd_swp_clear_soft_dirty(pmd);
|
||||||
|
set_pmd_at(vma->vm_mm, addr, pmdp, pmd);
|
||||||
|
}
|
||||||
|
+ return ret;
|
||||||
|
}
|
||||||
|
#else
|
||||||
|
-static inline void clear_soft_dirty_pmd(struct vm_area_struct *vma,
|
||||||
|
+static inline bool clear_soft_dirty_pmd(struct vm_area_struct *vma,
|
||||||
|
unsigned long addr, pmd_t *pmdp)
|
||||||
|
{
|
||||||
|
+ return false;
|
||||||
|
}
|
||||||
|
#endif
|
||||||
|
|
||||||
|
@@ -1367,6 +1379,7 @@ struct pagemapread {
|
||||||
|
int pos, len; /* units: PM_ENTRY_BYTES, not bytes */
|
||||||
|
pagemap_entry_t *buffer;
|
||||||
|
bool show_pfn;
|
||||||
|
+ bool reset;
|
||||||
|
};
|
||||||
|
|
||||||
|
#define PAGEMAP_WALK_SIZE (PMD_SIZE)
|
||||||
|
@@ -1398,6 +1411,14 @@ static int add_to_pagemap(unsigned long addr, pagemap_entry_t *pme,
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
+static int add_addr_to_pagemap(unsigned long addr, struct pagemapread *pm)
|
||||||
|
+{
|
||||||
|
+ ((unsigned long *)pm->buffer)[pm->pos++] = addr;
|
||||||
|
+ if (pm->pos >= pm->len)
|
||||||
|
+ return PM_END_OF_BUFFER;
|
||||||
|
+ return 0;
|
||||||
|
+}
|
||||||
|
+
|
||||||
|
static int pagemap_pte_hole(unsigned long start, unsigned long end,
|
||||||
|
__always_unused int depth, struct mm_walk *walk)
|
||||||
|
{
|
||||||
|
@@ -1405,6 +1426,9 @@ static int pagemap_pte_hole(unsigned long start, unsigned long end,
|
||||||
|
unsigned long addr = start;
|
||||||
|
int err = 0;
|
||||||
|
|
||||||
|
+ if (pm->reset)
|
||||||
|
+ goto out;
|
||||||
|
+
|
||||||
|
while (addr < end) {
|
||||||
|
struct vm_area_struct *vma = find_vma(walk->mm, addr);
|
||||||
|
pagemap_entry_t pme = make_pme(0, 0);
|
||||||
|
@@ -1439,8 +1463,9 @@ static int pagemap_pte_hole(unsigned long start, unsigned long end,
|
||||||
|
}
|
||||||
|
|
||||||
|
static pagemap_entry_t pte_to_pagemap_entry(struct pagemapread *pm,
|
||||||
|
- struct vm_area_struct *vma, unsigned long addr, pte_t pte)
|
||||||
|
+ struct vm_area_struct *vma, unsigned long addr, pte_t *pte_addr)
|
||||||
|
{
|
||||||
|
+ pte_t pte = *pte_addr;
|
||||||
|
u64 frame = 0, flags = 0;
|
||||||
|
struct page *page = NULL;
|
||||||
|
|
||||||
|
@@ -1493,6 +1518,20 @@ static int pagemap_pmd_range(pmd_t *pmdp, unsigned long addr, unsigned long end,
|
||||||
|
pmd_t pmd = *pmdp;
|
||||||
|
struct page *page = NULL;
|
||||||
|
|
||||||
|
+ if (pm->reset)
|
||||||
|
+ {
|
||||||
|
+ if (clear_soft_dirty_pmd(vma, addr, pmdp))
|
||||||
|
+ {
|
||||||
|
+ for (; addr != end; addr += PAGE_SIZE)
|
||||||
|
+ {
|
||||||
|
+ err = add_addr_to_pagemap(addr, pm);
|
||||||
|
+ if (err)
|
||||||
|
+ break;
|
||||||
|
+ }
|
||||||
|
+ }
|
||||||
|
+ goto trans_huge_done;
|
||||||
|
+ }
|
||||||
|
+
|
||||||
|
if (vma->vm_flags & VM_SOFTDIRTY)
|
||||||
|
flags |= PM_SOFT_DIRTY;
|
||||||
|
|
||||||
|
@@ -1541,6 +1580,7 @@ static int pagemap_pmd_range(pmd_t *pmdp, unsigned long addr, unsigned long end,
|
||||||
|
frame += (1 << MAX_SWAPFILES_SHIFT);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
+trans_huge_done:
|
||||||
|
spin_unlock(ptl);
|
||||||
|
return err;
|
||||||
|
}
|
||||||
|
@@ -1555,10 +1595,18 @@ static int pagemap_pmd_range(pmd_t *pmdp, unsigned long addr, unsigned long end,
|
||||||
|
*/
|
||||||
|
orig_pte = pte = pte_offset_map_lock(walk->mm, pmdp, addr, &ptl);
|
||||||
|
for (; addr < end; pte++, addr += PAGE_SIZE) {
|
||||||
|
- pagemap_entry_t pme;
|
||||||
|
+ if (pm->reset)
|
||||||
|
+ {
|
||||||
|
+ if (clear_soft_dirty(vma, addr, pte))
|
||||||
|
+ err = add_addr_to_pagemap(addr, pm);
|
||||||
|
+ }
|
||||||
|
+ else
|
||||||
|
+ {
|
||||||
|
+ pagemap_entry_t pme;
|
||||||
|
|
||||||
|
- pme = pte_to_pagemap_entry(pm, vma, addr, *pte);
|
||||||
|
- err = add_to_pagemap(addr, &pme, pm);
|
||||||
|
+ pme = pte_to_pagemap_entry(pm, vma, addr, pte);
|
||||||
|
+ err = add_to_pagemap(addr, &pme, pm);
|
||||||
|
+ }
|
||||||
|
if (err)
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
@@ -1650,8 +1698,8 @@ static const struct mm_walk_ops pagemap_ops = {
|
||||||
|
* determine which areas of memory are actually mapped and llseek to
|
||||||
|
* skip over unmapped regions.
|
||||||
|
*/
|
||||||
|
-static ssize_t pagemap_read(struct file *file, char __user *buf,
|
||||||
|
- size_t count, loff_t *ppos)
|
||||||
|
+static ssize_t do_pagemap_read(struct file *file, char __user *buf,
|
||||||
|
+ size_t count, loff_t *ppos, bool reset)
|
||||||
|
{
|
||||||
|
struct mm_struct *mm = file->private_data;
|
||||||
|
struct pagemapread pm;
|
||||||
|
@@ -1660,6 +1708,8 @@ static ssize_t pagemap_read(struct file *file, char __user *buf,
|
||||||
|
unsigned long start_vaddr;
|
||||||
|
unsigned long end_vaddr;
|
||||||
|
int ret = 0, copied = 0;
|
||||||
|
+ struct mmu_notifier_range range;
|
||||||
|
+ size_t buffer_len;
|
||||||
|
|
||||||
|
if (!mm || !mmget_not_zero(mm))
|
||||||
|
goto out;
|
||||||
|
@@ -1675,19 +1725,38 @@ static ssize_t pagemap_read(struct file *file, char __user *buf,
|
||||||
|
|
||||||
|
/* do not disclose physical addresses: attack vector */
|
||||||
|
pm.show_pfn = file_ns_capable(file, &init_user_ns, CAP_SYS_ADMIN);
|
||||||
|
+ pm.reset = reset;
|
||||||
|
|
||||||
|
- pm.len = (PAGEMAP_WALK_SIZE >> PAGE_SHIFT);
|
||||||
|
- pm.buffer = kmalloc_array(pm.len, PM_ENTRY_BYTES, GFP_KERNEL);
|
||||||
|
+ buffer_len = min(PAGEMAP_WALK_SIZE >> PAGE_SHIFT, count / PM_ENTRY_BYTES);
|
||||||
|
+
|
||||||
|
+ pm.buffer = kmalloc_array(buffer_len, PM_ENTRY_BYTES, GFP_KERNEL);
|
||||||
|
ret = -ENOMEM;
|
||||||
|
if (!pm.buffer)
|
||||||
|
goto out_mm;
|
||||||
|
|
||||||
|
src = *ppos;
|
||||||
|
svpfn = src / PM_ENTRY_BYTES;
|
||||||
|
- end_vaddr = mm->task_size;
|
||||||
|
+
|
||||||
|
+ start_vaddr = svpfn << PAGE_SHIFT;
|
||||||
|
+
|
||||||
|
+ if (reset)
|
||||||
|
+ {
|
||||||
|
+ if (count < sizeof(end_vaddr))
|
||||||
|
+ {
|
||||||
|
+ ret = -EINVAL;
|
||||||
|
+ goto out_mm;
|
||||||
|
+ }
|
||||||
|
+ if (copy_from_user(&end_vaddr, buf, sizeof(end_vaddr)))
|
||||||
|
+ return -EFAULT;
|
||||||
|
+ end_vaddr = min(end_vaddr, mm->task_size);
|
||||||
|
+ }
|
||||||
|
+ else
|
||||||
|
+ {
|
||||||
|
+ end_vaddr = mm->task_size;
|
||||||
|
+ start_vaddr = end_vaddr;
|
||||||
|
+ }
|
||||||
|
|
||||||
|
/* watch out for wraparound */
|
||||||
|
- start_vaddr = end_vaddr;
|
||||||
|
if (svpfn <= (ULONG_MAX >> PAGE_SHIFT))
|
||||||
|
start_vaddr = untagged_addr(svpfn << PAGE_SHIFT);
|
||||||
|
|
||||||
|
@@ -1707,18 +1776,35 @@ static ssize_t pagemap_read(struct file *file, char __user *buf,
|
||||||
|
unsigned long end;
|
||||||
|
|
||||||
|
pm.pos = 0;
|
||||||
|
- end = (start_vaddr + PAGEMAP_WALK_SIZE) & PAGEMAP_WALK_MASK;
|
||||||
|
+ pm.len = min(buffer_len, count / PM_ENTRY_BYTES);
|
||||||
|
+
|
||||||
|
+ end = reset ? end_vaddr : (start_vaddr + (pm.len << PAGE_SHIFT));
|
||||||
|
/* overflow ? */
|
||||||
|
if (end < start_vaddr || end > end_vaddr)
|
||||||
|
end = end_vaddr;
|
||||||
|
+
|
||||||
|
ret = mmap_read_lock_killable(mm);
|
||||||
|
if (ret)
|
||||||
|
goto out_free;
|
||||||
|
+
|
||||||
|
+ if (reset)
|
||||||
|
+ {
|
||||||
|
+ inc_tlb_flush_pending(mm);
|
||||||
|
+ mmu_notifier_range_init(&range, MMU_NOTIFY_SOFT_DIRTY,
|
||||||
|
+ 0, NULL, mm, start_vaddr, end);
|
||||||
|
+ mmu_notifier_invalidate_range_start(&range);
|
||||||
|
+ }
|
||||||
|
ret = walk_page_range(mm, start_vaddr, end, &pagemap_ops, &pm);
|
||||||
|
+ if (reset)
|
||||||
|
+ {
|
||||||
|
+ mmu_notifier_invalidate_range_end(&range);
|
||||||
|
+ flush_tlb_mm(mm);
|
||||||
|
+ dec_tlb_flush_pending(mm);
|
||||||
|
+ }
|
||||||
|
mmap_read_unlock(mm);
|
||||||
|
- start_vaddr = end;
|
||||||
|
|
||||||
|
len = min(count, PM_ENTRY_BYTES * pm.pos);
|
||||||
|
+ BUG_ON(ret && ret != PM_END_OF_BUFFER);
|
||||||
|
if (copy_to_user(buf, pm.buffer, len)) {
|
||||||
|
ret = -EFAULT;
|
||||||
|
goto out_free;
|
||||||
|
@@ -1726,6 +1812,8 @@ static ssize_t pagemap_read(struct file *file, char __user *buf,
|
||||||
|
copied += len;
|
||||||
|
buf += len;
|
||||||
|
count -= len;
|
||||||
|
+
|
||||||
|
+ start_vaddr = reset && pm.pos == pm.len ? ((unsigned long *)pm.buffer)[pm.pos - 1] + PAGE_SIZE : end;
|
||||||
|
}
|
||||||
|
*ppos += copied;
|
||||||
|
if (!ret || ret == PM_END_OF_BUFFER)
|
||||||
|
@@ -1739,6 +1827,18 @@ static ssize_t pagemap_read(struct file *file, char __user *buf,
|
||||||
|
return ret;
|
||||||
|
}
|
||||||
|
|
||||||
|
+static ssize_t pagemap_read(struct file *file, char __user *buf,
|
||||||
|
+ size_t count, loff_t *ppos)
|
||||||
|
+{
|
||||||
|
+ return do_pagemap_read(file, buf, count, ppos, false);
|
||||||
|
+}
|
||||||
|
+
|
||||||
|
+static ssize_t pagemap_reset_read(struct file *file, char __user *buf,
|
||||||
|
+ size_t count, loff_t *ppos)
|
||||||
|
+{
|
||||||
|
+ return do_pagemap_read(file, buf, count, ppos, true);
|
||||||
|
+}
|
||||||
|
+
|
||||||
|
static int pagemap_open(struct inode *inode, struct file *file)
|
||||||
|
{
|
||||||
|
struct mm_struct *mm;
|
||||||
|
@@ -1765,6 +1865,14 @@ const struct file_operations proc_pagemap_operations = {
|
||||||
|
.open = pagemap_open,
|
||||||
|
.release = pagemap_release,
|
||||||
|
};
|
||||||
|
+
|
||||||
|
+const struct file_operations proc_pagemap_reset_operations = {
|
||||||
|
+ .llseek = mem_lseek, /* borrow this */
|
||||||
|
+ .read = pagemap_reset_read,
|
||||||
|
+ .open = pagemap_open,
|
||||||
|
+ .release = pagemap_release,
|
||||||
|
+};
|
||||||
|
+
|
||||||
|
#endif /* CONFIG_PROC_PAGE_MONITOR */
|
||||||
|
|
||||||
|
#ifdef CONFIG_NUMA
|
||||||
|
--
|
||||||
|
2.30.2
|
||||||
|
|
822
linux-tkg-patches/6.2/0003-glitched-base.patch
Normal file
822
linux-tkg-patches/6.2/0003-glitched-base.patch
Normal file
@@ -0,0 +1,822 @@
|
|||||||
|
From f7f49141a5dbe9c99d78196b58c44307fb2e6be3 Mon Sep 17 00:00:00 2001
|
||||||
|
From: Tk-Glitch <ti3nou@gmail.com>
|
||||||
|
Date: Wed, 4 Jul 2018 04:30:08 +0200
|
||||||
|
Subject: [PATCH 01/17] glitched
|
||||||
|
|
||||||
|
---
|
||||||
|
init/Makefile | 2 +-
|
||||||
|
1 file changed, 1 insertions(+), 1 deletions(-)
|
||||||
|
|
||||||
|
diff --git a/init/Makefile b/init/Makefile
|
||||||
|
index baf3ab8d9d49..854e32e6aec7 100755
|
||||||
|
--- a/init/Makefile
|
||||||
|
+++ b/init/Makefile
|
||||||
|
@@ -19,7 +19,7 @@ else
|
||||||
|
|
||||||
|
# Maximum length of UTS_VERSION is 64 chars
|
||||||
|
filechk_uts_version = \
|
||||||
|
- utsver=$$(echo '$(pound)'"$(build-version)" $(smp-flag-y) $(preempt-flag-y) "$(build-timestamp)" | cut -b -64); \
|
||||||
|
+ utsver=$$(echo '$(pound)'"$(build-version)" $(smp-flag-y) $(preempt-flag-y) "TKG" "$(build-timestamp)" | cut -b -64); \
|
||||||
|
echo '$(pound)'define UTS_VERSION \""$${utsver}"\"
|
||||||
|
|
||||||
|
#
|
||||||
|
--
|
||||||
|
2.28.0
|
||||||
|
|
||||||
|
|
||||||
|
From c304f43d14e98d4bf1215fc10bc5012f554bdd8a Mon Sep 17 00:00:00 2001
|
||||||
|
From: Alexandre Frade <admfrade@gmail.com>
|
||||||
|
Date: Mon, 29 Jan 2018 16:59:22 +0000
|
||||||
|
Subject: [PATCH 02/17] dcache: cache_pressure = 50 decreases the rate at which
|
||||||
|
VFS caches are reclaimed
|
||||||
|
|
||||||
|
Signed-off-by: Alexandre Frade <admfrade@gmail.com>
|
||||||
|
---
|
||||||
|
fs/dcache.c | 2 +-
|
||||||
|
1 file changed, 1 insertion(+), 1 deletion(-)
|
||||||
|
|
||||||
|
diff --git a/fs/dcache.c b/fs/dcache.c
|
||||||
|
index 361ea7ab30ea..0c5cf69b241a 100644
|
||||||
|
--- a/fs/dcache.c
|
||||||
|
+++ b/fs/dcache.c
|
||||||
|
@@ -71,7 +71,7 @@
|
||||||
|
* If no ancestor relationship:
|
||||||
|
* arbitrary, since it's serialized on rename_lock
|
||||||
|
*/
|
||||||
|
-int sysctl_vfs_cache_pressure __read_mostly = 100;
|
||||||
|
+int sysctl_vfs_cache_pressure __read_mostly = 50;
|
||||||
|
EXPORT_SYMBOL_GPL(sysctl_vfs_cache_pressure);
|
||||||
|
|
||||||
|
__cacheline_aligned_in_smp DEFINE_SEQLOCK(rename_lock);
|
||||||
|
--
|
||||||
|
2.28.0
|
||||||
|
|
||||||
|
|
||||||
|
diff --git a/kernel/sched/rt.c b/kernel/sched/rt.c
|
||||||
|
index f788cd61df21..2bfbb4213707 100644
|
||||||
|
--- a/kernel/sched/rt.c
|
||||||
|
+++ b/kernel/sched/rt.c
|
||||||
|
@@ -15,9 +15,9 @@ __read_mostly int scheduler_running;
|
||||||
|
|
||||||
|
/*
|
||||||
|
* part of the period that we allow rt tasks to run in us.
|
||||||
|
- * default: 0.95s
|
||||||
|
+ * XanMod default: 0.98s
|
||||||
|
*/
|
||||||
|
-int sysctl_sched_rt_runtime = 950000;
|
||||||
|
+int sysctl_sched_rt_runtime = 980000;
|
||||||
|
|
||||||
|
#ifdef CONFIG_SYSCTL
|
||||||
|
static int sysctl_sched_rr_timeslice = (MSEC_PER_SEC / HZ) * RR_TIMESLICE;
|
||||||
|
--
|
||||||
|
2.28.0
|
||||||
|
|
||||||
|
|
||||||
|
From acc49f33a10f61dc66c423888cbb883ba46710e4 Mon Sep 17 00:00:00 2001
|
||||||
|
From: Alexandre Frade <admfrade@gmail.com>
|
||||||
|
Date: Mon, 29 Jan 2018 17:41:29 +0000
|
||||||
|
Subject: [PATCH 04/17] scripts: disable the localversion "+" tag of a git repo
|
||||||
|
|
||||||
|
Signed-off-by: Alexandre Frade <admfrade@gmail.com>
|
||||||
|
---
|
||||||
|
scripts/setlocalversion | 2 +-
|
||||||
|
1 file changed, 1 insertion(+), 1 deletion(-)
|
||||||
|
|
||||||
|
diff --git a/scripts/setlocalversion b/scripts/setlocalversion
|
||||||
|
index 20f2efd57b11..0552d8b9f582 100755
|
||||||
|
--- a/scripts/setlocalversion
|
||||||
|
+++ b/scripts/setlocalversion
|
||||||
|
@@ -54,7 +54,7 @@ scm_version()
|
||||||
|
# If only the short version is requested, don't bother
|
||||||
|
# running further git commands
|
||||||
|
if $short; then
|
||||||
|
- echo "+"
|
||||||
|
+ # echo "+"
|
||||||
|
return
|
||||||
|
fi
|
||||||
|
# If we are past a tagged commit (like
|
||||||
|
--
|
||||||
|
2.28.0
|
||||||
|
|
||||||
|
|
||||||
|
From 360c6833e07cc9fdef5746f6bc45bdbc7212288d Mon Sep 17 00:00:00 2001
|
||||||
|
From: "Jan Alexander Steffens (heftig)" <jan.steffens@gmail.com>
|
||||||
|
Date: Fri, 26 Oct 2018 11:22:33 +0100
|
||||||
|
Subject: [PATCH 06/17] infiniband: Fix __read_overflow2 error with -O3
|
||||||
|
inlining
|
||||||
|
|
||||||
|
---
|
||||||
|
drivers/infiniband/core/addr.c | 1 +
|
||||||
|
1 file changed, 1 insertion(+)
|
||||||
|
|
||||||
|
diff --git a/drivers/infiniband/core/addr.c b/drivers/infiniband/core/addr.c
|
||||||
|
index 3a98439bba83..6efc4f907f58 100644
|
||||||
|
--- a/drivers/infiniband/core/addr.c
|
||||||
|
+++ b/drivers/infiniband/core/addr.c
|
||||||
|
@@ -820,6 +820,7 @@ int rdma_addr_find_l2_eth_by_grh(const union ib_gid *sgid,
|
||||||
|
union {
|
||||||
|
struct sockaddr_in _sockaddr_in;
|
||||||
|
struct sockaddr_in6 _sockaddr_in6;
|
||||||
|
+ struct sockaddr_ib _sockaddr_ib;
|
||||||
|
} sgid_addr, dgid_addr;
|
||||||
|
int ret;
|
||||||
|
|
||||||
|
--
|
||||||
|
2.28.0
|
||||||
|
|
||||||
|
|
||||||
|
From f85ed068b4d0e6c31edce8574a95757a60e58b87 Mon Sep 17 00:00:00 2001
|
||||||
|
From: Etienne Juvigny <Ti3noU@gmail.com>
|
||||||
|
Date: Mon, 3 Sep 2018 17:36:25 +0200
|
||||||
|
Subject: [PATCH 07/17] Zenify & stuff
|
||||||
|
|
||||||
|
---
|
||||||
|
init/Kconfig | 32 ++++++++++++++++++++++++++++++++
|
||||||
|
kernel/sched/fair.c | 25 +++++++++++++++++++++++++
|
||||||
|
mm/page-writeback.c | 8 ++++++++
|
||||||
|
3 files changed, 65 insertions(+)
|
||||||
|
|
||||||
|
diff --git a/init/Kconfig b/init/Kconfig
|
||||||
|
index 3ae8678e1145..da708eed0f1e 100644
|
||||||
|
--- a/init/Kconfig
|
||||||
|
+++ b/init/Kconfig
|
||||||
|
@@ -92,6 +92,38 @@ config THREAD_INFO_IN_TASK
|
||||||
|
|
||||||
|
menu "General setup"
|
||||||
|
|
||||||
|
+config ZENIFY
|
||||||
|
+ bool "A selection of patches from Zen/Liquorix kernel and additional tweaks for a better gaming experience"
|
||||||
|
+ default y
|
||||||
|
+ help
|
||||||
|
+ Tunes the kernel for responsiveness at the cost of throughput and power usage.
|
||||||
|
+
|
||||||
|
+ --- Virtual Memory Subsystem ---------------------------
|
||||||
|
+
|
||||||
|
+ Mem dirty before bg writeback..: 10 % -> 20 %
|
||||||
|
+ Mem dirty before sync writeback: 20 % -> 50 %
|
||||||
|
+
|
||||||
|
+ --- Block Layer ----------------------------------------
|
||||||
|
+
|
||||||
|
+ Queue depth...............: 128 -> 512
|
||||||
|
+ Default MQ scheduler......: mq-deadline -> bfq
|
||||||
|
+
|
||||||
|
+ --- CFS CPU Scheduler ----------------------------------
|
||||||
|
+
|
||||||
|
+ Scheduling latency.............: 6 -> 3 ms
|
||||||
|
+ Minimal granularity............: 0.75 -> 0.3 ms
|
||||||
|
+ Wakeup granularity.............: 1 -> 0.5 ms
|
||||||
|
+ CPU migration cost.............: 0.5 -> 0.25 ms
|
||||||
|
+ Bandwidth slice size...........: 5 -> 3 ms
|
||||||
|
+ Ondemand fine upscaling limit..: 95 % -> 85 %
|
||||||
|
+
|
||||||
|
+ --- MuQSS CPU Scheduler --------------------------------
|
||||||
|
+
|
||||||
|
+ Scheduling interval............: 6 -> 3 ms
|
||||||
|
+ ISO task max realtime use......: 70 % -> 25 %
|
||||||
|
+ Ondemand coarse upscaling limit: 80 % -> 45 %
|
||||||
|
+ Ondemand fine upscaling limit..: 95 % -> 45 %
|
||||||
|
+
|
||||||
|
config BROKEN
|
||||||
|
bool
|
||||||
|
|
||||||
|
diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c
|
||||||
|
index 6b3b59cc51d6..2a0072192c3d 100644
|
||||||
|
--- a/kernel/sched/fair.c
|
||||||
|
+++ b/kernel/sched/fair.c
|
||||||
|
@@ -37,8 +37,13 @@
|
||||||
|
*
|
||||||
|
* (default: 6ms * (1 + ilog(ncpus)), units: nanoseconds)
|
||||||
|
*/
|
||||||
|
+#ifdef CONFIG_ZENIFY
|
||||||
|
+unsigned int sysctl_sched_latency = 3000000ULL;
|
||||||
|
+static unsigned int normalized_sysctl_sched_latency = 3000000ULL;
|
||||||
|
+#else
|
||||||
|
unsigned int sysctl_sched_latency = 6000000ULL;
|
||||||
|
static unsigned int normalized_sysctl_sched_latency = 6000000ULL;
|
||||||
|
+#endif
|
||||||
|
|
||||||
|
/*
|
||||||
|
* The initial- and re-scaling of tunables is configurable
|
||||||
|
@@ -58,21 +63,34 @@ enum sched_tunable_scaling sysctl_sched_tunable_scaling = SCHED_TUNABLESCALING_L
|
||||||
|
*
|
||||||
|
* (default: 0.75 msec * (1 + ilog(ncpus)), units: nanoseconds)
|
||||||
|
*/
|
||||||
|
+#ifdef CONFIG_ZENIFY
|
||||||
|
+unsigned int sysctl_sched_min_granularity = 300000ULL;
|
||||||
|
+static unsigned int normalized_sysctl_sched_min_granularity = 300000ULL;
|
||||||
|
+#else
|
||||||
|
unsigned int sysctl_sched_min_granularity = 750000ULL;
|
||||||
|
static unsigned int normalized_sysctl_sched_min_granularity = 750000ULL;
|
||||||
|
+#endif
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Minimal preemption granularity for CPU-bound SCHED_IDLE tasks.
|
||||||
|
* Applies only when SCHED_IDLE tasks compete with normal tasks.
|
||||||
|
*
|
||||||
|
* (default: 0.75 msec)
|
||||||
|
*/
|
||||||
|
+#ifdef CONFIG_ZENIFY
|
||||||
|
+unsigned int sysctl_sched_idle_min_granularity = 300000ULL;
|
||||||
|
+#else
|
||||||
|
unsigned int sysctl_sched_idle_min_granularity = 750000ULL;
|
||||||
|
+#endif
|
||||||
|
|
||||||
|
/*
|
||||||
|
* This value is kept at sysctl_sched_latency/sysctl_sched_min_granularity
|
||||||
|
*/
|
||||||
|
+#ifdef CONFIG_ZENIFY
|
||||||
|
+static unsigned int sched_nr_latency = 10;
|
||||||
|
+#else
|
||||||
|
static unsigned int sched_nr_latency = 8;
|
||||||
|
+#endif
|
||||||
|
|
||||||
|
/*
|
||||||
|
* After fork, child runs first. If set to 0 (default) then
|
||||||
|
@@ -128,8 +149,12 @@ int __weak arch_asym_cpu_priority(int cpu)
|
||||||
|
*
|
||||||
|
* (default: 5 msec, units: microseconds)
|
||||||
|
*/
|
||||||
|
+#ifdef CONFIG_ZENIFY
|
||||||
|
+static unsigned int sysctl_sched_cfs_bandwidth_slice = 3000UL;
|
||||||
|
+#else
|
||||||
|
static unsigned int sysctl_sched_cfs_bandwidth_slice = 5000UL;
|
||||||
|
#endif
|
||||||
|
+#endif
|
||||||
|
|
||||||
|
#ifdef CONFIG_SYSCTL
|
||||||
|
static struct ctl_table sched_fair_sysctls[] = {
|
||||||
|
diff --git a/mm/page-writeback.c b/mm/page-writeback.c
|
||||||
|
index 28b3e7a67565..01a1aef2b9b1 100644
|
||||||
|
--- a/mm/page-writeback.c
|
||||||
|
+++ b/mm/page-writeback.c
|
||||||
|
@@ -71,7 +71,11 @@ static long ratelimit_pages = 32;
|
||||||
|
/*
|
||||||
|
* Start background writeback (via writeback threads) at this percentage
|
||||||
|
*/
|
||||||
|
+#ifdef CONFIG_ZENIFY
|
||||||
|
+static int dirty_background_ratio = 20;
|
||||||
|
+#else
|
||||||
|
static int dirty_background_ratio = 10;
|
||||||
|
+#endif
|
||||||
|
|
||||||
|
/*
|
||||||
|
* dirty_background_bytes starts at 0 (disabled) so that it is a function of
|
||||||
|
@@ -88,7 +92,11 @@ int vm_highmem_is_dirtyable;
|
||||||
|
/*
|
||||||
|
* The generator of dirty data starts writeback at this percentage
|
||||||
|
*/
|
||||||
|
+#ifdef CONFIG_ZENIFY
|
||||||
|
+static int vm_dirty_ratio = 50;
|
||||||
|
+#else
|
||||||
|
static int vm_dirty_ratio = 20;
|
||||||
|
+#endif
|
||||||
|
|
||||||
|
/*
|
||||||
|
* vm_dirty_bytes starts at 0 (disabled) so that it is a function of
|
||||||
|
--
|
||||||
|
2.28.0
|
||||||
|
|
||||||
|
|
||||||
|
From e92e67143385cf285851e12aa8b7f083dd38dd24 Mon Sep 17 00:00:00 2001
|
||||||
|
From: Steven Barrett <damentz@liquorix.net>
|
||||||
|
Date: Sun, 16 Jan 2011 18:57:32 -0600
|
||||||
|
Subject: [PATCH 08/17] ZEN: Allow TCP YeAH as default congestion control
|
||||||
|
|
||||||
|
4.4: In my tests YeAH dramatically slowed down transfers over a WLAN,
|
||||||
|
reducing throughput from ~65Mbps (CUBIC) to ~7MBps (YeAH) over 10
|
||||||
|
seconds (netperf TCP_STREAM) including long stalls.
|
||||||
|
|
||||||
|
Be careful when choosing this. ~heftig
|
||||||
|
---
|
||||||
|
net/ipv4/Kconfig | 4 ++++
|
||||||
|
1 file changed, 4 insertions(+)
|
||||||
|
|
||||||
|
diff --git a/net/ipv4/Kconfig b/net/ipv4/Kconfig
|
||||||
|
index e64e59b536d3..bfb55ef7ebbe 100644
|
||||||
|
--- a/net/ipv4/Kconfig
|
||||||
|
+++ b/net/ipv4/Kconfig
|
||||||
|
@@ -691,6 +691,9 @@ choice
|
||||||
|
config DEFAULT_VEGAS
|
||||||
|
bool "Vegas" if TCP_CONG_VEGAS=y
|
||||||
|
|
||||||
|
+ config DEFAULT_YEAH
|
||||||
|
+ bool "YeAH" if TCP_CONG_YEAH=y
|
||||||
|
+
|
||||||
|
config DEFAULT_VENO
|
||||||
|
bool "Veno" if TCP_CONG_VENO=y
|
||||||
|
|
||||||
|
@@ -724,6 +727,7 @@ config DEFAULT_TCP_CONG
|
||||||
|
default "htcp" if DEFAULT_HTCP
|
||||||
|
default "hybla" if DEFAULT_HYBLA
|
||||||
|
default "vegas" if DEFAULT_VEGAS
|
||||||
|
+ default "yeah" if DEFAULT_YEAH
|
||||||
|
default "westwood" if DEFAULT_WESTWOOD
|
||||||
|
default "veno" if DEFAULT_VENO
|
||||||
|
default "reno" if DEFAULT_RENO
|
||||||
|
--
|
||||||
|
2.28.0
|
||||||
|
|
||||||
|
|
||||||
|
From 76dbe7477bfde1b5e8bf29a71b5af7ab2be9b98e Mon Sep 17 00:00:00 2001
|
||||||
|
From: Steven Barrett <steven@liquorix.net>
|
||||||
|
Date: Wed, 28 Nov 2018 19:01:27 -0600
|
||||||
|
Subject: [PATCH 09/17] zen: Use [defer+madvise] as default khugepaged defrag
|
||||||
|
strategy
|
||||||
|
|
||||||
|
For some reason, the default strategy to respond to THP fault fallbacks
|
||||||
|
is still just madvise, meaning stall if the program wants transparent
|
||||||
|
hugepages, but don't trigger a background reclaim / compaction if THP
|
||||||
|
begins to fail allocations. This creates a snowball affect where we
|
||||||
|
still use the THP code paths, but we almost always fail once a system
|
||||||
|
has been active and busy for a while.
|
||||||
|
|
||||||
|
The option "defer" was created for interactive systems where THP can
|
||||||
|
still improve performance. If we have to fallback to a regular page due
|
||||||
|
to an allocation failure or anything else, we will trigger a background
|
||||||
|
reclaim and compaction so future THP attempts succeed and previous
|
||||||
|
attempts eventually have their smaller pages combined without stalling
|
||||||
|
running applications.
|
||||||
|
|
||||||
|
We still want madvise to stall applications that explicitely want THP,
|
||||||
|
so defer+madvise _does_ make a ton of sense. Make it the default for
|
||||||
|
interactive systems, especially if the kernel maintainer left
|
||||||
|
transparent hugepages on "always".
|
||||||
|
|
||||||
|
Reasoning and details in the original patch: https://lwn.net/Articles/711248/
|
||||||
|
---
|
||||||
|
mm/huge_memory.c | 4 ++++
|
||||||
|
1 file changed, 4 insertions(+)
|
||||||
|
|
||||||
|
diff --git a/mm/huge_memory.c b/mm/huge_memory.c
|
||||||
|
index 74300e337c3c..9277f22c10a7 100644
|
||||||
|
--- a/mm/huge_memory.c
|
||||||
|
+++ b/mm/huge_memory.c
|
||||||
|
@@ -53,7 +53,11 @@ unsigned long transparent_hugepage_flags __read_mostly =
|
||||||
|
#ifdef CONFIG_TRANSPARENT_HUGEPAGE_MADVISE
|
||||||
|
(1<<TRANSPARENT_HUGEPAGE_REQ_MADV_FLAG)|
|
||||||
|
#endif
|
||||||
|
+#ifdef CONFIG_ZENIFY
|
||||||
|
+ (1<<TRANSPARENT_HUGEPAGE_DEFRAG_KSWAPD_OR_MADV_FLAG)|
|
||||||
|
+#else
|
||||||
|
(1<<TRANSPARENT_HUGEPAGE_DEFRAG_REQ_MADV_FLAG)|
|
||||||
|
+#endif
|
||||||
|
(1<<TRANSPARENT_HUGEPAGE_DEFRAG_KHUGEPAGED_FLAG)|
|
||||||
|
(1<<TRANSPARENT_HUGEPAGE_USE_ZERO_PAGE_FLAG);
|
||||||
|
|
||||||
|
--
|
||||||
|
2.28.0
|
||||||
|
|
||||||
|
|
||||||
|
From 2b65a1329cb220b43c19c4d0de5833fae9e2b22d Mon Sep 17 00:00:00 2001
|
||||||
|
From: Alexandre Frade <admfrade@gmail.com>
|
||||||
|
Date: Wed, 24 Oct 2018 16:58:52 -0300
|
||||||
|
Subject: [PATCH 10/17] net/sched: allow configuring cake qdisc as default
|
||||||
|
|
||||||
|
Signed-off-by: Alexandre Frade <admfrade@gmail.com>
|
||||||
|
---
|
||||||
|
net/sched/Kconfig | 4 ++++
|
||||||
|
1 file changed, 4 insertions(+)
|
||||||
|
|
||||||
|
diff --git a/net/sched/Kconfig b/net/sched/Kconfig
|
||||||
|
index 84badf00647e..6a922bca9f39 100644
|
||||||
|
--- a/net/sched/Kconfig
|
||||||
|
+++ b/net/sched/Kconfig
|
||||||
|
@@ -471,6 +471,9 @@ choice
|
||||||
|
config DEFAULT_SFQ
|
||||||
|
bool "Stochastic Fair Queue" if NET_SCH_SFQ
|
||||||
|
|
||||||
|
+ config DEFAULT_CAKE
|
||||||
|
+ bool "Common Applications Kept Enhanced" if NET_SCH_CAKE
|
||||||
|
+
|
||||||
|
config DEFAULT_PFIFO_FAST
|
||||||
|
bool "Priority FIFO Fast"
|
||||||
|
endchoice
|
||||||
|
@@ -481,6 +484,7 @@ config DEFAULT_NET_SCH
|
||||||
|
default "fq" if DEFAULT_FQ
|
||||||
|
default "fq_codel" if DEFAULT_FQ_CODEL
|
||||||
|
default "sfq" if DEFAULT_SFQ
|
||||||
|
+ default "cake" if DEFAULT_CAKE
|
||||||
|
default "pfifo_fast"
|
||||||
|
endif
|
||||||
|
|
||||||
|
--
|
||||||
|
2.28.0
|
||||||
|
|
||||||
|
|
||||||
|
From 90240bcd90a568878738e66c0d45bed3e38e347b Mon Sep 17 00:00:00 2001
|
||||||
|
From: Tk-Glitch <ti3nou@gmail.com>
|
||||||
|
Date: Fri, 19 Apr 2019 12:33:38 +0200
|
||||||
|
Subject: [PATCH 12/17] Set vm.max_map_count to 262144 by default
|
||||||
|
|
||||||
|
The value is still pretty low, and AMD64-ABI and ELF extended numbering
|
||||||
|
supports that, so we should be fine on modern x86 systems.
|
||||||
|
|
||||||
|
This fixes crashes in some applications using more than 65535 vmas (also
|
||||||
|
affects some windows games running in wine, such as Star Citizen).
|
||||||
|
---
|
||||||
|
include/linux/mm.h | 3 +--
|
||||||
|
1 file changed, 1 insertion(+), 2 deletions(-)
|
||||||
|
|
||||||
|
diff --git a/include/linux/mm.h b/include/linux/mm.h
|
||||||
|
index bc05c3588aa3..b0cefe94920d 100644
|
||||||
|
--- a/include/linux/mm.h
|
||||||
|
+++ b/include/linux/mm.h
|
||||||
|
@@ -190,8 +190,7 @@ static inline void __mm_zero_struct_page(struct page *page)
|
||||||
|
* not a hard limit any more. Although some userspace tools can be surprised by
|
||||||
|
* that.
|
||||||
|
*/
|
||||||
|
-#define MAPCOUNT_ELF_CORE_MARGIN (5)
|
||||||
|
-#define DEFAULT_MAX_MAP_COUNT (USHRT_MAX - MAPCOUNT_ELF_CORE_MARGIN)
|
||||||
|
+#define DEFAULT_MAX_MAP_COUNT (262144)
|
||||||
|
|
||||||
|
extern int sysctl_max_map_count;
|
||||||
|
|
||||||
|
--
|
||||||
|
2.28.0
|
||||||
|
|
||||||
|
|
||||||
|
From 3a34034dba5efe91bcec491efe8c66e8087f509b Mon Sep 17 00:00:00 2001
|
||||||
|
From: Tk-Glitch <ti3nou@gmail.com>
|
||||||
|
Date: Mon, 27 Jul 2020 00:19:18 +0200
|
||||||
|
Subject: [PATCH 13/17] mm: bump DEFAULT_MAX_MAP_COUNT
|
||||||
|
|
||||||
|
Some games such as Detroit: Become Human tend to be very crash prone with
|
||||||
|
lower values.
|
||||||
|
---
|
||||||
|
include/linux/mm.h | 2 +-
|
||||||
|
1 file changed, 1 insertion(+), 1 deletion(-)
|
||||||
|
|
||||||
|
diff --git a/include/linux/mm.h b/include/linux/mm.h
|
||||||
|
index b0cefe94920d..890165099b07 100644
|
||||||
|
--- a/include/linux/mm.h
|
||||||
|
+++ b/include/linux/mm.h
|
||||||
|
@@ -190,7 +190,7 @@ static inline void __mm_zero_struct_page(struct page *page)
|
||||||
|
* not a hard limit any more. Although some userspace tools can be surprised by
|
||||||
|
* that.
|
||||||
|
*/
|
||||||
|
-#define DEFAULT_MAX_MAP_COUNT (262144)
|
||||||
|
+#define DEFAULT_MAX_MAP_COUNT (16777216)
|
||||||
|
|
||||||
|
extern int sysctl_max_map_count;
|
||||||
|
|
||||||
|
--
|
||||||
|
2.28.0
|
||||||
|
|
||||||
|
From 977812938da7c7226415778c340832141d9278b7 Mon Sep 17 00:00:00 2001
|
||||||
|
From: Alexandre Frade <admfrade@gmail.com>
|
||||||
|
Date: Mon, 25 Nov 2019 15:13:06 -0300
|
||||||
|
Subject: [PATCH 14/17] elevator: set default scheduler to bfq for blk-mq
|
||||||
|
|
||||||
|
Signed-off-by: Alexandre Frade <admfrade@gmail.com>
|
||||||
|
---
|
||||||
|
block/elevator.c | 6 +++---
|
||||||
|
1 file changed, 3 insertions(+), 3 deletions(-)
|
||||||
|
|
||||||
|
diff --git a/block/elevator.c b/block/elevator.c
|
||||||
|
index 4eab3d70e880..79669aa39d79 100644
|
||||||
|
--- a/block/elevator.c
|
||||||
|
+++ b/block/elevator.c
|
||||||
|
@@ -623,19 +623,19 @@ static inline bool elv_support_iosched(struct request_queue *q)
|
||||||
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
- * For single queue devices, default to using mq-deadline. If we have multiple
|
||||||
|
- * queues or mq-deadline is not available, default to "none".
|
||||||
|
+ * For single queue devices, default to using bfq. If we have multiple
|
||||||
|
+ * queues or bfq is not available, default to "none".
|
||||||
|
*/
|
||||||
|
static struct elevator_type *elevator_get_default(struct request_queue *q)
|
||||||
|
{
|
||||||
|
if (q->tag_set && q->tag_set->flags & BLK_MQ_F_NO_SCHED_BY_DEFAULT)
|
||||||
|
return NULL;
|
||||||
|
|
||||||
|
if (q->nr_hw_queues != 1 &&
|
||||||
|
!blk_mq_is_shared_tags(q->tag_set->flags))
|
||||||
|
return NULL;
|
||||||
|
|
||||||
|
- return elevator_find_get(q, "mq-deadline");
|
||||||
|
+ return elevator_find_get(q, "bfq");
|
||||||
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
--
|
||||||
|
2.28.0
|
||||||
|
|
||||||
|
From 3c229f434aca65c4ca61772bc03c3e0370817b92 Mon Sep 17 00:00:00 2001
|
||||||
|
From: Alexandre Frade <kernel@xanmod.org>
|
||||||
|
Date: Mon, 3 Aug 2020 17:05:04 +0000
|
||||||
|
Subject: [PATCH 16/17] mm: set 2 megabytes for address_space-level file
|
||||||
|
read-ahead pages size
|
||||||
|
|
||||||
|
Signed-off-by: Alexandre Frade <kernel@xanmod.org>
|
||||||
|
---
|
||||||
|
include/linux/pagemap.h | 2 +-
|
||||||
|
1 file changed, 1 insertion(+), 1 deletion(-)
|
||||||
|
|
||||||
|
diff --git a/include/linux/pagemap.h b/include/linux/pagemap.h
|
||||||
|
index cf2468da68e9..007dea784451 100644
|
||||||
|
--- a/include/linux/pagemap.h
|
||||||
|
+++ b/include/linux/pagemap.h
|
||||||
|
@@ -655,7 +655,7 @@ int replace_page_cache_page(struct page *old, struct page *new, gfp_t gfp_mask);
|
||||||
|
void delete_from_page_cache_batch(struct address_space *mapping,
|
||||||
|
struct pagevec *pvec);
|
||||||
|
|
||||||
|
-#define VM_READAHEAD_PAGES (SZ_128K / PAGE_SIZE)
|
||||||
|
+#define VM_READAHEAD_PAGES (SZ_2M / PAGE_SIZE)
|
||||||
|
|
||||||
|
void page_cache_sync_readahead(struct address_space *, struct file_ra_state *,
|
||||||
|
struct file *, pgoff_t index, unsigned long req_count);
|
||||||
|
--
|
||||||
|
2.28.0
|
||||||
|
|
||||||
|
|
||||||
|
From 716f41cf6631f3a85834dcb67b4ce99185b6387f Mon Sep 17 00:00:00 2001
|
||||||
|
From: Steven Barrett <steven@liquorix.net>
|
||||||
|
Date: Wed, 15 Jan 2020 20:43:56 -0600
|
||||||
|
Subject: [PATCH 17/17] ZEN: intel-pstate: Implement "enable" parameter
|
||||||
|
|
||||||
|
If intel-pstate is compiled into the kernel, it will preempt the loading
|
||||||
|
of acpi-cpufreq so you can take advantage of hardware p-states without
|
||||||
|
any friction.
|
||||||
|
|
||||||
|
However, intel-pstate is not completely superior to cpufreq's ondemand
|
||||||
|
for one reason. There's no concept of an up_threshold property.
|
||||||
|
|
||||||
|
In ondemand, up_threshold essentially reduces the maximum utilization to
|
||||||
|
compare against, allowing you to hit max frequencies and turbo boost
|
||||||
|
from a much lower core utilization.
|
||||||
|
|
||||||
|
With intel-pstate, you have the concept of minimum and maximum
|
||||||
|
performance, but no tunable that lets you define, maximum frequency
|
||||||
|
means 50% core utilization. For just this oversight, there's reasons
|
||||||
|
you may want ondemand.
|
||||||
|
|
||||||
|
Lets support setting "enable" in kernel boot parameters. This lets
|
||||||
|
kernel maintainers include "intel_pstate=disable" statically in the
|
||||||
|
static boot parameters, but let users of the kernel override this
|
||||||
|
selection.
|
||||||
|
---
|
||||||
|
Documentation/admin-guide/kernel-parameters.txt | 3 +++
|
||||||
|
drivers/cpufreq/intel_pstate.c | 2 ++
|
||||||
|
2 files changed, 5 insertions(+)
|
||||||
|
|
||||||
|
diff --git a/Documentation/admin-guide/kernel-parameters.txt b/Documentation/admin-guide/kernel-parameters.txt
|
||||||
|
index fb95fad81c79..3e92fee81e33 100644
|
||||||
|
--- a/Documentation/admin-guide/kernel-parameters.txt
|
||||||
|
+++ b/Documentation/admin-guide/kernel-parameters.txt
|
||||||
|
@@ -1857,6 +1857,9 @@
|
||||||
|
disable
|
||||||
|
Do not enable intel_pstate as the default
|
||||||
|
scaling driver for the supported processors
|
||||||
|
+ enable
|
||||||
|
+ Enable intel_pstate in-case "disable" was passed
|
||||||
|
+ previously in the kernel boot parameters
|
||||||
|
passive
|
||||||
|
Use intel_pstate as a scaling driver, but configure it
|
||||||
|
to work with generic cpufreq governors (instead of
|
||||||
|
diff --git a/drivers/cpufreq/intel_pstate.c b/drivers/cpufreq/intel_pstate.c
|
||||||
|
index 36a469150ff9..aee891c9b78a 100644
|
||||||
|
--- a/drivers/cpufreq/intel_pstate.c
|
||||||
|
+++ b/drivers/cpufreq/intel_pstate.c
|
||||||
|
@@ -2845,6 +2845,8 @@ static int __init intel_pstate_setup(char *str)
|
||||||
|
if (!strcmp(str, "no_hwp"))
|
||||||
|
no_hwp = 1;
|
||||||
|
|
||||||
|
+ if (!strcmp(str, "enable"))
|
||||||
|
+ no_load = 0;
|
||||||
|
if (!strcmp(str, "force"))
|
||||||
|
force_load = 1;
|
||||||
|
if (!strcmp(str, "hwp_only"))
|
||||||
|
--
|
||||||
|
2.28.0
|
||||||
|
|
||||||
|
From 379cbab18b5c75c622b93e2c5abdfac141fe9654 Mon Sep 17 00:00:00 2001
|
||||||
|
From: Kenny Levinsen <kl@kl.wtf>
|
||||||
|
Date: Sun, 27 Dec 2020 14:43:13 +0000
|
||||||
|
Subject: [PATCH] ZEN: Input: evdev - use call_rcu when detaching client
|
||||||
|
|
||||||
|
Significant time was spent on synchronize_rcu in evdev_detach_client
|
||||||
|
when applications closed evdev devices. Switching VT away from a
|
||||||
|
graphical environment commonly leads to mass input device closures,
|
||||||
|
which could lead to noticable delays on systems with many input devices.
|
||||||
|
|
||||||
|
Replace synchronize_rcu with call_rcu, deferring reclaim of the evdev
|
||||||
|
client struct till after the RCU grace period instead of blocking the
|
||||||
|
calling application.
|
||||||
|
|
||||||
|
While this does not solve all slow evdev fd closures, it takes care of a
|
||||||
|
good portion of them, including this simple test:
|
||||||
|
|
||||||
|
#include <fcntl.h>
|
||||||
|
#include <unistd.h>
|
||||||
|
|
||||||
|
int main(int argc, char *argv[])
|
||||||
|
{
|
||||||
|
int idx, fd;
|
||||||
|
const char *path = "/dev/input/event0";
|
||||||
|
for (idx = 0; idx < 1000; idx++) {
|
||||||
|
if ((fd = open(path, O_RDWR)) == -1) {
|
||||||
|
return -1;
|
||||||
|
}
|
||||||
|
close(fd);
|
||||||
|
}
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
Time to completion of above test when run locally:
|
||||||
|
|
||||||
|
Before: 0m27.111s
|
||||||
|
After: 0m0.018s
|
||||||
|
|
||||||
|
Signed-off-by: Kenny Levinsen <kl@kl.wtf>
|
||||||
|
---
|
||||||
|
drivers/input/evdev.c | 19 +++++++++++--------
|
||||||
|
1 file changed, 11 insertions(+), 8 deletions(-)
|
||||||
|
|
||||||
|
diff --git a/drivers/input/evdev.c b/drivers/input/evdev.c
|
||||||
|
index 95f90699d2b17b..2b10fe29d2c8d9 100644
|
||||||
|
--- a/drivers/input/evdev.c
|
||||||
|
+++ b/drivers/input/evdev.c
|
||||||
|
@@ -46,6 +46,7 @@ struct evdev_client {
|
||||||
|
struct fasync_struct *fasync;
|
||||||
|
struct evdev *evdev;
|
||||||
|
struct list_head node;
|
||||||
|
+ struct rcu_head rcu;
|
||||||
|
enum input_clock_type clk_type;
|
||||||
|
bool revoked;
|
||||||
|
unsigned long *evmasks[EV_CNT];
|
||||||
|
@@ -377,13 +378,22 @@ static void evdev_attach_client(struct evdev *evdev,
|
||||||
|
spin_unlock(&evdev->client_lock);
|
||||||
|
}
|
||||||
|
|
||||||
|
+static void evdev_reclaim_client(struct rcu_head *rp)
|
||||||
|
+{
|
||||||
|
+ struct evdev_client *client = container_of(rp, struct evdev_client, rcu);
|
||||||
|
+ unsigned int i;
|
||||||
|
+ for (i = 0; i < EV_CNT; ++i)
|
||||||
|
+ bitmap_free(client->evmasks[i]);
|
||||||
|
+ kvfree(client);
|
||||||
|
+}
|
||||||
|
+
|
||||||
|
static void evdev_detach_client(struct evdev *evdev,
|
||||||
|
struct evdev_client *client)
|
||||||
|
{
|
||||||
|
spin_lock(&evdev->client_lock);
|
||||||
|
list_del_rcu(&client->node);
|
||||||
|
spin_unlock(&evdev->client_lock);
|
||||||
|
- synchronize_rcu();
|
||||||
|
+ call_rcu(&client->rcu, evdev_reclaim_client);
|
||||||
|
}
|
||||||
|
|
||||||
|
static int evdev_open_device(struct evdev *evdev)
|
||||||
|
@@ -436,7 +446,6 @@ static int evdev_release(struct inode *inode, struct file *file)
|
||||||
|
{
|
||||||
|
struct evdev_client *client = file->private_data;
|
||||||
|
struct evdev *evdev = client->evdev;
|
||||||
|
- unsigned int i;
|
||||||
|
|
||||||
|
mutex_lock(&evdev->mutex);
|
||||||
|
|
||||||
|
@@ -448,11 +457,6 @@ static int evdev_release(struct inode *inode, struct file *file)
|
||||||
|
|
||||||
|
evdev_detach_client(evdev, client);
|
||||||
|
|
||||||
|
- for (i = 0; i < EV_CNT; ++i)
|
||||||
|
- bitmap_free(client->evmasks[i]);
|
||||||
|
-
|
||||||
|
- kvfree(client);
|
||||||
|
-
|
||||||
|
evdev_close_device(evdev);
|
||||||
|
|
||||||
|
return 0;
|
||||||
|
@@ -495,7 +499,6 @@ static int evdev_open(struct inode *inode, struct file *file)
|
||||||
|
|
||||||
|
err_free_client:
|
||||||
|
evdev_detach_client(evdev, client);
|
||||||
|
- kvfree(client);
|
||||||
|
return error;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
From 2aafb56f20e4b63d8c4af172fe9d017c64bc4129 Mon Sep 17 00:00:00 2001
|
||||||
|
From: Sultan Alsawaf <sultan@kerneltoast.com>
|
||||||
|
Date: Wed, 20 Oct 2021 20:50:11 -0700
|
||||||
|
Subject: [PATCH] ZEN: mm: Lower the non-hugetlbpage pageblock size to reduce
|
||||||
|
scheduling delays
|
||||||
|
|
||||||
|
The page allocator processes free pages in groups of pageblocks, where
|
||||||
|
the size of a pageblock is typically quite large (1024 pages without
|
||||||
|
hugetlbpage support). Pageblocks are processed atomically with the zone
|
||||||
|
lock held, which can cause severe scheduling delays on both the CPU
|
||||||
|
going through the pageblock and any other CPUs waiting to acquire the
|
||||||
|
zone lock. A frequent offender is move_freepages_block(), which is used
|
||||||
|
by rmqueue() for page allocation.
|
||||||
|
|
||||||
|
As it turns out, there's no requirement for pageblocks to be so large,
|
||||||
|
so the pageblock order can simply be reduced to ease the scheduling
|
||||||
|
delays and zone lock contention. PAGE_ALLOC_COSTLY_ORDER is used as a
|
||||||
|
reasonable setting to ensure non-costly page allocation requests can
|
||||||
|
still be serviced without always needing to free up more than one
|
||||||
|
pageblock's worth of pages at a time.
|
||||||
|
|
||||||
|
This has a noticeable effect on overall system latency when memory
|
||||||
|
pressure is elevated. The various mm functions which operate on
|
||||||
|
pageblocks no longer appear in the preemptoff tracer, where previously
|
||||||
|
they would spend up to 100 ms on a mobile arm64 CPU processing a
|
||||||
|
pageblock with preemption disabled and the zone lock held.
|
||||||
|
|
||||||
|
Signed-off-by: Sultan Alsawaf <sultan@kerneltoast.com>
|
||||||
|
---
|
||||||
|
include/linux/pageblock-flags.h | 2 +-
|
||||||
|
1 file changed, 1 insertion(+), 1 deletion(-)
|
||||||
|
|
||||||
|
diff --git a/include/linux/pageblock-flags.h b/include/linux/pageblock-flags.h
|
||||||
|
index 5f1ae07d724b88..97cda629c9e909 100644
|
||||||
|
--- a/include/linux/pageblock-flags.h
|
||||||
|
+++ b/include/linux/pageblock-flags.h
|
||||||
|
@@ -48,7 +48,7 @@ extern unsigned int pageblock_order;
|
||||||
|
#else /* CONFIG_HUGETLB_PAGE */
|
||||||
|
|
||||||
|
/* If huge pages are not used, group by MAX_ORDER_NR_PAGES */
|
||||||
|
-#define pageblock_order (MAX_ORDER-1)
|
||||||
|
+#define pageblock_order PAGE_ALLOC_COSTLY_ORDER
|
||||||
|
|
||||||
|
#endif /* CONFIG_HUGETLB_PAGE */
|
||||||
|
|
||||||
|
|
||||||
|
From f22bc56be85e69c71c8e36041193856bb8b01525 Mon Sep 17 00:00:00 2001
|
||||||
|
From: Sultan Alsawaf <sultan@kerneltoast.com>
|
||||||
|
Date: Wed, 20 Oct 2021 20:50:32 -0700
|
||||||
|
Subject: [PATCH] ZEN: mm: Don't hog the CPU and zone lock in rmqueue_bulk()
|
||||||
|
|
||||||
|
There is noticeable scheduling latency and heavy zone lock contention
|
||||||
|
stemming from rmqueue_bulk's single hold of the zone lock while doing
|
||||||
|
its work, as seen with the preemptoff tracer. There's no actual need for
|
||||||
|
rmqueue_bulk() to hold the zone lock the entire time; it only does so
|
||||||
|
for supposed efficiency. As such, we can relax the zone lock and even
|
||||||
|
reschedule when IRQs are enabled in order to keep the scheduling delays
|
||||||
|
and zone lock contention at bay. Forward progress is still guaranteed,
|
||||||
|
as the zone lock can only be relaxed after page removal.
|
||||||
|
|
||||||
|
With this change, rmqueue_bulk() no longer appears as a serious offender
|
||||||
|
in the preemptoff tracer, and system latency is noticeably improved.
|
||||||
|
|
||||||
|
Signed-off-by: Sultan Alsawaf <sultan@kerneltoast.com>
|
||||||
|
---
|
||||||
|
mm/page_alloc.c | 23 ++++++++++++++++++-----
|
||||||
|
1 file changed, 18 insertions(+), 5 deletions(-)
|
||||||
|
|
||||||
|
diff --git a/mm/page_alloc.c b/mm/page_alloc.c
|
||||||
|
index a0b0397e29ee4c..87a983a356530c 100644
|
||||||
|
--- a/mm/page_alloc.c
|
||||||
|
+++ b/mm/page_alloc.c
|
||||||
|
@@ -3118,15 +3119,16 @@ __rmqueue(struct zone *zone, unsigned int order, int migratetype,
|
||||||
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
- * Obtain a specified number of elements from the buddy allocator, all under
|
||||||
|
- * a single hold of the lock, for efficiency. Add them to the supplied list.
|
||||||
|
- * Returns the number of new pages which were placed at *list.
|
||||||
|
+ * Obtain a specified number of elements from the buddy allocator, and relax the
|
||||||
|
+ * zone lock when needed. Add them to the supplied list. Returns the number of
|
||||||
|
+ * new pages which were placed at *list.
|
||||||
|
*/
|
||||||
|
static int rmqueue_bulk(struct zone *zone, unsigned int order,
|
||||||
|
unsigned long count, struct list_head *list,
|
||||||
|
int migratetype, unsigned int alloc_flags)
|
||||||
|
{
|
||||||
|
unsigned long flags;
|
||||||
|
- int i, allocated = 0;
|
||||||
|
+ const bool can_resched = !preempt_count() && !irqs_disabled();
|
||||||
|
+ int i, allocated = 0, last_mod = 0;
|
||||||
|
|
||||||
|
/* Caller must hold IRQ-safe pcp->lock so IRQs are disabled. */
|
||||||
|
spin_lock(&zone->lock);
|
||||||
|
@@ -3137,6 +3138,18 @@ static int rmqueue_bulk(struct zone *zone, unsigned int order,
|
||||||
|
if (unlikely(page == NULL))
|
||||||
|
break;
|
||||||
|
|
||||||
|
+ /* Reschedule and ease the contention on the lock if needed */
|
||||||
|
+ if (i + 1 < count && ((can_resched && need_resched()) ||
|
||||||
|
+ spin_needbreak(&zone->lock))) {
|
||||||
|
+ __mod_zone_page_state(zone, NR_FREE_PAGES,
|
||||||
|
+ -((i + 1 - last_mod) << order));
|
||||||
|
+ last_mod = i + 1;
|
||||||
|
+ spin_unlock(&zone->lock);
|
||||||
|
+ if (can_resched)
|
||||||
|
+ cond_resched();
|
||||||
|
+ spin_lock(&zone->lock);
|
||||||
|
+ }
|
||||||
|
+
|
||||||
|
if (unlikely(check_pcp_refill(page, order)))
|
||||||
|
continue;
|
||||||
|
|
||||||
|
@@ -3163,7 +3176,7 @@ static int rmqueue_bulk(struct zone *zone, unsigned int order,
|
||||||
|
* on i. Do not confuse with 'allocated' which is the number of
|
||||||
|
* pages added to the pcp list.
|
||||||
|
*/
|
||||||
|
- __mod_zone_page_state(zone, NR_FREE_PAGES, -(i << order));
|
||||||
|
+ __mod_zone_page_state(zone, NR_FREE_PAGES, -((i - last_mod) << order));
|
||||||
|
spin_unlock(&zone->lock);
|
||||||
|
return allocated;
|
||||||
|
}
|
22
linux-tkg-patches/6.2/0003-glitched-cfs-additions.patch
Normal file
22
linux-tkg-patches/6.2/0003-glitched-cfs-additions.patch
Normal file
@@ -0,0 +1,22 @@
|
|||||||
|
diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c
|
||||||
|
index 6b3b59cc51d6..2a0072192c3d 100644
|
||||||
|
--- a/kernel/sched/fair.c
|
||||||
|
+++ b/kernel/sched/fair.c
|
||||||
|
@@ -81,10 +95,17 @@ unsigned int sysctl_sched_child_runs_first __read_mostly;
|
||||||
|
*
|
||||||
|
* (default: 1 msec * (1 + ilog(ncpus)), units: nanoseconds)
|
||||||
|
*/
|
||||||
|
+#ifdef CONFIG_ZENIFY
|
||||||
|
+unsigned int sysctl_sched_wakeup_granularity = 500000UL;
|
||||||
|
+static unsigned int normalized_sysctl_sched_wakeup_granularity = 500000UL;
|
||||||
|
+
|
||||||
|
+const_debug unsigned int sysctl_sched_migration_cost = 50000UL;
|
||||||
|
+#else
|
||||||
|
unsigned int sysctl_sched_wakeup_granularity = 1000000UL;
|
||||||
|
static unsigned int normalized_sysctl_sched_wakeup_granularity = 1000000UL;
|
||||||
|
|
||||||
|
const_debug unsigned int sysctl_sched_migration_cost = 500000UL;
|
||||||
|
+#endif
|
||||||
|
|
||||||
|
int sched_thermal_decay_shift;
|
||||||
|
static int __init setup_sched_thermal_decay_shift(char *str)
|
90
linux-tkg-patches/6.2/0003-glitched-cfs.patch
Normal file
90
linux-tkg-patches/6.2/0003-glitched-cfs.patch
Normal file
@@ -0,0 +1,90 @@
|
|||||||
|
diff --git a/kernel/Kconfig.hz b/kernel/Kconfig.hz
|
||||||
|
index 2a202a846757..1d9c7ed79b11 100644
|
||||||
|
--- a/kernel/Kconfig.hz
|
||||||
|
+++ b/kernel/Kconfig.hz
|
||||||
|
@@ -4,7 +4,7 @@
|
||||||
|
|
||||||
|
choice
|
||||||
|
prompt "Timer frequency"
|
||||||
|
- default HZ_250
|
||||||
|
+ default HZ_500
|
||||||
|
help
|
||||||
|
Allows the configuration of the timer frequency. It is customary
|
||||||
|
to have the timer interrupt run at 1000 Hz but 100 Hz may be more
|
||||||
|
@@ -39,6 +39,13 @@ choice
|
||||||
|
on SMP and NUMA systems and exactly dividing by both PAL and
|
||||||
|
NTSC frame rates for video and multimedia work.
|
||||||
|
|
||||||
|
+ config HZ_500
|
||||||
|
+ bool "500 HZ"
|
||||||
|
+ help
|
||||||
|
+ 500 Hz is a balanced timer frequency. Provides fast interactivity
|
||||||
|
+ on desktops with great smoothness without increasing CPU power
|
||||||
|
+ consumption and sacrificing the battery life on laptops.
|
||||||
|
+
|
||||||
|
config HZ_1000
|
||||||
|
bool "1000 HZ"
|
||||||
|
help
|
||||||
|
@@ -52,6 +59,7 @@ config HZ
|
||||||
|
default 100 if HZ_100
|
||||||
|
default 250 if HZ_250
|
||||||
|
default 300 if HZ_300
|
||||||
|
+ default 500 if HZ_500
|
||||||
|
default 1000 if HZ_1000
|
||||||
|
|
||||||
|
config SCHED_HRTICK
|
||||||
|
|
||||||
|
diff --git a/kernel/Kconfig.hz b/kernel/Kconfig.hz
|
||||||
|
index 2a202a846757..1d9c7ed79b11 100644
|
||||||
|
--- a/kernel/Kconfig.hz
|
||||||
|
+++ b/kernel/Kconfig.hz
|
||||||
|
@@ -4,7 +4,7 @@
|
||||||
|
|
||||||
|
choice
|
||||||
|
prompt "Timer frequency"
|
||||||
|
- default HZ_500
|
||||||
|
+ default HZ_750
|
||||||
|
help
|
||||||
|
Allows the configuration of the timer frequency. It is customary
|
||||||
|
to have the timer interrupt run at 1000 Hz but 100 Hz may be more
|
||||||
|
@@ -46,6 +46,13 @@ choice
|
||||||
|
on desktops with great smoothness without increasing CPU power
|
||||||
|
consumption and sacrificing the battery life on laptops.
|
||||||
|
|
||||||
|
+ config HZ_750
|
||||||
|
+ bool "750 HZ"
|
||||||
|
+ help
|
||||||
|
+ 750 Hz is a good timer frequency for desktops. Provides fast
|
||||||
|
+ interactivity with great smoothness without sacrificing too
|
||||||
|
+ much throughput.
|
||||||
|
+
|
||||||
|
config HZ_1000
|
||||||
|
bool "1000 HZ"
|
||||||
|
help
|
||||||
|
@@ -60,6 +67,7 @@ config HZ
|
||||||
|
default 250 if HZ_250
|
||||||
|
default 300 if HZ_300
|
||||||
|
default 500 if HZ_500
|
||||||
|
+ default 750 if HZ_750
|
||||||
|
default 1000 if HZ_1000
|
||||||
|
|
||||||
|
config SCHED_HRTICK
|
||||||
|
|
||||||
|
diff --git a/drivers/cpufreq/cpufreq_ondemand.c b/drivers/cpufreq/cpufreq_ondemand.c
|
||||||
|
index 6b423eebfd5d..61e3271675d6 100644
|
||||||
|
--- a/drivers/cpufreq/cpufreq_ondemand.c
|
||||||
|
+++ b/drivers/cpufreq/cpufreq_ondemand.c
|
||||||
|
@@ -21,10 +21,10 @@
|
||||||
|
#include "cpufreq_ondemand.h"
|
||||||
|
|
||||||
|
/* On-demand governor macros */
|
||||||
|
-#define DEF_FREQUENCY_UP_THRESHOLD (80)
|
||||||
|
-#define DEF_SAMPLING_DOWN_FACTOR (1)
|
||||||
|
+#define DEF_FREQUENCY_UP_THRESHOLD (55)
|
||||||
|
+#define DEF_SAMPLING_DOWN_FACTOR (5)
|
||||||
|
#define MAX_SAMPLING_DOWN_FACTOR (100000)
|
||||||
|
-#define MICRO_FREQUENCY_UP_THRESHOLD (95)
|
||||||
|
+#define MICRO_FREQUENCY_UP_THRESHOLD (63)
|
||||||
|
#define MICRO_FREQUENCY_MIN_SAMPLE_RATE (10000)
|
||||||
|
#define MIN_FREQUENCY_UP_THRESHOLD (1)
|
||||||
|
#define MAX_FREQUENCY_UP_THRESHOLD (100)
|
193
linux-tkg-patches/6.2/0006-add-acs-overrides_iommu.patch
Normal file
193
linux-tkg-patches/6.2/0006-add-acs-overrides_iommu.patch
Normal file
@@ -0,0 +1,193 @@
|
|||||||
|
From cdeab384f48dd9c88e2dff2e9ad8d57dca1a1b1c Mon Sep 17 00:00:00 2001
|
||||||
|
From: Mark Weiman <mark.weiman@markzz.com>
|
||||||
|
Date: Sun, 12 Aug 2018 11:36:21 -0400
|
||||||
|
Subject: [PATCH] pci: Enable overrides for missing ACS capabilities
|
||||||
|
|
||||||
|
This an updated version of Alex Williamson's patch from:
|
||||||
|
https://lkml.org/lkml/2013/5/30/513
|
||||||
|
|
||||||
|
Original commit message follows:
|
||||||
|
|
||||||
|
PCIe ACS (Access Control Services) is the PCIe 2.0+ feature that
|
||||||
|
allows us to control whether transactions are allowed to be redirected
|
||||||
|
in various subnodes of a PCIe topology. For instance, if two
|
||||||
|
endpoints are below a root port or downsteam switch port, the
|
||||||
|
downstream port may optionally redirect transactions between the
|
||||||
|
devices, bypassing upstream devices. The same can happen internally
|
||||||
|
on multifunction devices. The transaction may never be visible to the
|
||||||
|
upstream devices.
|
||||||
|
|
||||||
|
One upstream device that we particularly care about is the IOMMU. If
|
||||||
|
a redirection occurs in the topology below the IOMMU, then the IOMMU
|
||||||
|
cannot provide isolation between devices. This is why the PCIe spec
|
||||||
|
encourages topologies to include ACS support. Without it, we have to
|
||||||
|
assume peer-to-peer DMA within a hierarchy can bypass IOMMU isolation.
|
||||||
|
|
||||||
|
Unfortunately, far too many topologies do not support ACS to make this
|
||||||
|
a steadfast requirement. Even the latest chipsets from Intel are only
|
||||||
|
sporadically supporting ACS. We have trouble getting interconnect
|
||||||
|
vendors to include the PCIe spec required PCIe capability, let alone
|
||||||
|
suggested features.
|
||||||
|
|
||||||
|
Therefore, we need to add some flexibility. The pcie_acs_override=
|
||||||
|
boot option lets users opt-in specific devices or sets of devices to
|
||||||
|
assume ACS support. The "downstream" option assumes full ACS support
|
||||||
|
on root ports and downstream switch ports. The "multifunction"
|
||||||
|
option assumes the subset of ACS features available on multifunction
|
||||||
|
endpoints and upstream switch ports are supported. The "id:nnnn:nnnn"
|
||||||
|
option enables ACS support on devices matching the provided vendor
|
||||||
|
and device IDs, allowing more strategic ACS overrides. These options
|
||||||
|
may be combined in any order. A maximum of 16 id specific overrides
|
||||||
|
are available. It's suggested to use the most limited set of options
|
||||||
|
necessary to avoid completely disabling ACS across the topology.
|
||||||
|
Note to hardware vendors, we have facilities to permanently quirk
|
||||||
|
specific devices which enforce isolation but not provide an ACS
|
||||||
|
capability. Please contact me to have your devices added and save
|
||||||
|
your customers the hassle of this boot option.
|
||||||
|
|
||||||
|
Signed-off-by: Mark Weiman <mark.weiman@markzz.com>
|
||||||
|
---
|
||||||
|
.../admin-guide/kernel-parameters.txt | 9 ++
|
||||||
|
drivers/pci/quirks.c | 101 ++++++++++++++++++
|
||||||
|
2 files changed, 110 insertions(+)
|
||||||
|
|
||||||
|
diff --git a/Documentation/admin-guide/kernel-parameters.txt b/Documentation/admin-guide/kernel-parameters.txt
|
||||||
|
index aefd358a5ca3..173b3596fd9e 100644
|
||||||
|
--- a/Documentation/admin-guide/kernel-parameters.txt
|
||||||
|
+++ b/Documentation/admin-guide/kernel-parameters.txt
|
||||||
|
@@ -3190,6 +3190,15 @@
|
||||||
|
nomsi [MSI] If the PCI_MSI kernel config parameter is
|
||||||
|
enabled, this kernel boot option can be used to
|
||||||
|
disable the use of MSI interrupts system-wide.
|
||||||
|
+ pcie_acs_override =
|
||||||
|
+ [PCIE] Override missing PCIe ACS support for:
|
||||||
|
+ downstream
|
||||||
|
+ All downstream ports - full ACS capabilities
|
||||||
|
+ multifunction
|
||||||
|
+ All multifunction devices - multifunction ACS subset
|
||||||
|
+ id:nnnn:nnnn
|
||||||
|
+ Specific device - full ACS capabilities
|
||||||
|
+ Specified as vid:did (vendor/device ID) in hex
|
||||||
|
noioapicquirk [APIC] Disable all boot interrupt quirks.
|
||||||
|
Safety option to keep boot IRQs enabled. This
|
||||||
|
should never be necessary.
|
||||||
|
diff --git a/drivers/pci/quirks.c b/drivers/pci/quirks.c
|
||||||
|
index 4700d24e5d55..8f7a3d7fd9c1 100644
|
||||||
|
--- a/drivers/pci/quirks.c
|
||||||
|
+++ b/drivers/pci/quirks.c
|
||||||
|
@@ -3372,6 +3372,106 @@ static void quirk_no_bus_reset(struct pci_dev *dev)
|
||||||
|
dev->dev_flags |= PCI_DEV_FLAGS_NO_BUS_RESET;
|
||||||
|
}
|
||||||
|
|
||||||
|
+static bool acs_on_downstream;
|
||||||
|
+static bool acs_on_multifunction;
|
||||||
|
+
|
||||||
|
+#define NUM_ACS_IDS 16
|
||||||
|
+struct acs_on_id {
|
||||||
|
+ unsigned short vendor;
|
||||||
|
+ unsigned short device;
|
||||||
|
+};
|
||||||
|
+static struct acs_on_id acs_on_ids[NUM_ACS_IDS];
|
||||||
|
+static u8 max_acs_id;
|
||||||
|
+
|
||||||
|
+static __init int pcie_acs_override_setup(char *p)
|
||||||
|
+{
|
||||||
|
+ if (!p)
|
||||||
|
+ return -EINVAL;
|
||||||
|
+
|
||||||
|
+ while (*p) {
|
||||||
|
+ if (!strncmp(p, "downstream", 10))
|
||||||
|
+ acs_on_downstream = true;
|
||||||
|
+ if (!strncmp(p, "multifunction", 13))
|
||||||
|
+ acs_on_multifunction = true;
|
||||||
|
+ if (!strncmp(p, "id:", 3)) {
|
||||||
|
+ char opt[5];
|
||||||
|
+ int ret;
|
||||||
|
+ long val;
|
||||||
|
+
|
||||||
|
+ if (max_acs_id >= NUM_ACS_IDS - 1) {
|
||||||
|
+ pr_warn("Out of PCIe ACS override slots (%d)\n",
|
||||||
|
+ NUM_ACS_IDS);
|
||||||
|
+ goto next;
|
||||||
|
+ }
|
||||||
|
+
|
||||||
|
+ p += 3;
|
||||||
|
+ snprintf(opt, 5, "%s", p);
|
||||||
|
+ ret = kstrtol(opt, 16, &val);
|
||||||
|
+ if (ret) {
|
||||||
|
+ pr_warn("PCIe ACS ID parse error %d\n", ret);
|
||||||
|
+ goto next;
|
||||||
|
+ }
|
||||||
|
+ acs_on_ids[max_acs_id].vendor = val;
|
||||||
|
+
|
||||||
|
+ p += strcspn(p, ":");
|
||||||
|
+ if (*p != ':') {
|
||||||
|
+ pr_warn("PCIe ACS invalid ID\n");
|
||||||
|
+ goto next;
|
||||||
|
+ }
|
||||||
|
+
|
||||||
|
+ p++;
|
||||||
|
+ snprintf(opt, 5, "%s", p);
|
||||||
|
+ ret = kstrtol(opt, 16, &val);
|
||||||
|
+ if (ret) {
|
||||||
|
+ pr_warn("PCIe ACS ID parse error %d\n", ret);
|
||||||
|
+ goto next;
|
||||||
|
+ }
|
||||||
|
+ acs_on_ids[max_acs_id].device = val;
|
||||||
|
+ max_acs_id++;
|
||||||
|
+ }
|
||||||
|
+next:
|
||||||
|
+ p += strcspn(p, ",");
|
||||||
|
+ if (*p == ',')
|
||||||
|
+ p++;
|
||||||
|
+ }
|
||||||
|
+
|
||||||
|
+ if (acs_on_downstream || acs_on_multifunction || max_acs_id)
|
||||||
|
+ pr_warn("Warning: PCIe ACS overrides enabled; This may allow non-IOMMU protected peer-to-peer DMA\n");
|
||||||
|
+
|
||||||
|
+ return 0;
|
||||||
|
+}
|
||||||
|
+early_param("pcie_acs_override", pcie_acs_override_setup);
|
||||||
|
+
|
||||||
|
+static int pcie_acs_overrides(struct pci_dev *dev, u16 acs_flags)
|
||||||
|
+{
|
||||||
|
+ int i;
|
||||||
|
+
|
||||||
|
+ /* Never override ACS for legacy devices or devices with ACS caps */
|
||||||
|
+ if (!pci_is_pcie(dev) ||
|
||||||
|
+ pci_find_ext_capability(dev, PCI_EXT_CAP_ID_ACS))
|
||||||
|
+ return -ENOTTY;
|
||||||
|
+
|
||||||
|
+ for (i = 0; i < max_acs_id; i++)
|
||||||
|
+ if (acs_on_ids[i].vendor == dev->vendor &&
|
||||||
|
+ acs_on_ids[i].device == dev->device)
|
||||||
|
+ return 1;
|
||||||
|
+
|
||||||
|
+ switch (pci_pcie_type(dev)) {
|
||||||
|
+ case PCI_EXP_TYPE_DOWNSTREAM:
|
||||||
|
+ case PCI_EXP_TYPE_ROOT_PORT:
|
||||||
|
+ if (acs_on_downstream)
|
||||||
|
+ return 1;
|
||||||
|
+ break;
|
||||||
|
+ case PCI_EXP_TYPE_ENDPOINT:
|
||||||
|
+ case PCI_EXP_TYPE_UPSTREAM:
|
||||||
|
+ case PCI_EXP_TYPE_LEG_END:
|
||||||
|
+ case PCI_EXP_TYPE_RC_END:
|
||||||
|
+ if (acs_on_multifunction && dev->multifunction)
|
||||||
|
+ return 1;
|
||||||
|
+ }
|
||||||
|
+
|
||||||
|
+ return -ENOTTY;
|
||||||
|
+}
|
||||||
|
/*
|
||||||
|
* Some Atheros AR9xxx and QCA988x chips do not behave after a bus reset.
|
||||||
|
* The device will throw a Link Down error on AER-capable systems and
|
||||||
|
@@ -4513,6 +4613,7 @@ static const struct pci_dev_acs_enabled {
|
||||||
|
{ PCI_VENDOR_ID_ZHAOXIN, 0x9083, pci_quirk_mf_endpoint_acs },
|
||||||
|
/* Zhaoxin Root/Downstream Ports */
|
||||||
|
{ PCI_VENDOR_ID_ZHAOXIN, PCI_ANY_ID, pci_quirk_zhaoxin_pcie_ports_acs },
|
||||||
|
+ { PCI_ANY_ID, PCI_ANY_ID, pcie_acs_overrides },
|
||||||
|
{ 0 }
|
||||||
|
};
|
||||||
|
|
||||||
|
|
166
linux-tkg-patches/6.2/0007-v6.2-fsync1_via_futex_waitv.patch
Normal file
166
linux-tkg-patches/6.2/0007-v6.2-fsync1_via_futex_waitv.patch
Normal file
@@ -0,0 +1,166 @@
|
|||||||
|
From b70e738f08403950aa3053c36b98c6b0eeb0eb90 Mon Sep 17 00:00:00 2001
|
||||||
|
From: =?UTF-8?q?Andr=C3=A9=20Almeida?= <andrealmeid@collabora.com>
|
||||||
|
Date: Mon, 25 Oct 2021 09:49:42 -0300
|
||||||
|
Subject: [PATCH] futex: Add entry point for FUTEX_WAIT_MULTIPLE (opcode 31)
|
||||||
|
MIME-Version: 1.0
|
||||||
|
Content-Type: text/plain; charset=UTF-8
|
||||||
|
Content-Transfer-Encoding: 8bit
|
||||||
|
|
||||||
|
Add an option to wait on multiple futexes using the old interface, that
|
||||||
|
uses opcode 31 through futex() syscall. Do that by just translation the
|
||||||
|
old interface to use the new code. This allows old and stable versions
|
||||||
|
of Proton to still use fsync in new kernel releases.
|
||||||
|
|
||||||
|
Signed-off-by: André Almeida <andrealmeid@collabora.com>
|
||||||
|
---
|
||||||
|
include/uapi/linux/futex.h | 13 +++++++
|
||||||
|
kernel/futex/syscalls.c | 75 +++++++++++++++++++++++++++++++++++++-
|
||||||
|
2 files changed, 87 insertions(+), 1 deletion(-)
|
||||||
|
|
||||||
|
diff --git a/include/uapi/linux/futex.h b/include/uapi/linux/futex.h
|
||||||
|
index 71a5df8d2689..d375ab21cbf8 100644
|
||||||
|
--- a/include/uapi/linux/futex.h
|
||||||
|
+++ b/include/uapi/linux/futex.h
|
||||||
|
@@ -22,6 +22,7 @@
|
||||||
|
#define FUTEX_WAIT_REQUEUE_PI 11
|
||||||
|
#define FUTEX_CMP_REQUEUE_PI 12
|
||||||
|
#define FUTEX_LOCK_PI2 13
|
||||||
|
+#define FUTEX_WAIT_MULTIPLE 31
|
||||||
|
|
||||||
|
#define FUTEX_PRIVATE_FLAG 128
|
||||||
|
#define FUTEX_CLOCK_REALTIME 256
|
||||||
|
@@ -68,6 +69,18 @@ struct futex_waitv {
|
||||||
|
__u32 __reserved;
|
||||||
|
};
|
||||||
|
|
||||||
|
+/**
|
||||||
|
+ * struct futex_wait_block - Block of futexes to be waited for
|
||||||
|
+ * @uaddr: User address of the futex
|
||||||
|
+ * @val: Futex value expected by userspace
|
||||||
|
+ * @bitset: Bitset for the optional bitmasked wakeup
|
||||||
|
+ */
|
||||||
|
+struct futex_wait_block {
|
||||||
|
+ __u32 __user *uaddr;
|
||||||
|
+ __u32 val;
|
||||||
|
+ __u32 bitset;
|
||||||
|
+};
|
||||||
|
+
|
||||||
|
/*
|
||||||
|
* Support for robust futexes: the kernel cleans up held futexes at
|
||||||
|
* thread exit time.
|
||||||
|
diff --git a/kernel/futex/syscalls.c b/kernel/futex/syscalls.c
|
||||||
|
index 6f91a07a6a83..2f4d4c04ede2 100644
|
||||||
|
--- a/kernel/futex/syscalls.c
|
||||||
|
+++ b/kernel/futex/syscalls.c
|
||||||
|
@@ -158,6 +158,7 @@ static __always_inline bool futex_cmd_has_timeout(u32 cmd)
|
||||||
|
case FUTEX_LOCK_PI2:
|
||||||
|
case FUTEX_WAIT_BITSET:
|
||||||
|
case FUTEX_WAIT_REQUEUE_PI:
|
||||||
|
+ case FUTEX_WAIT_MULTIPLE:
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
return false;
|
||||||
|
@@ -170,13 +171,79 @@ futex_init_timeout(u32 cmd, u32 op, struct timespec64 *ts, ktime_t *t)
|
||||||
|
return -EINVAL;
|
||||||
|
|
||||||
|
*t = timespec64_to_ktime(*ts);
|
||||||
|
- if (cmd == FUTEX_WAIT)
|
||||||
|
+ if (cmd == FUTEX_WAIT || cmd == FUTEX_WAIT_MULTIPLE)
|
||||||
|
*t = ktime_add_safe(ktime_get(), *t);
|
||||||
|
else if (cmd != FUTEX_LOCK_PI && !(op & FUTEX_CLOCK_REALTIME))
|
||||||
|
*t = timens_ktime_to_host(CLOCK_MONOTONIC, *t);
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
+/**
|
||||||
|
+ * futex_read_wait_block - Read an array of futex_wait_block from userspace
|
||||||
|
+ * @uaddr: Userspace address of the block
|
||||||
|
+ * @count: Number of blocks to be read
|
||||||
|
+ *
|
||||||
|
+ * This function creates and allocate an array of futex_q (we zero it to
|
||||||
|
+ * initialize the fields) and then, for each futex_wait_block element from
|
||||||
|
+ * userspace, fill a futex_q element with proper values.
|
||||||
|
+ */
|
||||||
|
+inline struct futex_vector *futex_read_wait_block(u32 __user *uaddr, u32 count)
|
||||||
|
+{
|
||||||
|
+ unsigned int i;
|
||||||
|
+ struct futex_vector *futexv;
|
||||||
|
+ struct futex_wait_block fwb;
|
||||||
|
+ struct futex_wait_block __user *entry =
|
||||||
|
+ (struct futex_wait_block __user *)uaddr;
|
||||||
|
+
|
||||||
|
+ if (!count || count > FUTEX_WAITV_MAX)
|
||||||
|
+ return ERR_PTR(-EINVAL);
|
||||||
|
+
|
||||||
|
+ futexv = kcalloc(count, sizeof(*futexv), GFP_KERNEL);
|
||||||
|
+ if (!futexv)
|
||||||
|
+ return ERR_PTR(-ENOMEM);
|
||||||
|
+
|
||||||
|
+ for (i = 0; i < count; i++) {
|
||||||
|
+ if (copy_from_user(&fwb, &entry[i], sizeof(fwb))) {
|
||||||
|
+ kfree(futexv);
|
||||||
|
+ return ERR_PTR(-EFAULT);
|
||||||
|
+ }
|
||||||
|
+
|
||||||
|
+ futexv[i].w.flags = FUTEX_32;
|
||||||
|
+ futexv[i].w.val = fwb.val;
|
||||||
|
+ futexv[i].w.uaddr = (uintptr_t) (fwb.uaddr);
|
||||||
|
+ futexv[i].q = futex_q_init;
|
||||||
|
+ }
|
||||||
|
+
|
||||||
|
+ return futexv;
|
||||||
|
+}
|
||||||
|
+
|
||||||
|
+int futex_wait_multiple(struct futex_vector *vs, unsigned int count,
|
||||||
|
+ struct hrtimer_sleeper *to);
|
||||||
|
+
|
||||||
|
+int futex_opcode_31(ktime_t *abs_time, u32 __user *uaddr, int count)
|
||||||
|
+{
|
||||||
|
+ int ret;
|
||||||
|
+ struct futex_vector *vs;
|
||||||
|
+ struct hrtimer_sleeper *to = NULL, timeout;
|
||||||
|
+
|
||||||
|
+ to = futex_setup_timer(abs_time, &timeout, 0, 0);
|
||||||
|
+
|
||||||
|
+ vs = futex_read_wait_block(uaddr, count);
|
||||||
|
+
|
||||||
|
+ if (IS_ERR(vs))
|
||||||
|
+ return PTR_ERR(vs);
|
||||||
|
+
|
||||||
|
+ ret = futex_wait_multiple(vs, count, abs_time ? to : NULL);
|
||||||
|
+ kfree(vs);
|
||||||
|
+
|
||||||
|
+ if (to) {
|
||||||
|
+ hrtimer_cancel(&to->timer);
|
||||||
|
+ destroy_hrtimer_on_stack(&to->timer);
|
||||||
|
+ }
|
||||||
|
+
|
||||||
|
+ return ret;
|
||||||
|
+}
|
||||||
|
+
|
||||||
|
SYSCALL_DEFINE6(futex, u32 __user *, uaddr, int, op, u32, val,
|
||||||
|
const struct __kernel_timespec __user *, utime,
|
||||||
|
u32 __user *, uaddr2, u32, val3)
|
||||||
|
@@ -196,6 +263,9 @@ SYSCALL_DEFINE6(futex, u32 __user *, uaddr, int, op, u32, val,
|
||||||
|
tp = &t;
|
||||||
|
}
|
||||||
|
|
||||||
|
+ if (cmd == FUTEX_WAIT_MULTIPLE)
|
||||||
|
+ return futex_opcode_31(tp, uaddr, val);
|
||||||
|
+
|
||||||
|
return do_futex(uaddr, op, val, tp, uaddr2, (unsigned long)utime, val3);
|
||||||
|
}
|
||||||
|
|
||||||
|
@@ -392,6 +462,9 @@ SYSCALL_DEFINE6(futex_time32, u32 __user *, uaddr, int, op, u32, val,
|
||||||
|
tp = &t;
|
||||||
|
}
|
||||||
|
|
||||||
|
+ if (cmd == FUTEX_WAIT_MULTIPLE)
|
||||||
|
+ return futex_opcode_31(tp, uaddr, val);
|
||||||
|
+
|
||||||
|
return do_futex(uaddr, op, val, tp, uaddr2, (unsigned long)utime, val3);
|
||||||
|
}
|
||||||
|
#endif /* CONFIG_COMPAT_32BIT_TIME */
|
||||||
|
--
|
||||||
|
2.33.1
|
||||||
|
|
5105
linux-tkg-patches/6.2/0007-v6.2-winesync.patch
Normal file
5105
linux-tkg-patches/6.2/0007-v6.2-winesync.patch
Normal file
File diff suppressed because it is too large
Load Diff
90
linux-tkg-patches/6.2/0009-glitched-bmq.patch
Normal file
90
linux-tkg-patches/6.2/0009-glitched-bmq.patch
Normal file
@@ -0,0 +1,90 @@
|
|||||||
|
From f7f49141a5dbe9c99d78196b58c44307fb2e6be3 Mon Sep 17 00:00:00 2001
|
||||||
|
From: Tk-Glitch <ti3nou@gmail.com>
|
||||||
|
Date: Wed, 4 Jul 2018 04:30:08 +0200
|
||||||
|
Subject: glitched - BMQ
|
||||||
|
|
||||||
|
diff --git a/kernel/Kconfig.hz b/kernel/Kconfig.hz
|
||||||
|
index 2a202a846757..1d9c7ed79b11 100644
|
||||||
|
--- a/kernel/Kconfig.hz
|
||||||
|
+++ b/kernel/Kconfig.hz
|
||||||
|
@@ -4,7 +4,7 @@
|
||||||
|
|
||||||
|
choice
|
||||||
|
prompt "Timer frequency"
|
||||||
|
- default HZ_250
|
||||||
|
+ default HZ_500
|
||||||
|
help
|
||||||
|
Allows the configuration of the timer frequency. It is customary
|
||||||
|
to have the timer interrupt run at 1000 Hz but 100 Hz may be more
|
||||||
|
@@ -39,6 +39,13 @@ choice
|
||||||
|
on SMP and NUMA systems and exactly dividing by both PAL and
|
||||||
|
NTSC frame rates for video and multimedia work.
|
||||||
|
|
||||||
|
+ config HZ_500
|
||||||
|
+ bool "500 HZ"
|
||||||
|
+ help
|
||||||
|
+ 500 Hz is a balanced timer frequency. Provides fast interactivity
|
||||||
|
+ on desktops with great smoothness without increasing CPU power
|
||||||
|
+ consumption and sacrificing the battery life on laptops.
|
||||||
|
+
|
||||||
|
config HZ_1000
|
||||||
|
bool "1000 HZ"
|
||||||
|
help
|
||||||
|
@@ -52,6 +59,7 @@ config HZ
|
||||||
|
default 100 if HZ_100
|
||||||
|
default 250 if HZ_250
|
||||||
|
default 300 if HZ_300
|
||||||
|
+ default 500 if HZ_500
|
||||||
|
default 1000 if HZ_1000
|
||||||
|
|
||||||
|
config SCHED_HRTICK
|
||||||
|
|
||||||
|
diff --git a/kernel/Kconfig.hz b/kernel/Kconfig.hz
|
||||||
|
index 2a202a846757..1d9c7ed79b11 100644
|
||||||
|
--- a/kernel/Kconfig.hz
|
||||||
|
+++ b/kernel/Kconfig.hz
|
||||||
|
@@ -4,7 +4,7 @@
|
||||||
|
|
||||||
|
choice
|
||||||
|
prompt "Timer frequency"
|
||||||
|
- default HZ_500
|
||||||
|
+ default HZ_750
|
||||||
|
help
|
||||||
|
Allows the configuration of the timer frequency. It is customary
|
||||||
|
to have the timer interrupt run at 1000 Hz but 100 Hz may be more
|
||||||
|
@@ -46,6 +46,13 @@ choice
|
||||||
|
on desktops with great smoothness without increasing CPU power
|
||||||
|
consumption and sacrificing the battery life on laptops.
|
||||||
|
|
||||||
|
+ config HZ_750
|
||||||
|
+ bool "750 HZ"
|
||||||
|
+ help
|
||||||
|
+ 750 Hz is a good timer frequency for desktops. Provides fast
|
||||||
|
+ interactivity with great smoothness without sacrificing too
|
||||||
|
+ much throughput.
|
||||||
|
+
|
||||||
|
config HZ_1000
|
||||||
|
bool "1000 HZ"
|
||||||
|
help
|
||||||
|
@@ -60,6 +67,7 @@ config HZ
|
||||||
|
default 250 if HZ_250
|
||||||
|
default 300 if HZ_300
|
||||||
|
default 500 if HZ_500
|
||||||
|
+ default 750 if HZ_750
|
||||||
|
default 1000 if HZ_1000
|
||||||
|
|
||||||
|
config SCHED_HRTICK
|
||||||
|
|
||||||
|
diff --git a/mm/vmscan.c b/mm/vmscan.c
|
||||||
|
index 9270a4370d54..30d01e647417 100644
|
||||||
|
--- a/mm/vmscan.c
|
||||||
|
+++ b/mm/vmscan.c
|
||||||
|
@@ -169,7 +169,7 @@
|
||||||
|
/*
|
||||||
|
* From 0 .. 200. Higher means more swappy.
|
||||||
|
*/
|
||||||
|
-int vm_swappiness = 60;
|
||||||
|
+int vm_swappiness = 20;
|
||||||
|
|
||||||
|
static void set_task_reclaim_state(struct task_struct *task,
|
||||||
|
struct reclaim_state *rs)
|
18
linux-tkg-patches/6.2/0009-glitched-ondemand-bmq.patch
Normal file
18
linux-tkg-patches/6.2/0009-glitched-ondemand-bmq.patch
Normal file
@@ -0,0 +1,18 @@
|
|||||||
|
diff --git a/drivers/cpufreq/cpufreq_ondemand.c b/drivers/cpufreq/cpufreq_ondemand.c
|
||||||
|
index 6b423eebfd5d..61e3271675d6 100644
|
||||||
|
--- a/drivers/cpufreq/cpufreq_ondemand.c
|
||||||
|
+++ b/drivers/cpufreq/cpufreq_ondemand.c
|
||||||
|
@@ -21,10 +21,10 @@
|
||||||
|
#include "cpufreq_ondemand.h"
|
||||||
|
|
||||||
|
/* On-demand governor macros */
|
||||||
|
-#define DEF_FREQUENCY_UP_THRESHOLD (80)
|
||||||
|
-#define DEF_SAMPLING_DOWN_FACTOR (1)
|
||||||
|
+#define DEF_FREQUENCY_UP_THRESHOLD (55)
|
||||||
|
+#define DEF_SAMPLING_DOWN_FACTOR (5)
|
||||||
|
#define MAX_SAMPLING_DOWN_FACTOR (100000)
|
||||||
|
-#define MICRO_FREQUENCY_UP_THRESHOLD (95)
|
||||||
|
+#define MICRO_FREQUENCY_UP_THRESHOLD (63)
|
||||||
|
#define MICRO_FREQUENCY_MIN_SAMPLE_RATE (10000)
|
||||||
|
#define MIN_FREQUENCY_UP_THRESHOLD (1)
|
||||||
|
#define MAX_FREQUENCY_UP_THRESHOLD (100)
|
316
linux-tkg-patches/6.2/0012-misc-additions.patch
Normal file
316
linux-tkg-patches/6.2/0012-misc-additions.patch
Normal file
@@ -0,0 +1,316 @@
|
|||||||
|
From e5e77ad2223f662e1615266d8ef39a8db7e65a70 Mon Sep 17 00:00:00 2001
|
||||||
|
From: =?UTF-8?q?Felix=20H=C3=A4dicke?= <felixhaedicke@web.de>
|
||||||
|
Date: Thu, 19 Nov 2020 09:22:32 +0100
|
||||||
|
Subject: HID: quirks: Add Apple Magic Trackpad 2 to hid_have_special_driver
|
||||||
|
list
|
||||||
|
MIME-Version: 1.0
|
||||||
|
Content-Type: text/plain; charset=UTF-8
|
||||||
|
Content-Transfer-Encoding: 8bit
|
||||||
|
|
||||||
|
The Apple Magic Trackpad 2 is handled by the magicmouse driver. And
|
||||||
|
there were severe stability issues when both drivers (hid-generic and
|
||||||
|
hid-magicmouse) were loaded for this device.
|
||||||
|
|
||||||
|
Fixes: https://bugzilla.kernel.org/show_bug.cgi?id=210241
|
||||||
|
|
||||||
|
Signed-off-by: Felix Hädicke <felixhaedicke@web.de>
|
||||||
|
---
|
||||||
|
drivers/hid/hid-quirks.c | 2 ++
|
||||||
|
1 file changed, 2 insertions(+)
|
||||||
|
|
||||||
|
diff --git a/drivers/hid/hid-quirks.c b/drivers/hid/hid-quirks.c
|
||||||
|
index bf7ecab5d9e5..142e9dae2837 100644
|
||||||
|
--- a/drivers/hid/hid-quirks.c
|
||||||
|
+++ b/drivers/hid/hid-quirks.c
|
||||||
|
@@ -478,6 +478,8 @@ static const struct hid_device_id hid_have_special_driver[] = {
|
||||||
|
#if IS_ENABLED(CONFIG_HID_MAGICMOUSE)
|
||||||
|
{ HID_BLUETOOTH_DEVICE(USB_VENDOR_ID_APPLE, USB_DEVICE_ID_APPLE_MAGICMOUSE) },
|
||||||
|
{ HID_BLUETOOTH_DEVICE(USB_VENDOR_ID_APPLE, USB_DEVICE_ID_APPLE_MAGICTRACKPAD) },
|
||||||
|
+ { HID_BLUETOOTH_DEVICE(BT_VENDOR_ID_APPLE, USB_DEVICE_ID_APPLE_MAGICTRACKPAD2) },
|
||||||
|
+ { HID_USB_DEVICE(USB_VENDOR_ID_APPLE, USB_DEVICE_ID_APPLE_MAGICTRACKPAD2) },
|
||||||
|
#endif
|
||||||
|
#if IS_ENABLED(CONFIG_HID_MAYFLASH)
|
||||||
|
{ HID_USB_DEVICE(USB_VENDOR_ID_DRAGONRISE, USB_DEVICE_ID_DRAGONRISE_PS3) },
|
||||||
|
--
|
||||||
|
cgit v1.2.3-1-gf6bb5
|
||||||
|
|
||||||
|
From f7f49141a5dbe9c99d78196b58c44307fb2e6be3 Mon Sep 17 00:00:00 2001
|
||||||
|
From: Tk-Glitch <ti3nou@gmail.com>
|
||||||
|
Date: Wed, 3 Feb 2021 11:20:12 +0200
|
||||||
|
Subject: Revert "cpufreq: Avoid configuring old governors as default with intel_pstate"
|
||||||
|
|
||||||
|
This is an undesirable behavior for us since our aggressive ondemand performs
|
||||||
|
better than schedutil for gaming when using intel_pstate in passive mode.
|
||||||
|
Also it interferes with the option to select the desired default governor we have.
|
||||||
|
|
||||||
|
diff --git a/drivers/cpufreq/Kconfig b/drivers/cpufreq/Kconfig
|
||||||
|
index 2c7171e0b0010..85de313ddec29 100644
|
||||||
|
--- a/drivers/cpufreq/Kconfig
|
||||||
|
+++ b/drivers/cpufreq/Kconfig
|
||||||
|
@@ -71,7 +71,6 @@ config CPU_FREQ_DEFAULT_GOV_USERSPACE
|
||||||
|
|
||||||
|
config CPU_FREQ_DEFAULT_GOV_ONDEMAND
|
||||||
|
bool "ondemand"
|
||||||
|
- depends on !(X86_INTEL_PSTATE && SMP)
|
||||||
|
select CPU_FREQ_GOV_ONDEMAND
|
||||||
|
select CPU_FREQ_GOV_PERFORMANCE
|
||||||
|
help
|
||||||
|
@@ -83,7 +84,6 @@ config CPU_FREQ_DEFAULT_GOV_ONDEMAND
|
||||||
|
|
||||||
|
config CPU_FREQ_DEFAULT_GOV_CONSERVATIVE
|
||||||
|
bool "conservative"
|
||||||
|
- depends on !(X86_INTEL_PSTATE && SMP)
|
||||||
|
select CPU_FREQ_GOV_CONSERVATIVE
|
||||||
|
select CPU_FREQ_GOV_PERFORMANCE
|
||||||
|
help
|
||||||
|
|
||||||
|
From 0c079d3f88df5f8286cd5c91b54bdac7c819be85 Mon Sep 17 00:00:00 2001
|
||||||
|
From: Matthew Auld <matthew.auld@intel.com>
|
||||||
|
Date: Tue, 6 Dec 2022 16:11:41 +0000
|
||||||
|
Subject: [PATCH] drm/i915: improve the catch-all evict to handle lock
|
||||||
|
contention
|
||||||
|
MIME-Version: 1.0
|
||||||
|
Content-Type: text/plain; charset=UTF-8
|
||||||
|
Content-Transfer-Encoding: 8bit
|
||||||
|
|
||||||
|
The catch-all evict can fail due to object lock contention, since it
|
||||||
|
only goes as far as trylocking the object, due to us already holding the
|
||||||
|
vm->mutex. Doing a full object lock here can deadlock, since the
|
||||||
|
vm->mutex is always our inner lock. Add another execbuf pass which drops
|
||||||
|
the vm->mutex and then tries to grab the object will the full lock,
|
||||||
|
before then retrying the eviction. This should be good enough for now to
|
||||||
|
fix the immediate regression with userspace seeing -ENOSPC from execbuf
|
||||||
|
due to contended object locks during GTT eviction.
|
||||||
|
|
||||||
|
Testcase: igt@gem_ppgtt@shrink-vs-evict-*
|
||||||
|
Fixes: 7e00897be8bf ("drm/i915: Add object locking to i915_gem_evict_for_node and i915_gem_evict_something, v2.")
|
||||||
|
References: https://gitlab.freedesktop.org/drm/intel/-/issues/7627
|
||||||
|
References: https://gitlab.freedesktop.org/drm/intel/-/issues/7570
|
||||||
|
References: https://bugzilla.mozilla.org/show_bug.cgi?id=1779558
|
||||||
|
Signed-off-by: Matthew Auld <matthew.auld@intel.com>
|
||||||
|
Cc: Maarten Lankhorst <maarten.lankhorst@linux.intel.com>
|
||||||
|
Cc: Thomas Hellström <thomas.hellstrom@linux.intel.com>
|
||||||
|
Cc: Tvrtko Ursulin <tvrtko.ursulin@linux.intel.com>
|
||||||
|
Cc: Andrzej Hajda <andrzej.hajda@intel.com>
|
||||||
|
Cc: Mani Milani <mani@chromium.org>
|
||||||
|
Cc: <stable@vger.kernel.org> # v5.18+
|
||||||
|
|
||||||
|
Revision 1 of https://patchwork.freedesktop.org/series/111686/
|
||||||
|
---
|
||||||
|
.../gpu/drm/i915/gem/i915_gem_execbuffer.c | 25 +++++++++++--
|
||||||
|
drivers/gpu/drm/i915/gem/i915_gem_mman.c | 2 +-
|
||||||
|
drivers/gpu/drm/i915/i915_gem_evict.c | 37 ++++++++++++++-----
|
||||||
|
drivers/gpu/drm/i915/i915_gem_evict.h | 4 +-
|
||||||
|
drivers/gpu/drm/i915/i915_vma.c | 2 +-
|
||||||
|
.../gpu/drm/i915/selftests/i915_gem_evict.c | 4 +-
|
||||||
|
6 files changed, 56 insertions(+), 18 deletions(-)
|
||||||
|
|
||||||
|
diff --git a/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c b/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c
|
||||||
|
index 845023c14eb36f..094e92ed28db4f 100644
|
||||||
|
--- a/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c
|
||||||
|
+++ b/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c
|
||||||
|
@@ -741,25 +741,44 @@ static int eb_reserve(struct i915_execbuffer *eb)
|
||||||
|
*
|
||||||
|
* Defragmenting is skipped if all objects are pinned at a fixed location.
|
||||||
|
*/
|
||||||
|
- for (pass = 0; pass <= 2; pass++) {
|
||||||
|
+ for (pass = 0; pass <= 3; pass++) {
|
||||||
|
int pin_flags = PIN_USER | PIN_VALIDATE;
|
||||||
|
|
||||||
|
if (pass == 0)
|
||||||
|
pin_flags |= PIN_NONBLOCK;
|
||||||
|
|
||||||
|
if (pass >= 1)
|
||||||
|
- unpinned = eb_unbind(eb, pass == 2);
|
||||||
|
+ unpinned = eb_unbind(eb, pass >= 2);
|
||||||
|
|
||||||
|
if (pass == 2) {
|
||||||
|
err = mutex_lock_interruptible(&eb->context->vm->mutex);
|
||||||
|
if (!err) {
|
||||||
|
- err = i915_gem_evict_vm(eb->context->vm, &eb->ww);
|
||||||
|
+ err = i915_gem_evict_vm(eb->context->vm, &eb->ww, NULL);
|
||||||
|
mutex_unlock(&eb->context->vm->mutex);
|
||||||
|
}
|
||||||
|
if (err)
|
||||||
|
return err;
|
||||||
|
}
|
||||||
|
|
||||||
|
+ if (pass == 3) {
|
||||||
|
+retry:
|
||||||
|
+ err = mutex_lock_interruptible(&eb->context->vm->mutex);
|
||||||
|
+ if (!err) {
|
||||||
|
+ struct drm_i915_gem_object *busy_bo = NULL;
|
||||||
|
+
|
||||||
|
+ err = i915_gem_evict_vm(eb->context->vm, &eb->ww, &busy_bo);
|
||||||
|
+ mutex_unlock(&eb->context->vm->mutex);
|
||||||
|
+ if (err && busy_bo) {
|
||||||
|
+ err = i915_gem_object_lock(busy_bo, &eb->ww);
|
||||||
|
+ i915_gem_object_put(busy_bo);
|
||||||
|
+ if (!err)
|
||||||
|
+ goto retry;
|
||||||
|
+ }
|
||||||
|
+ }
|
||||||
|
+ if (err)
|
||||||
|
+ return err;
|
||||||
|
+ }
|
||||||
|
+
|
||||||
|
list_for_each_entry(ev, &eb->unbound, bind_link) {
|
||||||
|
err = eb_reserve_vma(eb, ev, pin_flags);
|
||||||
|
if (err)
|
||||||
|
diff --git a/drivers/gpu/drm/i915/gem/i915_gem_mman.c b/drivers/gpu/drm/i915/gem/i915_gem_mman.c
|
||||||
|
index 73d9eda1d6b7a6..c83d98e1dc5da0 100644
|
||||||
|
--- a/drivers/gpu/drm/i915/gem/i915_gem_mman.c
|
||||||
|
+++ b/drivers/gpu/drm/i915/gem/i915_gem_mman.c
|
||||||
|
@@ -369,7 +369,7 @@ static vm_fault_t vm_fault_gtt(struct vm_fault *vmf)
|
||||||
|
if (vma == ERR_PTR(-ENOSPC)) {
|
||||||
|
ret = mutex_lock_interruptible(&ggtt->vm.mutex);
|
||||||
|
if (!ret) {
|
||||||
|
- ret = i915_gem_evict_vm(&ggtt->vm, &ww);
|
||||||
|
+ ret = i915_gem_evict_vm(&ggtt->vm, &ww, NULL);
|
||||||
|
mutex_unlock(&ggtt->vm.mutex);
|
||||||
|
}
|
||||||
|
if (ret)
|
||||||
|
diff --git a/drivers/gpu/drm/i915/i915_gem_evict.c b/drivers/gpu/drm/i915/i915_gem_evict.c
|
||||||
|
index f025ee4fa52618..a4b4d9b7d26c7a 100644
|
||||||
|
--- a/drivers/gpu/drm/i915/i915_gem_evict.c
|
||||||
|
+++ b/drivers/gpu/drm/i915/i915_gem_evict.c
|
||||||
|
@@ -416,6 +416,11 @@ int i915_gem_evict_for_node(struct i915_address_space *vm,
|
||||||
|
* @vm: Address space to cleanse
|
||||||
|
* @ww: An optional struct i915_gem_ww_ctx. If not NULL, i915_gem_evict_vm
|
||||||
|
* will be able to evict vma's locked by the ww as well.
|
||||||
|
+ * @busy_bo: Optional pointer to struct drm_i915_gem_object. If not NULL, then
|
||||||
|
+ * in the event i915_gem_evict_vm() is unable to trylock an object for eviction,
|
||||||
|
+ * then @busy_bo will point to it. -EBUSY is also returned. The caller must drop
|
||||||
|
+ * the vm->mutex, before trying again to acquire the contended lock. The caller
|
||||||
|
+ * also owns a reference to the object.
|
||||||
|
*
|
||||||
|
* This function evicts all vmas from a vm.
|
||||||
|
*
|
||||||
|
@@ -425,7 +430,8 @@ int i915_gem_evict_for_node(struct i915_address_space *vm,
|
||||||
|
* To clarify: This is for freeing up virtual address space, not for freeing
|
||||||
|
* memory in e.g. the shrinker.
|
||||||
|
*/
|
||||||
|
-int i915_gem_evict_vm(struct i915_address_space *vm, struct i915_gem_ww_ctx *ww)
|
||||||
|
+int i915_gem_evict_vm(struct i915_address_space *vm, struct i915_gem_ww_ctx *ww,
|
||||||
|
+ struct drm_i915_gem_object **busy_bo)
|
||||||
|
{
|
||||||
|
int ret = 0;
|
||||||
|
|
||||||
|
@@ -457,15 +463,22 @@ int i915_gem_evict_vm(struct i915_address_space *vm, struct i915_gem_ww_ctx *ww)
|
||||||
|
* the resv is shared among multiple objects, we still
|
||||||
|
* need the object ref.
|
||||||
|
*/
|
||||||
|
- if (dying_vma(vma) ||
|
||||||
|
+ if (!i915_gem_object_get_rcu(vma->obj) ||
|
||||||
|
(ww && (dma_resv_locking_ctx(vma->obj->base.resv) == &ww->ctx))) {
|
||||||
|
__i915_vma_pin(vma);
|
||||||
|
list_add(&vma->evict_link, &locked_eviction_list);
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
|
||||||
|
- if (!i915_gem_object_trylock(vma->obj, ww))
|
||||||
|
+ if (!i915_gem_object_trylock(vma->obj, ww)) {
|
||||||
|
+ if (busy_bo) {
|
||||||
|
+ *busy_bo = vma->obj; /* holds ref */
|
||||||
|
+ ret = -EBUSY;
|
||||||
|
+ break;
|
||||||
|
+ }
|
||||||
|
+ i915_gem_object_put(vma->obj);
|
||||||
|
continue;
|
||||||
|
+ }
|
||||||
|
|
||||||
|
__i915_vma_pin(vma);
|
||||||
|
list_add(&vma->evict_link, &eviction_list);
|
||||||
|
@@ -473,25 +486,29 @@ int i915_gem_evict_vm(struct i915_address_space *vm, struct i915_gem_ww_ctx *ww)
|
||||||
|
if (list_empty(&eviction_list) && list_empty(&locked_eviction_list))
|
||||||
|
break;
|
||||||
|
|
||||||
|
- ret = 0;
|
||||||
|
/* Unbind locked objects first, before unlocking the eviction_list */
|
||||||
|
list_for_each_entry_safe(vma, vn, &locked_eviction_list, evict_link) {
|
||||||
|
__i915_vma_unpin(vma);
|
||||||
|
|
||||||
|
- if (ret == 0)
|
||||||
|
+ if (ret == 0) {
|
||||||
|
ret = __i915_vma_unbind(vma);
|
||||||
|
- if (ret != -EINTR) /* "Get me out of here!" */
|
||||||
|
- ret = 0;
|
||||||
|
+ if (ret != -EINTR) /* "Get me out of here!" */
|
||||||
|
+ ret = 0;
|
||||||
|
+ }
|
||||||
|
+ if (!dying_vma(vma))
|
||||||
|
+ i915_gem_object_put(vma->obj);
|
||||||
|
}
|
||||||
|
|
||||||
|
list_for_each_entry_safe(vma, vn, &eviction_list, evict_link) {
|
||||||
|
__i915_vma_unpin(vma);
|
||||||
|
- if (ret == 0)
|
||||||
|
+ if (ret == 0) {
|
||||||
|
ret = __i915_vma_unbind(vma);
|
||||||
|
- if (ret != -EINTR) /* "Get me out of here!" */
|
||||||
|
- ret = 0;
|
||||||
|
+ if (ret != -EINTR) /* "Get me out of here!" */
|
||||||
|
+ ret = 0;
|
||||||
|
+ }
|
||||||
|
|
||||||
|
i915_gem_object_unlock(vma->obj);
|
||||||
|
+ i915_gem_object_put(vma->obj);
|
||||||
|
}
|
||||||
|
} while (ret == 0);
|
||||||
|
|
||||||
|
diff --git a/drivers/gpu/drm/i915/i915_gem_evict.h b/drivers/gpu/drm/i915/i915_gem_evict.h
|
||||||
|
index e593c530f9bd7a..bf0ee0e4fe6088 100644
|
||||||
|
--- a/drivers/gpu/drm/i915/i915_gem_evict.h
|
||||||
|
+++ b/drivers/gpu/drm/i915/i915_gem_evict.h
|
||||||
|
@@ -11,6 +11,7 @@
|
||||||
|
struct drm_mm_node;
|
||||||
|
struct i915_address_space;
|
||||||
|
struct i915_gem_ww_ctx;
|
||||||
|
+struct drm_i915_gem_object;
|
||||||
|
|
||||||
|
int __must_check i915_gem_evict_something(struct i915_address_space *vm,
|
||||||
|
struct i915_gem_ww_ctx *ww,
|
||||||
|
@@ -23,6 +24,7 @@ int __must_check i915_gem_evict_for_node(struct i915_address_space *vm,
|
||||||
|
struct drm_mm_node *node,
|
||||||
|
unsigned int flags);
|
||||||
|
int i915_gem_evict_vm(struct i915_address_space *vm,
|
||||||
|
- struct i915_gem_ww_ctx *ww);
|
||||||
|
+ struct i915_gem_ww_ctx *ww,
|
||||||
|
+ struct drm_i915_gem_object **busy_bo);
|
||||||
|
|
||||||
|
#endif /* __I915_GEM_EVICT_H__ */
|
||||||
|
diff --git a/drivers/gpu/drm/i915/i915_vma.c b/drivers/gpu/drm/i915/i915_vma.c
|
||||||
|
index f17c09ead7d778..4d06875de14a14 100644
|
||||||
|
--- a/drivers/gpu/drm/i915/i915_vma.c
|
||||||
|
+++ b/drivers/gpu/drm/i915/i915_vma.c
|
||||||
|
@@ -1569,7 +1569,7 @@ static int __i915_ggtt_pin(struct i915_vma *vma, struct i915_gem_ww_ctx *ww,
|
||||||
|
* locked objects when called from execbuf when pinning
|
||||||
|
* is removed. This would probably regress badly.
|
||||||
|
*/
|
||||||
|
- i915_gem_evict_vm(vm, NULL);
|
||||||
|
+ i915_gem_evict_vm(vm, NULL, NULL);
|
||||||
|
mutex_unlock(&vm->mutex);
|
||||||
|
}
|
||||||
|
} while (1);
|
||||||
|
diff --git a/drivers/gpu/drm/i915/selftests/i915_gem_evict.c b/drivers/gpu/drm/i915/selftests/i915_gem_evict.c
|
||||||
|
index 8c6517d29b8e0c..37068542aafe7f 100644
|
||||||
|
--- a/drivers/gpu/drm/i915/selftests/i915_gem_evict.c
|
||||||
|
+++ b/drivers/gpu/drm/i915/selftests/i915_gem_evict.c
|
||||||
|
@@ -344,7 +344,7 @@ static int igt_evict_vm(void *arg)
|
||||||
|
|
||||||
|
/* Everything is pinned, nothing should happen */
|
||||||
|
mutex_lock(&ggtt->vm.mutex);
|
||||||
|
- err = i915_gem_evict_vm(&ggtt->vm, NULL);
|
||||||
|
+ err = i915_gem_evict_vm(&ggtt->vm, NULL, NULL);
|
||||||
|
mutex_unlock(&ggtt->vm.mutex);
|
||||||
|
if (err) {
|
||||||
|
pr_err("i915_gem_evict_vm on a full GGTT returned err=%d]\n",
|
||||||
|
@@ -356,7 +356,7 @@ static int igt_evict_vm(void *arg)
|
||||||
|
|
||||||
|
for_i915_gem_ww(&ww, err, false) {
|
||||||
|
mutex_lock(&ggtt->vm.mutex);
|
||||||
|
- err = i915_gem_evict_vm(&ggtt->vm, &ww);
|
||||||
|
+ err = i915_gem_evict_vm(&ggtt->vm, &ww, NULL);
|
||||||
|
mutex_unlock(&ggtt->vm.mutex);
|
||||||
|
}
|
||||||
|
|
27
linux-tkg-patches/6.2/0013-fedora-rpm.patch
Normal file
27
linux-tkg-patches/6.2/0013-fedora-rpm.patch
Normal file
@@ -0,0 +1,27 @@
|
|||||||
|
# Remove the obsoletes line in kernel-headers
|
||||||
|
# Add provides for kernel-devel so there's no conflict
|
||||||
|
|
||||||
|
diff --git a/scripts/package/mkspec b/scripts/package/mkspec
|
||||||
|
index 7c477ca7d..1158f5559 100755
|
||||||
|
--- a/scripts/package/mkspec
|
||||||
|
+++ b/scripts/package/mkspec
|
||||||
|
@@ -25,0 +26 @@ fi
|
||||||
|
+PROVIDES_DRM=""
|
||||||
|
@@ -27 +28 @@ if grep -q CONFIG_DRM=y .config; then
|
||||||
|
- PROVIDES=kernel-drm
|
||||||
|
+ PROVIDES_DRM="Provides: kernel-drm = %{version}"
|
||||||
|
@@ -30 +30,0 @@ fi
|
||||||
|
-PROVIDES="$PROVIDES kernel-$KERNELRELEASE"
|
||||||
|
@@ -51 +51,3 @@ $S Source: kernel-$__KERNELRELEASE.tar.gz
|
||||||
|
- Provides: $PROVIDES
|
||||||
|
+ $PROVIDES_DRM
|
||||||
|
+ Provides: kernel = %{version}
|
||||||
|
+ Provides: kernel-uname-r = %{version}
|
||||||
|
+ Provides: installonlypkg(kernel) = %{version}
|
||||||
|
@@ -61 +63 @@ $S Source: kernel-$__KERNELRELEASE.tar.gz
|
||||||
|
- Obsoletes: kernel-headers
|
||||||
|
+ Provides: installonlypkg(kernel) = %{version}
|
||||||
|
@@ -72,0 +75,3 @@ $S$M Group: System Environment/Kernel
|
||||||
|
+$S$M Provides: kernel-devel = %{version}
|
||||||
|
+$S$M Provides: kernel-devel-uname-r = %{version}
|
||||||
|
+$S$M Provides: installonlypkg(kernel) = %{version}
|
46
linux-tkg-patches/6.2/0013-optimize_harder_O3.patch
Normal file
46
linux-tkg-patches/6.2/0013-optimize_harder_O3.patch
Normal file
@@ -0,0 +1,46 @@
|
|||||||
|
diff --git a/Makefile b/Makefile
|
||||||
|
--- a/Makefile
|
||||||
|
+++ b/Makefile
|
||||||
|
@@ -442,7 +442,7 @@ endif
|
||||||
|
HOSTPKG_CONFIG = pkg-config
|
||||||
|
|
||||||
|
KBUILD_USERHOSTCFLAGS := -Wall -Wmissing-prototypes -Wstrict-prototypes \
|
||||||
|
- -O2 -fomit-frame-pointer -std=gnu11 \
|
||||||
|
+ -O3 -fomit-frame-pointer -std=gnu11 \
|
||||||
|
-Wdeclaration-after-statement
|
||||||
|
KBUILD_USERCFLAGS := $(KBUILD_USERHOSTCFLAGS) $(USERCFLAGS)
|
||||||
|
KBUILD_USERLDFLAGS := $(USERLDFLAGS)
|
||||||
|
@@ -474,7 +474,7 @@ endif
|
||||||
|
-Wclippy::dbg_macro
|
||||||
|
|
||||||
|
KBUILD_HOSTCFLAGS := $(KBUILD_USERHOSTCFLAGS) $(HOST_LFS_CFLAGS) $(HOSTCFLAGS)
|
||||||
|
-KBUILD_HOSTCXXFLAGS := -Wall -O2 $(HOST_LFS_CFLAGS) $(HOSTCXXFLAGS)
|
||||||
|
+KBUILD_HOSTCXXFLAGS := -Wall -O3 $(HOST_LFS_CFLAGS) $(HOSTCXXFLAGS)
|
||||||
|
KBUILD_HOSTRUSTFLAGS := $(rust_common_flags) -O -Cstrip=debuginfo \
|
||||||
|
-Zallow-features= $(HOSTRUSTFLAGS)
|
||||||
|
KBUILD_HOSTLDFLAGS := $(HOST_LFS_LDFLAGS) $(HOSTLDFLAGS)
|
||||||
|
@@ -757,7 +757,7 @@ KBUILD_CFLAGS += $(call cc-disable-warning, format-overflow)
|
||||||
|
KBUILD_CFLAGS += $(call cc-disable-warning, address-of-packed-member)
|
||||||
|
|
||||||
|
ifdef CONFIG_CC_OPTIMIZE_FOR_PERFORMANCE
|
||||||
|
-KBUILD_CFLAGS += -O2
|
||||||
|
+KBUILD_CFLAGS += -O3
|
||||||
|
KBUILD_RUSTFLAGS += -Copt-level=2
|
||||||
|
else ifdef CONFIG_CC_OPTIMIZE_FOR_SIZE
|
||||||
|
KBUILD_CFLAGS += -Os
|
||||||
|
diff --git a/init/Kconfig b/init/Kconfig
|
||||||
|
--- a/init/Kconfig
|
||||||
|
+++ b/init/Kconfig
|
||||||
|
@@ -1401,10 +1401,10 @@ choice
|
||||||
|
default CC_OPTIMIZE_FOR_PERFORMANCE
|
||||||
|
|
||||||
|
config CC_OPTIMIZE_FOR_PERFORMANCE
|
||||||
|
- bool "Optimize for performance (-O2)"
|
||||||
|
+ bool "Optimize for performance (-O3)"
|
||||||
|
help
|
||||||
|
This is the default optimization level for the kernel, building
|
||||||
|
- with the "-O2" compiler flag for best performance and most
|
||||||
|
+ with the "-O3" compiler flag for best performance and most
|
||||||
|
helpful compile-time warnings.
|
||||||
|
|
||||||
|
config CC_OPTIMIZE_FOR_SIZE
|
Reference in New Issue
Block a user