Compare commits
20 Commits
Author | SHA1 | Date | |
---|---|---|---|
|
06e2ade846 | ||
|
5298957d9b | ||
|
8c776ecf64 | ||
|
a6877bd422 | ||
|
12ff2a498f | ||
|
b57e1824c2 | ||
|
c141a8c304 | ||
|
5c3621fc6a | ||
|
be5dfffb8d | ||
|
1fc60cc388 | ||
|
73e9a2eb3c | ||
|
f4324da8a1 | ||
|
1854907001 | ||
|
348f3c9bb6 | ||
|
9948b85dac | ||
|
eb4c793ca8 | ||
|
367925c322 | ||
|
d0d6f02944 | ||
|
38e73fb059 | ||
|
4547454e3d |
4
PKGBUILD
4
PKGBUILD
@@ -57,7 +57,7 @@ else
|
||||
fi
|
||||
pkgname=("${pkgbase}" "${pkgbase}-headers")
|
||||
pkgver="${_basekernel}"."${_sub}"
|
||||
pkgrel=272
|
||||
pkgrel=273
|
||||
pkgdesc='Linux-tkg'
|
||||
arch=('x86_64') # no i686 in here
|
||||
url="https://www.kernel.org/"
|
||||
@@ -262,7 +262,7 @@ hackheaders() {
|
||||
msg2 "Stripping build tools..."
|
||||
local file
|
||||
while read -rd '' file; do
|
||||
case "$(file -bi "$file")" in
|
||||
case "$(file -Sib "$file")" in
|
||||
application/x-sharedlib\;*) # Libraries (.so)
|
||||
strip -v $STRIP_SHARED "$file" ;;
|
||||
application/x-archive\;*) # Libraries (.a)
|
||||
|
@@ -3,7 +3,7 @@
|
||||
# Linux distribution you are using, options are "Arch", "Void", "Ubuntu", "Debian", "Fedora", "Suse", "Gentoo", "Generic".
|
||||
# It is automatically set to "Arch" when using PKGBUILD.
|
||||
# If left empty, the script will prompt
|
||||
_distro=""
|
||||
_distro="Arch"
|
||||
|
||||
# Kernel Version - Options are "5.4", and from "5.7" to "5.19"
|
||||
# you can also set a specific kernel version, e.g. "6.0-rc4" or "5.10.51",
|
||||
@@ -32,7 +32,7 @@ CUSTOM_GCC_PATH=""
|
||||
CUSTOM_LLVM_PATH=""
|
||||
|
||||
# Set to true to bypass makepkg.conf and use all available threads for compilation. False will respect your makepkg.conf options.
|
||||
_force_all_threads="true"
|
||||
_force_all_threads="false"
|
||||
|
||||
# Set to true to prevent ccache from being used and set CONFIG_GCC_PLUGINS=y (which needs to be disabled for ccache to work properly)
|
||||
_noccache="false"
|
||||
@@ -46,10 +46,10 @@ _modprobeddb="false"
|
||||
_modprobeddb_db_path=~/.config/modprobed.db
|
||||
|
||||
# Set to "1" to call make menuconfig, "2" to call make nconfig, "3" to call make xconfig, before building the kernel. Set to false to disable and skip the prompt.
|
||||
_menunconfig=""
|
||||
_menunconfig="false"
|
||||
|
||||
# Set to true to generate a kernel config fragment from your changes in menuconfig/nconfig. Set to false to disable and skip the prompt.
|
||||
_diffconfig=""
|
||||
_diffconfig="false"
|
||||
|
||||
# Set to the file name where the generated config fragment should be written to. Only used if _diffconfig is active.
|
||||
_diffconfig_name=""
|
||||
@@ -57,6 +57,10 @@ _diffconfig_name=""
|
||||
# [install.sh specific] Use tmpfs as a work directory, recommended when RAM >= 32GB to reduce HDD/SSD usage. For more information, see https://wiki.archlinux.org/title/Tmpfs
|
||||
_use_tmpfs="false"
|
||||
|
||||
# Always make a fresh clone of the source in tmpfs to speed up compilation times
|
||||
# ! This will take ~20GB of RAM by itself, so don't use on <32GB RAM systems !
|
||||
_source_in_tmpfs="false"
|
||||
|
||||
# [install.sh specific] tmpfs folder path, only used when _use_tmpfs="true".
|
||||
# Creates a linux-tkg work folder within that pathmake sure to have nothing important in "$_tmpfs_path/linux-tkg"
|
||||
_tmpfs_path="/tmp"
|
||||
@@ -90,11 +94,11 @@ _STRIP="true"
|
||||
# LEAVE AN EMPTY VALUE TO BE PROMPTED ABOUT FOLLOWING OPTIONS AT BUILD TIME
|
||||
|
||||
# CPU scheduler - Options are "upds" (TkG's Undead PDS), "pds", "bmq", "muqss", "cacule" or "cfs" (kernel's default)
|
||||
_cpusched=""
|
||||
_cpusched="pds"
|
||||
|
||||
# Compiler to use - Options are "gcc" or "llvm".
|
||||
# For advanced users.
|
||||
_compiler=""
|
||||
_compiler="gcc"
|
||||
|
||||
# Force the use of the LLVM Integrated Assembler whether using LLVM, LTO or not.
|
||||
# Set to "1" to enable.
|
||||
@@ -124,7 +128,7 @@ _preempt_rt_force=""
|
||||
# For BMQ: 0: No yield.
|
||||
# 1: Deboost and requeue task. (Default)
|
||||
# 2: Set rq skip task.
|
||||
_sched_yield_type=""
|
||||
_sched_yield_type="0"
|
||||
|
||||
# Round Robin interval is the longest duration two tasks with the same nice level will be delayed for. When CPU time is requested by a task, it receives a time slice equal
|
||||
# to the rr_interval in addition to a virtual deadline. When using yield_type 2, a low value can help offset the disadvantages of rescheduling a process that has yielded.
|
||||
@@ -132,7 +136,7 @@ _sched_yield_type=""
|
||||
# PDS default: 4ms"
|
||||
# BMQ default: 2ms"
|
||||
# Set to "1" for 2ms, "2" for 4ms, "3" for 6ms, "4" for 8ms, or "default" to keep the chosen scheduler defaults.
|
||||
_rr_interval=""
|
||||
_rr_interval="2"
|
||||
|
||||
# Set to "true" to disable FUNCTION_TRACER/GRAPH_TRACER, lowering overhead but limiting debugging and analyzing of kernel functions - Kernel default is "false"
|
||||
_ftracedisable="false"
|
||||
@@ -147,10 +151,10 @@ _misc_adds="true"
|
||||
# Full tickless can give higher performances in case you use isolation of CPUs for tasks
|
||||
# and it works only when using the nohz_full kernel parameter, otherwise behaves like idle.
|
||||
# Just tickless idle perform better for most platforms.
|
||||
_tickless=""
|
||||
_tickless="2"
|
||||
|
||||
# Set to "true" to use ACS override patch - https://wiki.archlinux.org/index.php/PCI_passthrough_via_OVMF#Bypassing_the_IOMMU_groups_.28ACS_override_patch.29 - Kernel default is "false"
|
||||
_acs_override=""
|
||||
_acs_override="false"
|
||||
|
||||
# Set to "true" to add Bcache filesystem support. You'll have to install bcachefs-tools-git from AUR for utilities - https://bcachefs.org/ - If in doubt, set to "false"
|
||||
# This can be buggy and isn't recommended on a production machine, also enabling this option will not allow you to enable MGLRU.
|
||||
@@ -179,13 +183,13 @@ _futex_waitv="false"
|
||||
_winesync="false"
|
||||
|
||||
# Set to "true" to enable Binder and Ashmem, the kernel modules required to use the android emulator Anbox. ! This doesn't apply to 5.4.y !
|
||||
_anbox=""
|
||||
_anbox="false"
|
||||
|
||||
# A selection of patches from Zen/Liquorix kernel and additional tweaks for a better gaming experience (ZENIFY) - Default is "true"
|
||||
_zenify="true"
|
||||
|
||||
# compiler optimization level - 1. Optimize for performance (-O2); 2. Optimize harder (-O3); 3. Optimize for size (-Os) - Kernel default is "1"
|
||||
_compileroptlevel="1"
|
||||
_compileroptlevel="2"
|
||||
|
||||
# CPU compiler optimizations - Defaults to prompt at kernel config if left empty
|
||||
# AMD CPUs : "k8" "k8sse3" "k10" "barcelona" "bobcat" "jaguar" "bulldozer" "piledriver" "steamroller" "excavator" "zen" "zen2" "zen3" "zen4" (zen3 opt support depends on GCC11) (zen4 opt support depends on GCC13)
|
||||
@@ -199,7 +203,7 @@ _compileroptlevel="1"
|
||||
# - "generic_v2" (depends on GCC11 - to share the package between machines with different CPU µarch supporting at least x86-64-v2
|
||||
# - "generic_v3" (depends on GCC11 - to share the package between machines with different CPU µarch supporting at least x86-64-v3
|
||||
# - "generic_v4" (depends on GCC11 - to share the package between machines with different CPU µarch supporting at least x86-64-v4
|
||||
_processor_opt=""
|
||||
_processor_opt="skylake"
|
||||
|
||||
# MuQSS only - Make IRQ threading compulsory (FORCE_IRQ_THREADING) - Default is "false"
|
||||
_irq_threading="false"
|
||||
@@ -215,7 +219,7 @@ _cacule_rdb_interval="19"
|
||||
_tt_high_hz="false"
|
||||
|
||||
# MuQSS and PDS only - SMT (Hyperthreading) aware nice priority and policy support (SMT_NICE) - Kernel default is "true" - You can disable this on non-SMT/HT CPUs for lower overhead
|
||||
_smt_nice=""
|
||||
_smt_nice="true"
|
||||
|
||||
# Trust the CPU manufacturer to initialize Linux's CRNG (RANDOM_TRUST_CPU) - Kernel default is "false"
|
||||
_random_trust_cpu="true"
|
||||
@@ -225,7 +229,7 @@ _random_trust_cpu="true"
|
||||
_runqueue_sharing=""
|
||||
|
||||
# Timer frequency - "100" "250" "300" "500" "750" "1000" ("2000" is available for cacule cpusched only) - More options available in kernel config prompt when left empty depending on selected cpusched with the default option pointed with a ">" (2000 for cacule, 100 for muqss and 1000 for other cpu schedulers)
|
||||
_timer_freq=""
|
||||
_timer_freq="500"
|
||||
|
||||
# Default CPU governor - "performance", "ondemand", "schedutil" or leave empty for default (schedutil)
|
||||
_default_cpu_gov="ondemand"
|
||||
@@ -241,7 +245,7 @@ _aggressive_ondemand="true"
|
||||
_tcp_cong_alg=""
|
||||
|
||||
# You can pass a default set of kernel command line options here - example: "intel_pstate=passive nowatchdog amdgpu.ppfeaturemask=0xfffd7fff mitigations=off"
|
||||
_custom_commandline="intel_pstate=passive"
|
||||
_custom_commandline="intel_pstate=passive split_lock_detect=off"
|
||||
|
||||
# Selection of Clearlinux patches
|
||||
_clear_patches="true"
|
||||
@@ -260,10 +264,10 @@ _kernel_localversion=""
|
||||
# Set to "true" to add back missing symbol for AES-NI/AVX support on ZFS - This is a legacy option that can be ignored on 5.10+ kernels - https://github.com/NixOS/nixpkgs/blob/master/pkgs/os-specific/linux/kernel/export_kernel_fpu_functions.patch
|
||||
_zfsfix="true"
|
||||
|
||||
# Set to your maximum number of CPUs (physical + logical cores) - Lower means less overhead - You can set it to "$(nproc)" to use the current host's CPU(s) core count, or leave empty to get a prompt
|
||||
# Set to your maximum number of CPUs (physical + logical cores) - Lower means less overhead - You can set it to "$(nproc)" to use the current host's CPU(s) core count, or leave empty to use default
|
||||
# If you set this to a lower value than you have cores, some cores will be disabled
|
||||
# Default Arch kernel value is 320
|
||||
_NR_CPUS_value="128"
|
||||
_NR_CPUS_value=""
|
||||
|
||||
|
||||
#### USER PATCHES ####
|
||||
|
14
linux-tkg-config/6.2/90-cleanup.hook
Normal file
14
linux-tkg-config/6.2/90-cleanup.hook
Normal file
@@ -0,0 +1,14 @@
|
||||
[Trigger]
|
||||
Type = File
|
||||
Operation = Install
|
||||
Operation = Upgrade
|
||||
Operation = Remove
|
||||
Target = usr/lib/modules/*/
|
||||
Target = !usr/lib/modules/*/?*
|
||||
|
||||
[Action]
|
||||
Description = Cleaning up...
|
||||
When = PostTransaction
|
||||
Exec = /usr/share/libalpm/scripts/cleanup
|
||||
NeedsTargets
|
||||
|
10
linux-tkg-config/6.2/cleanup
Executable file
10
linux-tkg-config/6.2/cleanup
Executable file
@@ -0,0 +1,10 @@
|
||||
#!/bin/bash
|
||||
|
||||
for _f in /usr/lib/modules/*tkg*; do
|
||||
if [[ ! -e ${_f}/vmlinuz ]]; then
|
||||
rm -rf "$_f"
|
||||
fi
|
||||
done
|
||||
|
||||
# vim:set ft=sh sw=2 et:
|
||||
|
11333
linux-tkg-config/6.2/config.x86_64
Normal file
11333
linux-tkg-config/6.2/config.x86_64
Normal file
File diff suppressed because it is too large
Load Diff
@@ -1,7 +1,7 @@
|
||||
#!/bin/bash
|
||||
|
||||
# List of kernels that are maintained upstream
|
||||
_current_kernels=("6.1" "6.0" "5.15" "5.10" "5.4")
|
||||
_current_kernels=("6.2" "6.1" "6.0" "5.15" "5.10" "5.4")
|
||||
|
||||
# List of kernels that are no longer maintained upstream
|
||||
_eol_kernels=("5.19" "5.18" "5.17" "5.16" "5.14" "5.13" "5.12" "5.11" "5.9" "5.8" "5.7")
|
||||
@@ -39,32 +39,32 @@ done
|
||||
# PREEMPT_RT's supported kernel subversion
|
||||
typeset -Ag _rt_subver_map
|
||||
_rt_subver_map=(
|
||||
["5.4"]="209"
|
||||
["5.4"]="221"
|
||||
["5.9"]="1"
|
||||
["5.10"]="153"
|
||||
["5.11"]="4"
|
||||
["5.14"]="2"
|
||||
["5.15"]="76"
|
||||
["5.15"]="79"
|
||||
["5.16"]="2"
|
||||
["5.17"]="1"
|
||||
["6.0"]="5"
|
||||
["6.1"]="rc3"
|
||||
["6.1"]="rc7"
|
||||
)
|
||||
|
||||
# PREEMPT_RT's patch revision for the kernel
|
||||
# We separated this to allow for forcing the application of the patch when _preempt_rt_force=1 on version mismatch
|
||||
typeset -Ag _rt_rev_map
|
||||
_rt_rev_map=(
|
||||
["5.4"]="77"
|
||||
["5.4"]="79"
|
||||
["5.9"]="20"
|
||||
["5.10"]="76"
|
||||
["5.11"]="11"
|
||||
["5.14"]="21"
|
||||
["5.15"]="53"
|
||||
["5.15"]="54"
|
||||
["5.16"]="19"
|
||||
["5.17"]="17"
|
||||
["6.0"]="14"
|
||||
["6.1"]="2"
|
||||
["6.1"]="5"
|
||||
)
|
||||
|
||||
_undefine() {
|
||||
@@ -282,6 +282,9 @@ _set_cpu_scheduler() {
|
||||
elif [ "$_kver" = "600" ]; then
|
||||
_avail_cpu_scheds=("cfs" "pds" "bmq" "tt" "bore")
|
||||
elif [ "$_kver" = "601" ]; then
|
||||
_avail_cpu_scheds=("cfs" "pds" "bmq" "tt" "bore")
|
||||
_projectc_unoff=1
|
||||
elif [ "$_kver" = "602" ]; then
|
||||
_avail_cpu_scheds=("cfs" "tt" "bore")
|
||||
else
|
||||
_avail_cpu_scheds=("cfs")
|
||||
@@ -389,9 +392,15 @@ _linux_git_branch_checkout() {
|
||||
|
||||
cd "$_where"
|
||||
|
||||
if ! [ -d linux-src-git ]; then
|
||||
if ! [ -d linux-src-git ] || ( [ "$_source_in_tmpfs" = "true" ] && ! [ -d /tmp/linux-src-git ] ); then
|
||||
msg2 "First initialization of the linux source code git folder"
|
||||
if [ "$_source_in_tmpfs" = "true" ]; then
|
||||
rm -rf "${_where}/linux-src-git"
|
||||
mkdir "/tmp/linux-src-git"
|
||||
ln -s "/tmp/linux-src-git" "${_where}"
|
||||
else
|
||||
mkdir linux-src-git
|
||||
fi
|
||||
cd linux-src-git
|
||||
git init
|
||||
|
||||
@@ -399,6 +408,11 @@ _linux_git_branch_checkout() {
|
||||
git remote add "$remote" "${_kernel_git_remotes[$remote]}"
|
||||
done
|
||||
else
|
||||
if [ "$_source_in_tmpfs" = "true" ]; then
|
||||
rm -rf "${_where}/linux-src-git"
|
||||
ln -s "/tmp/linux-src-git" "${_where}"
|
||||
fi
|
||||
|
||||
cd linux-src-git
|
||||
|
||||
# Remove "origin" remote if present
|
||||
@@ -883,13 +897,19 @@ _tkg_srcprep() {
|
||||
if [ "$_kver" = "504" ] || [ "$_kver" = "509" ]; then
|
||||
scripts/config --set-val "RCU_BOOST_DELAY" "0"
|
||||
fi
|
||||
_disable "NTP_PPS" "CPU_FREQ_DEFAULT_GOV_PERFORMANCE_NODEF" "ZSWAP_COMPRESSOR_DEFAULT_LZO" "PROFILE_ALL_BRANCHES"
|
||||
_enable "CRYPTO_LZ4" "CRYPTO_LZ4HC" "LZ4_COMPRESS" "LZ4HC_COMPRESS" "ZSWAP_COMPRESSOR_DEFAULT_LZ4" "CMDLINE_BOOL" "BLK_DEV_LOOP" "X86_AMD_PSTATE" "AMD_PINCTRL" "CONTEXT_TRACKING_FORCE"
|
||||
_disable "DEBUG_FORCE_FUNCTION_ALIGN_64B"
|
||||
_disable "NTP_PPS" "ZSWAP_COMPRESSOR_DEFAULT_LZO" "PROFILE_ALL_BRANCHES"
|
||||
_enable "CRYPTO_LZ4" "CRYPTO_LZ4HC" "LZ4_COMPRESS" "LZ4HC_COMPRESS" "ZSWAP_COMPRESSOR_DEFAULT_LZ4" "X86_AMD_PSTATE" "AMD_PINCTRL"
|
||||
_disable "DEBUG_FORCE_FUNCTION_ALIGN_64B" "X86_P6_NOP"
|
||||
scripts/config --set-str "ZSWAP_COMPRESSOR_DEFAULT" "lz4"
|
||||
_enable "CPU_FREQ_DEFAULT_GOV_SCHEDUTIL"
|
||||
_disable "CPU_FREQ_DEFAULT_GOV_ONDEMAND" "CPU_FREQ_DEFAULT_GOV_CONSERVATIVE" "CPU_FREQ_DEFAULT_GOV_PERFORMANCE" "CPU_FREQ_DEFAULT_GOV_PERFORMANCE_NODEF"
|
||||
_module "BLK_DEV_LOOP"
|
||||
|
||||
if [ -n "$_custom_commandline" ]; then
|
||||
_enable "CMDLINE_BOOL"
|
||||
_disable "CMDLINE_OVERRIDE"
|
||||
scripts/config --set-str "CMDLINE" "${_custom_commandline}"
|
||||
_disable "CMDLINE_OVERRIDE" "X86_P6_NOP" "CPU_FREQ_DEFAULT_GOV_ONDEMAND" "CPU_FREQ_DEFAULT_GOV_CONSERVATIVE"
|
||||
#echo "# CONFIG_CPU_FREQ_DEFAULT_GOV_PERFORMANCE is not set" >> ./.config
|
||||
fi
|
||||
|
||||
# openrgb
|
||||
_module "I2C_NCT6775"
|
||||
@@ -1342,14 +1362,14 @@ _tkg_srcprep() {
|
||||
_tickless="${_selected_index}"
|
||||
fi
|
||||
if [ "$_tickless" = "0" ]; then
|
||||
_disable "NO_HZ_FULL_NODEF" "NO_HZ_IDLE" "NO_HZ_FULL" "NO_HZ" "NO_HZ_COMMON" "VIRT_CPU_ACCOUNTING_GEN"
|
||||
_disable "NO_HZ_FULL_NODEF" "NO_HZ_IDLE" "NO_HZ_FULL" "NO_HZ" "NO_HZ_COMMON" "VIRT_CPU_ACCOUNTING" "VIRT_CPU_ACCOUNTING_GEN"
|
||||
_enable "HZ_PERIODIC" "TICK_CPU_ACCOUNTING"
|
||||
elif [ "$_tickless" = "1" ]; then
|
||||
_disable "HZ_PERIODIC" "NO_HZ_IDLE" "TICK_CPU_ACCOUNTING"
|
||||
_enable "NO_HZ_FULL_NODEF" "NO_HZ_FULL" "NO_HZ" "NO_HZ_COMMON" "CONTEXT_TRACKING" "VIRT_CPU_ACCOUNTING_GEN"
|
||||
_disable "HZ_PERIODIC" "NO_HZ_IDLE" "TICK_CPU_ACCOUNTING" "CONTEXT_TRACKING_FORCE"
|
||||
_enable "NO_HZ_FULL_NODEF" "NO_HZ_FULL" "NO_HZ" "NO_HZ_COMMON" "CONTEXT_TRACKING" "VIRT_CPU_ACCOUNTING" "VIRT_CPU_ACCOUNTING_GEN"
|
||||
else
|
||||
_disable "NO_HZ_FULL_NODEF" "HZ_PERIODIC" "NO_HZ_FULL" "TICK_CPU_ACCOUNTING"
|
||||
_enable "NO_HZ_IDLE" "NO_HZ" "NO_HZ_COMMON" "VIRT_CPU_ACCOUNTING_GEN"
|
||||
_disable "NO_HZ_FULL_NODEF" "HZ_PERIODIC" "NO_HZ_FULL" "TICK_CPU_ACCOUNTING" "CONTEXT_TRACKING_FORCE"
|
||||
_enable "NO_HZ_IDLE" "NO_HZ" "NO_HZ_COMMON" "CONTEXT_TRACKING" "VIRT_CPU_ACCOUNTING" "VIRT_CPU_ACCOUNTING_GEN"
|
||||
fi
|
||||
|
||||
# acs override
|
||||
@@ -1593,22 +1613,9 @@ CONFIG_DEBUG_INFO_BTF_MODULES=y\r
|
||||
fi
|
||||
|
||||
# NR_CPUS
|
||||
if [ -z "$_NR_CPUS_value" ]; then
|
||||
plain ""
|
||||
plain "Set NR_CPUS value to the current host's threads count?"
|
||||
plain "For best results, it should be equal to the maximum number of threads the target machine has."
|
||||
plain "If you want to use the resulting kernel on a machine with more threads, you can hit enter or answer N to use a default of 128."
|
||||
read -rp "`echo $' > N/y : '`" CONDITION_nrcpus;
|
||||
fi
|
||||
if [[ "$CONDITION_nrcpus" =~ [yY] ]]; then
|
||||
_NR_CPUS_value="$(nproc)"
|
||||
fi
|
||||
if [ -n "$_NR_CPUS_value" ]; then
|
||||
scripts/config --set-val "NR_CPUS" "$_NR_CPUS_value"
|
||||
_enable "FORCE_NR_CPUS"
|
||||
else
|
||||
scripts/config --set-val "NR_CPUS" "128"
|
||||
_disable "FORCE_NR_CPUS"
|
||||
fi
|
||||
|
||||
fi
|
||||
|
@@ -14,7 +14,7 @@ of Proton to still use fsync in new kernel releases.
|
||||
Signed-off-by: André Almeida <andrealmeid@collabora.com>
|
||||
---
|
||||
include/uapi/linux/futex.h | 12 ++++++
|
||||
kernel/futex.c | 75 +++++++++++++++++++++++++++++++++++++-
|
||||
kernel/futex/core.c | 75 +++++++++++++++++++++++++++++++++++++-
|
||||
2 files changed, 86 insertions(+), 1 deletion(-)
|
||||
|
||||
diff --git a/include/uapi/linux/futex.h b/include/uapi/linux/futex.h
|
||||
@@ -47,10 +47,10 @@ index 2a06b99f9803..417c5d89b745 100644
|
||||
|
||||
/*
|
||||
* Support for robust futexes: the kernel cleans up held futexes at
|
||||
diff --git a/kernel/futex.c b/kernel/futex.c
|
||||
diff --git a/kernel/futex/core.c b/kernel/futex/core.c
|
||||
index 4a9e7ce3714a..c3f2e65afab8 100644
|
||||
--- a/kernel/futex.c
|
||||
+++ b/kernel/futex.c
|
||||
--- a/kernel/futex/core.c
|
||||
+++ b/kernel/futex/core.c
|
||||
@@ -4012,6 +4012,7 @@ static __always_inline bool futex_cmd_has_timeout(u32 cmd)
|
||||
case FUTEX_LOCK_PI2:
|
||||
case FUTEX_WAIT_BITSET:
|
||||
|
@@ -43,7 +43,7 @@ Link: https://lore.kernel.org/r/20210923171111.300673-17-andrealmeid@collabora.c
|
||||
include/linux/syscalls.h | 6 +
|
||||
include/uapi/asm-generic/unistd.h | 5 +-
|
||||
include/uapi/linux/futex.h | 26 +++
|
||||
kernel/futex.c | 334 ++++++++++++++++++++++++++++++
|
||||
kernel/futex/core.c | 334 ++++++++++++++++++++++++++++++
|
||||
kernel/sys_ni.c | 1 +
|
||||
5 files changed, 371 insertions(+), 1 deletion(-)
|
||||
|
||||
@@ -125,10 +125,10 @@ index a89eb0accd5e..1666f5e4b837 100644
|
||||
/*
|
||||
* Support for robust futexes: the kernel cleans up held futexes at
|
||||
* thread exit time.
|
||||
diff --git a/kernel/futex.c b/kernel/futex.c
|
||||
diff --git a/kernel/futex/core.c b/kernel/futex/core.c
|
||||
index 408cad5e8968..d7dc0bd9379c 100644
|
||||
--- a/kernel/futex.c
|
||||
+++ b/kernel/futex.c
|
||||
--- a/kernel/futex/core.c
|
||||
+++ b/kernel/futex/core.c
|
||||
@@ -285,6 +285,18 @@ static const struct futex_q futex_q_init = {
|
||||
.requeue_state = ATOMIC_INIT(Q_REQUEUE_PI_NONE),
|
||||
};
|
||||
|
@@ -709,7 +709,7 @@ index 000000000000..8b0ddbdd24e4
|
||||
+#include <asm/switch_to.h>
|
||||
+
|
||||
+#include "../workqueue_internal.h"
|
||||
+#include "../../fs/io-wq.h"
|
||||
+#include "../../io_uring/io-wq.h"
|
||||
+#include "../smpboot.h"
|
||||
+
|
||||
+#include "pelt.h"
|
||||
|
@@ -1,121 +1,45 @@
|
||||
From 5ec2dd3a095442ec1a21d86042a4994f2ba24e63 Mon Sep 17 00:00:00 2001
|
||||
Message-Id: <5ec2dd3a095442ec1a21d86042a4994f2ba24e63.1512651251.git.jan.steffens@gmail.com>
|
||||
From: Serge Hallyn <serge.hallyn@canonical.com>
|
||||
Date: Fri, 31 May 2013 19:12:12 +0100
|
||||
Subject: [PATCH] add sysctl to disallow unprivileged CLONE_NEWUSER by default
|
||||
|
||||
Signed-off-by: Serge Hallyn <serge.hallyn@ubuntu.com>
|
||||
[bwh: Remove unneeded binary sysctl bits]
|
||||
Signed-off-by: Daniel Micay <danielmicay@gmail.com>
|
||||
---
|
||||
kernel/fork.c | 15 +++++++++++++++
|
||||
kernel/sysctl.c | 12 ++++++++++++
|
||||
kernel/user_namespace.c | 3 +++
|
||||
3 files changed, 30 insertions(+)
|
||||
|
||||
diff --git a/kernel/fork.c b/kernel/fork.c
|
||||
index 07cc743698d3668e..4011d68a8ff9305c 100644
|
||||
--- a/kernel/fork.c
|
||||
+++ b/kernel/fork.c
|
||||
@@ -102,6 +102,11 @@
|
||||
|
||||
#define CREATE_TRACE_POINTS
|
||||
#include <trace/events/task.h>
|
||||
+#ifdef CONFIG_USER_NS
|
||||
+extern int unprivileged_userns_clone;
|
||||
+#else
|
||||
+#define unprivileged_userns_clone 0
|
||||
+#endif
|
||||
|
||||
/*
|
||||
* Minimum number of threads to boot the kernel
|
||||
@@ -1555,6 +1560,10 @@ static __latent_entropy struct task_struct *copy_process(
|
||||
if ((clone_flags & (CLONE_NEWUSER|CLONE_FS)) == (CLONE_NEWUSER|CLONE_FS))
|
||||
return ERR_PTR(-EINVAL);
|
||||
|
||||
+ if ((clone_flags & CLONE_NEWUSER) && !unprivileged_userns_clone)
|
||||
+ if (!capable(CAP_SYS_ADMIN))
|
||||
+ return ERR_PTR(-EPERM);
|
||||
+
|
||||
/*
|
||||
* Thread groups must share signals as well, and detached threads
|
||||
* can only be started up within the thread group.
|
||||
@@ -2348,6 +2357,12 @@ SYSCALL_DEFINE1(unshare, unsigned long, unshare_flags)
|
||||
if (unshare_flags & CLONE_NEWNS)
|
||||
unshare_flags |= CLONE_FS;
|
||||
|
||||
+ if ((unshare_flags & CLONE_NEWUSER) && !unprivileged_userns_clone) {
|
||||
+ err = -EPERM;
|
||||
+ if (!capable(CAP_SYS_ADMIN))
|
||||
+ goto bad_unshare_out;
|
||||
+ }
|
||||
+
|
||||
err = check_unshare_flags(unshare_flags);
|
||||
if (err)
|
||||
goto bad_unshare_out;
|
||||
diff --git a/kernel/sysctl.c b/kernel/sysctl.c
|
||||
index b86520ed3fb60fbf..f7dab3760839f1a1 100644
|
||||
--- a/kernel/sysctl.c
|
||||
+++ b/kernel/sysctl.c
|
||||
@@ -105,6 +105,9 @@ extern int core_uses_pid;
|
||||
|
||||
#if defined(CONFIG_SYSCTL)
|
||||
|
||||
+#ifdef CONFIG_USER_NS
|
||||
+extern int unprivileged_userns_clone;
|
||||
+#endif
|
||||
/* Constants used for minimum and maximum */
|
||||
|
||||
#ifdef CONFIG_PERF_EVENTS
|
||||
@@ -513,6 +516,15 @@ static struct ctl_table kern_table[] = {
|
||||
.proc_handler = proc_dointvec,
|
||||
},
|
||||
#endif
|
||||
+#ifdef CONFIG_USER_NS
|
||||
+ {
|
||||
+ .procname = "unprivileged_userns_clone",
|
||||
+ .data = &unprivileged_userns_clone,
|
||||
+ .maxlen = sizeof(int),
|
||||
+ .mode = 0644,
|
||||
+ .proc_handler = proc_dointvec,
|
||||
+ },
|
||||
+#endif
|
||||
#ifdef CONFIG_PROC_SYSCTL
|
||||
{
|
||||
.procname = "tainted",
|
||||
diff --git a/kernel/user_namespace.c b/kernel/user_namespace.c
|
||||
index c490f1e4313b998a..dd03bd39d7bf194d 100644
|
||||
--- a/kernel/user_namespace.c
|
||||
+++ b/kernel/user_namespace.c
|
||||
@@ -24,6 +24,9 @@
|
||||
#include <linux/projid.h>
|
||||
#include <linux/fs_struct.h>
|
||||
|
||||
+/* sysctl */
|
||||
+int unprivileged_userns_clone;
|
||||
+
|
||||
static struct kmem_cache *user_ns_cachep __read_mostly;
|
||||
static DEFINE_MUTEX(userns_state_mutex);
|
||||
|
||||
--
|
||||
2.15.1
|
||||
|
||||
From b5202296055dd333db4425120d3f93ef4e6a0573 Mon Sep 17 00:00:00 2001
|
||||
From d50977b164e708bf523a35ef53315355528c3ca6 Mon Sep 17 00:00:00 2001
|
||||
From: "Jan Alexander Steffens (heftig)" <jan.steffens@gmail.com>
|
||||
Date: Thu, 7 Dec 2017 13:50:48 +0100
|
||||
Subject: ZEN: Add CONFIG for unprivileged_userns_clone
|
||||
Date: Mon, 16 Sep 2019 04:53:20 +0200
|
||||
Subject: [PATCH] ZEN: Add sysctl and CONFIG to disallow unprivileged
|
||||
CLONE_NEWUSER
|
||||
|
||||
This way our default behavior continues to match the vanilla kernel.
|
||||
Our default behavior continues to match the vanilla kernel.
|
||||
---
|
||||
include/linux/user_namespace.h | 4 ++++
|
||||
init/Kconfig | 16 ++++++++++++++++
|
||||
kernel/user_namespace.c | 4 ++++
|
||||
2 files changed, 20 insertions(+)
|
||||
kernel/fork.c | 14 ++++++++++++++
|
||||
kernel/sysctl.c | 12 ++++++++++++
|
||||
kernel/user_namespace.c | 7 +++++++
|
||||
5 files changed, 53 insertions(+)
|
||||
|
||||
diff --git a/include/linux/user_namespace.h b/include/linux/user_namespace.h
|
||||
index 45f09bec02c485..87b20e2ee27445 100644
|
||||
--- a/include/linux/user_namespace.h
|
||||
+++ b/include/linux/user_namespace.h
|
||||
@@ -148,6 +148,8 @@ static inline void set_userns_rlimit_max(struct user_namespace *ns,
|
||||
|
||||
#ifdef CONFIG_USER_NS
|
||||
|
||||
+extern int unprivileged_userns_clone;
|
||||
+
|
||||
static inline struct user_namespace *get_user_ns(struct user_namespace *ns)
|
||||
{
|
||||
if (ns)
|
||||
@@ -181,6 +183,8 @@ extern bool current_in_userns(const struct user_namespace *target_ns);
|
||||
struct ns_common *ns_get_owner(struct ns_common *ns);
|
||||
#else
|
||||
|
||||
+#define unprivileged_userns_clone 0
|
||||
+
|
||||
static inline struct user_namespace *get_user_ns(struct user_namespace *ns)
|
||||
{
|
||||
return &init_user_ns;
|
||||
diff --git a/init/Kconfig b/init/Kconfig
|
||||
index 4592bf7997c0..f3df02990aff 100644
|
||||
index 94125d3b6893c7..9f7139b536f638 100644
|
||||
--- a/init/Kconfig
|
||||
+++ b/init/Kconfig
|
||||
@@ -1004,6 +1004,22 @@ config USER_NS
|
||||
@@ -1247,6 +1247,22 @@ config USER_NS
|
||||
|
||||
If unsure, say N.
|
||||
|
||||
@@ -138,19 +62,90 @@ index 4592bf7997c0..f3df02990aff 100644
|
||||
config PID_NS
|
||||
bool "PID Namespaces"
|
||||
default y
|
||||
diff --git a/kernel/fork.c b/kernel/fork.c
|
||||
index 08969f5aa38d59..ff601cb7a1fae0 100644
|
||||
--- a/kernel/fork.c
|
||||
+++ b/kernel/fork.c
|
||||
@@ -98,6 +98,10 @@
|
||||
#include <linux/io_uring.h>
|
||||
#include <linux/bpf.h>
|
||||
|
||||
+#ifdef CONFIG_USER_NS
|
||||
+#include <linux/user_namespace.h>
|
||||
+#endif
|
||||
+
|
||||
#include <asm/pgalloc.h>
|
||||
#include <linux/uaccess.h>
|
||||
#include <asm/mmu_context.h>
|
||||
@@ -2008,6 +2012,10 @@ static __latent_entropy struct task_struct *copy_process(
|
||||
if ((clone_flags & (CLONE_NEWUSER|CLONE_FS)) == (CLONE_NEWUSER|CLONE_FS))
|
||||
return ERR_PTR(-EINVAL);
|
||||
|
||||
+ if ((clone_flags & CLONE_NEWUSER) && !unprivileged_userns_clone)
|
||||
+ if (!capable(CAP_SYS_ADMIN))
|
||||
+ return ERR_PTR(-EPERM);
|
||||
+
|
||||
/*
|
||||
* Thread groups must share signals as well, and detached threads
|
||||
* can only be started up within the thread group.
|
||||
@@ -3166,6 +3174,12 @@ int ksys_unshare(unsigned long unshare_flags)
|
||||
if (unshare_flags & CLONE_NEWNS)
|
||||
unshare_flags |= CLONE_FS;
|
||||
|
||||
+ if ((unshare_flags & CLONE_NEWUSER) && !unprivileged_userns_clone) {
|
||||
+ err = -EPERM;
|
||||
+ if (!capable(CAP_SYS_ADMIN))
|
||||
+ goto bad_unshare_out;
|
||||
+ }
|
||||
+
|
||||
err = check_unshare_flags(unshare_flags);
|
||||
if (err)
|
||||
goto bad_unshare_out;
|
||||
diff --git a/kernel/sysctl.c b/kernel/sysctl.c
|
||||
index c6d9dec11b749d..9a4514ad481b21 100644
|
||||
--- a/kernel/sysctl.c
|
||||
+++ b/kernel/sysctl.c
|
||||
@@ -81,6 +81,9 @@
|
||||
#ifdef CONFIG_RT_MUTEXES
|
||||
#include <linux/rtmutex.h>
|
||||
#endif
|
||||
+#ifdef CONFIG_USER_NS
|
||||
+#include <linux/user_namespace.h>
|
||||
+#endif
|
||||
|
||||
/* shared constants to be used in various sysctls */
|
||||
const int sysctl_vals[] = { 0, 1, 2, 3, 4, 100, 200, 1000, 3000, INT_MAX, 65535, -1 };
|
||||
@@ -1659,6 +1662,15 @@ static struct ctl_table kern_table[] = {
|
||||
.mode = 0644,
|
||||
.proc_handler = proc_dointvec,
|
||||
},
|
||||
+#ifdef CONFIG_USER_NS
|
||||
+ {
|
||||
+ .procname = "unprivileged_userns_clone",
|
||||
+ .data = &unprivileged_userns_clone,
|
||||
+ .maxlen = sizeof(int),
|
||||
+ .mode = 0644,
|
||||
+ .proc_handler = proc_dointvec,
|
||||
+ },
|
||||
+#endif
|
||||
#ifdef CONFIG_PROC_SYSCTL
|
||||
{
|
||||
.procname = "tainted",
|
||||
diff --git a/kernel/user_namespace.c b/kernel/user_namespace.c
|
||||
index 6b9dbc257e34..107b17f0d528 100644
|
||||
index 54211dbd516c57..16ca0c1516298d 100644
|
||||
--- a/kernel/user_namespace.c
|
||||
+++ b/kernel/user_namespace.c
|
||||
@@ -27,7 +27,11 @@
|
||||
@@ -22,6 +22,13 @@
|
||||
#include <linux/bsearch.h>
|
||||
#include <linux/sort.h>
|
||||
|
||||
/* sysctl */
|
||||
+/* sysctl */
|
||||
+#ifdef CONFIG_USER_NS_UNPRIVILEGED
|
||||
+int unprivileged_userns_clone = 1;
|
||||
+#else
|
||||
int unprivileged_userns_clone;
|
||||
+int unprivileged_userns_clone;
|
||||
+#endif
|
||||
|
||||
+
|
||||
static struct kmem_cache *user_ns_cachep __read_mostly;
|
||||
static DEFINE_MUTEX(userns_state_mutex);
|
||||
|
||||
|
@@ -403,34 +403,6 @@ index 84badf00647e..6a922bca9f39 100644
|
||||
2.28.0
|
||||
|
||||
|
||||
From 816ee502759e954304693813bd03d94986b28dba Mon Sep 17 00:00:00 2001
|
||||
From: Tk-Glitch <ti3nou@gmail.com>
|
||||
Date: Mon, 18 Feb 2019 17:40:57 +0100
|
||||
Subject: [PATCH 11/17] mm: Set watermark_scale_factor to 200 (from 10)
|
||||
|
||||
Multiple users have reported it's helping reducing/eliminating stuttering
|
||||
with DXVK.
|
||||
---
|
||||
mm/page_alloc.c | 2 +-
|
||||
1 file changed, 1 insertion(+), 1 deletion(-)
|
||||
|
||||
diff --git a/mm/page_alloc.c b/mm/page_alloc.c
|
||||
index 898ff44f2c7b..e72074034793 100644
|
||||
--- a/mm/page_alloc.c
|
||||
+++ b/mm/page_alloc.c
|
||||
@@ -330,7 +330,7 @@ int watermark_boost_factor __read_mostly;
|
||||
int min_free_kbytes = 1024;
|
||||
int user_min_free_kbytes = -1;
|
||||
int watermark_boost_factor __read_mostly = 15000;
|
||||
-int watermark_scale_factor = 10;
|
||||
+int watermark_scale_factor = 200;
|
||||
|
||||
static unsigned long nr_kernel_pages __initdata;
|
||||
static unsigned long nr_all_pages __initdata;
|
||||
--
|
||||
2.28.0
|
||||
|
||||
|
||||
From 90240bcd90a568878738e66c0d45bed3e38e347b Mon Sep 17 00:00:00 2001
|
||||
From: Tk-Glitch <ti3nou@gmail.com>
|
||||
Date: Fri, 19 Apr 2019 12:33:38 +0200
|
||||
@@ -490,7 +462,6 @@ index b0cefe94920d..890165099b07 100644
|
||||
--
|
||||
2.28.0
|
||||
|
||||
|
||||
From 977812938da7c7226415778c340832141d9278b7 Mon Sep 17 00:00:00 2001
|
||||
From: Alexandre Frade <admfrade@gmail.com>
|
||||
Date: Mon, 25 Nov 2019 15:13:06 -0300
|
||||
@@ -619,3 +590,232 @@ index 36a469150ff9..aee891c9b78a 100644
|
||||
--
|
||||
2.28.0
|
||||
|
||||
From 379cbab18b5c75c622b93e2c5abdfac141fe9654 Mon Sep 17 00:00:00 2001
|
||||
From: Kenny Levinsen <kl@kl.wtf>
|
||||
Date: Sun, 27 Dec 2020 14:43:13 +0000
|
||||
Subject: [PATCH] ZEN: Input: evdev - use call_rcu when detaching client
|
||||
|
||||
Significant time was spent on synchronize_rcu in evdev_detach_client
|
||||
when applications closed evdev devices. Switching VT away from a
|
||||
graphical environment commonly leads to mass input device closures,
|
||||
which could lead to noticable delays on systems with many input devices.
|
||||
|
||||
Replace synchronize_rcu with call_rcu, deferring reclaim of the evdev
|
||||
client struct till after the RCU grace period instead of blocking the
|
||||
calling application.
|
||||
|
||||
While this does not solve all slow evdev fd closures, it takes care of a
|
||||
good portion of them, including this simple test:
|
||||
|
||||
#include <fcntl.h>
|
||||
#include <unistd.h>
|
||||
|
||||
int main(int argc, char *argv[])
|
||||
{
|
||||
int idx, fd;
|
||||
const char *path = "/dev/input/event0";
|
||||
for (idx = 0; idx < 1000; idx++) {
|
||||
if ((fd = open(path, O_RDWR)) == -1) {
|
||||
return -1;
|
||||
}
|
||||
close(fd);
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
Time to completion of above test when run locally:
|
||||
|
||||
Before: 0m27.111s
|
||||
After: 0m0.018s
|
||||
|
||||
Signed-off-by: Kenny Levinsen <kl@kl.wtf>
|
||||
---
|
||||
drivers/input/evdev.c | 19 +++++++++++--------
|
||||
1 file changed, 11 insertions(+), 8 deletions(-)
|
||||
|
||||
diff --git a/drivers/input/evdev.c b/drivers/input/evdev.c
|
||||
index 95f90699d2b17b..2b10fe29d2c8d9 100644
|
||||
--- a/drivers/input/evdev.c
|
||||
+++ b/drivers/input/evdev.c
|
||||
@@ -46,6 +46,7 @@ struct evdev_client {
|
||||
struct fasync_struct *fasync;
|
||||
struct evdev *evdev;
|
||||
struct list_head node;
|
||||
+ struct rcu_head rcu;
|
||||
enum input_clock_type clk_type;
|
||||
bool revoked;
|
||||
unsigned long *evmasks[EV_CNT];
|
||||
@@ -377,13 +378,22 @@ static void evdev_attach_client(struct evdev *evdev,
|
||||
spin_unlock(&evdev->client_lock);
|
||||
}
|
||||
|
||||
+static void evdev_reclaim_client(struct rcu_head *rp)
|
||||
+{
|
||||
+ struct evdev_client *client = container_of(rp, struct evdev_client, rcu);
|
||||
+ unsigned int i;
|
||||
+ for (i = 0; i < EV_CNT; ++i)
|
||||
+ bitmap_free(client->evmasks[i]);
|
||||
+ kvfree(client);
|
||||
+}
|
||||
+
|
||||
static void evdev_detach_client(struct evdev *evdev,
|
||||
struct evdev_client *client)
|
||||
{
|
||||
spin_lock(&evdev->client_lock);
|
||||
list_del_rcu(&client->node);
|
||||
spin_unlock(&evdev->client_lock);
|
||||
- synchronize_rcu();
|
||||
+ call_rcu(&client->rcu, evdev_reclaim_client);
|
||||
}
|
||||
|
||||
static int evdev_open_device(struct evdev *evdev)
|
||||
@@ -436,7 +446,6 @@ static int evdev_release(struct inode *inode, struct file *file)
|
||||
{
|
||||
struct evdev_client *client = file->private_data;
|
||||
struct evdev *evdev = client->evdev;
|
||||
- unsigned int i;
|
||||
|
||||
mutex_lock(&evdev->mutex);
|
||||
|
||||
@@ -448,11 +457,6 @@ static int evdev_release(struct inode *inode, struct file *file)
|
||||
|
||||
evdev_detach_client(evdev, client);
|
||||
|
||||
- for (i = 0; i < EV_CNT; ++i)
|
||||
- bitmap_free(client->evmasks[i]);
|
||||
-
|
||||
- kvfree(client);
|
||||
-
|
||||
evdev_close_device(evdev);
|
||||
|
||||
return 0;
|
||||
@@ -495,7 +499,6 @@ static int evdev_open(struct inode *inode, struct file *file)
|
||||
|
||||
err_free_client:
|
||||
evdev_detach_client(evdev, client);
|
||||
- kvfree(client);
|
||||
return error;
|
||||
}
|
||||
|
||||
|
||||
From 2aafb56f20e4b63d8c4af172fe9d017c64bc4129 Mon Sep 17 00:00:00 2001
|
||||
From: Sultan Alsawaf <sultan@kerneltoast.com>
|
||||
Date: Wed, 20 Oct 2021 20:50:11 -0700
|
||||
Subject: [PATCH] ZEN: mm: Lower the non-hugetlbpage pageblock size to reduce
|
||||
scheduling delays
|
||||
|
||||
The page allocator processes free pages in groups of pageblocks, where
|
||||
the size of a pageblock is typically quite large (1024 pages without
|
||||
hugetlbpage support). Pageblocks are processed atomically with the zone
|
||||
lock held, which can cause severe scheduling delays on both the CPU
|
||||
going through the pageblock and any other CPUs waiting to acquire the
|
||||
zone lock. A frequent offender is move_freepages_block(), which is used
|
||||
by rmqueue() for page allocation.
|
||||
|
||||
As it turns out, there's no requirement for pageblocks to be so large,
|
||||
so the pageblock order can simply be reduced to ease the scheduling
|
||||
delays and zone lock contention. PAGE_ALLOC_COSTLY_ORDER is used as a
|
||||
reasonable setting to ensure non-costly page allocation requests can
|
||||
still be serviced without always needing to free up more than one
|
||||
pageblock's worth of pages at a time.
|
||||
|
||||
This has a noticeable effect on overall system latency when memory
|
||||
pressure is elevated. The various mm functions which operate on
|
||||
pageblocks no longer appear in the preemptoff tracer, where previously
|
||||
they would spend up to 100 ms on a mobile arm64 CPU processing a
|
||||
pageblock with preemption disabled and the zone lock held.
|
||||
|
||||
Signed-off-by: Sultan Alsawaf <sultan@kerneltoast.com>
|
||||
---
|
||||
include/linux/pageblock-flags.h | 2 +-
|
||||
1 file changed, 1 insertion(+), 1 deletion(-)
|
||||
|
||||
diff --git a/include/linux/pageblock-flags.h b/include/linux/pageblock-flags.h
|
||||
index 5f1ae07d724b88..97cda629c9e909 100644
|
||||
--- a/include/linux/pageblock-flags.h
|
||||
+++ b/include/linux/pageblock-flags.h
|
||||
@@ -48,7 +48,7 @@ extern unsigned int pageblock_order;
|
||||
#else /* CONFIG_HUGETLB_PAGE */
|
||||
|
||||
/* If huge pages are not used, group by MAX_ORDER_NR_PAGES */
|
||||
-#define pageblock_order (MAX_ORDER-1)
|
||||
+#define pageblock_order PAGE_ALLOC_COSTLY_ORDER
|
||||
|
||||
#endif /* CONFIG_HUGETLB_PAGE */
|
||||
|
||||
|
||||
From f22bc56be85e69c71c8e36041193856bb8b01525 Mon Sep 17 00:00:00 2001
|
||||
From: Sultan Alsawaf <sultan@kerneltoast.com>
|
||||
Date: Wed, 20 Oct 2021 20:50:32 -0700
|
||||
Subject: [PATCH] ZEN: mm: Don't hog the CPU and zone lock in rmqueue_bulk()
|
||||
|
||||
There is noticeable scheduling latency and heavy zone lock contention
|
||||
stemming from rmqueue_bulk's single hold of the zone lock while doing
|
||||
its work, as seen with the preemptoff tracer. There's no actual need for
|
||||
rmqueue_bulk() to hold the zone lock the entire time; it only does so
|
||||
for supposed efficiency. As such, we can relax the zone lock and even
|
||||
reschedule when IRQs are enabled in order to keep the scheduling delays
|
||||
and zone lock contention at bay. Forward progress is still guaranteed,
|
||||
as the zone lock can only be relaxed after page removal.
|
||||
|
||||
With this change, rmqueue_bulk() no longer appears as a serious offender
|
||||
in the preemptoff tracer, and system latency is noticeably improved.
|
||||
|
||||
Signed-off-by: Sultan Alsawaf <sultan@kerneltoast.com>
|
||||
---
|
||||
mm/page_alloc.c | 23 ++++++++++++++++++-----
|
||||
1 file changed, 18 insertions(+), 5 deletions(-)
|
||||
|
||||
diff --git a/mm/page_alloc.c b/mm/page_alloc.c
|
||||
index a0b0397e29ee4c..87a983a356530c 100644
|
||||
--- a/mm/page_alloc.c
|
||||
+++ b/mm/page_alloc.c
|
||||
@@ -3119,15 +3119,16 @@ __rmqueue(struct zone *zone, unsigned int order, int migratetype,
|
||||
}
|
||||
|
||||
/*
|
||||
- * Obtain a specified number of elements from the buddy allocator, all under
|
||||
- * a single hold of the lock, for efficiency. Add them to the supplied list.
|
||||
- * Returns the number of new pages which were placed at *list.
|
||||
+ * Obtain a specified number of elements from the buddy allocator, and relax the
|
||||
+ * zone lock when needed. Add them to the supplied list. Returns the number of
|
||||
+ * new pages which were placed at *list.
|
||||
*/
|
||||
static int rmqueue_bulk(struct zone *zone, unsigned int order,
|
||||
unsigned long count, struct list_head *list,
|
||||
int migratetype, unsigned int alloc_flags)
|
||||
{
|
||||
- int i, allocated = 0;
|
||||
+ const bool can_resched = !preempt_count() && !irqs_disabled();
|
||||
+ int i, allocated = 0, last_mod = 0;
|
||||
|
||||
/* Caller must hold IRQ-safe pcp->lock so IRQs are disabled. */
|
||||
spin_lock(&zone->lock);
|
||||
@@ -3137,6 +3138,18 @@ static int rmqueue_bulk(struct zone *zone, unsigned int order,
|
||||
if (unlikely(page == NULL))
|
||||
break;
|
||||
|
||||
+ /* Reschedule and ease the contention on the lock if needed */
|
||||
+ if (i + 1 < count && ((can_resched && need_resched()) ||
|
||||
+ spin_needbreak(&zone->lock))) {
|
||||
+ __mod_zone_page_state(zone, NR_FREE_PAGES,
|
||||
+ -((i + 1 - last_mod) << order));
|
||||
+ last_mod = i + 1;
|
||||
+ spin_unlock(&zone->lock);
|
||||
+ if (can_resched)
|
||||
+ cond_resched();
|
||||
+ spin_lock(&zone->lock);
|
||||
+ }
|
||||
+
|
||||
if (unlikely(check_pcp_refill(page, order)))
|
||||
continue;
|
||||
|
||||
@@ -3163,7 +3176,7 @@ static int rmqueue_bulk(struct zone *zone, unsigned int order,
|
||||
* on i. Do not confuse with 'allocated' which is the number of
|
||||
* pages added to the pcp list.
|
||||
*/
|
||||
- __mod_zone_page_state(zone, NR_FREE_PAGES, -(i << order));
|
||||
+ __mod_zone_page_state(zone, NR_FREE_PAGES, -((i - last_mod) << order));
|
||||
spin_unlock(&zone->lock);
|
||||
return allocated;
|
||||
}
|
||||
|
96926
linux-tkg-patches/6.1/0008-6.1-bcachefs.patch
Normal file
96926
linux-tkg-patches/6.1/0008-6.1-bcachefs.patch
Normal file
File diff suppressed because it is too large
Load Diff
@@ -1,8 +1,62 @@
|
||||
From 4b5e85eec8c30a02e45965aa898d26ed8fdd32be Mon Sep 17 00:00:00 2001
|
||||
From: Peter Jung <admin@ptr1337.dev>
|
||||
Date: Mon, 12 Dec 2022 11:28:51 +0100
|
||||
Subject: [PATCH] prjc-cachy
|
||||
|
||||
Signed-off-by: Peter Jung <admin@ptr1337.dev>
|
||||
---
|
||||
.../admin-guide/kernel-parameters.txt | 6 +
|
||||
Documentation/admin-guide/sysctl/kernel.rst | 10 +
|
||||
Documentation/scheduler/sched-BMQ.txt | 110 +
|
||||
fs/proc/base.c | 2 +-
|
||||
include/asm-generic/resource.h | 2 +-
|
||||
include/linux/sched.h | 33 +-
|
||||
include/linux/sched/deadline.h | 20 +
|
||||
include/linux/sched/prio.h | 26 +
|
||||
include/linux/sched/rt.h | 2 +
|
||||
include/linux/sched/topology.h | 3 +-
|
||||
init/Kconfig | 34 +
|
||||
init/init_task.c | 18 +
|
||||
kernel/Kconfig.preempt | 2 +-
|
||||
kernel/cgroup/cpuset.c | 4 +-
|
||||
kernel/delayacct.c | 2 +-
|
||||
kernel/exit.c | 4 +-
|
||||
kernel/locking/rtmutex.c | 16 +-
|
||||
kernel/sched/Makefile | 5 +
|
||||
kernel/sched/alt_core.c | 7971 +++++++++++++++++
|
||||
kernel/sched/alt_debug.c | 31 +
|
||||
kernel/sched/alt_sched.h | 658 ++
|
||||
kernel/sched/bmq.h | 110 +
|
||||
kernel/sched/build_policy.c | 8 +-
|
||||
kernel/sched/build_utility.c | 3 +-
|
||||
kernel/sched/cpufreq_schedutil.c | 10 +
|
||||
kernel/sched/cputime.c | 10 +-
|
||||
kernel/sched/debug.c | 10 +
|
||||
kernel/sched/idle.c | 2 +
|
||||
kernel/sched/pds.h | 127 +
|
||||
kernel/sched/pelt.c | 4 +-
|
||||
kernel/sched/pelt.h | 8 +-
|
||||
kernel/sched/sched.h | 9 +
|
||||
kernel/sched/stats.c | 4 +
|
||||
kernel/sched/stats.h | 2 +
|
||||
kernel/sched/topology.c | 17 +
|
||||
kernel/sysctl.c | 15 +
|
||||
kernel/time/hrtimer.c | 2 +
|
||||
kernel/time/posix-cpu-timers.c | 10 +-
|
||||
kernel/trace/trace_selftest.c | 5 +
|
||||
39 files changed, 9292 insertions(+), 23 deletions(-)
|
||||
create mode 100644 Documentation/scheduler/sched-BMQ.txt
|
||||
create mode 100644 kernel/sched/alt_core.c
|
||||
create mode 100644 kernel/sched/alt_debug.c
|
||||
create mode 100644 kernel/sched/alt_sched.h
|
||||
create mode 100644 kernel/sched/bmq.h
|
||||
create mode 100644 kernel/sched/pds.h
|
||||
|
||||
diff --git a/Documentation/admin-guide/kernel-parameters.txt b/Documentation/admin-guide/kernel-parameters.txt
|
||||
index 42af9ca0127e..31747ec54f9d 100644
|
||||
index 9ffeb6f44966..4dbc3b80f406 100644
|
||||
--- a/Documentation/admin-guide/kernel-parameters.txt
|
||||
+++ b/Documentation/admin-guide/kernel-parameters.txt
|
||||
@@ -5406,6 +5406,12 @@
|
||||
@@ -5415,6 +5415,12 @@
|
||||
sa1100ir [NET]
|
||||
See drivers/net/irda/sa1100_ir.c.
|
||||
|
||||
@@ -176,7 +230,7 @@ index 8874f681b056..59eb72bf7d5f 100644
|
||||
[RLIMIT_RTTIME] = { RLIM_INFINITY, RLIM_INFINITY }, \
|
||||
}
|
||||
diff --git a/include/linux/sched.h b/include/linux/sched.h
|
||||
index ffb6eb55cd13..2e730a59caa2 100644
|
||||
index 5affff14993d..0fe3ce1d81c0 100644
|
||||
--- a/include/linux/sched.h
|
||||
+++ b/include/linux/sched.h
|
||||
@@ -762,8 +762,14 @@ struct task_struct {
|
||||
@@ -232,7 +286,7 @@ index ffb6eb55cd13..2e730a59caa2 100644
|
||||
|
||||
#ifdef CONFIG_CGROUP_SCHED
|
||||
struct task_group *sched_task_group;
|
||||
@@ -1545,6 +1567,15 @@ struct task_struct {
|
||||
@@ -1542,6 +1564,15 @@ struct task_struct {
|
||||
*/
|
||||
};
|
||||
|
||||
@@ -352,10 +406,10 @@ index 816df6cc444e..c8da08e18c91 100644
|
||||
#else
|
||||
static inline void rebuild_sched_domains_energy(void)
|
||||
diff --git a/init/Kconfig b/init/Kconfig
|
||||
index 94125d3b6893..c87ba766d354 100644
|
||||
index 5cf5c424fbf1..35d3ec42df0f 100644
|
||||
--- a/init/Kconfig
|
||||
+++ b/init/Kconfig
|
||||
@@ -819,6 +819,7 @@ menu "Scheduler features"
|
||||
@@ -836,6 +836,7 @@ menu "Scheduler features"
|
||||
config UCLAMP_TASK
|
||||
bool "Enable utilization clamping for RT/FAIR tasks"
|
||||
depends on CPU_FREQ_GOV_SCHEDUTIL
|
||||
@@ -363,21 +417,21 @@ index 94125d3b6893..c87ba766d354 100644
|
||||
help
|
||||
This feature enables the scheduler to track the clamped utilization
|
||||
of each CPU based on RUNNABLE tasks scheduled on that CPU.
|
||||
@@ -865,6 +866,35 @@ config UCLAMP_BUCKETS_COUNT
|
||||
@@ -882,6 +883,35 @@ config UCLAMP_BUCKETS_COUNT
|
||||
|
||||
If in doubt, use the default value.
|
||||
|
||||
+menuconfig SCHED_ALT
|
||||
+ bool "Alternative CPU Schedulers"
|
||||
+ default y
|
||||
+ default n
|
||||
+ help
|
||||
+ This feature enable alternative CPU scheduler"
|
||||
+ This feature enables the ProjectC alternative CPU schedulers."
|
||||
+
|
||||
+if SCHED_ALT
|
||||
+
|
||||
+choice
|
||||
+ prompt "Alternative CPU Scheduler"
|
||||
+ default SCHED_BMQ
|
||||
+ prompt "Alternative CPU schedulers"
|
||||
+ default SCHED_PDS
|
||||
+
|
||||
+config SCHED_BMQ
|
||||
+ bool "BMQ CPU scheduler"
|
||||
@@ -399,7 +453,7 @@ index 94125d3b6893..c87ba766d354 100644
|
||||
endmenu
|
||||
|
||||
#
|
||||
@@ -918,6 +948,7 @@ config NUMA_BALANCING
|
||||
@@ -935,6 +965,7 @@ config NUMA_BALANCING
|
||||
depends on ARCH_SUPPORTS_NUMA_BALANCING
|
||||
depends on !ARCH_WANT_NUMA_VARIABLE_LOCALITY
|
||||
depends on SMP && NUMA && MIGRATION && !PREEMPT_RT
|
||||
@@ -407,7 +461,7 @@ index 94125d3b6893..c87ba766d354 100644
|
||||
help
|
||||
This option adds support for automatic NUMA aware memory/task placement.
|
||||
The mechanism is quite primitive and is based on migrating memory when
|
||||
@@ -1015,6 +1046,7 @@ config FAIR_GROUP_SCHED
|
||||
@@ -1032,6 +1063,7 @@ config FAIR_GROUP_SCHED
|
||||
depends on CGROUP_SCHED
|
||||
default CGROUP_SCHED
|
||||
|
||||
@@ -415,7 +469,7 @@ index 94125d3b6893..c87ba766d354 100644
|
||||
config CFS_BANDWIDTH
|
||||
bool "CPU bandwidth provisioning for FAIR_GROUP_SCHED"
|
||||
depends on FAIR_GROUP_SCHED
|
||||
@@ -1037,6 +1069,7 @@ config RT_GROUP_SCHED
|
||||
@@ -1054,6 +1086,7 @@ config RT_GROUP_SCHED
|
||||
realtime bandwidth for them.
|
||||
See Documentation/scheduler/sched-rt-group.rst for more information.
|
||||
|
||||
@@ -423,7 +477,7 @@ index 94125d3b6893..c87ba766d354 100644
|
||||
endif #CGROUP_SCHED
|
||||
|
||||
config UCLAMP_TASK_GROUP
|
||||
@@ -1281,6 +1314,7 @@ config CHECKPOINT_RESTORE
|
||||
@@ -1314,6 +1347,7 @@ config CHECKPOINT_RESTORE
|
||||
|
||||
config SCHED_AUTOGROUP
|
||||
bool "Automatic process group scheduling"
|
||||
@@ -632,10 +686,10 @@ index 976092b7bd45..31d587c16ec1 100644
|
||||
obj-y += build_utility.o
|
||||
diff --git a/kernel/sched/alt_core.c b/kernel/sched/alt_core.c
|
||||
new file mode 100644
|
||||
index 000000000000..4bea0c025475
|
||||
index 000000000000..d3f6f2e1badd
|
||||
--- /dev/null
|
||||
+++ b/kernel/sched/alt_core.c
|
||||
@@ -0,0 +1,7912 @@
|
||||
@@ -0,0 +1,7971 @@
|
||||
+/*
|
||||
+ * kernel/sched/alt_core.c
|
||||
+ *
|
||||
@@ -706,7 +760,7 @@ index 000000000000..4bea0c025475
|
||||
+#define sched_feat(x) (0)
|
||||
+#endif /* CONFIG_SCHED_DEBUG */
|
||||
+
|
||||
+#define ALT_SCHED_VERSION "v6.1-r0"
|
||||
+#define ALT_SCHED_VERSION "v6.1-r0-CachyOS"
|
||||
+
|
||||
+/* rt_prio(prio) defined in include/linux/sched/rt.h */
|
||||
+#define rt_task(p) rt_prio((p)->prio)
|
||||
@@ -786,7 +840,91 @@ index 000000000000..4bea0c025475
|
||||
+#ifdef CONFIG_SCHED_SMT
|
||||
+static cpumask_t sched_sg_idle_mask ____cacheline_aligned_in_smp;
|
||||
+#endif
|
||||
+static cpumask_t sched_rq_watermark[SCHED_QUEUE_BITS] ____cacheline_aligned_in_smp;
|
||||
+
|
||||
+#define BITS_PER_ATOMIC_LONG_T BITS_PER_LONG
|
||||
+typedef struct sched_bitmask {
|
||||
+ atomic_long_t bits[DIV_ROUND_UP(SCHED_QUEUE_BITS, BITS_PER_ATOMIC_LONG_T)];
|
||||
+} sched_bitmask_t;
|
||||
+static sched_bitmask_t sched_rq_watermark[NR_CPUS] ____cacheline_aligned_in_smp;
|
||||
+
|
||||
+#define x(p, set, mask) \
|
||||
+ do { \
|
||||
+ smp_mb__before_atomic(); \
|
||||
+ if (set) \
|
||||
+ atomic_long_or((mask), (p)); \
|
||||
+ else \
|
||||
+ atomic_long_and(~(mask), (p)); \
|
||||
+ smp_mb__after_atomic(); \
|
||||
+ } while (0)
|
||||
+
|
||||
+static __always_inline void sched_rq_watermark_fill_downwards(int cpu, unsigned int end,
|
||||
+ unsigned int start, bool set)
|
||||
+{
|
||||
+ unsigned int start_idx, start_bit;
|
||||
+ unsigned int end_idx, end_bit;
|
||||
+ atomic_long_t *p;
|
||||
+
|
||||
+ if (end == start) {
|
||||
+ return;
|
||||
+ }
|
||||
+
|
||||
+ start_idx = start / BITS_PER_ATOMIC_LONG_T;
|
||||
+ start_bit = start % BITS_PER_ATOMIC_LONG_T;
|
||||
+ end_idx = (end - 1) / BITS_PER_ATOMIC_LONG_T;
|
||||
+ end_bit = (end - 1) % BITS_PER_ATOMIC_LONG_T;
|
||||
+ p = &sched_rq_watermark[cpu].bits[end_idx];
|
||||
+
|
||||
+ if (end_idx == start_idx) {
|
||||
+ x(p, set, (~0UL >> (BITS_PER_ATOMIC_LONG_T - 1 - end_bit)) & (~0UL << start_bit));
|
||||
+ return;
|
||||
+ }
|
||||
+
|
||||
+ if (end_bit != BITS_PER_ATOMIC_LONG_T - 1) {
|
||||
+ x(p, set, (~0UL >> (BITS_PER_ATOMIC_LONG_T - 1 - end_bit)));
|
||||
+ p -= 1;
|
||||
+ end_idx -= 1;
|
||||
+ }
|
||||
+
|
||||
+ while (end_idx != start_idx) {
|
||||
+ smp_mb__before_atomic();
|
||||
+ atomic_long_set(p, set ? ~0UL : 0);
|
||||
+ smp_mb__after_atomic();
|
||||
+ p -= 1;
|
||||
+ end_idx -= 1;
|
||||
+ }
|
||||
+
|
||||
+ x(p, set, ~0UL << start_bit);
|
||||
+}
|
||||
+
|
||||
+#undef x
|
||||
+
|
||||
+static __always_inline bool sched_rq_watermark_and(cpumask_t *dstp, const cpumask_t *cpus, int prio, bool not)
|
||||
+{
|
||||
+ int cpu;
|
||||
+ bool ret = false;
|
||||
+ int idx = prio / BITS_PER_ATOMIC_LONG_T;
|
||||
+ int bit = prio % BITS_PER_ATOMIC_LONG_T;
|
||||
+
|
||||
+ cpumask_clear(dstp);
|
||||
+ for_each_cpu(cpu, cpus)
|
||||
+ if (test_bit(bit, (long*)&sched_rq_watermark[cpu].bits[idx].counter) == !not) {
|
||||
+ __cpumask_set_cpu(cpu, dstp);
|
||||
+ ret = true;
|
||||
+ }
|
||||
+ return ret;
|
||||
+}
|
||||
+
|
||||
+static __always_inline bool sched_rq_watermark_test(const cpumask_t *cpus, int prio, bool not)
|
||||
+{
|
||||
+ int cpu;
|
||||
+ int idx = prio / BITS_PER_ATOMIC_LONG_T;
|
||||
+ int bit = prio % BITS_PER_ATOMIC_LONG_T;
|
||||
+
|
||||
+ for_each_cpu(cpu, cpus)
|
||||
+ if (test_bit(bit, (long*)&sched_rq_watermark[cpu].bits[idx].counter) == !not)
|
||||
+ return true;
|
||||
+ return false;
|
||||
+}
|
||||
+
|
||||
+/* sched_queue related functions */
|
||||
+static inline void sched_queue_init(struct sched_queue *q)
|
||||
@@ -815,7 +953,6 @@ index 000000000000..4bea0c025475
|
||||
+{
|
||||
+ unsigned long watermark = find_first_bit(rq->queue.bitmap, SCHED_QUEUE_BITS);
|
||||
+ unsigned long last_wm = rq->watermark;
|
||||
+ unsigned long i;
|
||||
+ int cpu;
|
||||
+
|
||||
+ if (watermark == last_wm)
|
||||
@@ -824,28 +961,25 @@ index 000000000000..4bea0c025475
|
||||
+ rq->watermark = watermark;
|
||||
+ cpu = cpu_of(rq);
|
||||
+ if (watermark < last_wm) {
|
||||
+ for (i = last_wm; i > watermark; i--)
|
||||
+ cpumask_clear_cpu(cpu, sched_rq_watermark + SCHED_QUEUE_BITS - i);
|
||||
+ sched_rq_watermark_fill_downwards(cpu, SCHED_QUEUE_BITS - watermark, SCHED_QUEUE_BITS - last_wm, false);
|
||||
+#ifdef CONFIG_SCHED_SMT
|
||||
+ if (static_branch_likely(&sched_smt_present) &&
|
||||
+ IDLE_TASK_SCHED_PRIO == last_wm)
|
||||
+ unlikely(IDLE_TASK_SCHED_PRIO == last_wm))
|
||||
+ cpumask_andnot(&sched_sg_idle_mask,
|
||||
+ &sched_sg_idle_mask, cpu_smt_mask(cpu));
|
||||
+#endif
|
||||
+ return;
|
||||
+ }
|
||||
+ /* last_wm < watermark */
|
||||
+ for (i = watermark; i > last_wm; i--)
|
||||
+ cpumask_set_cpu(cpu, sched_rq_watermark + SCHED_QUEUE_BITS - i);
|
||||
+ sched_rq_watermark_fill_downwards(cpu, SCHED_QUEUE_BITS - last_wm, SCHED_QUEUE_BITS - watermark, true);
|
||||
+#ifdef CONFIG_SCHED_SMT
|
||||
+ if (static_branch_likely(&sched_smt_present) &&
|
||||
+ IDLE_TASK_SCHED_PRIO == watermark) {
|
||||
+ cpumask_t tmp;
|
||||
+ unlikely(IDLE_TASK_SCHED_PRIO == watermark)) {
|
||||
+ const cpumask_t *smt_mask = cpu_smt_mask(cpu);
|
||||
+
|
||||
+ cpumask_and(&tmp, cpu_smt_mask(cpu), sched_rq_watermark);
|
||||
+ if (cpumask_equal(&tmp, cpu_smt_mask(cpu)))
|
||||
+ if (!sched_rq_watermark_test(smt_mask, 0, true))
|
||||
+ cpumask_or(&sched_sg_idle_mask,
|
||||
+ &sched_sg_idle_mask, cpu_smt_mask(cpu));
|
||||
+ &sched_sg_idle_mask, smt_mask);
|
||||
+ }
|
||||
+#endif
|
||||
+}
|
||||
@@ -928,7 +1062,8 @@ index 000000000000..4bea0c025475
|
||||
+ * p->se.load, p->rt_priority,
|
||||
+ * p->dl.dl_{runtime, deadline, period, flags, bw, density}
|
||||
+ * - sched_setnuma(): p->numa_preferred_nid
|
||||
+ * - sched_move_task(): p->sched_task_group
|
||||
+ * - sched_move_task()/
|
||||
+ * cpu_cgroup_fork(): p->sched_task_group
|
||||
+ * - uclamp_update_active() p->uclamp*
|
||||
+ *
|
||||
+ * p->state <- TASK_*:
|
||||
@@ -1189,7 +1324,6 @@ index 000000000000..4bea0c025475
|
||||
+
|
||||
+ rq->prev_irq_time += irq_delta;
|
||||
+ delta -= irq_delta;
|
||||
+ psi_account_irqtime(rq->curr, irq_delta);
|
||||
+#endif
|
||||
+#ifdef CONFIG_PARAVIRT_TIME_ACCOUNTING
|
||||
+ if (static_key_false((¶virt_steal_rq_enabled))) {
|
||||
@@ -1262,15 +1396,15 @@ index 000000000000..4bea0c025475
|
||||
+ rq->load_stamp = time;
|
||||
+}
|
||||
+
|
||||
+unsigned long rq_load_util(struct rq *rq, unsigned long max)
|
||||
+unsigned long rq_load_util(struct rq *rq, int cpu)
|
||||
+{
|
||||
+ return RQ_LOAD_HISTORY_TO_UTIL(rq->load_history) * (max >> RQ_UTIL_SHIFT);
|
||||
+ return RQ_LOAD_HISTORY_TO_UTIL(rq->load_history) * (arch_scale_cpu_capacity(cpu) >> RQ_UTIL_SHIFT);
|
||||
+}
|
||||
+
|
||||
+#ifdef CONFIG_SMP
|
||||
+unsigned long sched_cpu_util(int cpu)
|
||||
+{
|
||||
+ return rq_load_util(cpu_rq(cpu), arch_scale_cpu_capacity(cpu));
|
||||
+ return rq_load_util(cpu_rq(cpu), cpu);
|
||||
+}
|
||||
+#endif /* CONFIG_SMP */
|
||||
+
|
||||
@@ -2536,9 +2670,9 @@ index 000000000000..4bea0c025475
|
||||
+#ifdef CONFIG_SCHED_SMT
|
||||
+ cpumask_and(&tmp, &chk_mask, &sched_sg_idle_mask) ||
|
||||
+#endif
|
||||
+ cpumask_and(&tmp, &chk_mask, sched_rq_watermark) ||
|
||||
+ cpumask_and(&tmp, &chk_mask,
|
||||
+ sched_rq_watermark + SCHED_QUEUE_BITS - 1 - task_sched_prio(p)))
|
||||
+ sched_rq_watermark_and(&tmp, &chk_mask, 0, false) ||
|
||||
+ sched_rq_watermark_and(&tmp, &chk_mask,
|
||||
+ SCHED_QUEUE_BITS - 1 - task_sched_prio(p), false))
|
||||
+ return best_mask_cpu(task_cpu(p), &tmp);
|
||||
+
|
||||
+ return best_mask_cpu(task_cpu(p), &chk_mask);
|
||||
@@ -2979,13 +3113,6 @@ index 000000000000..4bea0c025475
|
||||
+ if (!llist)
|
||||
+ return;
|
||||
+
|
||||
+ /*
|
||||
+ * rq::ttwu_pending racy indication of out-standing wakeups.
|
||||
+ * Races such that false-negatives are possible, since they
|
||||
+ * are shorter lived that false-positives would be.
|
||||
+ */
|
||||
+ WRITE_ONCE(rq->ttwu_pending, 0);
|
||||
+
|
||||
+ rq_lock_irqsave(rq, &rf);
|
||||
+ update_rq_clock(rq);
|
||||
+
|
||||
@@ -2999,6 +3126,17 @@ index 000000000000..4bea0c025475
|
||||
+ ttwu_do_activate(rq, p, p->sched_remote_wakeup ? WF_MIGRATED : 0);
|
||||
+ }
|
||||
+
|
||||
+ /*
|
||||
+ * Must be after enqueueing at least once task such that
|
||||
+ * idle_cpu() does not observe a false-negative -- if it does,
|
||||
+ * it is possible for select_idle_siblings() to stack a number
|
||||
+ * of tasks on this CPU during that window.
|
||||
+ *
|
||||
+ * It is ok to clear ttwu_pending when another task pending.
|
||||
+ * We will receive IPI after local irq enabled and then enqueue it.
|
||||
+ * Since now nr_running > 0, idle_cpu() will always get correct result.
|
||||
+ */
|
||||
+ WRITE_ONCE(rq->ttwu_pending, 0);
|
||||
+ rq_unlock_irqrestore(rq, &rf);
|
||||
+}
|
||||
+
|
||||
@@ -3938,8 +4076,7 @@ index 000000000000..4bea0c025475
|
||||
+ * Claim the task as running, we do this before switching to it
|
||||
+ * such that any running task will have this set.
|
||||
+ *
|
||||
+ * See the smp_load_acquire(&p->on_cpu) case in ttwu() and
|
||||
+ * its ordering comment.
|
||||
+ * See the ttwu() WF_ON_CPU case and its ordering comment.
|
||||
+ */
|
||||
+ WRITE_ONCE(next->on_cpu, 1);
|
||||
+}
|
||||
@@ -4009,7 +4146,7 @@ index 000000000000..4bea0c025475
|
||||
+ if (likely(!head))
|
||||
+ return NULL;
|
||||
+
|
||||
+ lockdep_assert_rq_held(rq);
|
||||
+ lockdep_assert_held(&rq->lock);
|
||||
+ /*
|
||||
+ * Must not take balance_push_callback off the list when
|
||||
+ * splice_balance_callbacks() and balance_callbacks() are not
|
||||
@@ -4678,7 +4815,7 @@ index 000000000000..4bea0c025475
|
||||
+ * find potential cpus which can migrate the current running task
|
||||
+ */
|
||||
+ if (cpumask_test_cpu(cpu, &sched_sg_idle_mask) &&
|
||||
+ cpumask_andnot(&chk, cpu_online_mask, sched_rq_watermark) &&
|
||||
+ sched_rq_watermark_and(&chk, cpu_online_mask, 0, true) &&
|
||||
+ cpumask_andnot(&chk, &chk, &sched_rq_pending_mask)) {
|
||||
+ int i;
|
||||
+
|
||||
@@ -4820,7 +4957,7 @@ index 000000000000..4bea0c025475
|
||||
+int __init sched_tick_offload_init(void)
|
||||
+{
|
||||
+ tick_work_cpu = alloc_percpu(struct tick_work);
|
||||
+ BUG_ON(!tick_work_cpu);
|
||||
+ WARN_ON_ONCE(!tick_work_cpu);
|
||||
+ return 0;
|
||||
+}
|
||||
+
|
||||
@@ -4986,9 +5123,8 @@ index 000000000000..4bea0c025475
|
||||
+#ifdef ALT_SCHED_DEBUG
|
||||
+void alt_sched_debug(void)
|
||||
+{
|
||||
+ printk(KERN_INFO "sched: pending: 0x%04lx, idle: 0x%04lx, sg_idle: 0x%04lx\n",
|
||||
+ printk(KERN_INFO "sched: pending: 0x%04lx, sg_idle: 0x%04lx\n",
|
||||
+ sched_rq_pending_mask.bits[0],
|
||||
+ sched_rq_watermark[0].bits[0],
|
||||
+ sched_sg_idle_mask.bits[0]);
|
||||
+}
|
||||
+#else
|
||||
@@ -4997,23 +5133,18 @@ index 000000000000..4bea0c025475
|
||||
+
|
||||
+#ifdef CONFIG_SMP
|
||||
+
|
||||
+#ifdef CONFIG_PREEMPT_RT
|
||||
+#define SCHED_NR_MIGRATE_BREAK 8
|
||||
+#else
|
||||
+#define SCHED_NR_MIGRATE_BREAK 32
|
||||
+#endif
|
||||
+
|
||||
+const_debug unsigned int sysctl_sched_nr_migrate = SCHED_NR_MIGRATE_BREAK;
|
||||
+
|
||||
+#define SCHED_RQ_NR_MIGRATION (32U)
|
||||
+/*
|
||||
+ * Migrate pending tasks in @rq to @dest_cpu
|
||||
+ * Will try to migrate mininal of half of @rq nr_running tasks and
|
||||
+ * SCHED_RQ_NR_MIGRATION to @dest_cpu
|
||||
+ */
|
||||
+static inline int
|
||||
+migrate_pending_tasks(struct rq *rq, struct rq *dest_rq, const int dest_cpu)
|
||||
+{
|
||||
+ struct task_struct *p, *skip = rq->curr;
|
||||
+ int nr_migrated = 0;
|
||||
+ int nr_tries = min(rq->nr_running / 2, sysctl_sched_nr_migrate);
|
||||
+ int nr_tries = min(rq->nr_running / 2, SCHED_RQ_NR_MIGRATION);
|
||||
+
|
||||
+ while (skip != rq->idle && nr_tries &&
|
||||
+ (p = sched_rq_next_task(skip, rq)) != rq->idle) {
|
||||
@@ -5631,7 +5762,7 @@ index 000000000000..4bea0c025475
|
||||
+ enum ctx_state prev_state;
|
||||
+
|
||||
+ /* Catch callers which need to be fixed */
|
||||
+ BUG_ON(preempt_count() || !irqs_disabled());
|
||||
+ WARN_ON_ONCE(preempt_count() || !irqs_disabled());
|
||||
+
|
||||
+ prev_state = exception_enter();
|
||||
+
|
||||
@@ -5806,29 +5937,17 @@ index 000000000000..4bea0c025475
|
||||
+EXPORT_SYMBOL(set_user_nice);
|
||||
+
|
||||
+/*
|
||||
+ * is_nice_reduction - check if nice value is an actual reduction
|
||||
+ *
|
||||
+ * Similar to can_nice() but does not perform a capability check.
|
||||
+ *
|
||||
+ * @p: task
|
||||
+ * @nice: nice value
|
||||
+ */
|
||||
+static bool is_nice_reduction(const struct task_struct *p, const int nice)
|
||||
+{
|
||||
+ /* Convert nice value [19,-20] to rlimit style value [1,40]: */
|
||||
+ int nice_rlim = nice_to_rlimit(nice);
|
||||
+
|
||||
+ return (nice_rlim <= task_rlimit(p, RLIMIT_NICE));
|
||||
+}
|
||||
+
|
||||
+/*
|
||||
+ * can_nice - check if a task can reduce its nice value
|
||||
+ * @p: task
|
||||
+ * @nice: nice value
|
||||
+ */
|
||||
+int can_nice(const struct task_struct *p, const int nice)
|
||||
+{
|
||||
+ return is_nice_reduction(p, nice) || capable(CAP_SYS_NICE);
|
||||
+ /* Convert nice value [19,-20] to rlimit style value [1,40] */
|
||||
+ int nice_rlim = nice_to_rlimit(nice);
|
||||
+
|
||||
+ return (nice_rlim <= task_rlimit(p, RLIMIT_NICE) ||
|
||||
+ capable(CAP_SYS_NICE));
|
||||
+}
|
||||
+
|
||||
+#ifdef __ARCH_WANT_SYS_NICE
|
||||
@@ -5979,45 +6098,6 @@ index 000000000000..4bea0c025475
|
||||
+ return match;
|
||||
+}
|
||||
+
|
||||
+/*
|
||||
+ * Allow unprivileged RT tasks to decrease priority.
|
||||
+ * Only issue a capable test if needed and only once to avoid an audit
|
||||
+ * event on permitted non-privileged operations:
|
||||
+ */
|
||||
+static int user_check_sched_setscheduler(struct task_struct *p,
|
||||
+ const struct sched_attr *attr,
|
||||
+ int policy, int reset_on_fork)
|
||||
+{
|
||||
+ if (rt_policy(policy)) {
|
||||
+ unsigned long rlim_rtprio = task_rlimit(p, RLIMIT_RTPRIO);
|
||||
+
|
||||
+ /* Can't set/change the rt policy: */
|
||||
+ if (policy != p->policy && !rlim_rtprio)
|
||||
+ goto req_priv;
|
||||
+
|
||||
+ /* Can't increase priority: */
|
||||
+ if (attr->sched_priority > p->rt_priority &&
|
||||
+ attr->sched_priority > rlim_rtprio)
|
||||
+ goto req_priv;
|
||||
+ }
|
||||
+
|
||||
+ /* Can't change other user's priorities: */
|
||||
+ if (!check_same_owner(p))
|
||||
+ goto req_priv;
|
||||
+
|
||||
+ /* Normal users shall not reset the sched_reset_on_fork flag: */
|
||||
+ if (p->sched_reset_on_fork && !reset_on_fork)
|
||||
+ goto req_priv;
|
||||
+
|
||||
+ return 0;
|
||||
+
|
||||
+req_priv:
|
||||
+ if (!capable(CAP_SYS_NICE))
|
||||
+ return -EPERM;
|
||||
+
|
||||
+ return 0;
|
||||
+}
|
||||
+
|
||||
+static int __sched_setscheduler(struct task_struct *p,
|
||||
+ const struct sched_attr *attr,
|
||||
+ bool user, bool pi)
|
||||
@@ -6037,7 +6117,7 @@ index 000000000000..4bea0c025475
|
||||
+ raw_spinlock_t *lock;
|
||||
+
|
||||
+ /* The pi code expects interrupts enabled */
|
||||
+ BUG_ON(pi && in_interrupt());
|
||||
+ WARN_ON_ONCE(pi && in_interrupt());
|
||||
+
|
||||
+ /*
|
||||
+ * Alt schedule FW supports SCHED_DEADLINE by squash it as prio 0 SCHED_FIFO
|
||||
@@ -6074,11 +6154,34 @@ index 000000000000..4bea0c025475
|
||||
+ (attr->sched_priority != 0))
|
||||
+ return -EINVAL;
|
||||
+
|
||||
+ if (user) {
|
||||
+ retval = user_check_sched_setscheduler(p, attr, policy, reset_on_fork);
|
||||
+ if (retval)
|
||||
+ return retval;
|
||||
+ /*
|
||||
+ * Allow unprivileged RT tasks to decrease priority:
|
||||
+ */
|
||||
+ if (user && !capable(CAP_SYS_NICE)) {
|
||||
+ if (SCHED_FIFO == policy || SCHED_RR == policy) {
|
||||
+ unsigned long rlim_rtprio =
|
||||
+ task_rlimit(p, RLIMIT_RTPRIO);
|
||||
+
|
||||
+ /* Can't set/change the rt policy */
|
||||
+ if (policy != p->policy && !rlim_rtprio)
|
||||
+ return -EPERM;
|
||||
+
|
||||
+ /* Can't increase priority */
|
||||
+ if (attr->sched_priority > p->rt_priority &&
|
||||
+ attr->sched_priority > rlim_rtprio)
|
||||
+ return -EPERM;
|
||||
+ }
|
||||
+
|
||||
+ /* Can't change other user's priorities */
|
||||
+ if (!check_same_owner(p))
|
||||
+ return -EPERM;
|
||||
+
|
||||
+ /* Normal users shall not reset the sched_reset_on_fork flag */
|
||||
+ if (p->sched_reset_on_fork && !reset_on_fork)
|
||||
+ return -EPERM;
|
||||
+ }
|
||||
+
|
||||
+ if (user) {
|
||||
+ retval = security_task_setscheduler(p);
|
||||
+ if (retval)
|
||||
+ return retval;
|
||||
@@ -7515,7 +7618,7 @@ index 000000000000..4bea0c025475
|
||||
+{
|
||||
+ struct mm_struct *mm = current->active_mm;
|
||||
+
|
||||
+ BUG_ON(current != this_rq()->idle);
|
||||
+ WARN_ON_ONCE(current != this_rq()->idle);
|
||||
+
|
||||
+ if (mm != &init_mm) {
|
||||
+ switch_mm(mm, &init_mm, current);
|
||||
@@ -8029,8 +8132,17 @@ index 000000000000..4bea0c025475
|
||||
+ wait_bit_init();
|
||||
+
|
||||
+#ifdef CONFIG_SMP
|
||||
+ for (i = 0; i < SCHED_QUEUE_BITS; i++)
|
||||
+ cpumask_copy(sched_rq_watermark + i, cpu_present_mask);
|
||||
+ for (i = 0; i < nr_cpu_ids; i++) {
|
||||
+ long val = cpumask_test_cpu(i, cpu_present_mask) ? -1L : 0;
|
||||
+ int j;
|
||||
+ for (j = 0; j < DIV_ROUND_UP(SCHED_QUEUE_BITS, BITS_PER_ATOMIC_LONG_T); j++)
|
||||
+ atomic_long_set(&sched_rq_watermark[i].bits[j], val);
|
||||
+ }
|
||||
+ for (i = nr_cpu_ids; i < NR_CPUS; i++) {
|
||||
+ int j;
|
||||
+ for (j = 0; j < DIV_ROUND_UP(SCHED_QUEUE_BITS, BITS_PER_ATOMIC_LONG_T); j++)
|
||||
+ atomic_long_set(&sched_rq_watermark[i].bits[j], 0);
|
||||
+ }
|
||||
+#endif
|
||||
+
|
||||
+#ifdef CONFIG_CGROUP_SCHED
|
||||
@@ -8456,12 +8568,14 @@ index 000000000000..4bea0c025475
|
||||
+ sched_unregister_group(tg);
|
||||
+}
|
||||
+
|
||||
+#ifdef CONFIG_RT_GROUP_SCHED
|
||||
+static void cpu_cgroup_fork(struct task_struct *task)
|
||||
+{
|
||||
+}
|
||||
+
|
||||
+static int cpu_cgroup_can_attach(struct cgroup_taskset *tset)
|
||||
+{
|
||||
+ return 0;
|
||||
+}
|
||||
+#endif
|
||||
+
|
||||
+static void cpu_cgroup_attach(struct cgroup_taskset *tset)
|
||||
+{
|
||||
@@ -8535,9 +8649,8 @@ index 000000000000..4bea0c025475
|
||||
+ .css_released = cpu_cgroup_css_released,
|
||||
+ .css_free = cpu_cgroup_css_free,
|
||||
+ .css_extra_stat_show = cpu_extra_stat_show,
|
||||
+#ifdef CONFIG_RT_GROUP_SCHED
|
||||
+ .fork = cpu_cgroup_fork,
|
||||
+ .can_attach = cpu_cgroup_can_attach,
|
||||
+#endif
|
||||
+ .attach = cpu_cgroup_attach,
|
||||
+ .legacy_cftypes = cpu_files,
|
||||
+ .legacy_cftypes = cpu_legacy_files,
|
||||
@@ -8587,10 +8700,10 @@ index 000000000000..1212a031700e
|
||||
+{}
|
||||
diff --git a/kernel/sched/alt_sched.h b/kernel/sched/alt_sched.h
|
||||
new file mode 100644
|
||||
index 000000000000..7c1cc0cbca0d
|
||||
index 000000000000..6df234aacdd7
|
||||
--- /dev/null
|
||||
+++ b/kernel/sched/alt_sched.h
|
||||
@@ -0,0 +1,660 @@
|
||||
@@ -0,0 +1,658 @@
|
||||
+#ifndef ALT_SCHED_H
|
||||
+#define ALT_SCHED_H
|
||||
+
|
||||
@@ -8673,15 +8786,6 @@ index 000000000000..7c1cc0cbca0d
|
||||
+#define MAX_SHARES (1UL << 18)
|
||||
+#endif
|
||||
+
|
||||
+/*
|
||||
+ * Tunables that become constants when CONFIG_SCHED_DEBUG is off:
|
||||
+ */
|
||||
+#ifdef CONFIG_SCHED_DEBUG
|
||||
+# define const_debug __read_mostly
|
||||
+#else
|
||||
+# define const_debug const
|
||||
+#endif
|
||||
+
|
||||
+/* task_struct::on_rq states: */
|
||||
+#define TASK_ON_RQ_QUEUED 1
|
||||
+#define TASK_ON_RQ_MIGRATING 2
|
||||
@@ -8838,7 +8942,7 @@ index 000000000000..7c1cc0cbca0d
|
||||
+#endif /* CONFIG_NO_HZ_COMMON */
|
||||
+};
|
||||
+
|
||||
+extern unsigned long rq_load_util(struct rq *rq, unsigned long max);
|
||||
+extern unsigned long rq_load_util(struct rq *rq, int cpu);
|
||||
+
|
||||
+extern unsigned long calc_load_update;
|
||||
+extern atomic_long_t calc_load_tasks;
|
||||
@@ -8990,6 +9094,13 @@ index 000000000000..7c1cc0cbca0d
|
||||
+}
|
||||
+
|
||||
+static inline void
|
||||
+rq_lock_irq(struct rq *rq, struct rq_flags *rf)
|
||||
+ __acquires(rq->lock)
|
||||
+{
|
||||
+ raw_spin_lock_irq(&rq->lock);
|
||||
+}
|
||||
+
|
||||
+static inline void
|
||||
+rq_lock(struct rq *rq, struct rq_flags *rf)
|
||||
+ __acquires(rq->lock)
|
||||
+{
|
||||
@@ -9393,10 +9504,18 @@ index d9dc9ab3773f..71a25540d65e 100644
|
||||
+#include "deadline.c"
|
||||
+#endif
|
||||
diff --git a/kernel/sched/build_utility.c b/kernel/sched/build_utility.c
|
||||
index 99bdd96f454f..23f80a86d2d7 100644
|
||||
index 99bdd96f454f..bc17d5a6fc41 100644
|
||||
--- a/kernel/sched/build_utility.c
|
||||
+++ b/kernel/sched/build_utility.c
|
||||
@@ -85,7 +85,9 @@
|
||||
@@ -34,7 +34,6 @@
|
||||
#include <linux/nospec.h>
|
||||
#include <linux/proc_fs.h>
|
||||
#include <linux/psi.h>
|
||||
-#include <linux/psi.h>
|
||||
#include <linux/ptrace_api.h>
|
||||
#include <linux/sched_clock.h>
|
||||
#include <linux/security.h>
|
||||
@@ -85,7 +84,9 @@
|
||||
|
||||
#ifdef CONFIG_SMP
|
||||
# include "cpupri.c"
|
||||
@@ -9407,7 +9526,7 @@ index 99bdd96f454f..23f80a86d2d7 100644
|
||||
#endif
|
||||
|
||||
diff --git a/kernel/sched/cpufreq_schedutil.c b/kernel/sched/cpufreq_schedutil.c
|
||||
index 1207c78f85c1..68812e0756cb 100644
|
||||
index 1207c78f85c1..f66b715e4287 100644
|
||||
--- a/kernel/sched/cpufreq_schedutil.c
|
||||
+++ b/kernel/sched/cpufreq_schedutil.c
|
||||
@@ -159,9 +159,14 @@ static void sugov_get_util(struct sugov_cpu *sg_cpu)
|
||||
@@ -9420,7 +9539,7 @@ index 1207c78f85c1..68812e0756cb 100644
|
||||
FREQUENCY_UTIL, NULL);
|
||||
+#else
|
||||
+ sg_cpu->bw_dl = 0;
|
||||
+ sg_cpu->util = rq_load_util(rq, sg_cpu->max);
|
||||
+ sg_cpu->util = rq_load_util(rq, sg_cpu->cpu);
|
||||
+#endif /* CONFIG_SCHED_ALT */
|
||||
}
|
||||
|
||||
@@ -9504,7 +9623,7 @@ index 95fc77853743..b48b3f9ed47f 100644
|
||||
|
||||
if (task_cputime(p, &cputime.utime, &cputime.stime))
|
||||
diff --git a/kernel/sched/debug.c b/kernel/sched/debug.c
|
||||
index 1637b65ba07a..033c6deeb515 100644
|
||||
index a8377d0e5ebd..b6e8e386bbfc 100644
|
||||
--- a/kernel/sched/debug.c
|
||||
+++ b/kernel/sched/debug.c
|
||||
@@ -7,6 +7,7 @@
|
||||
@@ -9555,7 +9674,7 @@ index 1637b65ba07a..033c6deeb515 100644
|
||||
debugfs_create_u32("latency_ns", 0644, debugfs_sched, &sysctl_sched_latency);
|
||||
debugfs_create_u32("min_granularity_ns", 0644, debugfs_sched, &sysctl_sched_min_granularity);
|
||||
debugfs_create_u32("idle_min_granularity_ns", 0644, debugfs_sched, &sysctl_sched_idle_min_granularity);
|
||||
@@ -337,11 +344,13 @@ static __init int sched_init_debug(void)
|
||||
@@ -339,11 +346,13 @@ static __init int sched_init_debug(void)
|
||||
#endif
|
||||
|
||||
debugfs_create_file("debug", 0444, debugfs_sched, NULL, &sched_debug_fops);
|
||||
@@ -9569,7 +9688,7 @@ index 1637b65ba07a..033c6deeb515 100644
|
||||
#ifdef CONFIG_SMP
|
||||
|
||||
static cpumask_var_t sd_sysctl_cpus;
|
||||
@@ -1068,6 +1077,7 @@ void proc_sched_set_task(struct task_struct *p)
|
||||
@@ -1070,6 +1079,7 @@ void proc_sched_set_task(struct task_struct *p)
|
||||
memset(&p->stats, 0, sizeof(p->stats));
|
||||
#endif
|
||||
}
|
||||
@@ -9728,7 +9847,7 @@ index 000000000000..56a649d02e49
|
||||
+#endif
|
||||
+static inline void sched_task_deactivate(struct task_struct *p, struct rq *rq) {}
|
||||
diff --git a/kernel/sched/pelt.c b/kernel/sched/pelt.c
|
||||
index 0f310768260c..bd38bf738fe9 100644
|
||||
index 036b0e2cd2b4..a00ed09127bd 100644
|
||||
--- a/kernel/sched/pelt.c
|
||||
+++ b/kernel/sched/pelt.c
|
||||
@@ -266,6 +266,7 @@ ___update_load_avg(struct sched_avg *sa, unsigned long load)
|
||||
@@ -9751,7 +9870,7 @@ index 0f310768260c..bd38bf738fe9 100644
|
||||
* thermal:
|
||||
*
|
||||
diff --git a/kernel/sched/pelt.h b/kernel/sched/pelt.h
|
||||
index 3a0e0dc28721..e8a7d84aa5a5 100644
|
||||
index 9b35b5072bae..6e457b864d66 100644
|
||||
--- a/kernel/sched/pelt.h
|
||||
+++ b/kernel/sched/pelt.h
|
||||
@@ -1,13 +1,15 @@
|
||||
@@ -9779,7 +9898,7 @@ index 3a0e0dc28721..e8a7d84aa5a5 100644
|
||||
static inline void cfs_se_util_change(struct sched_avg *avg)
|
||||
{
|
||||
unsigned int enqueued;
|
||||
@@ -180,9 +183,11 @@ static inline u64 cfs_rq_clock_pelt(struct cfs_rq *cfs_rq)
|
||||
@@ -209,9 +212,11 @@ static inline u64 cfs_rq_clock_pelt(struct cfs_rq *cfs_rq)
|
||||
return rq_clock_pelt(rq_of(cfs_rq));
|
||||
}
|
||||
#endif
|
||||
@@ -9791,7 +9910,7 @@ index 3a0e0dc28721..e8a7d84aa5a5 100644
|
||||
static inline int
|
||||
update_cfs_rq_load_avg(u64 now, struct cfs_rq *cfs_rq)
|
||||
{
|
||||
@@ -200,6 +205,7 @@ update_dl_rq_load_avg(u64 now, struct rq *rq, int running)
|
||||
@@ -229,6 +234,7 @@ update_dl_rq_load_avg(u64 now, struct rq *rq, int running)
|
||||
{
|
||||
return 0;
|
||||
}
|
||||
@@ -9800,7 +9919,7 @@ index 3a0e0dc28721..e8a7d84aa5a5 100644
|
||||
static inline int
|
||||
update_thermal_load_avg(u64 now, struct rq *rq, u64 capacity)
|
||||
diff --git a/kernel/sched/sched.h b/kernel/sched/sched.h
|
||||
index a4a20046e586..c363693cd869 100644
|
||||
index 2c89aaa9200f..21d2d4a188fa 100644
|
||||
--- a/kernel/sched/sched.h
|
||||
+++ b/kernel/sched/sched.h
|
||||
@@ -5,6 +5,10 @@
|
||||
@@ -9814,7 +9933,7 @@ index a4a20046e586..c363693cd869 100644
|
||||
#include <linux/sched/affinity.h>
|
||||
#include <linux/sched/autogroup.h>
|
||||
#include <linux/sched/cpufreq.h>
|
||||
@@ -3183,4 +3187,9 @@ static inline void update_current_exec_runtime(struct task_struct *curr,
|
||||
@@ -3264,4 +3268,9 @@ static inline void update_current_exec_runtime(struct task_struct *curr,
|
||||
cgroup_account_cputime(curr, delta_exec);
|
||||
}
|
||||
|
||||
@@ -9856,7 +9975,7 @@ index 857f837f52cb..5486c63e4790 100644
|
||||
}
|
||||
return 0;
|
||||
diff --git a/kernel/sched/stats.h b/kernel/sched/stats.h
|
||||
index 84a188913cc9..53934e7ef5db 100644
|
||||
index 38f3698f5e5b..b9d597394316 100644
|
||||
--- a/kernel/sched/stats.h
|
||||
+++ b/kernel/sched/stats.h
|
||||
@@ -89,6 +89,7 @@ static inline void rq_sched_info_depart (struct rq *rq, unsigned long long delt
|
||||
@@ -9876,7 +9995,7 @@ index 84a188913cc9..53934e7ef5db 100644
|
||||
#ifdef CONFIG_PSI
|
||||
void psi_task_change(struct task_struct *task, int clear, int set);
|
||||
diff --git a/kernel/sched/topology.c b/kernel/sched/topology.c
|
||||
index 8739c2a5a54e..d8dd6c15eb47 100644
|
||||
index dea9fa39e7c0..b401e6423102 100644
|
||||
--- a/kernel/sched/topology.c
|
||||
+++ b/kernel/sched/topology.c
|
||||
@@ -3,6 +3,7 @@
|
||||
@@ -9931,7 +10050,7 @@ index 8739c2a5a54e..d8dd6c15eb47 100644
|
||||
+#endif /* CONFIG_NUMA */
|
||||
+#endif
|
||||
diff --git a/kernel/sysctl.c b/kernel/sysctl.c
|
||||
index c6d9dec11b74..2bc42ce8b48e 100644
|
||||
index 1c7c7c953876..f9dc7d89a6d2 100644
|
||||
--- a/kernel/sysctl.c
|
||||
+++ b/kernel/sysctl.c
|
||||
@@ -93,6 +93,10 @@ EXPORT_SYMBOL_GPL(sysctl_long_vals);
|
||||
@@ -9945,23 +10064,7 @@ index c6d9dec11b74..2bc42ce8b48e 100644
|
||||
#ifdef CONFIG_PERF_EVENTS
|
||||
static const int six_hundred_forty_kb = 640 * 1024;
|
||||
#endif
|
||||
@@ -1633,6 +1637,7 @@ int proc_do_static_key(struct ctl_table *table, int write,
|
||||
}
|
||||
|
||||
static struct ctl_table kern_table[] = {
|
||||
+#ifndef CONFIG_SCHED_ALT
|
||||
#ifdef CONFIG_NUMA_BALANCING
|
||||
{
|
||||
.procname = "numa_balancing",
|
||||
@@ -1652,6 +1657,7 @@ static struct ctl_table kern_table[] = {
|
||||
.extra1 = SYSCTL_ZERO,
|
||||
},
|
||||
#endif /* CONFIG_NUMA_BALANCING */
|
||||
+#endif /* !CONFIG_SCHED_ALT */
|
||||
{
|
||||
.procname = "panic",
|
||||
.data = &panic_timeout,
|
||||
@@ -1953,6 +1959,17 @@ static struct ctl_table kern_table[] = {
|
||||
@@ -1965,6 +1969,17 @@ static struct ctl_table kern_table[] = {
|
||||
.proc_handler = proc_dointvec,
|
||||
},
|
||||
#endif
|
||||
@@ -10074,21 +10177,6 @@ index a2d301f58ced..2ccdede8585c 100644
|
||||
};
|
||||
struct wakeup_test_data *x = data;
|
||||
|
||||
diff --git a/kernel/sched/alt_sched.h b/kernel/sched/alt_sched.h
|
||||
index 93ff3bddd36f85c67a3130a68ed67e2525513353..a00bc84b93b2c8789d4798a2900299fdd39a2e58 100644
|
||||
--- a/kernel/sched/alt_sched.h
|
||||
+++ b/kernel/sched/alt_sched.h
|
||||
@@ -387,6 +387,13 @@ task_rq_unlock(struct rq *rq, struct task_struct *p, struct rq_flags *rf)
|
||||
raw_spin_unlock_irqrestore(&p->pi_lock, rf->flags);
|
||||
}
|
||||
--
|
||||
2.39.0.rc2.1.gbd5df96b79
|
||||
|
||||
+static inline void
|
||||
+rq_lock_irq(struct rq *rq, struct rq_flags *rf)
|
||||
+ __acquires(rq->lock)
|
||||
+{
|
||||
+ raw_spin_lock_irq(&rq->lock);
|
||||
+}
|
||||
+
|
||||
static inline void
|
||||
rq_lock(struct rq *rq, struct rq_flags *rf)
|
||||
__acquires(rq->lock)
|
||||
|
@@ -64,422 +64,253 @@ index 2c7171e0b0010..85de313ddec29 100644
|
||||
select CPU_FREQ_GOV_PERFORMANCE
|
||||
help
|
||||
|
||||
From 430daaab3c78de6bd82f10cfb5a0f016c6e583f6 Mon Sep 17 00:00:00 2001
|
||||
From: Desmond Cheong Zhi Xi <desmondcheongzx@gmail.com>
|
||||
Date: Mon, 4 Oct 2021 14:07:34 -0400
|
||||
Subject: [PATCH] Bluetooth: fix deadlock for RFCOMM sk state change
|
||||
From 0c079d3f88df5f8286cd5c91b54bdac7c819be85 Mon Sep 17 00:00:00 2001
|
||||
From: Matthew Auld <matthew.auld@intel.com>
|
||||
Date: Tue, 6 Dec 2022 16:11:41 +0000
|
||||
Subject: [PATCH] drm/i915: improve the catch-all evict to handle lock
|
||||
contention
|
||||
MIME-Version: 1.0
|
||||
Content-Type: text/plain; charset=UTF-8
|
||||
Content-Transfer-Encoding: 8bit
|
||||
|
||||
Syzbot reports the following task hang [1]:
|
||||
The catch-all evict can fail due to object lock contention, since it
|
||||
only goes as far as trylocking the object, due to us already holding the
|
||||
vm->mutex. Doing a full object lock here can deadlock, since the
|
||||
vm->mutex is always our inner lock. Add another execbuf pass which drops
|
||||
the vm->mutex and then tries to grab the object will the full lock,
|
||||
before then retrying the eviction. This should be good enough for now to
|
||||
fix the immediate regression with userspace seeing -ENOSPC from execbuf
|
||||
due to contended object locks during GTT eviction.
|
||||
|
||||
INFO: task syz-executor255:8499 blocked for more than 143 seconds.
|
||||
Not tainted 5.14.0-rc7-syzkaller #0
|
||||
Testcase: igt@gem_ppgtt@shrink-vs-evict-*
|
||||
Fixes: 7e00897be8bf ("drm/i915: Add object locking to i915_gem_evict_for_node and i915_gem_evict_something, v2.")
|
||||
References: https://gitlab.freedesktop.org/drm/intel/-/issues/7627
|
||||
References: https://gitlab.freedesktop.org/drm/intel/-/issues/7570
|
||||
References: https://bugzilla.mozilla.org/show_bug.cgi?id=1779558
|
||||
Signed-off-by: Matthew Auld <matthew.auld@intel.com>
|
||||
Cc: Maarten Lankhorst <maarten.lankhorst@linux.intel.com>
|
||||
Cc: Thomas Hellström <thomas.hellstrom@linux.intel.com>
|
||||
Cc: Tvrtko Ursulin <tvrtko.ursulin@linux.intel.com>
|
||||
Cc: Andrzej Hajda <andrzej.hajda@intel.com>
|
||||
Cc: Mani Milani <mani@chromium.org>
|
||||
Cc: <stable@vger.kernel.org> # v5.18+
|
||||
|
||||
Call Trace:
|
||||
context_switch kernel/sched/core.c:4681 [inline]
|
||||
__schedule+0x93a/0x26f0 kernel/sched/core.c:5938
|
||||
schedule+0xd3/0x270 kernel/sched/core.c:6017
|
||||
__lock_sock+0x13d/0x260 net/core/sock.c:2644
|
||||
lock_sock_nested+0xf6/0x120 net/core/sock.c:3185
|
||||
lock_sock include/net/sock.h:1612 [inline]
|
||||
rfcomm_sk_state_change+0xb4/0x390 net/bluetooth/rfcomm/sock.c:73
|
||||
__rfcomm_dlc_close+0x1b6/0x8a0 net/bluetooth/rfcomm/core.c:489
|
||||
rfcomm_dlc_close+0x1ea/0x240 net/bluetooth/rfcomm/core.c:520
|
||||
__rfcomm_sock_close+0xac/0x260 net/bluetooth/rfcomm/sock.c:220
|
||||
rfcomm_sock_shutdown+0xe9/0x210 net/bluetooth/rfcomm/sock.c:931
|
||||
rfcomm_sock_release+0x5f/0x140 net/bluetooth/rfcomm/sock.c:951
|
||||
__sock_release+0xcd/0x280 net/socket.c:649
|
||||
sock_close+0x18/0x20 net/socket.c:1314
|
||||
__fput+0x288/0x920 fs/file_table.c:280
|
||||
task_work_run+0xdd/0x1a0 kernel/task_work.c:164
|
||||
exit_task_work include/linux/task_work.h:32 [inline]
|
||||
do_exit+0xbd4/0x2a60 kernel/exit.c:825
|
||||
do_group_exit+0x125/0x310 kernel/exit.c:922
|
||||
get_signal+0x47f/0x2160 kernel/signal.c:2808
|
||||
arch_do_signal_or_restart+0x2a9/0x1c40 arch/x86/kernel/signal.c:865
|
||||
handle_signal_work kernel/entry/common.c:148 [inline]
|
||||
exit_to_user_mode_loop kernel/entry/common.c:172 [inline]
|
||||
exit_to_user_mode_prepare+0x17d/0x290 kernel/entry/common.c:209
|
||||
__syscall_exit_to_user_mode_work kernel/entry/common.c:291 [inline]
|
||||
syscall_exit_to_user_mode+0x19/0x60 kernel/entry/common.c:302
|
||||
do_syscall_64+0x42/0xb0 arch/x86/entry/common.c:86
|
||||
entry_SYSCALL_64_after_hwframe+0x44/0xae
|
||||
|
||||
Showing all locks held in the system:
|
||||
1 lock held by khungtaskd/1653:
|
||||
#0: ffffffff8b97c280 (rcu_read_lock){....}-{1:2}, at:
|
||||
debug_show_all_locks+0x53/0x260 kernel/locking/lockdep.c:6446
|
||||
1 lock held by krfcommd/4781:
|
||||
#0: ffffffff8d306528 (rfcomm_mutex){+.+.}-{3:3}, at:
|
||||
rfcomm_process_sessions net/bluetooth/rfcomm/core.c:1979 [inline]
|
||||
#0: ffffffff8d306528 (rfcomm_mutex){+.+.}-{3:3}, at:
|
||||
rfcomm_run+0x2ed/0x4a20 net/bluetooth/rfcomm/core.c:2086
|
||||
2 locks held by in:imklog/8206:
|
||||
#0: ffff8880182ce5f0 (&f->f_pos_lock){+.+.}-{3:3}, at:
|
||||
__fdget_pos+0xe9/0x100 fs/file.c:974
|
||||
#1: ffff8880b9c51a58 (&rq->__lock){-.-.}-{2:2}, at:
|
||||
raw_spin_rq_lock_nested kernel/sched/core.c:460 [inline]
|
||||
#1: ffff8880b9c51a58 (&rq->__lock){-.-.}-{2:2}, at: raw_spin_rq_lock
|
||||
kernel/sched/sched.h:1307 [inline]
|
||||
#1: ffff8880b9c51a58 (&rq->__lock){-.-.}-{2:2}, at: rq_lock
|
||||
kernel/sched/sched.h:1610 [inline]
|
||||
#1: ffff8880b9c51a58 (&rq->__lock){-.-.}-{2:2}, at:
|
||||
__schedule+0x233/0x26f0 kernel/sched/core.c:5852
|
||||
4 locks held by syz-executor255/8499:
|
||||
#0: ffff888039a83690 (&sb->s_type->i_mutex_key#13){+.+.}-{3:3}, at:
|
||||
inode_lock include/linux/fs.h:774 [inline]
|
||||
#0: ffff888039a83690 (&sb->s_type->i_mutex_key#13){+.+.}-{3:3}, at:
|
||||
__sock_release+0x86/0x280 net/socket.c:648
|
||||
#1:
|
||||
ffff88802fa31120 (sk_lock-AF_BLUETOOTH-BTPROTO_RFCOMM){+.+.}-{0:0},
|
||||
at: lock_sock include/net/sock.h:1612 [inline]
|
||||
#1:
|
||||
ffff88802fa31120 (sk_lock-AF_BLUETOOTH-BTPROTO_RFCOMM){+.+.}-{0:0},
|
||||
at: rfcomm_sock_shutdown+0x54/0x210 net/bluetooth/rfcomm/sock.c:928
|
||||
#2: ffffffff8d306528 (rfcomm_mutex){+.+.}-{3:3}, at:
|
||||
rfcomm_dlc_close+0x34/0x240 net/bluetooth/rfcomm/core.c:507
|
||||
#3: ffff888141bd6d28 (&d->lock){+.+.}-{3:3}, at:
|
||||
__rfcomm_dlc_close+0x162/0x8a0 net/bluetooth/rfcomm/core.c:487
|
||||
==================================================================
|
||||
|
||||
The task hangs because of a deadlock that occurs when lock_sock() is
|
||||
called in rfcomm_sk_state_change(). One such call stack is:
|
||||
|
||||
rfcomm_sock_shutdown():
|
||||
lock_sock();
|
||||
__rfcomm_sock_close():
|
||||
rfcomm_dlc_close():
|
||||
__rfcomm_dlc_close():
|
||||
rfcomm_dlc_lock();
|
||||
rfcomm_sk_state_change():
|
||||
lock_sock();
|
||||
|
||||
lock_sock() has to be called when the sk state is changed because the
|
||||
lock is not always held when rfcomm_sk_state_change() is
|
||||
called. However, besides the recursive deadlock, there is also an
|
||||
issue of a lock hierarchy inversion between rfcomm_dlc_lock() and
|
||||
lock_sock() if the socket is locked in rfcomm_sk_state_change().
|
||||
|
||||
To avoid these issues, we can instead schedule the sk state change in
|
||||
the global workqueue. This is already the implicit assumption about
|
||||
how sk state changes happen. For example, in rfcomm_sock_shutdown(),
|
||||
the call to __rfcomm_sock_close() is followed by
|
||||
bt_sock_wait_state().
|
||||
|
||||
Additionally, the call to rfcomm_sock_kill() inside
|
||||
rfcomm_sk_state_change() should be removed. The socket shouldn't be
|
||||
killed here because only rfcomm_sock_release() calls sock_orphan(),
|
||||
which it already follows up with a call to rfcomm_sock_kill().
|
||||
|
||||
Fixes: b7ce436a5d79 ("Bluetooth: switch to lock_sock in RFCOMM")
|
||||
Link: https://syzkaller.appspot.com/bug?extid=7d51f807c81b190a127d [1]
|
||||
Reported-by: syzbot+7d51f807c81b190a127d@syzkaller.appspotmail.com
|
||||
Tested-by: syzbot+7d51f807c81b190a127d@syzkaller.appspotmail.com
|
||||
Signed-off-by: Desmond Cheong Zhi Xi <desmondcheongzx@gmail.com>
|
||||
Cc: Hillf Danton <hdanton@sina.com>
|
||||
Revision 1 of https://patchwork.freedesktop.org/series/111686/
|
||||
---
|
||||
include/net/bluetooth/rfcomm.h | 3 +++
|
||||
net/bluetooth/rfcomm/core.c | 2 ++
|
||||
net/bluetooth/rfcomm/sock.c | 34 ++++++++++++++++++++++------------
|
||||
3 files changed, 27 insertions(+), 12 deletions(-)
|
||||
.../gpu/drm/i915/gem/i915_gem_execbuffer.c | 25 +++++++++++--
|
||||
drivers/gpu/drm/i915/gem/i915_gem_mman.c | 2 +-
|
||||
drivers/gpu/drm/i915/i915_gem_evict.c | 37 ++++++++++++++-----
|
||||
drivers/gpu/drm/i915/i915_gem_evict.h | 4 +-
|
||||
drivers/gpu/drm/i915/i915_vma.c | 2 +-
|
||||
.../gpu/drm/i915/selftests/i915_gem_evict.c | 4 +-
|
||||
6 files changed, 56 insertions(+), 18 deletions(-)
|
||||
|
||||
diff --git a/include/net/bluetooth/rfcomm.h b/include/net/bluetooth/rfcomm.h
|
||||
index 99d26879b02a53..a92799fc5e74d0 100644
|
||||
--- a/include/net/bluetooth/rfcomm.h
|
||||
+++ b/include/net/bluetooth/rfcomm.h
|
||||
@@ -171,6 +171,7 @@ struct rfcomm_dlc {
|
||||
struct rfcomm_session *session;
|
||||
struct sk_buff_head tx_queue;
|
||||
struct timer_list timer;
|
||||
+ struct work_struct state_change_work;
|
||||
diff --git a/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c b/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c
|
||||
index 845023c14eb36f..094e92ed28db4f 100644
|
||||
--- a/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c
|
||||
+++ b/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c
|
||||
@@ -741,25 +741,44 @@ static int eb_reserve(struct i915_execbuffer *eb)
|
||||
*
|
||||
* Defragmenting is skipped if all objects are pinned at a fixed location.
|
||||
*/
|
||||
- for (pass = 0; pass <= 2; pass++) {
|
||||
+ for (pass = 0; pass <= 3; pass++) {
|
||||
int pin_flags = PIN_USER | PIN_VALIDATE;
|
||||
|
||||
struct mutex lock;
|
||||
unsigned long state;
|
||||
@@ -186,6 +187,7 @@ struct rfcomm_dlc {
|
||||
u8 sec_level;
|
||||
u8 role_switch;
|
||||
u32 defer_setup;
|
||||
+ int err;
|
||||
if (pass == 0)
|
||||
pin_flags |= PIN_NONBLOCK;
|
||||
|
||||
uint mtu;
|
||||
uint cfc;
|
||||
@@ -310,6 +312,7 @@ struct rfcomm_pinfo {
|
||||
u8 role_switch;
|
||||
};
|
||||
if (pass >= 1)
|
||||
- unpinned = eb_unbind(eb, pass == 2);
|
||||
+ unpinned = eb_unbind(eb, pass >= 2);
|
||||
|
||||
+void __rfcomm_sk_state_change(struct work_struct *work);
|
||||
int rfcomm_init_sockets(void);
|
||||
void rfcomm_cleanup_sockets(void);
|
||||
|
||||
diff --git a/net/bluetooth/rfcomm/core.c b/net/bluetooth/rfcomm/core.c
|
||||
index 7324764384b677..c6494e85cd68b2 100644
|
||||
--- a/net/bluetooth/rfcomm/core.c
|
||||
+++ b/net/bluetooth/rfcomm/core.c
|
||||
@@ -289,6 +289,7 @@ static void rfcomm_dlc_clear_state(struct rfcomm_dlc *d)
|
||||
d->flags = 0;
|
||||
d->mscex = 0;
|
||||
d->sec_level = BT_SECURITY_LOW;
|
||||
+ d->err = 0;
|
||||
d->mtu = RFCOMM_DEFAULT_MTU;
|
||||
d->v24_sig = RFCOMM_V24_RTC | RFCOMM_V24_RTR | RFCOMM_V24_DV;
|
||||
|
||||
@@ -306,6 +307,7 @@ struct rfcomm_dlc *rfcomm_dlc_alloc(gfp_t prio)
|
||||
timer_setup(&d->timer, rfcomm_dlc_timeout, 0);
|
||||
|
||||
skb_queue_head_init(&d->tx_queue);
|
||||
+ INIT_WORK(&d->state_change_work, __rfcomm_sk_state_change);
|
||||
mutex_init(&d->lock);
|
||||
refcount_set(&d->refcnt, 1);
|
||||
|
||||
diff --git a/net/bluetooth/rfcomm/sock.c b/net/bluetooth/rfcomm/sock.c
|
||||
index 4bf4ea6cbb5eee..4850dafbaa05fb 100644
|
||||
--- a/net/bluetooth/rfcomm/sock.c
|
||||
+++ b/net/bluetooth/rfcomm/sock.c
|
||||
@@ -61,19 +61,22 @@ static void rfcomm_sk_data_ready(struct rfcomm_dlc *d, struct sk_buff *skb)
|
||||
rfcomm_dlc_throttle(d);
|
||||
if (pass == 2) {
|
||||
err = mutex_lock_interruptible(&eb->context->vm->mutex);
|
||||
if (!err) {
|
||||
- err = i915_gem_evict_vm(eb->context->vm, &eb->ww);
|
||||
+ err = i915_gem_evict_vm(eb->context->vm, &eb->ww, NULL);
|
||||
mutex_unlock(&eb->context->vm->mutex);
|
||||
}
|
||||
if (err)
|
||||
return err;
|
||||
}
|
||||
|
||||
-static void rfcomm_sk_state_change(struct rfcomm_dlc *d, int err)
|
||||
+void __rfcomm_sk_state_change(struct work_struct *work)
|
||||
{
|
||||
+ struct rfcomm_dlc *d = container_of(work, struct rfcomm_dlc,
|
||||
+ state_change_work);
|
||||
struct sock *sk = d->owner, *parent;
|
||||
|
||||
if (!sk)
|
||||
return;
|
||||
|
||||
- BT_DBG("dlc %p state %ld err %d", d, d->state, err);
|
||||
-
|
||||
lock_sock(sk);
|
||||
+ rfcomm_dlc_lock(d);
|
||||
|
||||
- if (err)
|
||||
- sk->sk_err = err;
|
||||
+ BT_DBG("dlc %p state %ld err %d", d, d->state, d->err);
|
||||
+ if (pass == 3) {
|
||||
+retry:
|
||||
+ err = mutex_lock_interruptible(&eb->context->vm->mutex);
|
||||
+ if (!err) {
|
||||
+ struct drm_i915_gem_object *busy_bo = NULL;
|
||||
+
|
||||
+ if (d->err)
|
||||
+ sk->sk_err = d->err;
|
||||
|
||||
sk->sk_state = d->state;
|
||||
|
||||
@@ -91,15 +94,22 @@ static void rfcomm_sk_state_change(struct rfcomm_dlc *d, int err)
|
||||
sk->sk_state_change(sk);
|
||||
}
|
||||
|
||||
+ rfcomm_dlc_unlock(d);
|
||||
release_sock(sk);
|
||||
+ sock_put(sk);
|
||||
+ err = i915_gem_evict_vm(eb->context->vm, &eb->ww, &busy_bo);
|
||||
+ mutex_unlock(&eb->context->vm->mutex);
|
||||
+ if (err && busy_bo) {
|
||||
+ err = i915_gem_object_lock(busy_bo, &eb->ww);
|
||||
+ i915_gem_object_put(busy_bo);
|
||||
+ if (!err)
|
||||
+ goto retry;
|
||||
+ }
|
||||
|
||||
- if (parent && sock_flag(sk, SOCK_ZAPPED)) {
|
||||
- /* We have to drop DLC lock here, otherwise
|
||||
- * rfcomm_sock_destruct() will dead lock. */
|
||||
- rfcomm_dlc_unlock(d);
|
||||
- rfcomm_sock_kill(sk);
|
||||
- rfcomm_dlc_lock(d);
|
||||
- }
|
||||
+static void rfcomm_sk_state_change(struct rfcomm_dlc *d, int err)
|
||||
+{
|
||||
+ struct sock *sk = d->owner;
|
||||
+
|
||||
+ if (!sk)
|
||||
+ return;
|
||||
+
|
||||
+ d->err = err;
|
||||
+ sock_hold(sk);
|
||||
+ if (!schedule_work(&d->state_change_work))
|
||||
+ sock_put(sk);
|
||||
}
|
||||
|
||||
/* ---- Socket functions ---- */
|
||||
|
||||
From 727209376f4998bc84db1d5d8af15afea846a92b Mon Sep 17 00:00:00 2001
|
||||
From: "Guilherme G. Piccoli" <gpiccoli@igalia.com>
|
||||
Date: Mon, 24 Oct 2022 17:02:54 -0300
|
||||
Subject: x86/split_lock: Add sysctl to control the misery mode
|
||||
|
||||
Commit b041b525dab9 ("x86/split_lock: Make life miserable for split lockers")
|
||||
changed the way the split lock detector works when in "warn" mode;
|
||||
basically, it not only shows the warn message, but also intentionally
|
||||
introduces a slowdown through sleeping plus serialization mechanism
|
||||
on such task. Based on discussions in [0], seems the warning alone
|
||||
wasn't enough motivation for userspace developers to fix their
|
||||
applications.
|
||||
|
||||
This slowdown is enough to totally break some proprietary (aka.
|
||||
unfixable) userspace[1].
|
||||
|
||||
Happens that originally the proposal in [0] was to add a new mode
|
||||
which would warns + slowdown the "split locking" task, keeping the
|
||||
old warn mode untouched. In the end, that idea was discarded and
|
||||
the regular/default "warn" mode now slows down the applications. This
|
||||
is quite aggressive with regards proprietary/legacy programs that
|
||||
basically are unable to properly run in kernel with this change.
|
||||
While it is understandable that a malicious application could DoS
|
||||
by split locking, it seems unacceptable to regress old/proprietary
|
||||
userspace programs through a default configuration that previously
|
||||
worked. An example of such breakage was reported in [1].
|
||||
|
||||
Add a sysctl to allow controlling the "misery mode" behavior, as per
|
||||
Thomas suggestion on [2]. This way, users running legacy and/or
|
||||
proprietary software are allowed to still execute them with a decent
|
||||
performance while still observing the warning messages on kernel log.
|
||||
|
||||
[0] https://lore.kernel.org/lkml/20220217012721.9694-1-tony.luck@intel.com/
|
||||
[1] https://github.com/doitsujin/dxvk/issues/2938
|
||||
[2] https://lore.kernel.org/lkml/87pmf4bter.ffs@tglx/
|
||||
|
||||
[ dhansen: minor changelog tweaks, including clarifying the actual
|
||||
problem ]
|
||||
|
||||
Fixes: b041b525dab9 ("x86/split_lock: Make life miserable for split lockers")
|
||||
Suggested-by: Thomas Gleixner <tglx@linutronix.de>
|
||||
Signed-off-by: Guilherme G. Piccoli <gpiccoli@igalia.com>
|
||||
Signed-off-by: Dave Hansen <dave.hansen@linux.intel.com>
|
||||
Reviewed-by: Tony Luck <tony.luck@intel.com>
|
||||
Tested-by: Andre Almeida <andrealmeid@igalia.com>
|
||||
Link: https://lore.kernel.org/all/20221024200254.635256-1-gpiccoli%40igalia.com
|
||||
---
|
||||
Documentation/admin-guide/sysctl/kernel.rst | 23 +++++++++++
|
||||
arch/x86/kernel/cpu/intel.c | 63 ++++++++++++++++++++++++-----
|
||||
2 files changed, 76 insertions(+), 10 deletions(-)
|
||||
|
||||
diff --git a/Documentation/admin-guide/sysctl/kernel.rst b/Documentation/admin-guide/sysctl/kernel.rst
|
||||
index 98d1b198b2b4c..c2c64c1b706ff 100644
|
||||
--- a/Documentation/admin-guide/sysctl/kernel.rst
|
||||
+++ b/Documentation/admin-guide/sysctl/kernel.rst
|
||||
@@ -1314,6 +1314,29 @@ watchdog work to be queued by the watchdog timer function, otherwise the NMI
|
||||
watchdog — if enabled — can detect a hard lockup condition.
|
||||
|
||||
|
||||
+split_lock_mitigate (x86 only)
|
||||
+==============================
|
||||
+
|
||||
+On x86, each "split lock" imposes a system-wide performance penalty. On larger
|
||||
+systems, large numbers of split locks from unprivileged users can result in
|
||||
+denials of service to well-behaved and potentially more important users.
|
||||
+
|
||||
+The kernel mitigates these bad users by detecting split locks and imposing
|
||||
+penalties: forcing them to wait and only allowing one core to execute split
|
||||
+locks at a time.
|
||||
+
|
||||
+These mitigations can make those bad applications unbearably slow. Setting
|
||||
+split_lock_mitigate=0 may restore some application performance, but will also
|
||||
+increase system exposure to denial of service attacks from split lock users.
|
||||
+
|
||||
+= ===================================================================
|
||||
+0 Disable the mitigation mode - just warns the split lock on kernel log
|
||||
+ and exposes the system to denials of service from the split lockers.
|
||||
+1 Enable the mitigation mode (this is the default) - penalizes the split
|
||||
+ lockers with intentional performance degradation.
|
||||
+= ===================================================================
|
||||
+
|
||||
+
|
||||
stack_erasing
|
||||
=============
|
||||
|
||||
diff --git a/arch/x86/kernel/cpu/intel.c b/arch/x86/kernel/cpu/intel.c
|
||||
index 2d7ea5480ec33..4278996504833 100644
|
||||
--- a/arch/x86/kernel/cpu/intel.c
|
||||
+++ b/arch/x86/kernel/cpu/intel.c
|
||||
@@ -1034,8 +1034,32 @@ static const struct {
|
||||
|
||||
static struct ratelimit_state bld_ratelimit;
|
||||
|
||||
+static unsigned int sysctl_sld_mitigate = 1;
|
||||
static DEFINE_SEMAPHORE(buslock_sem);
|
||||
|
||||
+#ifdef CONFIG_PROC_SYSCTL
|
||||
+static struct ctl_table sld_sysctls[] = {
|
||||
+ {
|
||||
+ .procname = "split_lock_mitigate",
|
||||
+ .data = &sysctl_sld_mitigate,
|
||||
+ .maxlen = sizeof(unsigned int),
|
||||
+ .mode = 0644,
|
||||
+ .proc_handler = proc_douintvec_minmax,
|
||||
+ .extra1 = SYSCTL_ZERO,
|
||||
+ .extra2 = SYSCTL_ONE,
|
||||
+ },
|
||||
+ {}
|
||||
+};
|
||||
+
|
||||
+static int __init sld_mitigate_sysctl_init(void)
|
||||
+{
|
||||
+ register_sysctl_init("kernel", sld_sysctls);
|
||||
+ return 0;
|
||||
+ }
|
||||
+ if (err)
|
||||
+ return err;
|
||||
+ }
|
||||
+
|
||||
+late_initcall(sld_mitigate_sysctl_init);
|
||||
+#endif
|
||||
+
|
||||
static inline bool match_option(const char *arg, int arglen, const char *opt)
|
||||
list_for_each_entry(ev, &eb->unbound, bind_link) {
|
||||
err = eb_reserve_vma(eb, ev, pin_flags);
|
||||
if (err)
|
||||
diff --git a/drivers/gpu/drm/i915/gem/i915_gem_mman.c b/drivers/gpu/drm/i915/gem/i915_gem_mman.c
|
||||
index 73d9eda1d6b7a6..c83d98e1dc5da0 100644
|
||||
--- a/drivers/gpu/drm/i915/gem/i915_gem_mman.c
|
||||
+++ b/drivers/gpu/drm/i915/gem/i915_gem_mman.c
|
||||
@@ -369,7 +369,7 @@ static vm_fault_t vm_fault_gtt(struct vm_fault *vmf)
|
||||
if (vma == ERR_PTR(-ENOSPC)) {
|
||||
ret = mutex_lock_interruptible(&ggtt->vm.mutex);
|
||||
if (!ret) {
|
||||
- ret = i915_gem_evict_vm(&ggtt->vm, &ww);
|
||||
+ ret = i915_gem_evict_vm(&ggtt->vm, &ww, NULL);
|
||||
mutex_unlock(&ggtt->vm.mutex);
|
||||
}
|
||||
if (ret)
|
||||
diff --git a/drivers/gpu/drm/i915/i915_gem_evict.c b/drivers/gpu/drm/i915/i915_gem_evict.c
|
||||
index f025ee4fa52618..a4b4d9b7d26c7a 100644
|
||||
--- a/drivers/gpu/drm/i915/i915_gem_evict.c
|
||||
+++ b/drivers/gpu/drm/i915/i915_gem_evict.c
|
||||
@@ -416,6 +416,11 @@ int i915_gem_evict_for_node(struct i915_address_space *vm,
|
||||
* @vm: Address space to cleanse
|
||||
* @ww: An optional struct i915_gem_ww_ctx. If not NULL, i915_gem_evict_vm
|
||||
* will be able to evict vma's locked by the ww as well.
|
||||
+ * @busy_bo: Optional pointer to struct drm_i915_gem_object. If not NULL, then
|
||||
+ * in the event i915_gem_evict_vm() is unable to trylock an object for eviction,
|
||||
+ * then @busy_bo will point to it. -EBUSY is also returned. The caller must drop
|
||||
+ * the vm->mutex, before trying again to acquire the contended lock. The caller
|
||||
+ * also owns a reference to the object.
|
||||
*
|
||||
* This function evicts all vmas from a vm.
|
||||
*
|
||||
@@ -425,7 +430,8 @@ int i915_gem_evict_for_node(struct i915_address_space *vm,
|
||||
* To clarify: This is for freeing up virtual address space, not for freeing
|
||||
* memory in e.g. the shrinker.
|
||||
*/
|
||||
-int i915_gem_evict_vm(struct i915_address_space *vm, struct i915_gem_ww_ctx *ww)
|
||||
+int i915_gem_evict_vm(struct i915_address_space *vm, struct i915_gem_ww_ctx *ww,
|
||||
+ struct drm_i915_gem_object **busy_bo)
|
||||
{
|
||||
int len = strlen(opt), ratelimit;
|
||||
@@ -1146,12 +1170,20 @@ static void split_lock_init(void)
|
||||
split_lock_verify_msr(sld_state != sld_off);
|
||||
int ret = 0;
|
||||
|
||||
@@ -457,15 +463,22 @@ int i915_gem_evict_vm(struct i915_address_space *vm, struct i915_gem_ww_ctx *ww)
|
||||
* the resv is shared among multiple objects, we still
|
||||
* need the object ref.
|
||||
*/
|
||||
- if (dying_vma(vma) ||
|
||||
+ if (!i915_gem_object_get_rcu(vma->obj) ||
|
||||
(ww && (dma_resv_locking_ctx(vma->obj->base.resv) == &ww->ctx))) {
|
||||
__i915_vma_pin(vma);
|
||||
list_add(&vma->evict_link, &locked_eviction_list);
|
||||
continue;
|
||||
}
|
||||
|
||||
-static void __split_lock_reenable(struct work_struct *work)
|
||||
+static void __split_lock_reenable_unlock(struct work_struct *work)
|
||||
{
|
||||
sld_update_msr(true);
|
||||
up(&buslock_sem);
|
||||
}
|
||||
|
||||
+static DECLARE_DELAYED_WORK(sl_reenable_unlock, __split_lock_reenable_unlock);
|
||||
+
|
||||
+static void __split_lock_reenable(struct work_struct *work)
|
||||
+{
|
||||
+ sld_update_msr(true);
|
||||
- if (!i915_gem_object_trylock(vma->obj, ww))
|
||||
+ if (!i915_gem_object_trylock(vma->obj, ww)) {
|
||||
+ if (busy_bo) {
|
||||
+ *busy_bo = vma->obj; /* holds ref */
|
||||
+ ret = -EBUSY;
|
||||
+ break;
|
||||
+ }
|
||||
+static DECLARE_DELAYED_WORK(sl_reenable, __split_lock_reenable);
|
||||
+
|
||||
/*
|
||||
* If a CPU goes offline with pending delayed work to re-enable split lock
|
||||
* detection then the delayed work will be executed on some other CPU. That
|
||||
@@ -1169,10 +1201,9 @@ static int splitlock_cpu_offline(unsigned int cpu)
|
||||
return 0;
|
||||
+ i915_gem_object_put(vma->obj);
|
||||
continue;
|
||||
+ }
|
||||
|
||||
__i915_vma_pin(vma);
|
||||
list_add(&vma->evict_link, &eviction_list);
|
||||
@@ -473,25 +486,29 @@ int i915_gem_evict_vm(struct i915_address_space *vm, struct i915_gem_ww_ctx *ww)
|
||||
if (list_empty(&eviction_list) && list_empty(&locked_eviction_list))
|
||||
break;
|
||||
|
||||
- ret = 0;
|
||||
/* Unbind locked objects first, before unlocking the eviction_list */
|
||||
list_for_each_entry_safe(vma, vn, &locked_eviction_list, evict_link) {
|
||||
__i915_vma_unpin(vma);
|
||||
|
||||
- if (ret == 0)
|
||||
+ if (ret == 0) {
|
||||
ret = __i915_vma_unbind(vma);
|
||||
- if (ret != -EINTR) /* "Get me out of here!" */
|
||||
- ret = 0;
|
||||
+ if (ret != -EINTR) /* "Get me out of here!" */
|
||||
+ ret = 0;
|
||||
+ }
|
||||
+ if (!dying_vma(vma))
|
||||
+ i915_gem_object_put(vma->obj);
|
||||
}
|
||||
|
||||
-static DECLARE_DELAYED_WORK(split_lock_reenable, __split_lock_reenable);
|
||||
-
|
||||
static void split_lock_warn(unsigned long ip)
|
||||
{
|
||||
+ struct delayed_work *work;
|
||||
int cpu;
|
||||
|
||||
if (!current->reported_split_lock)
|
||||
@@ -1180,14 +1211,26 @@ static void split_lock_warn(unsigned long ip)
|
||||
current->comm, current->pid, ip);
|
||||
current->reported_split_lock = 1;
|
||||
|
||||
- /* misery factor #1, sleep 10ms before trying to execute split lock */
|
||||
- if (msleep_interruptible(10) > 0)
|
||||
- return;
|
||||
- /* Misery factor #2, only allow one buslocked disabled core at a time */
|
||||
- if (down_interruptible(&buslock_sem) == -EINTR)
|
||||
- return;
|
||||
+ if (sysctl_sld_mitigate) {
|
||||
+ /*
|
||||
+ * misery factor #1:
|
||||
+ * sleep 10ms before trying to execute split lock.
|
||||
+ */
|
||||
+ if (msleep_interruptible(10) > 0)
|
||||
+ return;
|
||||
+ /*
|
||||
+ * Misery factor #2:
|
||||
+ * only allow one buslocked disabled core at a time.
|
||||
+ */
|
||||
+ if (down_interruptible(&buslock_sem) == -EINTR)
|
||||
+ return;
|
||||
+ work = &sl_reenable_unlock;
|
||||
+ } else {
|
||||
+ work = &sl_reenable;
|
||||
list_for_each_entry_safe(vma, vn, &eviction_list, evict_link) {
|
||||
__i915_vma_unpin(vma);
|
||||
- if (ret == 0)
|
||||
+ if (ret == 0) {
|
||||
ret = __i915_vma_unbind(vma);
|
||||
- if (ret != -EINTR) /* "Get me out of here!" */
|
||||
- ret = 0;
|
||||
+ if (ret != -EINTR) /* "Get me out of here!" */
|
||||
+ ret = 0;
|
||||
+ }
|
||||
+
|
||||
cpu = get_cpu();
|
||||
- schedule_delayed_work_on(cpu, &split_lock_reenable, 2);
|
||||
+ schedule_delayed_work_on(cpu, work, 2);
|
||||
|
||||
/* Disable split lock detection on this CPU to make progress */
|
||||
sld_update_msr(false);
|
||||
i915_gem_object_unlock(vma->obj);
|
||||
+ i915_gem_object_put(vma->obj);
|
||||
}
|
||||
} while (ret == 0);
|
||||
|
||||
diff --git a/drivers/gpu/drm/i915/i915_gem_evict.h b/drivers/gpu/drm/i915/i915_gem_evict.h
|
||||
index e593c530f9bd7a..bf0ee0e4fe6088 100644
|
||||
--- a/drivers/gpu/drm/i915/i915_gem_evict.h
|
||||
+++ b/drivers/gpu/drm/i915/i915_gem_evict.h
|
||||
@@ -11,6 +11,7 @@
|
||||
struct drm_mm_node;
|
||||
struct i915_address_space;
|
||||
struct i915_gem_ww_ctx;
|
||||
+struct drm_i915_gem_object;
|
||||
|
||||
int __must_check i915_gem_evict_something(struct i915_address_space *vm,
|
||||
struct i915_gem_ww_ctx *ww,
|
||||
@@ -23,6 +24,7 @@ int __must_check i915_gem_evict_for_node(struct i915_address_space *vm,
|
||||
struct drm_mm_node *node,
|
||||
unsigned int flags);
|
||||
int i915_gem_evict_vm(struct i915_address_space *vm,
|
||||
- struct i915_gem_ww_ctx *ww);
|
||||
+ struct i915_gem_ww_ctx *ww,
|
||||
+ struct drm_i915_gem_object **busy_bo);
|
||||
|
||||
#endif /* __I915_GEM_EVICT_H__ */
|
||||
diff --git a/drivers/gpu/drm/i915/i915_vma.c b/drivers/gpu/drm/i915/i915_vma.c
|
||||
index f17c09ead7d778..4d06875de14a14 100644
|
||||
--- a/drivers/gpu/drm/i915/i915_vma.c
|
||||
+++ b/drivers/gpu/drm/i915/i915_vma.c
|
||||
@@ -1569,7 +1569,7 @@ static int __i915_ggtt_pin(struct i915_vma *vma, struct i915_gem_ww_ctx *ww,
|
||||
* locked objects when called from execbuf when pinning
|
||||
* is removed. This would probably regress badly.
|
||||
*/
|
||||
- i915_gem_evict_vm(vm, NULL);
|
||||
+ i915_gem_evict_vm(vm, NULL, NULL);
|
||||
mutex_unlock(&vm->mutex);
|
||||
}
|
||||
} while (1);
|
||||
diff --git a/drivers/gpu/drm/i915/selftests/i915_gem_evict.c b/drivers/gpu/drm/i915/selftests/i915_gem_evict.c
|
||||
index 8c6517d29b8e0c..37068542aafe7f 100644
|
||||
--- a/drivers/gpu/drm/i915/selftests/i915_gem_evict.c
|
||||
+++ b/drivers/gpu/drm/i915/selftests/i915_gem_evict.c
|
||||
@@ -344,7 +344,7 @@ static int igt_evict_vm(void *arg)
|
||||
|
||||
/* Everything is pinned, nothing should happen */
|
||||
mutex_lock(&ggtt->vm.mutex);
|
||||
- err = i915_gem_evict_vm(&ggtt->vm, NULL);
|
||||
+ err = i915_gem_evict_vm(&ggtt->vm, NULL, NULL);
|
||||
mutex_unlock(&ggtt->vm.mutex);
|
||||
if (err) {
|
||||
pr_err("i915_gem_evict_vm on a full GGTT returned err=%d]\n",
|
||||
@@ -356,7 +356,7 @@ static int igt_evict_vm(void *arg)
|
||||
|
||||
for_i915_gem_ww(&ww, err, false) {
|
||||
mutex_lock(&ggtt->vm.mutex);
|
||||
- err = i915_gem_evict_vm(&ggtt->vm, &ww);
|
||||
+ err = i915_gem_evict_vm(&ggtt->vm, &ww, NULL);
|
||||
mutex_unlock(&ggtt->vm.mutex);
|
||||
}
|
||||
|
||||
|
@@ -0,0 +1,151 @@
|
||||
From d50977b164e708bf523a35ef53315355528c3ca6 Mon Sep 17 00:00:00 2001
|
||||
From: "Jan Alexander Steffens (heftig)" <jan.steffens@gmail.com>
|
||||
Date: Mon, 16 Sep 2019 04:53:20 +0200
|
||||
Subject: [PATCH] ZEN: Add sysctl and CONFIG to disallow unprivileged
|
||||
CLONE_NEWUSER
|
||||
|
||||
Our default behavior continues to match the vanilla kernel.
|
||||
---
|
||||
include/linux/user_namespace.h | 4 ++++
|
||||
init/Kconfig | 16 ++++++++++++++++
|
||||
kernel/fork.c | 14 ++++++++++++++
|
||||
kernel/sysctl.c | 12 ++++++++++++
|
||||
kernel/user_namespace.c | 7 +++++++
|
||||
5 files changed, 53 insertions(+)
|
||||
|
||||
diff --git a/include/linux/user_namespace.h b/include/linux/user_namespace.h
|
||||
index 45f09bec02c485..87b20e2ee27445 100644
|
||||
--- a/include/linux/user_namespace.h
|
||||
+++ b/include/linux/user_namespace.h
|
||||
@@ -148,6 +148,8 @@ static inline void set_userns_rlimit_max(struct user_namespace *ns,
|
||||
|
||||
#ifdef CONFIG_USER_NS
|
||||
|
||||
+extern int unprivileged_userns_clone;
|
||||
+
|
||||
static inline struct user_namespace *get_user_ns(struct user_namespace *ns)
|
||||
{
|
||||
if (ns)
|
||||
@@ -181,6 +183,8 @@ extern bool current_in_userns(const struct user_namespace *target_ns);
|
||||
struct ns_common *ns_get_owner(struct ns_common *ns);
|
||||
#else
|
||||
|
||||
+#define unprivileged_userns_clone 0
|
||||
+
|
||||
static inline struct user_namespace *get_user_ns(struct user_namespace *ns)
|
||||
{
|
||||
return &init_user_ns;
|
||||
diff --git a/init/Kconfig b/init/Kconfig
|
||||
index 94125d3b6893c7..9f7139b536f638 100644
|
||||
--- a/init/Kconfig
|
||||
+++ b/init/Kconfig
|
||||
@@ -1247,6 +1247,22 @@ config USER_NS
|
||||
|
||||
If unsure, say N.
|
||||
|
||||
+config USER_NS_UNPRIVILEGED
|
||||
+ bool "Allow unprivileged users to create namespaces"
|
||||
+ default y
|
||||
+ depends on USER_NS
|
||||
+ help
|
||||
+ When disabled, unprivileged users will not be able to create
|
||||
+ new namespaces. Allowing users to create their own namespaces
|
||||
+ has been part of several recent local privilege escalation
|
||||
+ exploits, so if you need user namespaces but are
|
||||
+ paranoid^Wsecurity-conscious you want to disable this.
|
||||
+
|
||||
+ This setting can be overridden at runtime via the
|
||||
+ kernel.unprivileged_userns_clone sysctl.
|
||||
+
|
||||
+ If unsure, say Y.
|
||||
+
|
||||
config PID_NS
|
||||
bool "PID Namespaces"
|
||||
default y
|
||||
diff --git a/kernel/fork.c b/kernel/fork.c
|
||||
index 08969f5aa38d59..ff601cb7a1fae0 100644
|
||||
--- a/kernel/fork.c
|
||||
+++ b/kernel/fork.c
|
||||
@@ -98,6 +98,10 @@
|
||||
#include <linux/io_uring.h>
|
||||
#include <linux/bpf.h>
|
||||
|
||||
+#ifdef CONFIG_USER_NS
|
||||
+#include <linux/user_namespace.h>
|
||||
+#endif
|
||||
+
|
||||
#include <asm/pgalloc.h>
|
||||
#include <linux/uaccess.h>
|
||||
#include <asm/mmu_context.h>
|
||||
@@ -2008,6 +2012,10 @@ static __latent_entropy struct task_struct *copy_process(
|
||||
if ((clone_flags & (CLONE_NEWUSER|CLONE_FS)) == (CLONE_NEWUSER|CLONE_FS))
|
||||
return ERR_PTR(-EINVAL);
|
||||
|
||||
+ if ((clone_flags & CLONE_NEWUSER) && !unprivileged_userns_clone)
|
||||
+ if (!capable(CAP_SYS_ADMIN))
|
||||
+ return ERR_PTR(-EPERM);
|
||||
+
|
||||
/*
|
||||
* Thread groups must share signals as well, and detached threads
|
||||
* can only be started up within the thread group.
|
||||
@@ -3166,6 +3174,12 @@ int ksys_unshare(unsigned long unshare_flags)
|
||||
if (unshare_flags & CLONE_NEWNS)
|
||||
unshare_flags |= CLONE_FS;
|
||||
|
||||
+ if ((unshare_flags & CLONE_NEWUSER) && !unprivileged_userns_clone) {
|
||||
+ err = -EPERM;
|
||||
+ if (!capable(CAP_SYS_ADMIN))
|
||||
+ goto bad_unshare_out;
|
||||
+ }
|
||||
+
|
||||
err = check_unshare_flags(unshare_flags);
|
||||
if (err)
|
||||
goto bad_unshare_out;
|
||||
diff --git a/kernel/sysctl.c b/kernel/sysctl.c
|
||||
index c6d9dec11b749d..9a4514ad481b21 100644
|
||||
--- a/kernel/sysctl.c
|
||||
+++ b/kernel/sysctl.c
|
||||
@@ -81,6 +81,9 @@
|
||||
#ifdef CONFIG_RT_MUTEXES
|
||||
#include <linux/rtmutex.h>
|
||||
#endif
|
||||
+#ifdef CONFIG_USER_NS
|
||||
+#include <linux/user_namespace.h>
|
||||
+#endif
|
||||
|
||||
/* shared constants to be used in various sysctls */
|
||||
const int sysctl_vals[] = { 0, 1, 2, 3, 4, 100, 200, 1000, 3000, INT_MAX, 65535, -1 };
|
||||
@@ -1659,6 +1662,15 @@ static struct ctl_table kern_table[] = {
|
||||
.mode = 0644,
|
||||
.proc_handler = proc_dointvec,
|
||||
},
|
||||
+#ifdef CONFIG_USER_NS
|
||||
+ {
|
||||
+ .procname = "unprivileged_userns_clone",
|
||||
+ .data = &unprivileged_userns_clone,
|
||||
+ .maxlen = sizeof(int),
|
||||
+ .mode = 0644,
|
||||
+ .proc_handler = proc_dointvec,
|
||||
+ },
|
||||
+#endif
|
||||
#ifdef CONFIG_PROC_SYSCTL
|
||||
{
|
||||
.procname = "tainted",
|
||||
diff --git a/kernel/user_namespace.c b/kernel/user_namespace.c
|
||||
index 54211dbd516c57..16ca0c1516298d 100644
|
||||
--- a/kernel/user_namespace.c
|
||||
+++ b/kernel/user_namespace.c
|
||||
@@ -22,6 +22,13 @@
|
||||
#include <linux/bsearch.h>
|
||||
#include <linux/sort.h>
|
||||
|
||||
+/* sysctl */
|
||||
+#ifdef CONFIG_USER_NS_UNPRIVILEGED
|
||||
+int unprivileged_userns_clone = 1;
|
||||
+#else
|
||||
+int unprivileged_userns_clone;
|
||||
+#endif
|
||||
+
|
||||
static struct kmem_cache *user_ns_cachep __read_mostly;
|
||||
static DEFINE_MUTEX(userns_state_mutex);
|
||||
|
@@ -0,0 +1,244 @@
|
||||
From 5ae86c8436b83762bc6cf46bea1da6ace2d3f50e Mon Sep 17 00:00:00 2001
|
||||
From: Paul Gofman <pgofman@codeweavers.com>
|
||||
Date: Wed, 6 May 2020 14:37:44 +0300
|
||||
Subject: [PATCH 1/2] mm: Support soft dirty flag reset for VA range.
|
||||
|
||||
---
|
||||
fs/proc/task_mmu.c | 129 ++++++++++++++++++++++++++++++++++++---------
|
||||
1 file changed, 103 insertions(+), 26 deletions(-)
|
||||
|
||||
diff --git a/fs/proc/task_mmu.c b/fs/proc/task_mmu.c
|
||||
index 3cec6fbef725..7c7865028f10 100644
|
||||
--- a/fs/proc/task_mmu.c
|
||||
+++ b/fs/proc/task_mmu.c
|
||||
@@ -1032,6 +1032,8 @@ enum clear_refs_types {
|
||||
|
||||
struct clear_refs_private {
|
||||
enum clear_refs_types type;
|
||||
+ unsigned long start, end;
|
||||
+ bool clear_range;
|
||||
};
|
||||
|
||||
#ifdef CONFIG_MEM_SOFT_DIRTY
|
||||
@@ -1125,6 +1127,8 @@ static int clear_refs_pte_range(pmd_t *pmd, unsigned long addr,
|
||||
spinlock_t *ptl;
|
||||
struct page *page;
|
||||
|
||||
+ BUG_ON(addr < cp->start || end > cp->end);
|
||||
+
|
||||
ptl = pmd_trans_huge_lock(pmd, vma);
|
||||
if (ptl) {
|
||||
if (cp->type == CLEAR_REFS_SOFT_DIRTY) {
|
||||
@@ -1181,9 +1185,11 @@ static int clear_refs_test_walk(unsigned long start, unsigned long end,
|
||||
struct clear_refs_private *cp = walk->private;
|
||||
struct vm_area_struct *vma = walk->vma;
|
||||
|
||||
- if (vma->vm_flags & VM_PFNMAP)
|
||||
+ if (!cp->clear_range && (vma->vm_flags & VM_PFNMAP))
|
||||
return 1;
|
||||
|
||||
+ BUG_ON(start < cp->start || end > cp->end);
|
||||
+
|
||||
/*
|
||||
* Writing 1 to /proc/pid/clear_refs affects all pages.
|
||||
* Writing 2 to /proc/pid/clear_refs only affects anonymous pages.
|
||||
@@ -1206,10 +1212,12 @@ static ssize_t clear_refs_write(struct file *file, const char __user *buf,
|
||||
size_t count, loff_t *ppos)
|
||||
{
|
||||
struct task_struct *task;
|
||||
- char buffer[PROC_NUMBUF];
|
||||
+ char buffer[18];
|
||||
struct mm_struct *mm;
|
||||
struct vm_area_struct *vma;
|
||||
enum clear_refs_types type;
|
||||
+ unsigned long start, end;
|
||||
+ bool clear_range;
|
||||
int itype;
|
||||
int rv;
|
||||
|
||||
@@ -1218,12 +1226,34 @@ static ssize_t clear_refs_write(struct file *file, const char __user *buf,
|
||||
count = sizeof(buffer) - 1;
|
||||
if (copy_from_user(buffer, buf, count))
|
||||
return -EFAULT;
|
||||
- rv = kstrtoint(strstrip(buffer), 10, &itype);
|
||||
- if (rv < 0)
|
||||
- return rv;
|
||||
- type = (enum clear_refs_types)itype;
|
||||
- if (type < CLEAR_REFS_ALL || type >= CLEAR_REFS_LAST)
|
||||
- return -EINVAL;
|
||||
+
|
||||
+ if (buffer[0] == '6')
|
||||
+ {
|
||||
+ static int once;
|
||||
+
|
||||
+ if (!once++)
|
||||
+ printk(KERN_DEBUG "task_mmu: Using POC clear refs range implementation.\n");
|
||||
+
|
||||
+ if (count != 17)
|
||||
+ return -EINVAL;
|
||||
+
|
||||
+ type = CLEAR_REFS_SOFT_DIRTY;
|
||||
+ start = *(unsigned long *)(buffer + 1);
|
||||
+ end = *(unsigned long *)(buffer + 1 + 8);
|
||||
+ }
|
||||
+ else
|
||||
+ {
|
||||
+ rv = kstrtoint(strstrip(buffer), 10, &itype);
|
||||
+ if (rv < 0)
|
||||
+ return rv;
|
||||
+ type = (enum clear_refs_types)itype;
|
||||
+
|
||||
+ if (type < CLEAR_REFS_ALL || type >= CLEAR_REFS_LAST)
|
||||
+ return -EINVAL;
|
||||
+
|
||||
+ start = 0;
|
||||
+ end = -1UL;
|
||||
+ }
|
||||
|
||||
task = get_proc_task(file_inode(file));
|
||||
if (!task)
|
||||
@@ -1235,40 +1265,86 @@ static ssize_t clear_refs_write(struct file *file, const char __user *buf,
|
||||
.type = type,
|
||||
};
|
||||
|
||||
- if (mmap_write_lock_killable(mm)) {
|
||||
- count = -EINTR;
|
||||
- goto out_mm;
|
||||
+ if (start || end != -1UL)
|
||||
+ {
|
||||
+ start = min(start, -1) & PAGE_MASK;
|
||||
+ end = min(end, -1) & PAGE_MASK;
|
||||
+
|
||||
+ if (start >= end)
|
||||
+ {
|
||||
+ count = -EINVAL;
|
||||
+ goto out_mm;
|
||||
+ }
|
||||
+ clear_range = true;
|
||||
}
|
||||
+ else
|
||||
+ {
|
||||
+ clear_range = false;
|
||||
+ }
|
||||
+
|
||||
+ cp.start = start;
|
||||
+ cp.end = end;
|
||||
+ cp.clear_range = clear_range;
|
||||
+
|
||||
if (type == CLEAR_REFS_MM_HIWATER_RSS) {
|
||||
+ if (mmap_write_lock_killable(mm)) {
|
||||
+ count = -EINTR;
|
||||
+ goto out_mm;
|
||||
+ }
|
||||
+
|
||||
/*
|
||||
* Writing 5 to /proc/pid/clear_refs resets the peak
|
||||
* resident set size to this mm's current rss value.
|
||||
*/
|
||||
reset_mm_hiwater_rss(mm);
|
||||
- goto out_unlock;
|
||||
+ mmap_write_unlock(mm);
|
||||
+ goto out_mm;
|
||||
}
|
||||
|
||||
if (type == CLEAR_REFS_SOFT_DIRTY) {
|
||||
- mas_for_each(&mas, vma, ULONG_MAX) {
|
||||
- if (!(vma->vm_flags & VM_SOFTDIRTY))
|
||||
- continue;
|
||||
- vma->vm_flags &= ~VM_SOFTDIRTY;
|
||||
- vma_set_page_prot(vma);
|
||||
+ if (mmap_read_lock_killable(mm)) {
|
||||
+ count = -EINTR;
|
||||
+ goto out_mm;
|
||||
}
|
||||
-
|
||||
+ if (!clear_range)
|
||||
+ mas_for_each(&mas, vma, ULONG_MAX) {
|
||||
+ if (!(vma->vm_flags & VM_SOFTDIRTY))
|
||||
+ continue;
|
||||
+ mmap_read_unlock(mm);
|
||||
+ if (mmap_write_lock_killable(mm)) {
|
||||
+ count = -EINTR;
|
||||
+ goto out_mm;
|
||||
+ }
|
||||
+ mas_for_each(&mas, vma, ULONG_MAX) {
|
||||
+ vma->vm_flags &= ~VM_SOFTDIRTY;
|
||||
+ vma_set_page_prot(vma);
|
||||
+ }
|
||||
+ mmap_write_downgrade(mm);
|
||||
+ break;
|
||||
+ }
|
||||
inc_tlb_flush_pending(mm);
|
||||
mmu_notifier_range_init(&range, MMU_NOTIFY_SOFT_DIRTY,
|
||||
- 0, NULL, mm, 0, -1UL);
|
||||
+ 0, NULL, mm, start, end);
|
||||
mmu_notifier_invalidate_range_start(&range);
|
||||
}
|
||||
- walk_page_range(mm, 0, -1, &clear_refs_walk_ops, &cp);
|
||||
+ else
|
||||
+ {
|
||||
+ if (mmap_write_lock_killable(mm)) {
|
||||
+ count = -EINTR;
|
||||
+ goto out_mm;
|
||||
+ }
|
||||
+ }
|
||||
+ walk_page_range(mm, start, end == -1UL ? -1 : end, &clear_refs_walk_ops, &cp);
|
||||
if (type == CLEAR_REFS_SOFT_DIRTY) {
|
||||
mmu_notifier_invalidate_range_end(&range);
|
||||
flush_tlb_mm(mm);
|
||||
dec_tlb_flush_pending(mm);
|
||||
+ mmap_read_unlock(mm);
|
||||
+ }
|
||||
+ else
|
||||
+ {
|
||||
+ mmap_write_unlock(mm);
|
||||
}
|
||||
-out_unlock:
|
||||
- mmap_write_unlock(mm);
|
||||
out_mm:
|
||||
mmput(mm);
|
||||
}
|
||||
@@ -1301,6 +1377,7 @@ struct pagemapread {
|
||||
#define PM_PFRAME_MASK GENMASK_ULL(PM_PFRAME_BITS - 1, 0)
|
||||
#define PM_SOFT_DIRTY BIT_ULL(55)
|
||||
#define PM_MMAP_EXCLUSIVE BIT_ULL(56)
|
||||
+#define PM_SOFT_DIRTY_PAGE BIT_ULL(57)
|
||||
#define PM_UFFD_WP BIT_ULL(57)
|
||||
#define PM_FILE BIT_ULL(61)
|
||||
#define PM_SWAP BIT_ULL(62)
|
||||
@@ -1373,13 +1450,13 @@ static pagemap_entry_t pte_to_pagemap_entry(struct pagemapread *pm,
|
||||
flags |= PM_PRESENT;
|
||||
page = vm_normal_page(vma, addr, pte);
|
||||
if (pte_soft_dirty(pte))
|
||||
- flags |= PM_SOFT_DIRTY;
|
||||
+ flags |= PM_SOFT_DIRTY | PM_SOFT_DIRTY_PAGE;
|
||||
if (pte_uffd_wp(pte))
|
||||
flags |= PM_UFFD_WP;
|
||||
} else if (is_swap_pte(pte)) {
|
||||
swp_entry_t entry;
|
||||
if (pte_swp_soft_dirty(pte))
|
||||
- flags |= PM_SOFT_DIRTY;
|
||||
+ flags |= PM_SOFT_DIRTY | PM_SOFT_DIRTY_PAGE;
|
||||
if (pte_swp_uffd_wp(pte))
|
||||
flags |= PM_UFFD_WP;
|
||||
entry = pte_to_swp_entry(pte);
|
||||
@@ -1500,7 +1500,7 @@
|
||||
|
||||
flags |= PM_PRESENT;
|
||||
if (pmd_soft_dirty(pmd))
|
||||
- flags |= PM_SOFT_DIRTY;
|
||||
+ flags |= PM_SOFT_DIRTY | PM_SOFT_DIRTY_PAGE;
|
||||
if (pmd_uffd_wp(pmd))
|
||||
flags |= PM_UFFD_WP;
|
||||
if (pm->show_pfn)
|
||||
@@ -1442,7 +1519,7 @@ static int pagemap_pmd_range(pmd_t *pmdp, unsigned long addr, unsigned long end,
|
||||
}
|
||||
flags |= PM_SWAP;
|
||||
if (pmd_swp_soft_dirty(pmd))
|
||||
- flags |= PM_SOFT_DIRTY;
|
||||
+ flags |= PM_SOFT_DIRTY | PM_SOFT_DIRTY_PAGE;
|
||||
if (pmd_swp_uffd_wp(pmd))
|
||||
flags |= PM_UFFD_WP;
|
||||
VM_BUG_ON(!is_pmd_migration_entry(pmd));
|
||||
--
|
||||
2.30.2
|
360
linux-tkg-patches/6.2/0002-clear-patches.patch
Normal file
360
linux-tkg-patches/6.2/0002-clear-patches.patch
Normal file
@@ -0,0 +1,360 @@
|
||||
From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
|
||||
From: Arjan van de Ven <arjan@linux.intel.com>
|
||||
Date: Mon, 14 Mar 2016 11:10:58 -0600
|
||||
Subject: [PATCH] pci pme wakeups
|
||||
|
||||
Reduce wakeups for PME checks, which are a workaround for miswired
|
||||
boards (sadly, too many of them) in laptops.
|
||||
---
|
||||
drivers/pci/pci.c | 2 +-
|
||||
1 file changed, 1 insertion(+), 1 deletion(-)
|
||||
|
||||
diff --git a/drivers/pci/pci.c b/drivers/pci/pci.c
|
||||
index c9338f9..6974fbf 100644
|
||||
--- a/drivers/pci/pci.c
|
||||
+++ b/drivers/pci/pci.c
|
||||
@@ -62,7 +62,7 @@ struct pci_pme_device {
|
||||
struct pci_dev *dev;
|
||||
};
|
||||
|
||||
-#define PME_TIMEOUT 1000 /* How long between PME checks */
|
||||
+#define PME_TIMEOUT 4000 /* How long between PME checks */
|
||||
|
||||
static void pci_dev_d3_sleep(struct pci_dev *dev)
|
||||
{
|
||||
--
|
||||
https://clearlinux.org
|
||||
|
||||
From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
|
||||
From: Arjan van de Ven <arjan@linux.intel.com>
|
||||
Date: Sat, 19 Mar 2016 21:32:19 -0400
|
||||
Subject: [PATCH] intel_idle: tweak cpuidle cstates
|
||||
|
||||
Increase target_residency in cpuidle cstate
|
||||
|
||||
Tune intel_idle to be a bit less agressive;
|
||||
Clear linux is cleaner in hygiene (wakupes) than the average linux,
|
||||
so we can afford changing these in a way that increases
|
||||
performance while keeping power efficiency
|
||||
---
|
||||
drivers/idle/intel_idle.c | 44 +++++++++++++++++++--------------------
|
||||
1 file changed, 22 insertions(+), 22 deletions(-)
|
||||
|
||||
diff --git a/drivers/idle/intel_idle.c b/drivers/idle/intel_idle.c
|
||||
index f449584..c994d24 100644
|
||||
--- a/drivers/idle/intel_idle.c
|
||||
+++ b/drivers/idle/intel_idle.c
|
||||
@@ -531,7 +531,7 @@ static struct cpuidle_state hsw_cstates[] __initdata = {
|
||||
.desc = "MWAIT 0x01",
|
||||
.flags = MWAIT2flg(0x01) | CPUIDLE_FLAG_ALWAYS_ENABLE,
|
||||
.exit_latency = 10,
|
||||
- .target_residency = 20,
|
||||
+ .target_residency = 120,
|
||||
.enter = &intel_idle,
|
||||
.enter_s2idle = intel_idle_s2idle, },
|
||||
{
|
||||
@@ -539,7 +539,7 @@ static struct cpuidle_state hsw_cstates[] __initdata = {
|
||||
.desc = "MWAIT 0x10",
|
||||
.flags = MWAIT2flg(0x10) | CPUIDLE_FLAG_TLB_FLUSHED,
|
||||
.exit_latency = 33,
|
||||
- .target_residency = 100,
|
||||
+ .target_residency = 900,
|
||||
.enter = &intel_idle,
|
||||
.enter_s2idle = intel_idle_s2idle, },
|
||||
{
|
||||
@@ -547,7 +547,7 @@ static struct cpuidle_state hsw_cstates[] __initdata = {
|
||||
.desc = "MWAIT 0x20",
|
||||
.flags = MWAIT2flg(0x20) | CPUIDLE_FLAG_TLB_FLUSHED,
|
||||
.exit_latency = 133,
|
||||
- .target_residency = 400,
|
||||
+ .target_residency = 1000,
|
||||
.enter = &intel_idle,
|
||||
.enter_s2idle = intel_idle_s2idle, },
|
||||
{
|
||||
@@ -555,7 +555,7 @@ static struct cpuidle_state hsw_cstates[] __initdata = {
|
||||
.desc = "MWAIT 0x32",
|
||||
.flags = MWAIT2flg(0x32) | CPUIDLE_FLAG_TLB_FLUSHED,
|
||||
.exit_latency = 166,
|
||||
- .target_residency = 500,
|
||||
+ .target_residency = 1500,
|
||||
.enter = &intel_idle,
|
||||
.enter_s2idle = intel_idle_s2idle, },
|
||||
{
|
||||
@@ -563,7 +563,7 @@ static struct cpuidle_state hsw_cstates[] __initdata = {
|
||||
.desc = "MWAIT 0x40",
|
||||
.flags = MWAIT2flg(0x40) | CPUIDLE_FLAG_TLB_FLUSHED,
|
||||
.exit_latency = 300,
|
||||
- .target_residency = 900,
|
||||
+ .target_residency = 2000,
|
||||
.enter = &intel_idle,
|
||||
.enter_s2idle = intel_idle_s2idle, },
|
||||
{
|
||||
@@ -571,7 +571,7 @@ static struct cpuidle_state hsw_cstates[] __initdata = {
|
||||
.desc = "MWAIT 0x50",
|
||||
.flags = MWAIT2flg(0x50) | CPUIDLE_FLAG_TLB_FLUSHED,
|
||||
.exit_latency = 600,
|
||||
- .target_residency = 1800,
|
||||
+ .target_residency = 5000,
|
||||
.enter = &intel_idle,
|
||||
.enter_s2idle = intel_idle_s2idle, },
|
||||
{
|
||||
@@ -579,7 +579,7 @@ static struct cpuidle_state hsw_cstates[] __initdata = {
|
||||
.desc = "MWAIT 0x60",
|
||||
.flags = MWAIT2flg(0x60) | CPUIDLE_FLAG_TLB_FLUSHED,
|
||||
.exit_latency = 2600,
|
||||
- .target_residency = 7700,
|
||||
+ .target_residency = 9000,
|
||||
.enter = &intel_idle,
|
||||
.enter_s2idle = intel_idle_s2idle, },
|
||||
{
|
||||
@@ -599,7 +599,7 @@ static struct cpuidle_state bdw_cstates[] __initdata = {
|
||||
.desc = "MWAIT 0x01",
|
||||
.flags = MWAIT2flg(0x01) | CPUIDLE_FLAG_ALWAYS_ENABLE,
|
||||
.exit_latency = 10,
|
||||
- .target_residency = 20,
|
||||
+ .target_residency = 120,
|
||||
.enter = &intel_idle,
|
||||
.enter_s2idle = intel_idle_s2idle, },
|
||||
{
|
||||
@@ -607,7 +607,7 @@ static struct cpuidle_state bdw_cstates[] __initdata = {
|
||||
.desc = "MWAIT 0x10",
|
||||
.flags = MWAIT2flg(0x10) | CPUIDLE_FLAG_TLB_FLUSHED,
|
||||
.exit_latency = 40,
|
||||
- .target_residency = 100,
|
||||
+ .target_residency = 1000,
|
||||
.enter = &intel_idle,
|
||||
.enter_s2idle = intel_idle_s2idle, },
|
||||
{
|
||||
@@ -615,7 +615,7 @@ static struct cpuidle_state bdw_cstates[] __initdata = {
|
||||
.desc = "MWAIT 0x20",
|
||||
.flags = MWAIT2flg(0x20) | CPUIDLE_FLAG_TLB_FLUSHED,
|
||||
.exit_latency = 133,
|
||||
- .target_residency = 400,
|
||||
+ .target_residency = 1000,
|
||||
.enter = &intel_idle,
|
||||
.enter_s2idle = intel_idle_s2idle, },
|
||||
{
|
||||
@@ -623,7 +623,7 @@ static struct cpuidle_state bdw_cstates[] __initdata = {
|
||||
.desc = "MWAIT 0x32",
|
||||
.flags = MWAIT2flg(0x32) | CPUIDLE_FLAG_TLB_FLUSHED,
|
||||
.exit_latency = 166,
|
||||
- .target_residency = 500,
|
||||
+ .target_residency = 2000,
|
||||
.enter = &intel_idle,
|
||||
.enter_s2idle = intel_idle_s2idle, },
|
||||
{
|
||||
@@ -631,7 +631,7 @@ static struct cpuidle_state bdw_cstates[] __initdata = {
|
||||
.desc = "MWAIT 0x40",
|
||||
.flags = MWAIT2flg(0x40) | CPUIDLE_FLAG_TLB_FLUSHED,
|
||||
.exit_latency = 300,
|
||||
- .target_residency = 900,
|
||||
+ .target_residency = 4000,
|
||||
.enter = &intel_idle,
|
||||
.enter_s2idle = intel_idle_s2idle, },
|
||||
{
|
||||
@@ -639,7 +639,7 @@ static struct cpuidle_state bdw_cstates[] __initdata = {
|
||||
.desc = "MWAIT 0x50",
|
||||
.flags = MWAIT2flg(0x50) | CPUIDLE_FLAG_TLB_FLUSHED,
|
||||
.exit_latency = 600,
|
||||
- .target_residency = 1800,
|
||||
+ .target_residency = 7000,
|
||||
.enter = &intel_idle,
|
||||
.enter_s2idle = intel_idle_s2idle, },
|
||||
{
|
||||
@@ -647,7 +647,7 @@ static struct cpuidle_state bdw_cstates[] __initdata = {
|
||||
.desc = "MWAIT 0x60",
|
||||
.flags = MWAIT2flg(0x60) | CPUIDLE_FLAG_TLB_FLUSHED,
|
||||
.exit_latency = 2600,
|
||||
- .target_residency = 7700,
|
||||
+ .target_residency = 9000,
|
||||
.enter = &intel_idle,
|
||||
.enter_s2idle = intel_idle_s2idle, },
|
||||
{
|
||||
@@ -668,7 +668,7 @@ static struct cpuidle_state skl_cstates[] __initdata = {
|
||||
.desc = "MWAIT 0x01",
|
||||
.flags = MWAIT2flg(0x01) | CPUIDLE_FLAG_ALWAYS_ENABLE,
|
||||
.exit_latency = 10,
|
||||
- .target_residency = 20,
|
||||
+ .target_residency = 120,
|
||||
.enter = &intel_idle,
|
||||
.enter_s2idle = intel_idle_s2idle, },
|
||||
{
|
||||
@@ -676,7 +676,7 @@ static struct cpuidle_state skl_cstates[] __initdata = {
|
||||
.desc = "MWAIT 0x10",
|
||||
.flags = MWAIT2flg(0x10) | CPUIDLE_FLAG_TLB_FLUSHED,
|
||||
.exit_latency = 70,
|
||||
- .target_residency = 100,
|
||||
+ .target_residency = 1000,
|
||||
.enter = &intel_idle,
|
||||
.enter_s2idle = intel_idle_s2idle, },
|
||||
{
|
||||
@@ -684,7 +684,7 @@ static struct cpuidle_state skl_cstates[] __initdata = {
|
||||
.desc = "MWAIT 0x20",
|
||||
.flags = MWAIT2flg(0x20) | CPUIDLE_FLAG_TLB_FLUSHED,
|
||||
.exit_latency = 85,
|
||||
- .target_residency = 200,
|
||||
+ .target_residency = 600,
|
||||
.enter = &intel_idle,
|
||||
.enter_s2idle = intel_idle_s2idle, },
|
||||
{
|
||||
@@ -692,7 +692,7 @@ static struct cpuidle_state skl_cstates[] __initdata = {
|
||||
.desc = "MWAIT 0x33",
|
||||
.flags = MWAIT2flg(0x33) | CPUIDLE_FLAG_TLB_FLUSHED,
|
||||
.exit_latency = 124,
|
||||
- .target_residency = 800,
|
||||
+ .target_residency = 3000,
|
||||
.enter = &intel_idle,
|
||||
.enter_s2idle = intel_idle_s2idle, },
|
||||
{
|
||||
@@ -700,7 +700,7 @@ static struct cpuidle_state skl_cstates[] __initdata = {
|
||||
.desc = "MWAIT 0x40",
|
||||
.flags = MWAIT2flg(0x40) | CPUIDLE_FLAG_TLB_FLUSHED,
|
||||
.exit_latency = 200,
|
||||
- .target_residency = 800,
|
||||
+ .target_residency = 3200,
|
||||
.enter = &intel_idle,
|
||||
.enter_s2idle = intel_idle_s2idle, },
|
||||
{
|
||||
@@ -708,7 +708,7 @@ static struct cpuidle_state skl_cstates[] __initdata = {
|
||||
.desc = "MWAIT 0x50",
|
||||
.flags = MWAIT2flg(0x50) | CPUIDLE_FLAG_TLB_FLUSHED,
|
||||
.exit_latency = 480,
|
||||
- .target_residency = 5000,
|
||||
+ .target_residency = 9000,
|
||||
.enter = &intel_idle,
|
||||
.enter_s2idle = intel_idle_s2idle, },
|
||||
{
|
||||
@@ -716,7 +716,7 @@ static struct cpuidle_state skl_cstates[] __initdata = {
|
||||
.desc = "MWAIT 0x60",
|
||||
.flags = MWAIT2flg(0x60) | CPUIDLE_FLAG_TLB_FLUSHED,
|
||||
.exit_latency = 890,
|
||||
- .target_residency = 5000,
|
||||
+ .target_residency = 9000,
|
||||
.enter = &intel_idle,
|
||||
.enter_s2idle = intel_idle_s2idle, },
|
||||
{
|
||||
@@ -737,7 +737,7 @@ static struct cpuidle_state skx_cstates[] __initdata = {
|
||||
.desc = "MWAIT 0x01",
|
||||
.flags = MWAIT2flg(0x01) | CPUIDLE_FLAG_ALWAYS_ENABLE,
|
||||
.exit_latency = 10,
|
||||
- .target_residency = 20,
|
||||
+ .target_residency = 300,
|
||||
.enter = &intel_idle,
|
||||
.enter_s2idle = intel_idle_s2idle, },
|
||||
{
|
||||
--
|
||||
https://clearlinux.org
|
||||
|
||||
From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
|
||||
From: Arjan van de Ven <arjan@linux.intel.com>
|
||||
Date: Fri, 6 Jan 2017 15:34:09 +0000
|
||||
Subject: [PATCH] ipv4/tcp: allow the memory tuning for tcp to go a little
|
||||
bigger than default
|
||||
|
||||
---
|
||||
net/ipv4/tcp.c | 4 ++--
|
||||
1 file changed, 2 insertions(+), 2 deletions(-)
|
||||
|
||||
diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c
|
||||
index 30c1142..4345075 100644
|
||||
--- a/net/ipv4/tcp.c
|
||||
+++ b/net/ipv4/tcp.c
|
||||
@@ -4201,8 +4201,8 @@ void __init tcp_init(void)
|
||||
tcp_init_mem();
|
||||
/* Set per-socket limits to no more than 1/128 the pressure threshold */
|
||||
limit = nr_free_buffer_pages() << (PAGE_SHIFT - 7);
|
||||
- max_wshare = min(4UL*1024*1024, limit);
|
||||
- max_rshare = min(6UL*1024*1024, limit);
|
||||
+ max_wshare = min(16UL*1024*1024, limit);
|
||||
+ max_rshare = min(16UL*1024*1024, limit);
|
||||
|
||||
init_net.ipv4.sysctl_tcp_wmem[0] = SK_MEM_QUANTUM;
|
||||
init_net.ipv4.sysctl_tcp_wmem[1] = 16*1024;
|
||||
--
|
||||
https://clearlinux.org
|
||||
|
||||
From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
|
||||
From: Arjan van de Ven <arjan@linux.intel.com>
|
||||
Date: Sun, 18 Feb 2018 23:35:41 +0000
|
||||
Subject: [PATCH] locking: rwsem: spin faster
|
||||
|
||||
tweak rwsem owner spinning a bit
|
||||
---
|
||||
kernel/locking/rwsem.c | 4 +++-
|
||||
1 file changed, 3 insertions(+), 1 deletion(-)
|
||||
|
||||
diff --git a/kernel/locking/rwsem.c b/kernel/locking/rwsem.c
|
||||
index f11b9bd..1bbfcc1 100644
|
||||
--- a/kernel/locking/rwsem.c
|
||||
+++ b/kernel/locking/rwsem.c
|
||||
@@ -717,6 +717,7 @@ rwsem_spin_on_owner(struct rw_semaphore *sem, unsigned long nonspinnable)
|
||||
struct task_struct *new, *owner;
|
||||
unsigned long flags, new_flags;
|
||||
enum owner_state state;
|
||||
+ int i = 0;
|
||||
|
||||
owner = rwsem_owner_flags(sem, &flags);
|
||||
state = rwsem_owner_state(owner, flags, nonspinnable);
|
||||
@@ -750,7 +751,8 @@ rwsem_spin_on_owner(struct rw_semaphore *sem, unsigned long nonspinnable)
|
||||
break;
|
||||
}
|
||||
|
||||
- cpu_relax();
|
||||
+ if (i++ > 1000)
|
||||
+ cpu_relax();
|
||||
}
|
||||
rcu_read_unlock();
|
||||
|
||||
--
|
||||
https://clearlinux.org
|
||||
|
||||
From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
|
||||
From: Arjan van de Ven <arjan@linux.intel.com>
|
||||
Date: Thu, 2 Jun 2016 23:36:32 -0500
|
||||
Subject: [PATCH] initialize ata before graphics
|
||||
|
||||
ATA init is the long pole in the boot process, and its asynchronous.
|
||||
move the graphics init after it so that ata and graphics initialize
|
||||
in parallel
|
||||
---
|
||||
drivers/Makefile | 15 ++++++++-------
|
||||
1 file changed, 8 insertions(+), 7 deletions(-)
|
||||
|
||||
diff --git a/drivers/Makefile b/drivers/Makefile
|
||||
index c0cd1b9..af1e2fb 100644
|
||||
--- a/drivers/Makefile
|
||||
+++ b/drivers/Makefile
|
||||
@@ -59,15 +59,8 @@ obj-y += char/
|
||||
# iommu/ comes before gpu as gpu are using iommu controllers
|
||||
obj-y += iommu/
|
||||
|
||||
-# gpu/ comes after char for AGP vs DRM startup and after iommu
|
||||
-obj-y += gpu/
|
||||
-
|
||||
obj-$(CONFIG_CONNECTOR) += connector/
|
||||
|
||||
-# i810fb and intelfb depend on char/agp/
|
||||
-obj-$(CONFIG_FB_I810) += video/fbdev/i810/
|
||||
-obj-$(CONFIG_FB_INTEL) += video/fbdev/intelfb/
|
||||
-
|
||||
obj-$(CONFIG_PARPORT) += parport/
|
||||
obj-$(CONFIG_NVM) += lightnvm/
|
||||
obj-y += base/ block/ misc/ mfd/ nfc/
|
||||
@@ -80,6 +73,14 @@ obj-$(CONFIG_IDE) += ide/
|
||||
obj-y += scsi/
|
||||
obj-y += nvme/
|
||||
obj-$(CONFIG_ATA) += ata/
|
||||
+
|
||||
+# gpu/ comes after char for AGP vs DRM startup and after iommu
|
||||
+obj-y += gpu/
|
||||
+
|
||||
+# i810fb and intelfb depend on char/agp/
|
||||
+obj-$(CONFIG_FB_I810) += video/fbdev/i810/
|
||||
+obj-$(CONFIG_FB_INTEL) += video/fbdev/intelfb/
|
||||
+
|
||||
obj-$(CONFIG_TARGET_CORE) += target/
|
||||
obj-$(CONFIG_MTD) += mtd/
|
||||
obj-$(CONFIG_SPI) += spi/
|
||||
--
|
||||
https://clearlinux.org
|
||||
|
@@ -0,0 +1,363 @@
|
||||
From 9c85113cf4019e7b277a44e72bda8b78347aa72f Mon Sep 17 00:00:00 2001
|
||||
From: Paul Gofman <pgofman@codeweavers.com>
|
||||
Date: Thu, 7 May 2020 14:05:31 +0300
|
||||
Subject: [PATCH 2/2] mm: Support soft dirty flag read with reset.
|
||||
|
||||
---
|
||||
fs/proc/base.c | 3 +
|
||||
fs/proc/internal.h | 1 +
|
||||
fs/proc/task_mmu.c | 144 +++++++++++++++++++++++++++++++++++++++------
|
||||
3 files changed, 130 insertions(+), 18 deletions(-)
|
||||
|
||||
diff --git a/fs/proc/base.c b/fs/proc/base.c
|
||||
index b3422cda2a91..8199ae2411ca 100644
|
||||
--- a/fs/proc/base.c
|
||||
+++ b/fs/proc/base.c
|
||||
@@ -3202,6 +3202,9 @@ static const struct pid_entry tgid_base_stuff[] = {
|
||||
REG("smaps", S_IRUGO, proc_pid_smaps_operations),
|
||||
REG("smaps_rollup", S_IRUGO, proc_pid_smaps_rollup_operations),
|
||||
REG("pagemap", S_IRUSR, proc_pagemap_operations),
|
||||
+#ifdef CONFIG_MEM_SOFT_DIRTY
|
||||
+ REG("pagemap_reset", S_IRUSR, proc_pagemap_reset_operations),
|
||||
+#endif
|
||||
#endif
|
||||
#ifdef CONFIG_SECURITY
|
||||
DIR("attr", S_IRUGO|S_IXUGO, proc_attr_dir_inode_operations, proc_attr_dir_operations),
|
||||
diff --git a/fs/proc/internal.h b/fs/proc/internal.h
|
||||
index f60b379dcdc7..36a901cf0e7f 100644
|
||||
--- a/fs/proc/internal.h
|
||||
+++ b/fs/proc/internal.h
|
||||
@@ -303,6 +303,7 @@ extern const struct file_operations proc_pid_smaps_operations;
|
||||
extern const struct file_operations proc_pid_smaps_rollup_operations;
|
||||
extern const struct file_operations proc_clear_refs_operations;
|
||||
extern const struct file_operations proc_pagemap_operations;
|
||||
+extern const struct file_operations proc_pagemap_reset_operations;
|
||||
|
||||
extern unsigned long task_vsize(struct mm_struct *);
|
||||
extern unsigned long task_statm(struct mm_struct *,
|
||||
diff --git a/fs/proc/task_mmu.c b/fs/proc/task_mmu.c
|
||||
index 7c7865028f10..a21694967915 100644
|
||||
--- a/fs/proc/task_mmu.c
|
||||
+++ b/fs/proc/task_mmu.c
|
||||
@@ -1056,8 +1056,8 @@ static inline bool pte_is_pinned(struct vm_area_struct *vma, unsigned long addr,
|
||||
return page_maybe_dma_pinned(page);
|
||||
}
|
||||
|
||||
-static inline void clear_soft_dirty(struct vm_area_struct *vma,
|
||||
- unsigned long addr, pte_t *pte)
|
||||
+static inline bool clear_soft_dirty(struct vm_area_struct *vma,
|
||||
+ unsigned long addr, pte_t *pte)
|
||||
{
|
||||
/*
|
||||
* The soft-dirty tracker uses #PF-s to catch writes
|
||||
@@ -1066,37 +1066,46 @@ static inline void clear_soft_dirty(struct vm_area_struct *vma,
|
||||
* of how soft-dirty works.
|
||||
*/
|
||||
pte_t ptent = *pte;
|
||||
+ bool ret = false;
|
||||
|
||||
if (pte_present(ptent)) {
|
||||
pte_t old_pte;
|
||||
|
||||
if (pte_is_pinned(vma, addr, ptent))
|
||||
- return;
|
||||
+ return ret;
|
||||
old_pte = ptep_modify_prot_start(vma, addr, pte);
|
||||
+ ret = pte_soft_dirty(old_pte);
|
||||
ptent = pte_wrprotect(old_pte);
|
||||
ptent = pte_clear_soft_dirty(ptent);
|
||||
ptep_modify_prot_commit(vma, addr, pte, old_pte, ptent);
|
||||
} else if (is_swap_pte(ptent)) {
|
||||
+ ret = pte_swp_soft_dirty(ptent);
|
||||
ptent = pte_swp_clear_soft_dirty(ptent);
|
||||
set_pte_at(vma->vm_mm, addr, pte, ptent);
|
||||
}
|
||||
+ return ret;
|
||||
}
|
||||
#else
|
||||
-static inline void clear_soft_dirty(struct vm_area_struct *vma,
|
||||
+static inline bool clear_soft_dirty(struct vm_area_struct *vma,
|
||||
unsigned long addr, pte_t *pte)
|
||||
{
|
||||
+ return false;
|
||||
}
|
||||
#endif
|
||||
|
||||
#if defined(CONFIG_MEM_SOFT_DIRTY) && defined(CONFIG_TRANSPARENT_HUGEPAGE)
|
||||
-static inline void clear_soft_dirty_pmd(struct vm_area_struct *vma,
|
||||
+static inline bool clear_soft_dirty_pmd(struct vm_area_struct *vma,
|
||||
unsigned long addr, pmd_t *pmdp)
|
||||
{
|
||||
pmd_t old, pmd = *pmdp;
|
||||
+ bool ret = false;
|
||||
|
||||
if (pmd_present(pmd)) {
|
||||
/* See comment in change_huge_pmd() */
|
||||
old = pmdp_invalidate(vma, addr, pmdp);
|
||||
+
|
||||
+ ret = pmd_soft_dirty(old);
|
||||
+
|
||||
if (pmd_dirty(old))
|
||||
pmd = pmd_mkdirty(pmd);
|
||||
if (pmd_young(old))
|
||||
@@ -1107,14 +1116,17 @@ static inline void clear_soft_dirty_pmd(struct vm_area_struct *vma,
|
||||
|
||||
set_pmd_at(vma->vm_mm, addr, pmdp, pmd);
|
||||
} else if (is_migration_entry(pmd_to_swp_entry(pmd))) {
|
||||
+ ret = pmd_swp_soft_dirty(pmd);
|
||||
pmd = pmd_swp_clear_soft_dirty(pmd);
|
||||
set_pmd_at(vma->vm_mm, addr, pmdp, pmd);
|
||||
}
|
||||
+ return ret;
|
||||
}
|
||||
#else
|
||||
-static inline void clear_soft_dirty_pmd(struct vm_area_struct *vma,
|
||||
+static inline bool clear_soft_dirty_pmd(struct vm_area_struct *vma,
|
||||
unsigned long addr, pmd_t *pmdp)
|
||||
{
|
||||
+ return false;
|
||||
}
|
||||
#endif
|
||||
|
||||
@@ -1367,6 +1379,7 @@ struct pagemapread {
|
||||
int pos, len; /* units: PM_ENTRY_BYTES, not bytes */
|
||||
pagemap_entry_t *buffer;
|
||||
bool show_pfn;
|
||||
+ bool reset;
|
||||
};
|
||||
|
||||
#define PAGEMAP_WALK_SIZE (PMD_SIZE)
|
||||
@@ -1398,6 +1411,14 @@ static int add_to_pagemap(unsigned long addr, pagemap_entry_t *pme,
|
||||
return 0;
|
||||
}
|
||||
|
||||
+static int add_addr_to_pagemap(unsigned long addr, struct pagemapread *pm)
|
||||
+{
|
||||
+ ((unsigned long *)pm->buffer)[pm->pos++] = addr;
|
||||
+ if (pm->pos >= pm->len)
|
||||
+ return PM_END_OF_BUFFER;
|
||||
+ return 0;
|
||||
+}
|
||||
+
|
||||
static int pagemap_pte_hole(unsigned long start, unsigned long end,
|
||||
__always_unused int depth, struct mm_walk *walk)
|
||||
{
|
||||
@@ -1405,6 +1426,9 @@ static int pagemap_pte_hole(unsigned long start, unsigned long end,
|
||||
unsigned long addr = start;
|
||||
int err = 0;
|
||||
|
||||
+ if (pm->reset)
|
||||
+ goto out;
|
||||
+
|
||||
while (addr < end) {
|
||||
struct vm_area_struct *vma = find_vma(walk->mm, addr);
|
||||
pagemap_entry_t pme = make_pme(0, 0);
|
||||
@@ -1439,8 +1463,9 @@ static int pagemap_pte_hole(unsigned long start, unsigned long end,
|
||||
}
|
||||
|
||||
static pagemap_entry_t pte_to_pagemap_entry(struct pagemapread *pm,
|
||||
- struct vm_area_struct *vma, unsigned long addr, pte_t pte)
|
||||
+ struct vm_area_struct *vma, unsigned long addr, pte_t *pte_addr)
|
||||
{
|
||||
+ pte_t pte = *pte_addr;
|
||||
u64 frame = 0, flags = 0;
|
||||
struct page *page = NULL;
|
||||
|
||||
@@ -1493,6 +1518,20 @@ static int pagemap_pmd_range(pmd_t *pmdp, unsigned long addr, unsigned long end,
|
||||
pmd_t pmd = *pmdp;
|
||||
struct page *page = NULL;
|
||||
|
||||
+ if (pm->reset)
|
||||
+ {
|
||||
+ if (clear_soft_dirty_pmd(vma, addr, pmdp))
|
||||
+ {
|
||||
+ for (; addr != end; addr += PAGE_SIZE)
|
||||
+ {
|
||||
+ err = add_addr_to_pagemap(addr, pm);
|
||||
+ if (err)
|
||||
+ break;
|
||||
+ }
|
||||
+ }
|
||||
+ goto trans_huge_done;
|
||||
+ }
|
||||
+
|
||||
if (vma->vm_flags & VM_SOFTDIRTY)
|
||||
flags |= PM_SOFT_DIRTY;
|
||||
|
||||
@@ -1541,6 +1580,7 @@ static int pagemap_pmd_range(pmd_t *pmdp, unsigned long addr, unsigned long end,
|
||||
frame += (1 << MAX_SWAPFILES_SHIFT);
|
||||
}
|
||||
}
|
||||
+trans_huge_done:
|
||||
spin_unlock(ptl);
|
||||
return err;
|
||||
}
|
||||
@@ -1555,10 +1595,18 @@ static int pagemap_pmd_range(pmd_t *pmdp, unsigned long addr, unsigned long end,
|
||||
*/
|
||||
orig_pte = pte = pte_offset_map_lock(walk->mm, pmdp, addr, &ptl);
|
||||
for (; addr < end; pte++, addr += PAGE_SIZE) {
|
||||
- pagemap_entry_t pme;
|
||||
+ if (pm->reset)
|
||||
+ {
|
||||
+ if (clear_soft_dirty(vma, addr, pte))
|
||||
+ err = add_addr_to_pagemap(addr, pm);
|
||||
+ }
|
||||
+ else
|
||||
+ {
|
||||
+ pagemap_entry_t pme;
|
||||
|
||||
- pme = pte_to_pagemap_entry(pm, vma, addr, *pte);
|
||||
- err = add_to_pagemap(addr, &pme, pm);
|
||||
+ pme = pte_to_pagemap_entry(pm, vma, addr, pte);
|
||||
+ err = add_to_pagemap(addr, &pme, pm);
|
||||
+ }
|
||||
if (err)
|
||||
break;
|
||||
}
|
||||
@@ -1650,8 +1698,8 @@ static const struct mm_walk_ops pagemap_ops = {
|
||||
* determine which areas of memory are actually mapped and llseek to
|
||||
* skip over unmapped regions.
|
||||
*/
|
||||
-static ssize_t pagemap_read(struct file *file, char __user *buf,
|
||||
- size_t count, loff_t *ppos)
|
||||
+static ssize_t do_pagemap_read(struct file *file, char __user *buf,
|
||||
+ size_t count, loff_t *ppos, bool reset)
|
||||
{
|
||||
struct mm_struct *mm = file->private_data;
|
||||
struct pagemapread pm;
|
||||
@@ -1660,6 +1708,8 @@ static ssize_t pagemap_read(struct file *file, char __user *buf,
|
||||
unsigned long start_vaddr;
|
||||
unsigned long end_vaddr;
|
||||
int ret = 0, copied = 0;
|
||||
+ struct mmu_notifier_range range;
|
||||
+ size_t buffer_len;
|
||||
|
||||
if (!mm || !mmget_not_zero(mm))
|
||||
goto out;
|
||||
@@ -1675,19 +1725,38 @@ static ssize_t pagemap_read(struct file *file, char __user *buf,
|
||||
|
||||
/* do not disclose physical addresses: attack vector */
|
||||
pm.show_pfn = file_ns_capable(file, &init_user_ns, CAP_SYS_ADMIN);
|
||||
+ pm.reset = reset;
|
||||
|
||||
- pm.len = (PAGEMAP_WALK_SIZE >> PAGE_SHIFT);
|
||||
- pm.buffer = kmalloc_array(pm.len, PM_ENTRY_BYTES, GFP_KERNEL);
|
||||
+ buffer_len = min(PAGEMAP_WALK_SIZE >> PAGE_SHIFT, count / PM_ENTRY_BYTES);
|
||||
+
|
||||
+ pm.buffer = kmalloc_array(buffer_len, PM_ENTRY_BYTES, GFP_KERNEL);
|
||||
ret = -ENOMEM;
|
||||
if (!pm.buffer)
|
||||
goto out_mm;
|
||||
|
||||
src = *ppos;
|
||||
svpfn = src / PM_ENTRY_BYTES;
|
||||
- end_vaddr = mm->task_size;
|
||||
+
|
||||
+ start_vaddr = svpfn << PAGE_SHIFT;
|
||||
+
|
||||
+ if (reset)
|
||||
+ {
|
||||
+ if (count < sizeof(end_vaddr))
|
||||
+ {
|
||||
+ ret = -EINVAL;
|
||||
+ goto out_mm;
|
||||
+ }
|
||||
+ if (copy_from_user(&end_vaddr, buf, sizeof(end_vaddr)))
|
||||
+ return -EFAULT;
|
||||
+ end_vaddr = min(end_vaddr, mm->task_size);
|
||||
+ }
|
||||
+ else
|
||||
+ {
|
||||
+ end_vaddr = mm->task_size;
|
||||
+ start_vaddr = end_vaddr;
|
||||
+ }
|
||||
|
||||
/* watch out for wraparound */
|
||||
- start_vaddr = end_vaddr;
|
||||
if (svpfn <= (ULONG_MAX >> PAGE_SHIFT))
|
||||
start_vaddr = untagged_addr(svpfn << PAGE_SHIFT);
|
||||
|
||||
@@ -1707,18 +1776,35 @@ static ssize_t pagemap_read(struct file *file, char __user *buf,
|
||||
unsigned long end;
|
||||
|
||||
pm.pos = 0;
|
||||
- end = (start_vaddr + PAGEMAP_WALK_SIZE) & PAGEMAP_WALK_MASK;
|
||||
+ pm.len = min(buffer_len, count / PM_ENTRY_BYTES);
|
||||
+
|
||||
+ end = reset ? end_vaddr : (start_vaddr + (pm.len << PAGE_SHIFT));
|
||||
/* overflow ? */
|
||||
if (end < start_vaddr || end > end_vaddr)
|
||||
end = end_vaddr;
|
||||
+
|
||||
ret = mmap_read_lock_killable(mm);
|
||||
if (ret)
|
||||
goto out_free;
|
||||
+
|
||||
+ if (reset)
|
||||
+ {
|
||||
+ inc_tlb_flush_pending(mm);
|
||||
+ mmu_notifier_range_init(&range, MMU_NOTIFY_SOFT_DIRTY,
|
||||
+ 0, NULL, mm, start_vaddr, end);
|
||||
+ mmu_notifier_invalidate_range_start(&range);
|
||||
+ }
|
||||
ret = walk_page_range(mm, start_vaddr, end, &pagemap_ops, &pm);
|
||||
+ if (reset)
|
||||
+ {
|
||||
+ mmu_notifier_invalidate_range_end(&range);
|
||||
+ flush_tlb_mm(mm);
|
||||
+ dec_tlb_flush_pending(mm);
|
||||
+ }
|
||||
mmap_read_unlock(mm);
|
||||
- start_vaddr = end;
|
||||
|
||||
len = min(count, PM_ENTRY_BYTES * pm.pos);
|
||||
+ BUG_ON(ret && ret != PM_END_OF_BUFFER);
|
||||
if (copy_to_user(buf, pm.buffer, len)) {
|
||||
ret = -EFAULT;
|
||||
goto out_free;
|
||||
@@ -1726,6 +1812,8 @@ static ssize_t pagemap_read(struct file *file, char __user *buf,
|
||||
copied += len;
|
||||
buf += len;
|
||||
count -= len;
|
||||
+
|
||||
+ start_vaddr = reset && pm.pos == pm.len ? ((unsigned long *)pm.buffer)[pm.pos - 1] + PAGE_SIZE : end;
|
||||
}
|
||||
*ppos += copied;
|
||||
if (!ret || ret == PM_END_OF_BUFFER)
|
||||
@@ -1739,6 +1827,18 @@ static ssize_t pagemap_read(struct file *file, char __user *buf,
|
||||
return ret;
|
||||
}
|
||||
|
||||
+static ssize_t pagemap_read(struct file *file, char __user *buf,
|
||||
+ size_t count, loff_t *ppos)
|
||||
+{
|
||||
+ return do_pagemap_read(file, buf, count, ppos, false);
|
||||
+}
|
||||
+
|
||||
+static ssize_t pagemap_reset_read(struct file *file, char __user *buf,
|
||||
+ size_t count, loff_t *ppos)
|
||||
+{
|
||||
+ return do_pagemap_read(file, buf, count, ppos, true);
|
||||
+}
|
||||
+
|
||||
static int pagemap_open(struct inode *inode, struct file *file)
|
||||
{
|
||||
struct mm_struct *mm;
|
||||
@@ -1765,6 +1865,14 @@ const struct file_operations proc_pagemap_operations = {
|
||||
.open = pagemap_open,
|
||||
.release = pagemap_release,
|
||||
};
|
||||
+
|
||||
+const struct file_operations proc_pagemap_reset_operations = {
|
||||
+ .llseek = mem_lseek, /* borrow this */
|
||||
+ .read = pagemap_reset_read,
|
||||
+ .open = pagemap_open,
|
||||
+ .release = pagemap_release,
|
||||
+};
|
||||
+
|
||||
#endif /* CONFIG_PROC_PAGE_MONITOR */
|
||||
|
||||
#ifdef CONFIG_NUMA
|
||||
--
|
||||
2.30.2
|
||||
|
822
linux-tkg-patches/6.2/0003-glitched-base.patch
Normal file
822
linux-tkg-patches/6.2/0003-glitched-base.patch
Normal file
@@ -0,0 +1,822 @@
|
||||
From f7f49141a5dbe9c99d78196b58c44307fb2e6be3 Mon Sep 17 00:00:00 2001
|
||||
From: Tk-Glitch <ti3nou@gmail.com>
|
||||
Date: Wed, 4 Jul 2018 04:30:08 +0200
|
||||
Subject: [PATCH 01/17] glitched
|
||||
|
||||
---
|
||||
init/Makefile | 2 +-
|
||||
1 file changed, 1 insertions(+), 1 deletions(-)
|
||||
|
||||
diff --git a/init/Makefile b/init/Makefile
|
||||
index baf3ab8d9d49..854e32e6aec7 100755
|
||||
--- a/init/Makefile
|
||||
+++ b/init/Makefile
|
||||
@@ -19,7 +19,7 @@ else
|
||||
|
||||
# Maximum length of UTS_VERSION is 64 chars
|
||||
filechk_uts_version = \
|
||||
- utsver=$$(echo '$(pound)'"$(build-version)" $(smp-flag-y) $(preempt-flag-y) "$(build-timestamp)" | cut -b -64); \
|
||||
+ utsver=$$(echo '$(pound)'"$(build-version)" $(smp-flag-y) $(preempt-flag-y) "TKG" "$(build-timestamp)" | cut -b -64); \
|
||||
echo '$(pound)'define UTS_VERSION \""$${utsver}"\"
|
||||
|
||||
#
|
||||
--
|
||||
2.28.0
|
||||
|
||||
|
||||
From c304f43d14e98d4bf1215fc10bc5012f554bdd8a Mon Sep 17 00:00:00 2001
|
||||
From: Alexandre Frade <admfrade@gmail.com>
|
||||
Date: Mon, 29 Jan 2018 16:59:22 +0000
|
||||
Subject: [PATCH 02/17] dcache: cache_pressure = 50 decreases the rate at which
|
||||
VFS caches are reclaimed
|
||||
|
||||
Signed-off-by: Alexandre Frade <admfrade@gmail.com>
|
||||
---
|
||||
fs/dcache.c | 2 +-
|
||||
1 file changed, 1 insertion(+), 1 deletion(-)
|
||||
|
||||
diff --git a/fs/dcache.c b/fs/dcache.c
|
||||
index 361ea7ab30ea..0c5cf69b241a 100644
|
||||
--- a/fs/dcache.c
|
||||
+++ b/fs/dcache.c
|
||||
@@ -71,7 +71,7 @@
|
||||
* If no ancestor relationship:
|
||||
* arbitrary, since it's serialized on rename_lock
|
||||
*/
|
||||
-int sysctl_vfs_cache_pressure __read_mostly = 100;
|
||||
+int sysctl_vfs_cache_pressure __read_mostly = 50;
|
||||
EXPORT_SYMBOL_GPL(sysctl_vfs_cache_pressure);
|
||||
|
||||
__cacheline_aligned_in_smp DEFINE_SEQLOCK(rename_lock);
|
||||
--
|
||||
2.28.0
|
||||
|
||||
|
||||
diff --git a/kernel/sched/rt.c b/kernel/sched/rt.c
|
||||
index f788cd61df21..2bfbb4213707 100644
|
||||
--- a/kernel/sched/rt.c
|
||||
+++ b/kernel/sched/rt.c
|
||||
@@ -15,9 +15,9 @@ __read_mostly int scheduler_running;
|
||||
|
||||
/*
|
||||
* part of the period that we allow rt tasks to run in us.
|
||||
- * default: 0.95s
|
||||
+ * XanMod default: 0.98s
|
||||
*/
|
||||
-int sysctl_sched_rt_runtime = 950000;
|
||||
+int sysctl_sched_rt_runtime = 980000;
|
||||
|
||||
#ifdef CONFIG_SYSCTL
|
||||
static int sysctl_sched_rr_timeslice = (MSEC_PER_SEC / HZ) * RR_TIMESLICE;
|
||||
--
|
||||
2.28.0
|
||||
|
||||
|
||||
From acc49f33a10f61dc66c423888cbb883ba46710e4 Mon Sep 17 00:00:00 2001
|
||||
From: Alexandre Frade <admfrade@gmail.com>
|
||||
Date: Mon, 29 Jan 2018 17:41:29 +0000
|
||||
Subject: [PATCH 04/17] scripts: disable the localversion "+" tag of a git repo
|
||||
|
||||
Signed-off-by: Alexandre Frade <admfrade@gmail.com>
|
||||
---
|
||||
scripts/setlocalversion | 2 +-
|
||||
1 file changed, 1 insertion(+), 1 deletion(-)
|
||||
|
||||
diff --git a/scripts/setlocalversion b/scripts/setlocalversion
|
||||
index 20f2efd57b11..0552d8b9f582 100755
|
||||
--- a/scripts/setlocalversion
|
||||
+++ b/scripts/setlocalversion
|
||||
@@ -54,7 +54,7 @@ scm_version()
|
||||
# If only the short version is requested, don't bother
|
||||
# running further git commands
|
||||
if $short; then
|
||||
- echo "+"
|
||||
+ # echo "+"
|
||||
return
|
||||
fi
|
||||
# If we are past a tagged commit (like
|
||||
--
|
||||
2.28.0
|
||||
|
||||
|
||||
From 360c6833e07cc9fdef5746f6bc45bdbc7212288d Mon Sep 17 00:00:00 2001
|
||||
From: "Jan Alexander Steffens (heftig)" <jan.steffens@gmail.com>
|
||||
Date: Fri, 26 Oct 2018 11:22:33 +0100
|
||||
Subject: [PATCH 06/17] infiniband: Fix __read_overflow2 error with -O3
|
||||
inlining
|
||||
|
||||
---
|
||||
drivers/infiniband/core/addr.c | 1 +
|
||||
1 file changed, 1 insertion(+)
|
||||
|
||||
diff --git a/drivers/infiniband/core/addr.c b/drivers/infiniband/core/addr.c
|
||||
index 3a98439bba83..6efc4f907f58 100644
|
||||
--- a/drivers/infiniband/core/addr.c
|
||||
+++ b/drivers/infiniband/core/addr.c
|
||||
@@ -820,6 +820,7 @@ int rdma_addr_find_l2_eth_by_grh(const union ib_gid *sgid,
|
||||
union {
|
||||
struct sockaddr_in _sockaddr_in;
|
||||
struct sockaddr_in6 _sockaddr_in6;
|
||||
+ struct sockaddr_ib _sockaddr_ib;
|
||||
} sgid_addr, dgid_addr;
|
||||
int ret;
|
||||
|
||||
--
|
||||
2.28.0
|
||||
|
||||
|
||||
From f85ed068b4d0e6c31edce8574a95757a60e58b87 Mon Sep 17 00:00:00 2001
|
||||
From: Etienne Juvigny <Ti3noU@gmail.com>
|
||||
Date: Mon, 3 Sep 2018 17:36:25 +0200
|
||||
Subject: [PATCH 07/17] Zenify & stuff
|
||||
|
||||
---
|
||||
init/Kconfig | 32 ++++++++++++++++++++++++++++++++
|
||||
kernel/sched/fair.c | 25 +++++++++++++++++++++++++
|
||||
mm/page-writeback.c | 8 ++++++++
|
||||
3 files changed, 65 insertions(+)
|
||||
|
||||
diff --git a/init/Kconfig b/init/Kconfig
|
||||
index 3ae8678e1145..da708eed0f1e 100644
|
||||
--- a/init/Kconfig
|
||||
+++ b/init/Kconfig
|
||||
@@ -92,6 +92,38 @@ config THREAD_INFO_IN_TASK
|
||||
|
||||
menu "General setup"
|
||||
|
||||
+config ZENIFY
|
||||
+ bool "A selection of patches from Zen/Liquorix kernel and additional tweaks for a better gaming experience"
|
||||
+ default y
|
||||
+ help
|
||||
+ Tunes the kernel for responsiveness at the cost of throughput and power usage.
|
||||
+
|
||||
+ --- Virtual Memory Subsystem ---------------------------
|
||||
+
|
||||
+ Mem dirty before bg writeback..: 10 % -> 20 %
|
||||
+ Mem dirty before sync writeback: 20 % -> 50 %
|
||||
+
|
||||
+ --- Block Layer ----------------------------------------
|
||||
+
|
||||
+ Queue depth...............: 128 -> 512
|
||||
+ Default MQ scheduler......: mq-deadline -> bfq
|
||||
+
|
||||
+ --- CFS CPU Scheduler ----------------------------------
|
||||
+
|
||||
+ Scheduling latency.............: 6 -> 3 ms
|
||||
+ Minimal granularity............: 0.75 -> 0.3 ms
|
||||
+ Wakeup granularity.............: 1 -> 0.5 ms
|
||||
+ CPU migration cost.............: 0.5 -> 0.25 ms
|
||||
+ Bandwidth slice size...........: 5 -> 3 ms
|
||||
+ Ondemand fine upscaling limit..: 95 % -> 85 %
|
||||
+
|
||||
+ --- MuQSS CPU Scheduler --------------------------------
|
||||
+
|
||||
+ Scheduling interval............: 6 -> 3 ms
|
||||
+ ISO task max realtime use......: 70 % -> 25 %
|
||||
+ Ondemand coarse upscaling limit: 80 % -> 45 %
|
||||
+ Ondemand fine upscaling limit..: 95 % -> 45 %
|
||||
+
|
||||
config BROKEN
|
||||
bool
|
||||
|
||||
diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c
|
||||
index 6b3b59cc51d6..2a0072192c3d 100644
|
||||
--- a/kernel/sched/fair.c
|
||||
+++ b/kernel/sched/fair.c
|
||||
@@ -37,8 +37,13 @@
|
||||
*
|
||||
* (default: 6ms * (1 + ilog(ncpus)), units: nanoseconds)
|
||||
*/
|
||||
+#ifdef CONFIG_ZENIFY
|
||||
+unsigned int sysctl_sched_latency = 3000000ULL;
|
||||
+static unsigned int normalized_sysctl_sched_latency = 3000000ULL;
|
||||
+#else
|
||||
unsigned int sysctl_sched_latency = 6000000ULL;
|
||||
static unsigned int normalized_sysctl_sched_latency = 6000000ULL;
|
||||
+#endif
|
||||
|
||||
/*
|
||||
* The initial- and re-scaling of tunables is configurable
|
||||
@@ -58,21 +63,34 @@ enum sched_tunable_scaling sysctl_sched_tunable_scaling = SCHED_TUNABLESCALING_L
|
||||
*
|
||||
* (default: 0.75 msec * (1 + ilog(ncpus)), units: nanoseconds)
|
||||
*/
|
||||
+#ifdef CONFIG_ZENIFY
|
||||
+unsigned int sysctl_sched_min_granularity = 300000ULL;
|
||||
+static unsigned int normalized_sysctl_sched_min_granularity = 300000ULL;
|
||||
+#else
|
||||
unsigned int sysctl_sched_min_granularity = 750000ULL;
|
||||
static unsigned int normalized_sysctl_sched_min_granularity = 750000ULL;
|
||||
+#endif
|
||||
|
||||
/*
|
||||
* Minimal preemption granularity for CPU-bound SCHED_IDLE tasks.
|
||||
* Applies only when SCHED_IDLE tasks compete with normal tasks.
|
||||
*
|
||||
* (default: 0.75 msec)
|
||||
*/
|
||||
+#ifdef CONFIG_ZENIFY
|
||||
+unsigned int sysctl_sched_idle_min_granularity = 300000ULL;
|
||||
+#else
|
||||
unsigned int sysctl_sched_idle_min_granularity = 750000ULL;
|
||||
+#endif
|
||||
|
||||
/*
|
||||
* This value is kept at sysctl_sched_latency/sysctl_sched_min_granularity
|
||||
*/
|
||||
+#ifdef CONFIG_ZENIFY
|
||||
+static unsigned int sched_nr_latency = 10;
|
||||
+#else
|
||||
static unsigned int sched_nr_latency = 8;
|
||||
+#endif
|
||||
|
||||
/*
|
||||
* After fork, child runs first. If set to 0 (default) then
|
||||
@@ -128,8 +149,12 @@ int __weak arch_asym_cpu_priority(int cpu)
|
||||
*
|
||||
* (default: 5 msec, units: microseconds)
|
||||
*/
|
||||
+#ifdef CONFIG_ZENIFY
|
||||
+static unsigned int sysctl_sched_cfs_bandwidth_slice = 3000UL;
|
||||
+#else
|
||||
static unsigned int sysctl_sched_cfs_bandwidth_slice = 5000UL;
|
||||
#endif
|
||||
+#endif
|
||||
|
||||
#ifdef CONFIG_SYSCTL
|
||||
static struct ctl_table sched_fair_sysctls[] = {
|
||||
diff --git a/mm/page-writeback.c b/mm/page-writeback.c
|
||||
index 28b3e7a67565..01a1aef2b9b1 100644
|
||||
--- a/mm/page-writeback.c
|
||||
+++ b/mm/page-writeback.c
|
||||
@@ -71,7 +71,11 @@ static long ratelimit_pages = 32;
|
||||
/*
|
||||
* Start background writeback (via writeback threads) at this percentage
|
||||
*/
|
||||
+#ifdef CONFIG_ZENIFY
|
||||
+static int dirty_background_ratio = 20;
|
||||
+#else
|
||||
static int dirty_background_ratio = 10;
|
||||
+#endif
|
||||
|
||||
/*
|
||||
* dirty_background_bytes starts at 0 (disabled) so that it is a function of
|
||||
@@ -88,7 +92,11 @@ int vm_highmem_is_dirtyable;
|
||||
/*
|
||||
* The generator of dirty data starts writeback at this percentage
|
||||
*/
|
||||
+#ifdef CONFIG_ZENIFY
|
||||
+static int vm_dirty_ratio = 50;
|
||||
+#else
|
||||
static int vm_dirty_ratio = 20;
|
||||
+#endif
|
||||
|
||||
/*
|
||||
* vm_dirty_bytes starts at 0 (disabled) so that it is a function of
|
||||
--
|
||||
2.28.0
|
||||
|
||||
|
||||
From e92e67143385cf285851e12aa8b7f083dd38dd24 Mon Sep 17 00:00:00 2001
|
||||
From: Steven Barrett <damentz@liquorix.net>
|
||||
Date: Sun, 16 Jan 2011 18:57:32 -0600
|
||||
Subject: [PATCH 08/17] ZEN: Allow TCP YeAH as default congestion control
|
||||
|
||||
4.4: In my tests YeAH dramatically slowed down transfers over a WLAN,
|
||||
reducing throughput from ~65Mbps (CUBIC) to ~7MBps (YeAH) over 10
|
||||
seconds (netperf TCP_STREAM) including long stalls.
|
||||
|
||||
Be careful when choosing this. ~heftig
|
||||
---
|
||||
net/ipv4/Kconfig | 4 ++++
|
||||
1 file changed, 4 insertions(+)
|
||||
|
||||
diff --git a/net/ipv4/Kconfig b/net/ipv4/Kconfig
|
||||
index e64e59b536d3..bfb55ef7ebbe 100644
|
||||
--- a/net/ipv4/Kconfig
|
||||
+++ b/net/ipv4/Kconfig
|
||||
@@ -691,6 +691,9 @@ choice
|
||||
config DEFAULT_VEGAS
|
||||
bool "Vegas" if TCP_CONG_VEGAS=y
|
||||
|
||||
+ config DEFAULT_YEAH
|
||||
+ bool "YeAH" if TCP_CONG_YEAH=y
|
||||
+
|
||||
config DEFAULT_VENO
|
||||
bool "Veno" if TCP_CONG_VENO=y
|
||||
|
||||
@@ -724,6 +727,7 @@ config DEFAULT_TCP_CONG
|
||||
default "htcp" if DEFAULT_HTCP
|
||||
default "hybla" if DEFAULT_HYBLA
|
||||
default "vegas" if DEFAULT_VEGAS
|
||||
+ default "yeah" if DEFAULT_YEAH
|
||||
default "westwood" if DEFAULT_WESTWOOD
|
||||
default "veno" if DEFAULT_VENO
|
||||
default "reno" if DEFAULT_RENO
|
||||
--
|
||||
2.28.0
|
||||
|
||||
|
||||
From 76dbe7477bfde1b5e8bf29a71b5af7ab2be9b98e Mon Sep 17 00:00:00 2001
|
||||
From: Steven Barrett <steven@liquorix.net>
|
||||
Date: Wed, 28 Nov 2018 19:01:27 -0600
|
||||
Subject: [PATCH 09/17] zen: Use [defer+madvise] as default khugepaged defrag
|
||||
strategy
|
||||
|
||||
For some reason, the default strategy to respond to THP fault fallbacks
|
||||
is still just madvise, meaning stall if the program wants transparent
|
||||
hugepages, but don't trigger a background reclaim / compaction if THP
|
||||
begins to fail allocations. This creates a snowball affect where we
|
||||
still use the THP code paths, but we almost always fail once a system
|
||||
has been active and busy for a while.
|
||||
|
||||
The option "defer" was created for interactive systems where THP can
|
||||
still improve performance. If we have to fallback to a regular page due
|
||||
to an allocation failure or anything else, we will trigger a background
|
||||
reclaim and compaction so future THP attempts succeed and previous
|
||||
attempts eventually have their smaller pages combined without stalling
|
||||
running applications.
|
||||
|
||||
We still want madvise to stall applications that explicitely want THP,
|
||||
so defer+madvise _does_ make a ton of sense. Make it the default for
|
||||
interactive systems, especially if the kernel maintainer left
|
||||
transparent hugepages on "always".
|
||||
|
||||
Reasoning and details in the original patch: https://lwn.net/Articles/711248/
|
||||
---
|
||||
mm/huge_memory.c | 4 ++++
|
||||
1 file changed, 4 insertions(+)
|
||||
|
||||
diff --git a/mm/huge_memory.c b/mm/huge_memory.c
|
||||
index 74300e337c3c..9277f22c10a7 100644
|
||||
--- a/mm/huge_memory.c
|
||||
+++ b/mm/huge_memory.c
|
||||
@@ -53,7 +53,11 @@ unsigned long transparent_hugepage_flags __read_mostly =
|
||||
#ifdef CONFIG_TRANSPARENT_HUGEPAGE_MADVISE
|
||||
(1<<TRANSPARENT_HUGEPAGE_REQ_MADV_FLAG)|
|
||||
#endif
|
||||
+#ifdef CONFIG_ZENIFY
|
||||
+ (1<<TRANSPARENT_HUGEPAGE_DEFRAG_KSWAPD_OR_MADV_FLAG)|
|
||||
+#else
|
||||
(1<<TRANSPARENT_HUGEPAGE_DEFRAG_REQ_MADV_FLAG)|
|
||||
+#endif
|
||||
(1<<TRANSPARENT_HUGEPAGE_DEFRAG_KHUGEPAGED_FLAG)|
|
||||
(1<<TRANSPARENT_HUGEPAGE_USE_ZERO_PAGE_FLAG);
|
||||
|
||||
--
|
||||
2.28.0
|
||||
|
||||
|
||||
From 2b65a1329cb220b43c19c4d0de5833fae9e2b22d Mon Sep 17 00:00:00 2001
|
||||
From: Alexandre Frade <admfrade@gmail.com>
|
||||
Date: Wed, 24 Oct 2018 16:58:52 -0300
|
||||
Subject: [PATCH 10/17] net/sched: allow configuring cake qdisc as default
|
||||
|
||||
Signed-off-by: Alexandre Frade <admfrade@gmail.com>
|
||||
---
|
||||
net/sched/Kconfig | 4 ++++
|
||||
1 file changed, 4 insertions(+)
|
||||
|
||||
diff --git a/net/sched/Kconfig b/net/sched/Kconfig
|
||||
index 84badf00647e..6a922bca9f39 100644
|
||||
--- a/net/sched/Kconfig
|
||||
+++ b/net/sched/Kconfig
|
||||
@@ -471,6 +471,9 @@ choice
|
||||
config DEFAULT_SFQ
|
||||
bool "Stochastic Fair Queue" if NET_SCH_SFQ
|
||||
|
||||
+ config DEFAULT_CAKE
|
||||
+ bool "Common Applications Kept Enhanced" if NET_SCH_CAKE
|
||||
+
|
||||
config DEFAULT_PFIFO_FAST
|
||||
bool "Priority FIFO Fast"
|
||||
endchoice
|
||||
@@ -481,6 +484,7 @@ config DEFAULT_NET_SCH
|
||||
default "fq" if DEFAULT_FQ
|
||||
default "fq_codel" if DEFAULT_FQ_CODEL
|
||||
default "sfq" if DEFAULT_SFQ
|
||||
+ default "cake" if DEFAULT_CAKE
|
||||
default "pfifo_fast"
|
||||
endif
|
||||
|
||||
--
|
||||
2.28.0
|
||||
|
||||
|
||||
From 90240bcd90a568878738e66c0d45bed3e38e347b Mon Sep 17 00:00:00 2001
|
||||
From: Tk-Glitch <ti3nou@gmail.com>
|
||||
Date: Fri, 19 Apr 2019 12:33:38 +0200
|
||||
Subject: [PATCH 12/17] Set vm.max_map_count to 262144 by default
|
||||
|
||||
The value is still pretty low, and AMD64-ABI and ELF extended numbering
|
||||
supports that, so we should be fine on modern x86 systems.
|
||||
|
||||
This fixes crashes in some applications using more than 65535 vmas (also
|
||||
affects some windows games running in wine, such as Star Citizen).
|
||||
---
|
||||
include/linux/mm.h | 3 +--
|
||||
1 file changed, 1 insertion(+), 2 deletions(-)
|
||||
|
||||
diff --git a/include/linux/mm.h b/include/linux/mm.h
|
||||
index bc05c3588aa3..b0cefe94920d 100644
|
||||
--- a/include/linux/mm.h
|
||||
+++ b/include/linux/mm.h
|
||||
@@ -190,8 +190,7 @@ static inline void __mm_zero_struct_page(struct page *page)
|
||||
* not a hard limit any more. Although some userspace tools can be surprised by
|
||||
* that.
|
||||
*/
|
||||
-#define MAPCOUNT_ELF_CORE_MARGIN (5)
|
||||
-#define DEFAULT_MAX_MAP_COUNT (USHRT_MAX - MAPCOUNT_ELF_CORE_MARGIN)
|
||||
+#define DEFAULT_MAX_MAP_COUNT (262144)
|
||||
|
||||
extern int sysctl_max_map_count;
|
||||
|
||||
--
|
||||
2.28.0
|
||||
|
||||
|
||||
From 3a34034dba5efe91bcec491efe8c66e8087f509b Mon Sep 17 00:00:00 2001
|
||||
From: Tk-Glitch <ti3nou@gmail.com>
|
||||
Date: Mon, 27 Jul 2020 00:19:18 +0200
|
||||
Subject: [PATCH 13/17] mm: bump DEFAULT_MAX_MAP_COUNT
|
||||
|
||||
Some games such as Detroit: Become Human tend to be very crash prone with
|
||||
lower values.
|
||||
---
|
||||
include/linux/mm.h | 2 +-
|
||||
1 file changed, 1 insertion(+), 1 deletion(-)
|
||||
|
||||
diff --git a/include/linux/mm.h b/include/linux/mm.h
|
||||
index b0cefe94920d..890165099b07 100644
|
||||
--- a/include/linux/mm.h
|
||||
+++ b/include/linux/mm.h
|
||||
@@ -190,7 +190,7 @@ static inline void __mm_zero_struct_page(struct page *page)
|
||||
* not a hard limit any more. Although some userspace tools can be surprised by
|
||||
* that.
|
||||
*/
|
||||
-#define DEFAULT_MAX_MAP_COUNT (262144)
|
||||
+#define DEFAULT_MAX_MAP_COUNT (16777216)
|
||||
|
||||
extern int sysctl_max_map_count;
|
||||
|
||||
--
|
||||
2.28.0
|
||||
|
||||
From 977812938da7c7226415778c340832141d9278b7 Mon Sep 17 00:00:00 2001
|
||||
From: Alexandre Frade <admfrade@gmail.com>
|
||||
Date: Mon, 25 Nov 2019 15:13:06 -0300
|
||||
Subject: [PATCH 14/17] elevator: set default scheduler to bfq for blk-mq
|
||||
|
||||
Signed-off-by: Alexandre Frade <admfrade@gmail.com>
|
||||
---
|
||||
block/elevator.c | 6 +++---
|
||||
1 file changed, 3 insertions(+), 3 deletions(-)
|
||||
|
||||
diff --git a/block/elevator.c b/block/elevator.c
|
||||
index 4eab3d70e880..79669aa39d79 100644
|
||||
--- a/block/elevator.c
|
||||
+++ b/block/elevator.c
|
||||
@@ -623,19 +623,19 @@ static inline bool elv_support_iosched(struct request_queue *q)
|
||||
}
|
||||
|
||||
/*
|
||||
- * For single queue devices, default to using mq-deadline. If we have multiple
|
||||
- * queues or mq-deadline is not available, default to "none".
|
||||
+ * For single queue devices, default to using bfq. If we have multiple
|
||||
+ * queues or bfq is not available, default to "none".
|
||||
*/
|
||||
static struct elevator_type *elevator_get_default(struct request_queue *q)
|
||||
{
|
||||
if (q->tag_set && q->tag_set->flags & BLK_MQ_F_NO_SCHED_BY_DEFAULT)
|
||||
return NULL;
|
||||
|
||||
if (q->nr_hw_queues != 1 &&
|
||||
!blk_mq_is_shared_tags(q->tag_set->flags))
|
||||
return NULL;
|
||||
|
||||
- return elevator_find_get(q, "mq-deadline");
|
||||
+ return elevator_find_get(q, "bfq");
|
||||
}
|
||||
|
||||
/*
|
||||
--
|
||||
2.28.0
|
||||
|
||||
From 3c229f434aca65c4ca61772bc03c3e0370817b92 Mon Sep 17 00:00:00 2001
|
||||
From: Alexandre Frade <kernel@xanmod.org>
|
||||
Date: Mon, 3 Aug 2020 17:05:04 +0000
|
||||
Subject: [PATCH 16/17] mm: set 2 megabytes for address_space-level file
|
||||
read-ahead pages size
|
||||
|
||||
Signed-off-by: Alexandre Frade <kernel@xanmod.org>
|
||||
---
|
||||
include/linux/pagemap.h | 2 +-
|
||||
1 file changed, 1 insertion(+), 1 deletion(-)
|
||||
|
||||
diff --git a/include/linux/pagemap.h b/include/linux/pagemap.h
|
||||
index cf2468da68e9..007dea784451 100644
|
||||
--- a/include/linux/pagemap.h
|
||||
+++ b/include/linux/pagemap.h
|
||||
@@ -655,7 +655,7 @@ int replace_page_cache_page(struct page *old, struct page *new, gfp_t gfp_mask);
|
||||
void delete_from_page_cache_batch(struct address_space *mapping,
|
||||
struct pagevec *pvec);
|
||||
|
||||
-#define VM_READAHEAD_PAGES (SZ_128K / PAGE_SIZE)
|
||||
+#define VM_READAHEAD_PAGES (SZ_2M / PAGE_SIZE)
|
||||
|
||||
void page_cache_sync_readahead(struct address_space *, struct file_ra_state *,
|
||||
struct file *, pgoff_t index, unsigned long req_count);
|
||||
--
|
||||
2.28.0
|
||||
|
||||
|
||||
From 716f41cf6631f3a85834dcb67b4ce99185b6387f Mon Sep 17 00:00:00 2001
|
||||
From: Steven Barrett <steven@liquorix.net>
|
||||
Date: Wed, 15 Jan 2020 20:43:56 -0600
|
||||
Subject: [PATCH 17/17] ZEN: intel-pstate: Implement "enable" parameter
|
||||
|
||||
If intel-pstate is compiled into the kernel, it will preempt the loading
|
||||
of acpi-cpufreq so you can take advantage of hardware p-states without
|
||||
any friction.
|
||||
|
||||
However, intel-pstate is not completely superior to cpufreq's ondemand
|
||||
for one reason. There's no concept of an up_threshold property.
|
||||
|
||||
In ondemand, up_threshold essentially reduces the maximum utilization to
|
||||
compare against, allowing you to hit max frequencies and turbo boost
|
||||
from a much lower core utilization.
|
||||
|
||||
With intel-pstate, you have the concept of minimum and maximum
|
||||
performance, but no tunable that lets you define, maximum frequency
|
||||
means 50% core utilization. For just this oversight, there's reasons
|
||||
you may want ondemand.
|
||||
|
||||
Lets support setting "enable" in kernel boot parameters. This lets
|
||||
kernel maintainers include "intel_pstate=disable" statically in the
|
||||
static boot parameters, but let users of the kernel override this
|
||||
selection.
|
||||
---
|
||||
Documentation/admin-guide/kernel-parameters.txt | 3 +++
|
||||
drivers/cpufreq/intel_pstate.c | 2 ++
|
||||
2 files changed, 5 insertions(+)
|
||||
|
||||
diff --git a/Documentation/admin-guide/kernel-parameters.txt b/Documentation/admin-guide/kernel-parameters.txt
|
||||
index fb95fad81c79..3e92fee81e33 100644
|
||||
--- a/Documentation/admin-guide/kernel-parameters.txt
|
||||
+++ b/Documentation/admin-guide/kernel-parameters.txt
|
||||
@@ -1857,6 +1857,9 @@
|
||||
disable
|
||||
Do not enable intel_pstate as the default
|
||||
scaling driver for the supported processors
|
||||
+ enable
|
||||
+ Enable intel_pstate in-case "disable" was passed
|
||||
+ previously in the kernel boot parameters
|
||||
passive
|
||||
Use intel_pstate as a scaling driver, but configure it
|
||||
to work with generic cpufreq governors (instead of
|
||||
diff --git a/drivers/cpufreq/intel_pstate.c b/drivers/cpufreq/intel_pstate.c
|
||||
index 36a469150ff9..aee891c9b78a 100644
|
||||
--- a/drivers/cpufreq/intel_pstate.c
|
||||
+++ b/drivers/cpufreq/intel_pstate.c
|
||||
@@ -2845,6 +2845,8 @@ static int __init intel_pstate_setup(char *str)
|
||||
if (!strcmp(str, "no_hwp"))
|
||||
no_hwp = 1;
|
||||
|
||||
+ if (!strcmp(str, "enable"))
|
||||
+ no_load = 0;
|
||||
if (!strcmp(str, "force"))
|
||||
force_load = 1;
|
||||
if (!strcmp(str, "hwp_only"))
|
||||
--
|
||||
2.28.0
|
||||
|
||||
From 379cbab18b5c75c622b93e2c5abdfac141fe9654 Mon Sep 17 00:00:00 2001
|
||||
From: Kenny Levinsen <kl@kl.wtf>
|
||||
Date: Sun, 27 Dec 2020 14:43:13 +0000
|
||||
Subject: [PATCH] ZEN: Input: evdev - use call_rcu when detaching client
|
||||
|
||||
Significant time was spent on synchronize_rcu in evdev_detach_client
|
||||
when applications closed evdev devices. Switching VT away from a
|
||||
graphical environment commonly leads to mass input device closures,
|
||||
which could lead to noticable delays on systems with many input devices.
|
||||
|
||||
Replace synchronize_rcu with call_rcu, deferring reclaim of the evdev
|
||||
client struct till after the RCU grace period instead of blocking the
|
||||
calling application.
|
||||
|
||||
While this does not solve all slow evdev fd closures, it takes care of a
|
||||
good portion of them, including this simple test:
|
||||
|
||||
#include <fcntl.h>
|
||||
#include <unistd.h>
|
||||
|
||||
int main(int argc, char *argv[])
|
||||
{
|
||||
int idx, fd;
|
||||
const char *path = "/dev/input/event0";
|
||||
for (idx = 0; idx < 1000; idx++) {
|
||||
if ((fd = open(path, O_RDWR)) == -1) {
|
||||
return -1;
|
||||
}
|
||||
close(fd);
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
Time to completion of above test when run locally:
|
||||
|
||||
Before: 0m27.111s
|
||||
After: 0m0.018s
|
||||
|
||||
Signed-off-by: Kenny Levinsen <kl@kl.wtf>
|
||||
---
|
||||
drivers/input/evdev.c | 19 +++++++++++--------
|
||||
1 file changed, 11 insertions(+), 8 deletions(-)
|
||||
|
||||
diff --git a/drivers/input/evdev.c b/drivers/input/evdev.c
|
||||
index 95f90699d2b17b..2b10fe29d2c8d9 100644
|
||||
--- a/drivers/input/evdev.c
|
||||
+++ b/drivers/input/evdev.c
|
||||
@@ -46,6 +46,7 @@ struct evdev_client {
|
||||
struct fasync_struct *fasync;
|
||||
struct evdev *evdev;
|
||||
struct list_head node;
|
||||
+ struct rcu_head rcu;
|
||||
enum input_clock_type clk_type;
|
||||
bool revoked;
|
||||
unsigned long *evmasks[EV_CNT];
|
||||
@@ -377,13 +378,22 @@ static void evdev_attach_client(struct evdev *evdev,
|
||||
spin_unlock(&evdev->client_lock);
|
||||
}
|
||||
|
||||
+static void evdev_reclaim_client(struct rcu_head *rp)
|
||||
+{
|
||||
+ struct evdev_client *client = container_of(rp, struct evdev_client, rcu);
|
||||
+ unsigned int i;
|
||||
+ for (i = 0; i < EV_CNT; ++i)
|
||||
+ bitmap_free(client->evmasks[i]);
|
||||
+ kvfree(client);
|
||||
+}
|
||||
+
|
||||
static void evdev_detach_client(struct evdev *evdev,
|
||||
struct evdev_client *client)
|
||||
{
|
||||
spin_lock(&evdev->client_lock);
|
||||
list_del_rcu(&client->node);
|
||||
spin_unlock(&evdev->client_lock);
|
||||
- synchronize_rcu();
|
||||
+ call_rcu(&client->rcu, evdev_reclaim_client);
|
||||
}
|
||||
|
||||
static int evdev_open_device(struct evdev *evdev)
|
||||
@@ -436,7 +446,6 @@ static int evdev_release(struct inode *inode, struct file *file)
|
||||
{
|
||||
struct evdev_client *client = file->private_data;
|
||||
struct evdev *evdev = client->evdev;
|
||||
- unsigned int i;
|
||||
|
||||
mutex_lock(&evdev->mutex);
|
||||
|
||||
@@ -448,11 +457,6 @@ static int evdev_release(struct inode *inode, struct file *file)
|
||||
|
||||
evdev_detach_client(evdev, client);
|
||||
|
||||
- for (i = 0; i < EV_CNT; ++i)
|
||||
- bitmap_free(client->evmasks[i]);
|
||||
-
|
||||
- kvfree(client);
|
||||
-
|
||||
evdev_close_device(evdev);
|
||||
|
||||
return 0;
|
||||
@@ -495,7 +499,6 @@ static int evdev_open(struct inode *inode, struct file *file)
|
||||
|
||||
err_free_client:
|
||||
evdev_detach_client(evdev, client);
|
||||
- kvfree(client);
|
||||
return error;
|
||||
}
|
||||
|
||||
|
||||
From 2aafb56f20e4b63d8c4af172fe9d017c64bc4129 Mon Sep 17 00:00:00 2001
|
||||
From: Sultan Alsawaf <sultan@kerneltoast.com>
|
||||
Date: Wed, 20 Oct 2021 20:50:11 -0700
|
||||
Subject: [PATCH] ZEN: mm: Lower the non-hugetlbpage pageblock size to reduce
|
||||
scheduling delays
|
||||
|
||||
The page allocator processes free pages in groups of pageblocks, where
|
||||
the size of a pageblock is typically quite large (1024 pages without
|
||||
hugetlbpage support). Pageblocks are processed atomically with the zone
|
||||
lock held, which can cause severe scheduling delays on both the CPU
|
||||
going through the pageblock and any other CPUs waiting to acquire the
|
||||
zone lock. A frequent offender is move_freepages_block(), which is used
|
||||
by rmqueue() for page allocation.
|
||||
|
||||
As it turns out, there's no requirement for pageblocks to be so large,
|
||||
so the pageblock order can simply be reduced to ease the scheduling
|
||||
delays and zone lock contention. PAGE_ALLOC_COSTLY_ORDER is used as a
|
||||
reasonable setting to ensure non-costly page allocation requests can
|
||||
still be serviced without always needing to free up more than one
|
||||
pageblock's worth of pages at a time.
|
||||
|
||||
This has a noticeable effect on overall system latency when memory
|
||||
pressure is elevated. The various mm functions which operate on
|
||||
pageblocks no longer appear in the preemptoff tracer, where previously
|
||||
they would spend up to 100 ms on a mobile arm64 CPU processing a
|
||||
pageblock with preemption disabled and the zone lock held.
|
||||
|
||||
Signed-off-by: Sultan Alsawaf <sultan@kerneltoast.com>
|
||||
---
|
||||
include/linux/pageblock-flags.h | 2 +-
|
||||
1 file changed, 1 insertion(+), 1 deletion(-)
|
||||
|
||||
diff --git a/include/linux/pageblock-flags.h b/include/linux/pageblock-flags.h
|
||||
index 5f1ae07d724b88..97cda629c9e909 100644
|
||||
--- a/include/linux/pageblock-flags.h
|
||||
+++ b/include/linux/pageblock-flags.h
|
||||
@@ -48,7 +48,7 @@ extern unsigned int pageblock_order;
|
||||
#else /* CONFIG_HUGETLB_PAGE */
|
||||
|
||||
/* If huge pages are not used, group by MAX_ORDER_NR_PAGES */
|
||||
-#define pageblock_order (MAX_ORDER-1)
|
||||
+#define pageblock_order PAGE_ALLOC_COSTLY_ORDER
|
||||
|
||||
#endif /* CONFIG_HUGETLB_PAGE */
|
||||
|
||||
|
||||
From f22bc56be85e69c71c8e36041193856bb8b01525 Mon Sep 17 00:00:00 2001
|
||||
From: Sultan Alsawaf <sultan@kerneltoast.com>
|
||||
Date: Wed, 20 Oct 2021 20:50:32 -0700
|
||||
Subject: [PATCH] ZEN: mm: Don't hog the CPU and zone lock in rmqueue_bulk()
|
||||
|
||||
There is noticeable scheduling latency and heavy zone lock contention
|
||||
stemming from rmqueue_bulk's single hold of the zone lock while doing
|
||||
its work, as seen with the preemptoff tracer. There's no actual need for
|
||||
rmqueue_bulk() to hold the zone lock the entire time; it only does so
|
||||
for supposed efficiency. As such, we can relax the zone lock and even
|
||||
reschedule when IRQs are enabled in order to keep the scheduling delays
|
||||
and zone lock contention at bay. Forward progress is still guaranteed,
|
||||
as the zone lock can only be relaxed after page removal.
|
||||
|
||||
With this change, rmqueue_bulk() no longer appears as a serious offender
|
||||
in the preemptoff tracer, and system latency is noticeably improved.
|
||||
|
||||
Signed-off-by: Sultan Alsawaf <sultan@kerneltoast.com>
|
||||
---
|
||||
mm/page_alloc.c | 23 ++++++++++++++++++-----
|
||||
1 file changed, 18 insertions(+), 5 deletions(-)
|
||||
|
||||
diff --git a/mm/page_alloc.c b/mm/page_alloc.c
|
||||
index a0b0397e29ee4c..87a983a356530c 100644
|
||||
--- a/mm/page_alloc.c
|
||||
+++ b/mm/page_alloc.c
|
||||
@@ -3118,15 +3119,16 @@ __rmqueue(struct zone *zone, unsigned int order, int migratetype,
|
||||
}
|
||||
|
||||
/*
|
||||
- * Obtain a specified number of elements from the buddy allocator, all under
|
||||
- * a single hold of the lock, for efficiency. Add them to the supplied list.
|
||||
- * Returns the number of new pages which were placed at *list.
|
||||
+ * Obtain a specified number of elements from the buddy allocator, and relax the
|
||||
+ * zone lock when needed. Add them to the supplied list. Returns the number of
|
||||
+ * new pages which were placed at *list.
|
||||
*/
|
||||
static int rmqueue_bulk(struct zone *zone, unsigned int order,
|
||||
unsigned long count, struct list_head *list,
|
||||
int migratetype, unsigned int alloc_flags)
|
||||
{
|
||||
unsigned long flags;
|
||||
- int i, allocated = 0;
|
||||
+ const bool can_resched = !preempt_count() && !irqs_disabled();
|
||||
+ int i, allocated = 0, last_mod = 0;
|
||||
|
||||
/* Caller must hold IRQ-safe pcp->lock so IRQs are disabled. */
|
||||
spin_lock(&zone->lock);
|
||||
@@ -3137,6 +3138,18 @@ static int rmqueue_bulk(struct zone *zone, unsigned int order,
|
||||
if (unlikely(page == NULL))
|
||||
break;
|
||||
|
||||
+ /* Reschedule and ease the contention on the lock if needed */
|
||||
+ if (i + 1 < count && ((can_resched && need_resched()) ||
|
||||
+ spin_needbreak(&zone->lock))) {
|
||||
+ __mod_zone_page_state(zone, NR_FREE_PAGES,
|
||||
+ -((i + 1 - last_mod) << order));
|
||||
+ last_mod = i + 1;
|
||||
+ spin_unlock(&zone->lock);
|
||||
+ if (can_resched)
|
||||
+ cond_resched();
|
||||
+ spin_lock(&zone->lock);
|
||||
+ }
|
||||
+
|
||||
if (unlikely(check_pcp_refill(page, order)))
|
||||
continue;
|
||||
|
||||
@@ -3163,7 +3176,7 @@ static int rmqueue_bulk(struct zone *zone, unsigned int order,
|
||||
* on i. Do not confuse with 'allocated' which is the number of
|
||||
* pages added to the pcp list.
|
||||
*/
|
||||
- __mod_zone_page_state(zone, NR_FREE_PAGES, -(i << order));
|
||||
+ __mod_zone_page_state(zone, NR_FREE_PAGES, -((i - last_mod) << order));
|
||||
spin_unlock(&zone->lock);
|
||||
return allocated;
|
||||
}
|
22
linux-tkg-patches/6.2/0003-glitched-cfs-additions.patch
Normal file
22
linux-tkg-patches/6.2/0003-glitched-cfs-additions.patch
Normal file
@@ -0,0 +1,22 @@
|
||||
diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c
|
||||
index 6b3b59cc51d6..2a0072192c3d 100644
|
||||
--- a/kernel/sched/fair.c
|
||||
+++ b/kernel/sched/fair.c
|
||||
@@ -81,10 +95,17 @@ unsigned int sysctl_sched_child_runs_first __read_mostly;
|
||||
*
|
||||
* (default: 1 msec * (1 + ilog(ncpus)), units: nanoseconds)
|
||||
*/
|
||||
+#ifdef CONFIG_ZENIFY
|
||||
+unsigned int sysctl_sched_wakeup_granularity = 500000UL;
|
||||
+static unsigned int normalized_sysctl_sched_wakeup_granularity = 500000UL;
|
||||
+
|
||||
+const_debug unsigned int sysctl_sched_migration_cost = 50000UL;
|
||||
+#else
|
||||
unsigned int sysctl_sched_wakeup_granularity = 1000000UL;
|
||||
static unsigned int normalized_sysctl_sched_wakeup_granularity = 1000000UL;
|
||||
|
||||
const_debug unsigned int sysctl_sched_migration_cost = 500000UL;
|
||||
+#endif
|
||||
|
||||
int sched_thermal_decay_shift;
|
||||
static int __init setup_sched_thermal_decay_shift(char *str)
|
90
linux-tkg-patches/6.2/0003-glitched-cfs.patch
Normal file
90
linux-tkg-patches/6.2/0003-glitched-cfs.patch
Normal file
@@ -0,0 +1,90 @@
|
||||
diff --git a/kernel/Kconfig.hz b/kernel/Kconfig.hz
|
||||
index 2a202a846757..1d9c7ed79b11 100644
|
||||
--- a/kernel/Kconfig.hz
|
||||
+++ b/kernel/Kconfig.hz
|
||||
@@ -4,7 +4,7 @@
|
||||
|
||||
choice
|
||||
prompt "Timer frequency"
|
||||
- default HZ_250
|
||||
+ default HZ_500
|
||||
help
|
||||
Allows the configuration of the timer frequency. It is customary
|
||||
to have the timer interrupt run at 1000 Hz but 100 Hz may be more
|
||||
@@ -39,6 +39,13 @@ choice
|
||||
on SMP and NUMA systems and exactly dividing by both PAL and
|
||||
NTSC frame rates for video and multimedia work.
|
||||
|
||||
+ config HZ_500
|
||||
+ bool "500 HZ"
|
||||
+ help
|
||||
+ 500 Hz is a balanced timer frequency. Provides fast interactivity
|
||||
+ on desktops with great smoothness without increasing CPU power
|
||||
+ consumption and sacrificing the battery life on laptops.
|
||||
+
|
||||
config HZ_1000
|
||||
bool "1000 HZ"
|
||||
help
|
||||
@@ -52,6 +59,7 @@ config HZ
|
||||
default 100 if HZ_100
|
||||
default 250 if HZ_250
|
||||
default 300 if HZ_300
|
||||
+ default 500 if HZ_500
|
||||
default 1000 if HZ_1000
|
||||
|
||||
config SCHED_HRTICK
|
||||
|
||||
diff --git a/kernel/Kconfig.hz b/kernel/Kconfig.hz
|
||||
index 2a202a846757..1d9c7ed79b11 100644
|
||||
--- a/kernel/Kconfig.hz
|
||||
+++ b/kernel/Kconfig.hz
|
||||
@@ -4,7 +4,7 @@
|
||||
|
||||
choice
|
||||
prompt "Timer frequency"
|
||||
- default HZ_500
|
||||
+ default HZ_750
|
||||
help
|
||||
Allows the configuration of the timer frequency. It is customary
|
||||
to have the timer interrupt run at 1000 Hz but 100 Hz may be more
|
||||
@@ -46,6 +46,13 @@ choice
|
||||
on desktops with great smoothness without increasing CPU power
|
||||
consumption and sacrificing the battery life on laptops.
|
||||
|
||||
+ config HZ_750
|
||||
+ bool "750 HZ"
|
||||
+ help
|
||||
+ 750 Hz is a good timer frequency for desktops. Provides fast
|
||||
+ interactivity with great smoothness without sacrificing too
|
||||
+ much throughput.
|
||||
+
|
||||
config HZ_1000
|
||||
bool "1000 HZ"
|
||||
help
|
||||
@@ -60,6 +67,7 @@ config HZ
|
||||
default 250 if HZ_250
|
||||
default 300 if HZ_300
|
||||
default 500 if HZ_500
|
||||
+ default 750 if HZ_750
|
||||
default 1000 if HZ_1000
|
||||
|
||||
config SCHED_HRTICK
|
||||
|
||||
diff --git a/drivers/cpufreq/cpufreq_ondemand.c b/drivers/cpufreq/cpufreq_ondemand.c
|
||||
index 6b423eebfd5d..61e3271675d6 100644
|
||||
--- a/drivers/cpufreq/cpufreq_ondemand.c
|
||||
+++ b/drivers/cpufreq/cpufreq_ondemand.c
|
||||
@@ -21,10 +21,10 @@
|
||||
#include "cpufreq_ondemand.h"
|
||||
|
||||
/* On-demand governor macros */
|
||||
-#define DEF_FREQUENCY_UP_THRESHOLD (80)
|
||||
-#define DEF_SAMPLING_DOWN_FACTOR (1)
|
||||
+#define DEF_FREQUENCY_UP_THRESHOLD (55)
|
||||
+#define DEF_SAMPLING_DOWN_FACTOR (5)
|
||||
#define MAX_SAMPLING_DOWN_FACTOR (100000)
|
||||
-#define MICRO_FREQUENCY_UP_THRESHOLD (95)
|
||||
+#define MICRO_FREQUENCY_UP_THRESHOLD (63)
|
||||
#define MICRO_FREQUENCY_MIN_SAMPLE_RATE (10000)
|
||||
#define MIN_FREQUENCY_UP_THRESHOLD (1)
|
||||
#define MAX_FREQUENCY_UP_THRESHOLD (100)
|
193
linux-tkg-patches/6.2/0006-add-acs-overrides_iommu.patch
Normal file
193
linux-tkg-patches/6.2/0006-add-acs-overrides_iommu.patch
Normal file
@@ -0,0 +1,193 @@
|
||||
From cdeab384f48dd9c88e2dff2e9ad8d57dca1a1b1c Mon Sep 17 00:00:00 2001
|
||||
From: Mark Weiman <mark.weiman@markzz.com>
|
||||
Date: Sun, 12 Aug 2018 11:36:21 -0400
|
||||
Subject: [PATCH] pci: Enable overrides for missing ACS capabilities
|
||||
|
||||
This an updated version of Alex Williamson's patch from:
|
||||
https://lkml.org/lkml/2013/5/30/513
|
||||
|
||||
Original commit message follows:
|
||||
|
||||
PCIe ACS (Access Control Services) is the PCIe 2.0+ feature that
|
||||
allows us to control whether transactions are allowed to be redirected
|
||||
in various subnodes of a PCIe topology. For instance, if two
|
||||
endpoints are below a root port or downsteam switch port, the
|
||||
downstream port may optionally redirect transactions between the
|
||||
devices, bypassing upstream devices. The same can happen internally
|
||||
on multifunction devices. The transaction may never be visible to the
|
||||
upstream devices.
|
||||
|
||||
One upstream device that we particularly care about is the IOMMU. If
|
||||
a redirection occurs in the topology below the IOMMU, then the IOMMU
|
||||
cannot provide isolation between devices. This is why the PCIe spec
|
||||
encourages topologies to include ACS support. Without it, we have to
|
||||
assume peer-to-peer DMA within a hierarchy can bypass IOMMU isolation.
|
||||
|
||||
Unfortunately, far too many topologies do not support ACS to make this
|
||||
a steadfast requirement. Even the latest chipsets from Intel are only
|
||||
sporadically supporting ACS. We have trouble getting interconnect
|
||||
vendors to include the PCIe spec required PCIe capability, let alone
|
||||
suggested features.
|
||||
|
||||
Therefore, we need to add some flexibility. The pcie_acs_override=
|
||||
boot option lets users opt-in specific devices or sets of devices to
|
||||
assume ACS support. The "downstream" option assumes full ACS support
|
||||
on root ports and downstream switch ports. The "multifunction"
|
||||
option assumes the subset of ACS features available on multifunction
|
||||
endpoints and upstream switch ports are supported. The "id:nnnn:nnnn"
|
||||
option enables ACS support on devices matching the provided vendor
|
||||
and device IDs, allowing more strategic ACS overrides. These options
|
||||
may be combined in any order. A maximum of 16 id specific overrides
|
||||
are available. It's suggested to use the most limited set of options
|
||||
necessary to avoid completely disabling ACS across the topology.
|
||||
Note to hardware vendors, we have facilities to permanently quirk
|
||||
specific devices which enforce isolation but not provide an ACS
|
||||
capability. Please contact me to have your devices added and save
|
||||
your customers the hassle of this boot option.
|
||||
|
||||
Signed-off-by: Mark Weiman <mark.weiman@markzz.com>
|
||||
---
|
||||
.../admin-guide/kernel-parameters.txt | 9 ++
|
||||
drivers/pci/quirks.c | 101 ++++++++++++++++++
|
||||
2 files changed, 110 insertions(+)
|
||||
|
||||
diff --git a/Documentation/admin-guide/kernel-parameters.txt b/Documentation/admin-guide/kernel-parameters.txt
|
||||
index aefd358a5ca3..173b3596fd9e 100644
|
||||
--- a/Documentation/admin-guide/kernel-parameters.txt
|
||||
+++ b/Documentation/admin-guide/kernel-parameters.txt
|
||||
@@ -3190,6 +3190,15 @@
|
||||
nomsi [MSI] If the PCI_MSI kernel config parameter is
|
||||
enabled, this kernel boot option can be used to
|
||||
disable the use of MSI interrupts system-wide.
|
||||
+ pcie_acs_override =
|
||||
+ [PCIE] Override missing PCIe ACS support for:
|
||||
+ downstream
|
||||
+ All downstream ports - full ACS capabilities
|
||||
+ multifunction
|
||||
+ All multifunction devices - multifunction ACS subset
|
||||
+ id:nnnn:nnnn
|
||||
+ Specific device - full ACS capabilities
|
||||
+ Specified as vid:did (vendor/device ID) in hex
|
||||
noioapicquirk [APIC] Disable all boot interrupt quirks.
|
||||
Safety option to keep boot IRQs enabled. This
|
||||
should never be necessary.
|
||||
diff --git a/drivers/pci/quirks.c b/drivers/pci/quirks.c
|
||||
index 4700d24e5d55..8f7a3d7fd9c1 100644
|
||||
--- a/drivers/pci/quirks.c
|
||||
+++ b/drivers/pci/quirks.c
|
||||
@@ -3372,6 +3372,106 @@ static void quirk_no_bus_reset(struct pci_dev *dev)
|
||||
dev->dev_flags |= PCI_DEV_FLAGS_NO_BUS_RESET;
|
||||
}
|
||||
|
||||
+static bool acs_on_downstream;
|
||||
+static bool acs_on_multifunction;
|
||||
+
|
||||
+#define NUM_ACS_IDS 16
|
||||
+struct acs_on_id {
|
||||
+ unsigned short vendor;
|
||||
+ unsigned short device;
|
||||
+};
|
||||
+static struct acs_on_id acs_on_ids[NUM_ACS_IDS];
|
||||
+static u8 max_acs_id;
|
||||
+
|
||||
+static __init int pcie_acs_override_setup(char *p)
|
||||
+{
|
||||
+ if (!p)
|
||||
+ return -EINVAL;
|
||||
+
|
||||
+ while (*p) {
|
||||
+ if (!strncmp(p, "downstream", 10))
|
||||
+ acs_on_downstream = true;
|
||||
+ if (!strncmp(p, "multifunction", 13))
|
||||
+ acs_on_multifunction = true;
|
||||
+ if (!strncmp(p, "id:", 3)) {
|
||||
+ char opt[5];
|
||||
+ int ret;
|
||||
+ long val;
|
||||
+
|
||||
+ if (max_acs_id >= NUM_ACS_IDS - 1) {
|
||||
+ pr_warn("Out of PCIe ACS override slots (%d)\n",
|
||||
+ NUM_ACS_IDS);
|
||||
+ goto next;
|
||||
+ }
|
||||
+
|
||||
+ p += 3;
|
||||
+ snprintf(opt, 5, "%s", p);
|
||||
+ ret = kstrtol(opt, 16, &val);
|
||||
+ if (ret) {
|
||||
+ pr_warn("PCIe ACS ID parse error %d\n", ret);
|
||||
+ goto next;
|
||||
+ }
|
||||
+ acs_on_ids[max_acs_id].vendor = val;
|
||||
+
|
||||
+ p += strcspn(p, ":");
|
||||
+ if (*p != ':') {
|
||||
+ pr_warn("PCIe ACS invalid ID\n");
|
||||
+ goto next;
|
||||
+ }
|
||||
+
|
||||
+ p++;
|
||||
+ snprintf(opt, 5, "%s", p);
|
||||
+ ret = kstrtol(opt, 16, &val);
|
||||
+ if (ret) {
|
||||
+ pr_warn("PCIe ACS ID parse error %d\n", ret);
|
||||
+ goto next;
|
||||
+ }
|
||||
+ acs_on_ids[max_acs_id].device = val;
|
||||
+ max_acs_id++;
|
||||
+ }
|
||||
+next:
|
||||
+ p += strcspn(p, ",");
|
||||
+ if (*p == ',')
|
||||
+ p++;
|
||||
+ }
|
||||
+
|
||||
+ if (acs_on_downstream || acs_on_multifunction || max_acs_id)
|
||||
+ pr_warn("Warning: PCIe ACS overrides enabled; This may allow non-IOMMU protected peer-to-peer DMA\n");
|
||||
+
|
||||
+ return 0;
|
||||
+}
|
||||
+early_param("pcie_acs_override", pcie_acs_override_setup);
|
||||
+
|
||||
+static int pcie_acs_overrides(struct pci_dev *dev, u16 acs_flags)
|
||||
+{
|
||||
+ int i;
|
||||
+
|
||||
+ /* Never override ACS for legacy devices or devices with ACS caps */
|
||||
+ if (!pci_is_pcie(dev) ||
|
||||
+ pci_find_ext_capability(dev, PCI_EXT_CAP_ID_ACS))
|
||||
+ return -ENOTTY;
|
||||
+
|
||||
+ for (i = 0; i < max_acs_id; i++)
|
||||
+ if (acs_on_ids[i].vendor == dev->vendor &&
|
||||
+ acs_on_ids[i].device == dev->device)
|
||||
+ return 1;
|
||||
+
|
||||
+ switch (pci_pcie_type(dev)) {
|
||||
+ case PCI_EXP_TYPE_DOWNSTREAM:
|
||||
+ case PCI_EXP_TYPE_ROOT_PORT:
|
||||
+ if (acs_on_downstream)
|
||||
+ return 1;
|
||||
+ break;
|
||||
+ case PCI_EXP_TYPE_ENDPOINT:
|
||||
+ case PCI_EXP_TYPE_UPSTREAM:
|
||||
+ case PCI_EXP_TYPE_LEG_END:
|
||||
+ case PCI_EXP_TYPE_RC_END:
|
||||
+ if (acs_on_multifunction && dev->multifunction)
|
||||
+ return 1;
|
||||
+ }
|
||||
+
|
||||
+ return -ENOTTY;
|
||||
+}
|
||||
/*
|
||||
* Some Atheros AR9xxx and QCA988x chips do not behave after a bus reset.
|
||||
* The device will throw a Link Down error on AER-capable systems and
|
||||
@@ -4513,6 +4613,7 @@ static const struct pci_dev_acs_enabled {
|
||||
{ PCI_VENDOR_ID_ZHAOXIN, 0x9083, pci_quirk_mf_endpoint_acs },
|
||||
/* Zhaoxin Root/Downstream Ports */
|
||||
{ PCI_VENDOR_ID_ZHAOXIN, PCI_ANY_ID, pci_quirk_zhaoxin_pcie_ports_acs },
|
||||
+ { PCI_ANY_ID, PCI_ANY_ID, pcie_acs_overrides },
|
||||
{ 0 }
|
||||
};
|
||||
|
||||
|
166
linux-tkg-patches/6.2/0007-v6.2-fsync1_via_futex_waitv.patch
Normal file
166
linux-tkg-patches/6.2/0007-v6.2-fsync1_via_futex_waitv.patch
Normal file
@@ -0,0 +1,166 @@
|
||||
From b70e738f08403950aa3053c36b98c6b0eeb0eb90 Mon Sep 17 00:00:00 2001
|
||||
From: =?UTF-8?q?Andr=C3=A9=20Almeida?= <andrealmeid@collabora.com>
|
||||
Date: Mon, 25 Oct 2021 09:49:42 -0300
|
||||
Subject: [PATCH] futex: Add entry point for FUTEX_WAIT_MULTIPLE (opcode 31)
|
||||
MIME-Version: 1.0
|
||||
Content-Type: text/plain; charset=UTF-8
|
||||
Content-Transfer-Encoding: 8bit
|
||||
|
||||
Add an option to wait on multiple futexes using the old interface, that
|
||||
uses opcode 31 through futex() syscall. Do that by just translation the
|
||||
old interface to use the new code. This allows old and stable versions
|
||||
of Proton to still use fsync in new kernel releases.
|
||||
|
||||
Signed-off-by: André Almeida <andrealmeid@collabora.com>
|
||||
---
|
||||
include/uapi/linux/futex.h | 13 +++++++
|
||||
kernel/futex/syscalls.c | 75 +++++++++++++++++++++++++++++++++++++-
|
||||
2 files changed, 87 insertions(+), 1 deletion(-)
|
||||
|
||||
diff --git a/include/uapi/linux/futex.h b/include/uapi/linux/futex.h
|
||||
index 71a5df8d2689..d375ab21cbf8 100644
|
||||
--- a/include/uapi/linux/futex.h
|
||||
+++ b/include/uapi/linux/futex.h
|
||||
@@ -22,6 +22,7 @@
|
||||
#define FUTEX_WAIT_REQUEUE_PI 11
|
||||
#define FUTEX_CMP_REQUEUE_PI 12
|
||||
#define FUTEX_LOCK_PI2 13
|
||||
+#define FUTEX_WAIT_MULTIPLE 31
|
||||
|
||||
#define FUTEX_PRIVATE_FLAG 128
|
||||
#define FUTEX_CLOCK_REALTIME 256
|
||||
@@ -68,6 +69,18 @@ struct futex_waitv {
|
||||
__u32 __reserved;
|
||||
};
|
||||
|
||||
+/**
|
||||
+ * struct futex_wait_block - Block of futexes to be waited for
|
||||
+ * @uaddr: User address of the futex
|
||||
+ * @val: Futex value expected by userspace
|
||||
+ * @bitset: Bitset for the optional bitmasked wakeup
|
||||
+ */
|
||||
+struct futex_wait_block {
|
||||
+ __u32 __user *uaddr;
|
||||
+ __u32 val;
|
||||
+ __u32 bitset;
|
||||
+};
|
||||
+
|
||||
/*
|
||||
* Support for robust futexes: the kernel cleans up held futexes at
|
||||
* thread exit time.
|
||||
diff --git a/kernel/futex/syscalls.c b/kernel/futex/syscalls.c
|
||||
index 6f91a07a6a83..2f4d4c04ede2 100644
|
||||
--- a/kernel/futex/syscalls.c
|
||||
+++ b/kernel/futex/syscalls.c
|
||||
@@ -158,6 +158,7 @@ static __always_inline bool futex_cmd_has_timeout(u32 cmd)
|
||||
case FUTEX_LOCK_PI2:
|
||||
case FUTEX_WAIT_BITSET:
|
||||
case FUTEX_WAIT_REQUEUE_PI:
|
||||
+ case FUTEX_WAIT_MULTIPLE:
|
||||
return true;
|
||||
}
|
||||
return false;
|
||||
@@ -170,13 +171,79 @@ futex_init_timeout(u32 cmd, u32 op, struct timespec64 *ts, ktime_t *t)
|
||||
return -EINVAL;
|
||||
|
||||
*t = timespec64_to_ktime(*ts);
|
||||
- if (cmd == FUTEX_WAIT)
|
||||
+ if (cmd == FUTEX_WAIT || cmd == FUTEX_WAIT_MULTIPLE)
|
||||
*t = ktime_add_safe(ktime_get(), *t);
|
||||
else if (cmd != FUTEX_LOCK_PI && !(op & FUTEX_CLOCK_REALTIME))
|
||||
*t = timens_ktime_to_host(CLOCK_MONOTONIC, *t);
|
||||
return 0;
|
||||
}
|
||||
|
||||
+/**
|
||||
+ * futex_read_wait_block - Read an array of futex_wait_block from userspace
|
||||
+ * @uaddr: Userspace address of the block
|
||||
+ * @count: Number of blocks to be read
|
||||
+ *
|
||||
+ * This function creates and allocate an array of futex_q (we zero it to
|
||||
+ * initialize the fields) and then, for each futex_wait_block element from
|
||||
+ * userspace, fill a futex_q element with proper values.
|
||||
+ */
|
||||
+inline struct futex_vector *futex_read_wait_block(u32 __user *uaddr, u32 count)
|
||||
+{
|
||||
+ unsigned int i;
|
||||
+ struct futex_vector *futexv;
|
||||
+ struct futex_wait_block fwb;
|
||||
+ struct futex_wait_block __user *entry =
|
||||
+ (struct futex_wait_block __user *)uaddr;
|
||||
+
|
||||
+ if (!count || count > FUTEX_WAITV_MAX)
|
||||
+ return ERR_PTR(-EINVAL);
|
||||
+
|
||||
+ futexv = kcalloc(count, sizeof(*futexv), GFP_KERNEL);
|
||||
+ if (!futexv)
|
||||
+ return ERR_PTR(-ENOMEM);
|
||||
+
|
||||
+ for (i = 0; i < count; i++) {
|
||||
+ if (copy_from_user(&fwb, &entry[i], sizeof(fwb))) {
|
||||
+ kfree(futexv);
|
||||
+ return ERR_PTR(-EFAULT);
|
||||
+ }
|
||||
+
|
||||
+ futexv[i].w.flags = FUTEX_32;
|
||||
+ futexv[i].w.val = fwb.val;
|
||||
+ futexv[i].w.uaddr = (uintptr_t) (fwb.uaddr);
|
||||
+ futexv[i].q = futex_q_init;
|
||||
+ }
|
||||
+
|
||||
+ return futexv;
|
||||
+}
|
||||
+
|
||||
+int futex_wait_multiple(struct futex_vector *vs, unsigned int count,
|
||||
+ struct hrtimer_sleeper *to);
|
||||
+
|
||||
+int futex_opcode_31(ktime_t *abs_time, u32 __user *uaddr, int count)
|
||||
+{
|
||||
+ int ret;
|
||||
+ struct futex_vector *vs;
|
||||
+ struct hrtimer_sleeper *to = NULL, timeout;
|
||||
+
|
||||
+ to = futex_setup_timer(abs_time, &timeout, 0, 0);
|
||||
+
|
||||
+ vs = futex_read_wait_block(uaddr, count);
|
||||
+
|
||||
+ if (IS_ERR(vs))
|
||||
+ return PTR_ERR(vs);
|
||||
+
|
||||
+ ret = futex_wait_multiple(vs, count, abs_time ? to : NULL);
|
||||
+ kfree(vs);
|
||||
+
|
||||
+ if (to) {
|
||||
+ hrtimer_cancel(&to->timer);
|
||||
+ destroy_hrtimer_on_stack(&to->timer);
|
||||
+ }
|
||||
+
|
||||
+ return ret;
|
||||
+}
|
||||
+
|
||||
SYSCALL_DEFINE6(futex, u32 __user *, uaddr, int, op, u32, val,
|
||||
const struct __kernel_timespec __user *, utime,
|
||||
u32 __user *, uaddr2, u32, val3)
|
||||
@@ -196,6 +263,9 @@ SYSCALL_DEFINE6(futex, u32 __user *, uaddr, int, op, u32, val,
|
||||
tp = &t;
|
||||
}
|
||||
|
||||
+ if (cmd == FUTEX_WAIT_MULTIPLE)
|
||||
+ return futex_opcode_31(tp, uaddr, val);
|
||||
+
|
||||
return do_futex(uaddr, op, val, tp, uaddr2, (unsigned long)utime, val3);
|
||||
}
|
||||
|
||||
@@ -392,6 +462,9 @@ SYSCALL_DEFINE6(futex_time32, u32 __user *, uaddr, int, op, u32, val,
|
||||
tp = &t;
|
||||
}
|
||||
|
||||
+ if (cmd == FUTEX_WAIT_MULTIPLE)
|
||||
+ return futex_opcode_31(tp, uaddr, val);
|
||||
+
|
||||
return do_futex(uaddr, op, val, tp, uaddr2, (unsigned long)utime, val3);
|
||||
}
|
||||
#endif /* CONFIG_COMPAT_32BIT_TIME */
|
||||
--
|
||||
2.33.1
|
||||
|
5105
linux-tkg-patches/6.2/0007-v6.2-winesync.patch
Normal file
5105
linux-tkg-patches/6.2/0007-v6.2-winesync.patch
Normal file
File diff suppressed because it is too large
Load Diff
90
linux-tkg-patches/6.2/0009-glitched-bmq.patch
Normal file
90
linux-tkg-patches/6.2/0009-glitched-bmq.patch
Normal file
@@ -0,0 +1,90 @@
|
||||
From f7f49141a5dbe9c99d78196b58c44307fb2e6be3 Mon Sep 17 00:00:00 2001
|
||||
From: Tk-Glitch <ti3nou@gmail.com>
|
||||
Date: Wed, 4 Jul 2018 04:30:08 +0200
|
||||
Subject: glitched - BMQ
|
||||
|
||||
diff --git a/kernel/Kconfig.hz b/kernel/Kconfig.hz
|
||||
index 2a202a846757..1d9c7ed79b11 100644
|
||||
--- a/kernel/Kconfig.hz
|
||||
+++ b/kernel/Kconfig.hz
|
||||
@@ -4,7 +4,7 @@
|
||||
|
||||
choice
|
||||
prompt "Timer frequency"
|
||||
- default HZ_250
|
||||
+ default HZ_500
|
||||
help
|
||||
Allows the configuration of the timer frequency. It is customary
|
||||
to have the timer interrupt run at 1000 Hz but 100 Hz may be more
|
||||
@@ -39,6 +39,13 @@ choice
|
||||
on SMP and NUMA systems and exactly dividing by both PAL and
|
||||
NTSC frame rates for video and multimedia work.
|
||||
|
||||
+ config HZ_500
|
||||
+ bool "500 HZ"
|
||||
+ help
|
||||
+ 500 Hz is a balanced timer frequency. Provides fast interactivity
|
||||
+ on desktops with great smoothness without increasing CPU power
|
||||
+ consumption and sacrificing the battery life on laptops.
|
||||
+
|
||||
config HZ_1000
|
||||
bool "1000 HZ"
|
||||
help
|
||||
@@ -52,6 +59,7 @@ config HZ
|
||||
default 100 if HZ_100
|
||||
default 250 if HZ_250
|
||||
default 300 if HZ_300
|
||||
+ default 500 if HZ_500
|
||||
default 1000 if HZ_1000
|
||||
|
||||
config SCHED_HRTICK
|
||||
|
||||
diff --git a/kernel/Kconfig.hz b/kernel/Kconfig.hz
|
||||
index 2a202a846757..1d9c7ed79b11 100644
|
||||
--- a/kernel/Kconfig.hz
|
||||
+++ b/kernel/Kconfig.hz
|
||||
@@ -4,7 +4,7 @@
|
||||
|
||||
choice
|
||||
prompt "Timer frequency"
|
||||
- default HZ_500
|
||||
+ default HZ_750
|
||||
help
|
||||
Allows the configuration of the timer frequency. It is customary
|
||||
to have the timer interrupt run at 1000 Hz but 100 Hz may be more
|
||||
@@ -46,6 +46,13 @@ choice
|
||||
on desktops with great smoothness without increasing CPU power
|
||||
consumption and sacrificing the battery life on laptops.
|
||||
|
||||
+ config HZ_750
|
||||
+ bool "750 HZ"
|
||||
+ help
|
||||
+ 750 Hz is a good timer frequency for desktops. Provides fast
|
||||
+ interactivity with great smoothness without sacrificing too
|
||||
+ much throughput.
|
||||
+
|
||||
config HZ_1000
|
||||
bool "1000 HZ"
|
||||
help
|
||||
@@ -60,6 +67,7 @@ config HZ
|
||||
default 250 if HZ_250
|
||||
default 300 if HZ_300
|
||||
default 500 if HZ_500
|
||||
+ default 750 if HZ_750
|
||||
default 1000 if HZ_1000
|
||||
|
||||
config SCHED_HRTICK
|
||||
|
||||
diff --git a/mm/vmscan.c b/mm/vmscan.c
|
||||
index 9270a4370d54..30d01e647417 100644
|
||||
--- a/mm/vmscan.c
|
||||
+++ b/mm/vmscan.c
|
||||
@@ -169,7 +169,7 @@
|
||||
/*
|
||||
* From 0 .. 200. Higher means more swappy.
|
||||
*/
|
||||
-int vm_swappiness = 60;
|
||||
+int vm_swappiness = 20;
|
||||
|
||||
static void set_task_reclaim_state(struct task_struct *task,
|
||||
struct reclaim_state *rs)
|
18
linux-tkg-patches/6.2/0009-glitched-ondemand-bmq.patch
Normal file
18
linux-tkg-patches/6.2/0009-glitched-ondemand-bmq.patch
Normal file
@@ -0,0 +1,18 @@
|
||||
diff --git a/drivers/cpufreq/cpufreq_ondemand.c b/drivers/cpufreq/cpufreq_ondemand.c
|
||||
index 6b423eebfd5d..61e3271675d6 100644
|
||||
--- a/drivers/cpufreq/cpufreq_ondemand.c
|
||||
+++ b/drivers/cpufreq/cpufreq_ondemand.c
|
||||
@@ -21,10 +21,10 @@
|
||||
#include "cpufreq_ondemand.h"
|
||||
|
||||
/* On-demand governor macros */
|
||||
-#define DEF_FREQUENCY_UP_THRESHOLD (80)
|
||||
-#define DEF_SAMPLING_DOWN_FACTOR (1)
|
||||
+#define DEF_FREQUENCY_UP_THRESHOLD (55)
|
||||
+#define DEF_SAMPLING_DOWN_FACTOR (5)
|
||||
#define MAX_SAMPLING_DOWN_FACTOR (100000)
|
||||
-#define MICRO_FREQUENCY_UP_THRESHOLD (95)
|
||||
+#define MICRO_FREQUENCY_UP_THRESHOLD (63)
|
||||
#define MICRO_FREQUENCY_MIN_SAMPLE_RATE (10000)
|
||||
#define MIN_FREQUENCY_UP_THRESHOLD (1)
|
||||
#define MAX_FREQUENCY_UP_THRESHOLD (100)
|
316
linux-tkg-patches/6.2/0012-misc-additions.patch
Normal file
316
linux-tkg-patches/6.2/0012-misc-additions.patch
Normal file
@@ -0,0 +1,316 @@
|
||||
From e5e77ad2223f662e1615266d8ef39a8db7e65a70 Mon Sep 17 00:00:00 2001
|
||||
From: =?UTF-8?q?Felix=20H=C3=A4dicke?= <felixhaedicke@web.de>
|
||||
Date: Thu, 19 Nov 2020 09:22:32 +0100
|
||||
Subject: HID: quirks: Add Apple Magic Trackpad 2 to hid_have_special_driver
|
||||
list
|
||||
MIME-Version: 1.0
|
||||
Content-Type: text/plain; charset=UTF-8
|
||||
Content-Transfer-Encoding: 8bit
|
||||
|
||||
The Apple Magic Trackpad 2 is handled by the magicmouse driver. And
|
||||
there were severe stability issues when both drivers (hid-generic and
|
||||
hid-magicmouse) were loaded for this device.
|
||||
|
||||
Fixes: https://bugzilla.kernel.org/show_bug.cgi?id=210241
|
||||
|
||||
Signed-off-by: Felix Hädicke <felixhaedicke@web.de>
|
||||
---
|
||||
drivers/hid/hid-quirks.c | 2 ++
|
||||
1 file changed, 2 insertions(+)
|
||||
|
||||
diff --git a/drivers/hid/hid-quirks.c b/drivers/hid/hid-quirks.c
|
||||
index bf7ecab5d9e5..142e9dae2837 100644
|
||||
--- a/drivers/hid/hid-quirks.c
|
||||
+++ b/drivers/hid/hid-quirks.c
|
||||
@@ -478,6 +478,8 @@ static const struct hid_device_id hid_have_special_driver[] = {
|
||||
#if IS_ENABLED(CONFIG_HID_MAGICMOUSE)
|
||||
{ HID_BLUETOOTH_DEVICE(USB_VENDOR_ID_APPLE, USB_DEVICE_ID_APPLE_MAGICMOUSE) },
|
||||
{ HID_BLUETOOTH_DEVICE(USB_VENDOR_ID_APPLE, USB_DEVICE_ID_APPLE_MAGICTRACKPAD) },
|
||||
+ { HID_BLUETOOTH_DEVICE(BT_VENDOR_ID_APPLE, USB_DEVICE_ID_APPLE_MAGICTRACKPAD2) },
|
||||
+ { HID_USB_DEVICE(USB_VENDOR_ID_APPLE, USB_DEVICE_ID_APPLE_MAGICTRACKPAD2) },
|
||||
#endif
|
||||
#if IS_ENABLED(CONFIG_HID_MAYFLASH)
|
||||
{ HID_USB_DEVICE(USB_VENDOR_ID_DRAGONRISE, USB_DEVICE_ID_DRAGONRISE_PS3) },
|
||||
--
|
||||
cgit v1.2.3-1-gf6bb5
|
||||
|
||||
From f7f49141a5dbe9c99d78196b58c44307fb2e6be3 Mon Sep 17 00:00:00 2001
|
||||
From: Tk-Glitch <ti3nou@gmail.com>
|
||||
Date: Wed, 3 Feb 2021 11:20:12 +0200
|
||||
Subject: Revert "cpufreq: Avoid configuring old governors as default with intel_pstate"
|
||||
|
||||
This is an undesirable behavior for us since our aggressive ondemand performs
|
||||
better than schedutil for gaming when using intel_pstate in passive mode.
|
||||
Also it interferes with the option to select the desired default governor we have.
|
||||
|
||||
diff --git a/drivers/cpufreq/Kconfig b/drivers/cpufreq/Kconfig
|
||||
index 2c7171e0b0010..85de313ddec29 100644
|
||||
--- a/drivers/cpufreq/Kconfig
|
||||
+++ b/drivers/cpufreq/Kconfig
|
||||
@@ -71,7 +71,6 @@ config CPU_FREQ_DEFAULT_GOV_USERSPACE
|
||||
|
||||
config CPU_FREQ_DEFAULT_GOV_ONDEMAND
|
||||
bool "ondemand"
|
||||
- depends on !(X86_INTEL_PSTATE && SMP)
|
||||
select CPU_FREQ_GOV_ONDEMAND
|
||||
select CPU_FREQ_GOV_PERFORMANCE
|
||||
help
|
||||
@@ -83,7 +84,6 @@ config CPU_FREQ_DEFAULT_GOV_ONDEMAND
|
||||
|
||||
config CPU_FREQ_DEFAULT_GOV_CONSERVATIVE
|
||||
bool "conservative"
|
||||
- depends on !(X86_INTEL_PSTATE && SMP)
|
||||
select CPU_FREQ_GOV_CONSERVATIVE
|
||||
select CPU_FREQ_GOV_PERFORMANCE
|
||||
help
|
||||
|
||||
From 0c079d3f88df5f8286cd5c91b54bdac7c819be85 Mon Sep 17 00:00:00 2001
|
||||
From: Matthew Auld <matthew.auld@intel.com>
|
||||
Date: Tue, 6 Dec 2022 16:11:41 +0000
|
||||
Subject: [PATCH] drm/i915: improve the catch-all evict to handle lock
|
||||
contention
|
||||
MIME-Version: 1.0
|
||||
Content-Type: text/plain; charset=UTF-8
|
||||
Content-Transfer-Encoding: 8bit
|
||||
|
||||
The catch-all evict can fail due to object lock contention, since it
|
||||
only goes as far as trylocking the object, due to us already holding the
|
||||
vm->mutex. Doing a full object lock here can deadlock, since the
|
||||
vm->mutex is always our inner lock. Add another execbuf pass which drops
|
||||
the vm->mutex and then tries to grab the object will the full lock,
|
||||
before then retrying the eviction. This should be good enough for now to
|
||||
fix the immediate regression with userspace seeing -ENOSPC from execbuf
|
||||
due to contended object locks during GTT eviction.
|
||||
|
||||
Testcase: igt@gem_ppgtt@shrink-vs-evict-*
|
||||
Fixes: 7e00897be8bf ("drm/i915: Add object locking to i915_gem_evict_for_node and i915_gem_evict_something, v2.")
|
||||
References: https://gitlab.freedesktop.org/drm/intel/-/issues/7627
|
||||
References: https://gitlab.freedesktop.org/drm/intel/-/issues/7570
|
||||
References: https://bugzilla.mozilla.org/show_bug.cgi?id=1779558
|
||||
Signed-off-by: Matthew Auld <matthew.auld@intel.com>
|
||||
Cc: Maarten Lankhorst <maarten.lankhorst@linux.intel.com>
|
||||
Cc: Thomas Hellström <thomas.hellstrom@linux.intel.com>
|
||||
Cc: Tvrtko Ursulin <tvrtko.ursulin@linux.intel.com>
|
||||
Cc: Andrzej Hajda <andrzej.hajda@intel.com>
|
||||
Cc: Mani Milani <mani@chromium.org>
|
||||
Cc: <stable@vger.kernel.org> # v5.18+
|
||||
|
||||
Revision 1 of https://patchwork.freedesktop.org/series/111686/
|
||||
---
|
||||
.../gpu/drm/i915/gem/i915_gem_execbuffer.c | 25 +++++++++++--
|
||||
drivers/gpu/drm/i915/gem/i915_gem_mman.c | 2 +-
|
||||
drivers/gpu/drm/i915/i915_gem_evict.c | 37 ++++++++++++++-----
|
||||
drivers/gpu/drm/i915/i915_gem_evict.h | 4 +-
|
||||
drivers/gpu/drm/i915/i915_vma.c | 2 +-
|
||||
.../gpu/drm/i915/selftests/i915_gem_evict.c | 4 +-
|
||||
6 files changed, 56 insertions(+), 18 deletions(-)
|
||||
|
||||
diff --git a/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c b/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c
|
||||
index 845023c14eb36f..094e92ed28db4f 100644
|
||||
--- a/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c
|
||||
+++ b/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c
|
||||
@@ -741,25 +741,44 @@ static int eb_reserve(struct i915_execbuffer *eb)
|
||||
*
|
||||
* Defragmenting is skipped if all objects are pinned at a fixed location.
|
||||
*/
|
||||
- for (pass = 0; pass <= 2; pass++) {
|
||||
+ for (pass = 0; pass <= 3; pass++) {
|
||||
int pin_flags = PIN_USER | PIN_VALIDATE;
|
||||
|
||||
if (pass == 0)
|
||||
pin_flags |= PIN_NONBLOCK;
|
||||
|
||||
if (pass >= 1)
|
||||
- unpinned = eb_unbind(eb, pass == 2);
|
||||
+ unpinned = eb_unbind(eb, pass >= 2);
|
||||
|
||||
if (pass == 2) {
|
||||
err = mutex_lock_interruptible(&eb->context->vm->mutex);
|
||||
if (!err) {
|
||||
- err = i915_gem_evict_vm(eb->context->vm, &eb->ww);
|
||||
+ err = i915_gem_evict_vm(eb->context->vm, &eb->ww, NULL);
|
||||
mutex_unlock(&eb->context->vm->mutex);
|
||||
}
|
||||
if (err)
|
||||
return err;
|
||||
}
|
||||
|
||||
+ if (pass == 3) {
|
||||
+retry:
|
||||
+ err = mutex_lock_interruptible(&eb->context->vm->mutex);
|
||||
+ if (!err) {
|
||||
+ struct drm_i915_gem_object *busy_bo = NULL;
|
||||
+
|
||||
+ err = i915_gem_evict_vm(eb->context->vm, &eb->ww, &busy_bo);
|
||||
+ mutex_unlock(&eb->context->vm->mutex);
|
||||
+ if (err && busy_bo) {
|
||||
+ err = i915_gem_object_lock(busy_bo, &eb->ww);
|
||||
+ i915_gem_object_put(busy_bo);
|
||||
+ if (!err)
|
||||
+ goto retry;
|
||||
+ }
|
||||
+ }
|
||||
+ if (err)
|
||||
+ return err;
|
||||
+ }
|
||||
+
|
||||
list_for_each_entry(ev, &eb->unbound, bind_link) {
|
||||
err = eb_reserve_vma(eb, ev, pin_flags);
|
||||
if (err)
|
||||
diff --git a/drivers/gpu/drm/i915/gem/i915_gem_mman.c b/drivers/gpu/drm/i915/gem/i915_gem_mman.c
|
||||
index 73d9eda1d6b7a6..c83d98e1dc5da0 100644
|
||||
--- a/drivers/gpu/drm/i915/gem/i915_gem_mman.c
|
||||
+++ b/drivers/gpu/drm/i915/gem/i915_gem_mman.c
|
||||
@@ -369,7 +369,7 @@ static vm_fault_t vm_fault_gtt(struct vm_fault *vmf)
|
||||
if (vma == ERR_PTR(-ENOSPC)) {
|
||||
ret = mutex_lock_interruptible(&ggtt->vm.mutex);
|
||||
if (!ret) {
|
||||
- ret = i915_gem_evict_vm(&ggtt->vm, &ww);
|
||||
+ ret = i915_gem_evict_vm(&ggtt->vm, &ww, NULL);
|
||||
mutex_unlock(&ggtt->vm.mutex);
|
||||
}
|
||||
if (ret)
|
||||
diff --git a/drivers/gpu/drm/i915/i915_gem_evict.c b/drivers/gpu/drm/i915/i915_gem_evict.c
|
||||
index f025ee4fa52618..a4b4d9b7d26c7a 100644
|
||||
--- a/drivers/gpu/drm/i915/i915_gem_evict.c
|
||||
+++ b/drivers/gpu/drm/i915/i915_gem_evict.c
|
||||
@@ -416,6 +416,11 @@ int i915_gem_evict_for_node(struct i915_address_space *vm,
|
||||
* @vm: Address space to cleanse
|
||||
* @ww: An optional struct i915_gem_ww_ctx. If not NULL, i915_gem_evict_vm
|
||||
* will be able to evict vma's locked by the ww as well.
|
||||
+ * @busy_bo: Optional pointer to struct drm_i915_gem_object. If not NULL, then
|
||||
+ * in the event i915_gem_evict_vm() is unable to trylock an object for eviction,
|
||||
+ * then @busy_bo will point to it. -EBUSY is also returned. The caller must drop
|
||||
+ * the vm->mutex, before trying again to acquire the contended lock. The caller
|
||||
+ * also owns a reference to the object.
|
||||
*
|
||||
* This function evicts all vmas from a vm.
|
||||
*
|
||||
@@ -425,7 +430,8 @@ int i915_gem_evict_for_node(struct i915_address_space *vm,
|
||||
* To clarify: This is for freeing up virtual address space, not for freeing
|
||||
* memory in e.g. the shrinker.
|
||||
*/
|
||||
-int i915_gem_evict_vm(struct i915_address_space *vm, struct i915_gem_ww_ctx *ww)
|
||||
+int i915_gem_evict_vm(struct i915_address_space *vm, struct i915_gem_ww_ctx *ww,
|
||||
+ struct drm_i915_gem_object **busy_bo)
|
||||
{
|
||||
int ret = 0;
|
||||
|
||||
@@ -457,15 +463,22 @@ int i915_gem_evict_vm(struct i915_address_space *vm, struct i915_gem_ww_ctx *ww)
|
||||
* the resv is shared among multiple objects, we still
|
||||
* need the object ref.
|
||||
*/
|
||||
- if (dying_vma(vma) ||
|
||||
+ if (!i915_gem_object_get_rcu(vma->obj) ||
|
||||
(ww && (dma_resv_locking_ctx(vma->obj->base.resv) == &ww->ctx))) {
|
||||
__i915_vma_pin(vma);
|
||||
list_add(&vma->evict_link, &locked_eviction_list);
|
||||
continue;
|
||||
}
|
||||
|
||||
- if (!i915_gem_object_trylock(vma->obj, ww))
|
||||
+ if (!i915_gem_object_trylock(vma->obj, ww)) {
|
||||
+ if (busy_bo) {
|
||||
+ *busy_bo = vma->obj; /* holds ref */
|
||||
+ ret = -EBUSY;
|
||||
+ break;
|
||||
+ }
|
||||
+ i915_gem_object_put(vma->obj);
|
||||
continue;
|
||||
+ }
|
||||
|
||||
__i915_vma_pin(vma);
|
||||
list_add(&vma->evict_link, &eviction_list);
|
||||
@@ -473,25 +486,29 @@ int i915_gem_evict_vm(struct i915_address_space *vm, struct i915_gem_ww_ctx *ww)
|
||||
if (list_empty(&eviction_list) && list_empty(&locked_eviction_list))
|
||||
break;
|
||||
|
||||
- ret = 0;
|
||||
/* Unbind locked objects first, before unlocking the eviction_list */
|
||||
list_for_each_entry_safe(vma, vn, &locked_eviction_list, evict_link) {
|
||||
__i915_vma_unpin(vma);
|
||||
|
||||
- if (ret == 0)
|
||||
+ if (ret == 0) {
|
||||
ret = __i915_vma_unbind(vma);
|
||||
- if (ret != -EINTR) /* "Get me out of here!" */
|
||||
- ret = 0;
|
||||
+ if (ret != -EINTR) /* "Get me out of here!" */
|
||||
+ ret = 0;
|
||||
+ }
|
||||
+ if (!dying_vma(vma))
|
||||
+ i915_gem_object_put(vma->obj);
|
||||
}
|
||||
|
||||
list_for_each_entry_safe(vma, vn, &eviction_list, evict_link) {
|
||||
__i915_vma_unpin(vma);
|
||||
- if (ret == 0)
|
||||
+ if (ret == 0) {
|
||||
ret = __i915_vma_unbind(vma);
|
||||
- if (ret != -EINTR) /* "Get me out of here!" */
|
||||
- ret = 0;
|
||||
+ if (ret != -EINTR) /* "Get me out of here!" */
|
||||
+ ret = 0;
|
||||
+ }
|
||||
|
||||
i915_gem_object_unlock(vma->obj);
|
||||
+ i915_gem_object_put(vma->obj);
|
||||
}
|
||||
} while (ret == 0);
|
||||
|
||||
diff --git a/drivers/gpu/drm/i915/i915_gem_evict.h b/drivers/gpu/drm/i915/i915_gem_evict.h
|
||||
index e593c530f9bd7a..bf0ee0e4fe6088 100644
|
||||
--- a/drivers/gpu/drm/i915/i915_gem_evict.h
|
||||
+++ b/drivers/gpu/drm/i915/i915_gem_evict.h
|
||||
@@ -11,6 +11,7 @@
|
||||
struct drm_mm_node;
|
||||
struct i915_address_space;
|
||||
struct i915_gem_ww_ctx;
|
||||
+struct drm_i915_gem_object;
|
||||
|
||||
int __must_check i915_gem_evict_something(struct i915_address_space *vm,
|
||||
struct i915_gem_ww_ctx *ww,
|
||||
@@ -23,6 +24,7 @@ int __must_check i915_gem_evict_for_node(struct i915_address_space *vm,
|
||||
struct drm_mm_node *node,
|
||||
unsigned int flags);
|
||||
int i915_gem_evict_vm(struct i915_address_space *vm,
|
||||
- struct i915_gem_ww_ctx *ww);
|
||||
+ struct i915_gem_ww_ctx *ww,
|
||||
+ struct drm_i915_gem_object **busy_bo);
|
||||
|
||||
#endif /* __I915_GEM_EVICT_H__ */
|
||||
diff --git a/drivers/gpu/drm/i915/i915_vma.c b/drivers/gpu/drm/i915/i915_vma.c
|
||||
index f17c09ead7d778..4d06875de14a14 100644
|
||||
--- a/drivers/gpu/drm/i915/i915_vma.c
|
||||
+++ b/drivers/gpu/drm/i915/i915_vma.c
|
||||
@@ -1569,7 +1569,7 @@ static int __i915_ggtt_pin(struct i915_vma *vma, struct i915_gem_ww_ctx *ww,
|
||||
* locked objects when called from execbuf when pinning
|
||||
* is removed. This would probably regress badly.
|
||||
*/
|
||||
- i915_gem_evict_vm(vm, NULL);
|
||||
+ i915_gem_evict_vm(vm, NULL, NULL);
|
||||
mutex_unlock(&vm->mutex);
|
||||
}
|
||||
} while (1);
|
||||
diff --git a/drivers/gpu/drm/i915/selftests/i915_gem_evict.c b/drivers/gpu/drm/i915/selftests/i915_gem_evict.c
|
||||
index 8c6517d29b8e0c..37068542aafe7f 100644
|
||||
--- a/drivers/gpu/drm/i915/selftests/i915_gem_evict.c
|
||||
+++ b/drivers/gpu/drm/i915/selftests/i915_gem_evict.c
|
||||
@@ -344,7 +344,7 @@ static int igt_evict_vm(void *arg)
|
||||
|
||||
/* Everything is pinned, nothing should happen */
|
||||
mutex_lock(&ggtt->vm.mutex);
|
||||
- err = i915_gem_evict_vm(&ggtt->vm, NULL);
|
||||
+ err = i915_gem_evict_vm(&ggtt->vm, NULL, NULL);
|
||||
mutex_unlock(&ggtt->vm.mutex);
|
||||
if (err) {
|
||||
pr_err("i915_gem_evict_vm on a full GGTT returned err=%d]\n",
|
||||
@@ -356,7 +356,7 @@ static int igt_evict_vm(void *arg)
|
||||
|
||||
for_i915_gem_ww(&ww, err, false) {
|
||||
mutex_lock(&ggtt->vm.mutex);
|
||||
- err = i915_gem_evict_vm(&ggtt->vm, &ww);
|
||||
+ err = i915_gem_evict_vm(&ggtt->vm, &ww, NULL);
|
||||
mutex_unlock(&ggtt->vm.mutex);
|
||||
}
|
||||
|
27
linux-tkg-patches/6.2/0013-fedora-rpm.patch
Normal file
27
linux-tkg-patches/6.2/0013-fedora-rpm.patch
Normal file
@@ -0,0 +1,27 @@
|
||||
# Remove the obsoletes line in kernel-headers
|
||||
# Add provides for kernel-devel so there's no conflict
|
||||
|
||||
diff --git a/scripts/package/mkspec b/scripts/package/mkspec
|
||||
index 7c477ca7d..1158f5559 100755
|
||||
--- a/scripts/package/mkspec
|
||||
+++ b/scripts/package/mkspec
|
||||
@@ -25,0 +26 @@ fi
|
||||
+PROVIDES_DRM=""
|
||||
@@ -27 +28 @@ if grep -q CONFIG_DRM=y .config; then
|
||||
- PROVIDES=kernel-drm
|
||||
+ PROVIDES_DRM="Provides: kernel-drm = %{version}"
|
||||
@@ -30 +30,0 @@ fi
|
||||
-PROVIDES="$PROVIDES kernel-$KERNELRELEASE"
|
||||
@@ -51 +51,3 @@ $S Source: kernel-$__KERNELRELEASE.tar.gz
|
||||
- Provides: $PROVIDES
|
||||
+ $PROVIDES_DRM
|
||||
+ Provides: kernel = %{version}
|
||||
+ Provides: kernel-uname-r = %{version}
|
||||
+ Provides: installonlypkg(kernel) = %{version}
|
||||
@@ -61 +63 @@ $S Source: kernel-$__KERNELRELEASE.tar.gz
|
||||
- Obsoletes: kernel-headers
|
||||
+ Provides: installonlypkg(kernel) = %{version}
|
||||
@@ -72,0 +75,3 @@ $S$M Group: System Environment/Kernel
|
||||
+$S$M Provides: kernel-devel = %{version}
|
||||
+$S$M Provides: kernel-devel-uname-r = %{version}
|
||||
+$S$M Provides: installonlypkg(kernel) = %{version}
|
46
linux-tkg-patches/6.2/0013-optimize_harder_O3.patch
Normal file
46
linux-tkg-patches/6.2/0013-optimize_harder_O3.patch
Normal file
@@ -0,0 +1,46 @@
|
||||
diff --git a/Makefile b/Makefile
|
||||
--- a/Makefile
|
||||
+++ b/Makefile
|
||||
@@ -442,7 +442,7 @@ endif
|
||||
HOSTPKG_CONFIG = pkg-config
|
||||
|
||||
KBUILD_USERHOSTCFLAGS := -Wall -Wmissing-prototypes -Wstrict-prototypes \
|
||||
- -O2 -fomit-frame-pointer -std=gnu11 \
|
||||
+ -O3 -fomit-frame-pointer -std=gnu11 \
|
||||
-Wdeclaration-after-statement
|
||||
KBUILD_USERCFLAGS := $(KBUILD_USERHOSTCFLAGS) $(USERCFLAGS)
|
||||
KBUILD_USERLDFLAGS := $(USERLDFLAGS)
|
||||
@@ -474,7 +474,7 @@ endif
|
||||
-Wclippy::dbg_macro
|
||||
|
||||
KBUILD_HOSTCFLAGS := $(KBUILD_USERHOSTCFLAGS) $(HOST_LFS_CFLAGS) $(HOSTCFLAGS)
|
||||
-KBUILD_HOSTCXXFLAGS := -Wall -O2 $(HOST_LFS_CFLAGS) $(HOSTCXXFLAGS)
|
||||
+KBUILD_HOSTCXXFLAGS := -Wall -O3 $(HOST_LFS_CFLAGS) $(HOSTCXXFLAGS)
|
||||
KBUILD_HOSTRUSTFLAGS := $(rust_common_flags) -O -Cstrip=debuginfo \
|
||||
-Zallow-features= $(HOSTRUSTFLAGS)
|
||||
KBUILD_HOSTLDFLAGS := $(HOST_LFS_LDFLAGS) $(HOSTLDFLAGS)
|
||||
@@ -757,7 +757,7 @@ KBUILD_CFLAGS += $(call cc-disable-warning, format-overflow)
|
||||
KBUILD_CFLAGS += $(call cc-disable-warning, address-of-packed-member)
|
||||
|
||||
ifdef CONFIG_CC_OPTIMIZE_FOR_PERFORMANCE
|
||||
-KBUILD_CFLAGS += -O2
|
||||
+KBUILD_CFLAGS += -O3
|
||||
KBUILD_RUSTFLAGS += -Copt-level=2
|
||||
else ifdef CONFIG_CC_OPTIMIZE_FOR_SIZE
|
||||
KBUILD_CFLAGS += -Os
|
||||
diff --git a/init/Kconfig b/init/Kconfig
|
||||
--- a/init/Kconfig
|
||||
+++ b/init/Kconfig
|
||||
@@ -1401,10 +1401,10 @@ choice
|
||||
default CC_OPTIMIZE_FOR_PERFORMANCE
|
||||
|
||||
config CC_OPTIMIZE_FOR_PERFORMANCE
|
||||
- bool "Optimize for performance (-O2)"
|
||||
+ bool "Optimize for performance (-O3)"
|
||||
help
|
||||
This is the default optimization level for the kernel, building
|
||||
- with the "-O2" compiler flag for best performance and most
|
||||
+ with the "-O3" compiler flag for best performance and most
|
||||
helpful compile-time warnings.
|
||||
|
||||
config CC_OPTIMIZE_FOR_SIZE
|
Reference in New Issue
Block a user