Compare commits

...

16 Commits

Author SHA1 Message Date
Sravan Balaji
0870103c33 PDS Kernel Configuration 2022-11-03 19:06:39 -04:00
Vasiliy Stelmachenok
16c81fb013 Clarify the description of the tickless parameter (#636) 2022-11-02 13:49:58 +01:00
Tk-Glitch
1882fa3bd9 linux 6.1 RC: Allow BORE cpusched 2022-10-31 18:50:45 +01:00
Tk-Glitch
c4ceba207d linux 5.15.y: Allow BORE cpusched
Fixes https://github.com/Frogging-Family/linux-tkg/issues/623
2022-10-31 18:48:17 +01:00
Tk-Glitch
a380d532bc linux 5.10.y: Update Project C to r5 - https://gitlab.com/alfredchen/linux-prjc/-/tree/linux-5.10.y-prjc-lts 2022-10-26 20:42:55 +02:00
Tk-Glitch
b82abcf5f6 Move 5.19 to EOL 2022-10-26 20:03:23 +02:00
Tk-Glitch
1084ad5ba1 linux 6.0.y: misc-additions: Remove Revert "ALSA: hda: Fix page fault in snd_hda_codec_shutdown()" as it was merged with v6.0.4 2022-10-26 19:53:02 +02:00
Tk-Glitch
1e4b8fb627 linux 6.1 RC: Remove fixes merged in RC2 2022-10-24 15:53:50 +02:00
Tk-Glitch
02132367d6 Add support for zen4 and meteorlake compiler optimizations 2022-10-23 15:51:47 +02:00
Tk-Glitch
1c4fece830 linux 6.0.y: Add fixes and update defconfig to 6.0.3.arch2
ed63a1d111/trunk/config

fb23dad87a
785699dbc7
6df3912f64
41d72c2322
2022-10-23 15:48:24 +02:00
Tk-Glitch
786140014b Make sure WERROR is disabled by default
RCs and custom patches will throw warnings, and while the Arch defconfig we ship disables it, people using their own might face compilation failures.

Fixes https://github.com/Frogging-Family/linux-tkg/issues/628
2022-10-18 13:37:22 +02:00
Tk-Glitch
c162bca30d linux 6.1 RC: Fixups for 0001-mm-Support-soft-dirty-flag-reset-for-VA-range patch 2022-10-17 16:07:45 +02:00
Tk-Glitch
da7b57fa92 Initial kernel 6.1 RC support 2022-10-17 14:07:20 +02:00
Tk-Glitch
9e06bbfb46 linux 6.0.y: misc-additions: Add 0004-drm-sched-add-DRM_SCHED_FENCE_DONT_PIPELINE-flag and 0005-drm-amdgpu-use-DRM_SCHED_FENCE_DONT_PIPELINE-for-VM-, pending fixes for https://gitlab.freedesktop.org/drm/amd/-/issues/2113 from Christian König 2022-10-15 15:22:52 +02:00
Tk-Glitch
d60046d9fb linux 6.0.y: Disable pds and bmq CPU schedulers as Torvic9's rebase is reportedly buggy. We'll enable back when Alfred pushes his own. 2022-10-15 15:06:16 +02:00
Tk-Glitch
2a07409439 linux 6.0.y: misc-additions: Add mm: vmscan: fix extreme overreclaim and swap floods and Bluetooth: fix deadlock for RFCOMM sk state change cherry picks from Arch
Wifi issues were fixed with 6.0.2, thankfully
2022-10-15 15:05:21 +02:00
22 changed files with 20234 additions and 361 deletions

View File

@@ -56,7 +56,7 @@ else
fi fi
pkgname=("${pkgbase}" "${pkgbase}-headers") pkgname=("${pkgbase}" "${pkgbase}-headers")
pkgver="${_basekernel}"."${_sub}" pkgver="${_basekernel}"."${_sub}"
pkgrel=270 pkgrel=271
pkgdesc='Linux-tkg' pkgdesc='Linux-tkg'
arch=('x86_64') # no i686 in here arch=('x86_64') # no i686 in here
url="https://www.kernel.org/" url="https://www.kernel.org/"

View File

@@ -3,7 +3,7 @@
# Linux distribution you are using, options are "Arch", "Void", "Ubuntu", "Debian", "Fedora", "Suse", "Gentoo", "Generic". # Linux distribution you are using, options are "Arch", "Void", "Ubuntu", "Debian", "Fedora", "Suse", "Gentoo", "Generic".
# It is automatically set to "Arch" when using PKGBUILD. # It is automatically set to "Arch" when using PKGBUILD.
# If left empty, the script will prompt # If left empty, the script will prompt
_distro="" _distro="Arch"
# Kernel Version - Options are "5.4", and from "5.7" to "5.19" # Kernel Version - Options are "5.4", and from "5.7" to "5.19"
# you can also set a specific kernel version, e.g. "6.0-rc4" or "5.10.51", # you can also set a specific kernel version, e.g. "6.0-rc4" or "5.10.51",
@@ -32,7 +32,7 @@ CUSTOM_GCC_PATH=""
CUSTOM_LLVM_PATH="" CUSTOM_LLVM_PATH=""
# Set to true to bypass makepkg.conf and use all available threads for compilation. False will respect your makepkg.conf options. # Set to true to bypass makepkg.conf and use all available threads for compilation. False will respect your makepkg.conf options.
_force_all_threads="true" _force_all_threads="false"
# Set to true to prevent ccache from being used and set CONFIG_GCC_PLUGINS=y (which needs to be disabled for ccache to work properly) # Set to true to prevent ccache from being used and set CONFIG_GCC_PLUGINS=y (which needs to be disabled for ccache to work properly)
_noccache="false" _noccache="false"
@@ -46,10 +46,10 @@ _modprobeddb="false"
_modprobeddb_db_path=~/.config/modprobed.db _modprobeddb_db_path=~/.config/modprobed.db
# Set to "1" to call make menuconfig, "2" to call make nconfig, "3" to call make xconfig, before building the kernel. Set to false to disable and skip the prompt. # Set to "1" to call make menuconfig, "2" to call make nconfig, "3" to call make xconfig, before building the kernel. Set to false to disable and skip the prompt.
_menunconfig="" _menunconfig="false"
# Set to true to generate a kernel config fragment from your changes in menuconfig/nconfig. Set to false to disable and skip the prompt. # Set to true to generate a kernel config fragment from your changes in menuconfig/nconfig. Set to false to disable and skip the prompt.
_diffconfig="" _diffconfig="false"
# Set to the file name where the generated config fragment should be written to. Only used if _diffconfig is active. # Set to the file name where the generated config fragment should be written to. Only used if _diffconfig is active.
_diffconfig_name="" _diffconfig_name=""
@@ -90,11 +90,11 @@ _STRIP="true"
# LEAVE AN EMPTY VALUE TO BE PROMPTED ABOUT FOLLOWING OPTIONS AT BUILD TIME # LEAVE AN EMPTY VALUE TO BE PROMPTED ABOUT FOLLOWING OPTIONS AT BUILD TIME
# CPU scheduler - Options are "upds" (TkG's Undead PDS), "pds", "bmq", "muqss", "cacule" or "cfs" (kernel's default) # CPU scheduler - Options are "upds" (TkG's Undead PDS), "pds", "bmq", "muqss", "cacule" or "cfs" (kernel's default)
_cpusched="" _cpusched="pds"
# Compiler to use - Options are "gcc" or "llvm". # Compiler to use - Options are "gcc" or "llvm".
# For advanced users. # For advanced users.
_compiler="" _compiler="gcc"
# Force the use of the LLVM Integrated Assembler whether using LLVM, LTO or not. # Force the use of the LLVM Integrated Assembler whether using LLVM, LTO or not.
# Set to "1" to enable. # Set to "1" to enable.
@@ -124,7 +124,7 @@ _preempt_rt_force=""
# For BMQ: 0: No yield. # For BMQ: 0: No yield.
# 1: Deboost and requeue task. (Default) # 1: Deboost and requeue task. (Default)
# 2: Set rq skip task. # 2: Set rq skip task.
_sched_yield_type="" _sched_yield_type="0"
# Round Robin interval is the longest duration two tasks with the same nice level will be delayed for. When CPU time is requested by a task, it receives a time slice equal # Round Robin interval is the longest duration two tasks with the same nice level will be delayed for. When CPU time is requested by a task, it receives a time slice equal
# to the rr_interval in addition to a virtual deadline. When using yield_type 2, a low value can help offset the disadvantages of rescheduling a process that has yielded. # to the rr_interval in addition to a virtual deadline. When using yield_type 2, a low value can help offset the disadvantages of rescheduling a process that has yielded.
@@ -132,7 +132,7 @@ _sched_yield_type=""
# PDS default: 4ms" # PDS default: 4ms"
# BMQ default: 2ms" # BMQ default: 2ms"
# Set to "1" for 2ms, "2" for 4ms, "3" for 6ms, "4" for 8ms, or "default" to keep the chosen scheduler defaults. # Set to "1" for 2ms, "2" for 4ms, "3" for 6ms, "4" for 8ms, or "default" to keep the chosen scheduler defaults.
_rr_interval="" _rr_interval="2"
# Set to "true" to disable FUNCTION_TRACER/GRAPH_TRACER, lowering overhead but limiting debugging and analyzing of kernel functions - Kernel default is "false" # Set to "true" to disable FUNCTION_TRACER/GRAPH_TRACER, lowering overhead but limiting debugging and analyzing of kernel functions - Kernel default is "false"
_ftracedisable="false" _ftracedisable="false"
@@ -144,11 +144,13 @@ _numadisable="false"
_misc_adds="true" _misc_adds="true"
# Set to "0" for periodic ticks, "1" to use CattaRappa mode (enabling full tickless) and "2" for tickless idle only. # Set to "0" for periodic ticks, "1" to use CattaRappa mode (enabling full tickless) and "2" for tickless idle only.
# Full tickless can give higher performances in various cases but, depending on hardware, lower consistency. Just tickless idle can perform better on some platforms (mostly AMD based). # Full tickless can give higher performances in case you use isolation of CPUs for tasks
_tickless="" # and it works only when using the nohz_full kernel parameter, otherwise behaves like idle.
# Just tickless idle perform better for most platforms.
_tickless="2"
# Set to "true" to use ACS override patch - https://wiki.archlinux.org/index.php/PCI_passthrough_via_OVMF#Bypassing_the_IOMMU_groups_.28ACS_override_patch.29 - Kernel default is "false" # Set to "true" to use ACS override patch - https://wiki.archlinux.org/index.php/PCI_passthrough_via_OVMF#Bypassing_the_IOMMU_groups_.28ACS_override_patch.29 - Kernel default is "false"
_acs_override="" _acs_override="false"
# Set to "true" to add Bcache filesystem support. You'll have to install bcachefs-tools-git from AUR for utilities - https://bcachefs.org/ - If in doubt, set to "false" # Set to "true" to add Bcache filesystem support. You'll have to install bcachefs-tools-git from AUR for utilities - https://bcachefs.org/ - If in doubt, set to "false"
# This can be buggy and isn't recommended on a production machine, also enabling this option will not allow you to enable MGLRU. # This can be buggy and isn't recommended on a production machine, also enabling this option will not allow you to enable MGLRU.
@@ -177,13 +179,13 @@ _futex_waitv="false"
_winesync="false" _winesync="false"
# Set to "true" to enable Binder and Ashmem, the kernel modules required to use the android emulator Anbox. ! This doesn't apply to 5.4.y ! # Set to "true" to enable Binder and Ashmem, the kernel modules required to use the android emulator Anbox. ! This doesn't apply to 5.4.y !
_anbox="" _anbox="false"
# A selection of patches from Zen/Liquorix kernel and additional tweaks for a better gaming experience (ZENIFY) - Default is "true" # A selection of patches from Zen/Liquorix kernel and additional tweaks for a better gaming experience (ZENIFY) - Default is "true"
_zenify="true" _zenify="true"
# compiler optimization level - 1. Optimize for performance (-O2); 2. Optimize harder (-O3); 3. Optimize for size (-Os) - Kernel default is "1" # compiler optimization level - 1. Optimize for performance (-O2); 2. Optimize harder (-O3); 3. Optimize for size (-Os) - Kernel default is "1"
_compileroptlevel="1" _compileroptlevel="2"
# CPU compiler optimizations - Defaults to prompt at kernel config if left empty # CPU compiler optimizations - Defaults to prompt at kernel config if left empty
# AMD CPUs : "k8" "k8sse3" "k10" "barcelona" "bobcat" "jaguar" "bulldozer" "piledriver" "steamroller" "excavator" "zen" "zen2" "zen3" (zen3 opt support depends on GCC11) # AMD CPUs : "k8" "k8sse3" "k10" "barcelona" "bobcat" "jaguar" "bulldozer" "piledriver" "steamroller" "excavator" "zen" "zen2" "zen3" (zen3 opt support depends on GCC11)
@@ -197,7 +199,7 @@ _compileroptlevel="1"
# - "generic_v2" (depends on GCC11 - to share the package between machines with different CPU µarch supporting at least x86-64-v2 # - "generic_v2" (depends on GCC11 - to share the package between machines with different CPU µarch supporting at least x86-64-v2
# - "generic_v3" (depends on GCC11 - to share the package between machines with different CPU µarch supporting at least x86-64-v3 # - "generic_v3" (depends on GCC11 - to share the package between machines with different CPU µarch supporting at least x86-64-v3
# - "generic_v4" (depends on GCC11 - to share the package between machines with different CPU µarch supporting at least x86-64-v4 # - "generic_v4" (depends on GCC11 - to share the package between machines with different CPU µarch supporting at least x86-64-v4
_processor_opt="" _processor_opt="skylake"
# MuQSS only - Make IRQ threading compulsory (FORCE_IRQ_THREADING) - Default is "false" # MuQSS only - Make IRQ threading compulsory (FORCE_IRQ_THREADING) - Default is "false"
_irq_threading="false" _irq_threading="false"
@@ -213,7 +215,7 @@ _cacule_rdb_interval="19"
_tt_high_hz="false" _tt_high_hz="false"
# MuQSS and PDS only - SMT (Hyperthreading) aware nice priority and policy support (SMT_NICE) - Kernel default is "true" - You can disable this on non-SMT/HT CPUs for lower overhead # MuQSS and PDS only - SMT (Hyperthreading) aware nice priority and policy support (SMT_NICE) - Kernel default is "true" - You can disable this on non-SMT/HT CPUs for lower overhead
_smt_nice="" _smt_nice="true"
# Trust the CPU manufacturer to initialize Linux's CRNG (RANDOM_TRUST_CPU) - Kernel default is "false" # Trust the CPU manufacturer to initialize Linux's CRNG (RANDOM_TRUST_CPU) - Kernel default is "false"
_random_trust_cpu="false" _random_trust_cpu="false"
@@ -223,7 +225,7 @@ _random_trust_cpu="false"
_runqueue_sharing="" _runqueue_sharing=""
# Timer frequency - "100" "250" "300" "500" "750" "1000" ("2000" is available for cacule cpusched only) - More options available in kernel config prompt when left empty depending on selected cpusched with the default option pointed with a ">" (2000 for cacule, 100 for muqss and 1000 for other cpu schedulers) # Timer frequency - "100" "250" "300" "500" "750" "1000" ("2000" is available for cacule cpusched only) - More options available in kernel config prompt when left empty depending on selected cpusched with the default option pointed with a ">" (2000 for cacule, 100 for muqss and 1000 for other cpu schedulers)
_timer_freq="" _timer_freq="500"
# Default CPU governor - "performance", "ondemand", "schedutil" or leave empty for default (schedutil) # Default CPU governor - "performance", "ondemand", "schedutil" or leave empty for default (schedutil)
_default_cpu_gov="ondemand" _default_cpu_gov="ondemand"

View File

@@ -1,6 +1,6 @@
# #
# Automatically generated file; DO NOT EDIT. # Automatically generated file; DO NOT EDIT.
# Linux/x86 6.0.0-arch1 Kernel Configuration # Linux/x86 6.0.3-arch2 Kernel Configuration
# #
CONFIG_CC_VERSION_TEXT="gcc (GCC) 12.2.0" CONFIG_CC_VERSION_TEXT="gcc (GCC) 12.2.0"
CONFIG_CC_IS_GCC=y CONFIG_CC_IS_GCC=y
@@ -7596,6 +7596,7 @@ CONFIG_HID_SMARTJOYPLUS=m
CONFIG_SMARTJOYPLUS_FF=y CONFIG_SMARTJOYPLUS_FF=y
CONFIG_HID_TIVO=m CONFIG_HID_TIVO=m
CONFIG_HID_TOPSEED=m CONFIG_HID_TOPSEED=m
CONFIG_HID_TOPRE=m
CONFIG_HID_THINGM=m CONFIG_HID_THINGM=m
CONFIG_HID_THRUSTMASTER=m CONFIG_HID_THRUSTMASTER=m
CONFIG_THRUSTMASTER_FF=y CONFIG_THRUSTMASTER_FF=y
@@ -10374,6 +10375,7 @@ CONFIG_LSM="landlock,lockdown,yama,integrity,bpf"
# Memory initialization # Memory initialization
# #
CONFIG_CC_HAS_AUTO_VAR_INIT_PATTERN=y CONFIG_CC_HAS_AUTO_VAR_INIT_PATTERN=y
CONFIG_CC_HAS_AUTO_VAR_INIT_ZERO_BARE=y
CONFIG_CC_HAS_AUTO_VAR_INIT_ZERO=y CONFIG_CC_HAS_AUTO_VAR_INIT_ZERO=y
# CONFIG_INIT_STACK_NONE is not set # CONFIG_INIT_STACK_NONE is not set
# CONFIG_INIT_STACK_ALL_PATTERN is not set # CONFIG_INIT_STACK_ALL_PATTERN is not set
@@ -10849,6 +10851,7 @@ CONFIG_DEBUG_KERNEL=y
# Compile-time checks and compiler options # Compile-time checks and compiler options
# #
CONFIG_DEBUG_INFO=y CONFIG_DEBUG_INFO=y
CONFIG_AS_HAS_NON_CONST_LEB128=y
# CONFIG_DEBUG_INFO_NONE is not set # CONFIG_DEBUG_INFO_NONE is not set
# CONFIG_DEBUG_INFO_DWARF_TOOLCHAIN_DEFAULT is not set # CONFIG_DEBUG_INFO_DWARF_TOOLCHAIN_DEFAULT is not set
# CONFIG_DEBUG_INFO_DWARF4 is not set # CONFIG_DEBUG_INFO_DWARF4 is not set

View File

@@ -0,0 +1,14 @@
[Trigger]
Type = File
Operation = Install
Operation = Upgrade
Operation = Remove
Target = usr/lib/modules/*/
Target = !usr/lib/modules/*/?*
[Action]
Description = Cleaning up...
When = PostTransaction
Exec = /usr/share/libalpm/scripts/cleanup
NeedsTargets

10
linux-tkg-config/6.1/cleanup Executable file
View File

@@ -0,0 +1,10 @@
#!/bin/bash
for _f in /usr/lib/modules/*tkg*; do
if [[ ! -e ${_f}/vmlinuz ]]; then
rm -rf "$_f"
fi
done
# vim:set ft=sh sw=2 et:

File diff suppressed because it is too large Load Diff

View File

@@ -1,10 +1,10 @@
#!/bin/bash #!/bin/bash
# List of kernels that are maintained upstream # List of kernels that are maintained upstream
_current_kernels=("6.0" "5.19" "5.15" "5.10" "5.4") _current_kernels=("6.1" "6.0" "5.15" "5.10" "5.4")
# List of kernels that are no longer maintained upstream # List of kernels that are no longer maintained upstream
_eol_kernels=("5.18" "5.17" "5.16" "5.14" "5.13" "5.12" "5.11" "5.9" "5.8" "5.7") _eol_kernels=("5.19" "5.18" "5.17" "5.16" "5.14" "5.13" "5.12" "5.11" "5.9" "5.8" "5.7")
typeset -Ag _kernel_git_remotes typeset -Ag _kernel_git_remotes
_kernel_git_remotes=( _kernel_git_remotes=(
@@ -287,7 +287,7 @@ _set_cpu_scheduler() {
elif [ "$_kver" = "514" ]; then elif [ "$_kver" = "514" ]; then
_avail_cpu_scheds=("pds" "bmq" "cacule" "cfs") _avail_cpu_scheds=("pds" "bmq" "cacule" "cfs")
elif [ "$_kver" = "515" ]; then elif [ "$_kver" = "515" ]; then
_avail_cpu_scheds=("pds" "bmq" "cacule" "tt" "cfs") _avail_cpu_scheds=("pds" "bmq" "cacule" "tt" "bore" "cfs")
elif [ "$_kver" = "516" ]; then elif [ "$_kver" = "516" ]; then
_avail_cpu_scheds=("pds" "bmq" "cacule" "cfs") _avail_cpu_scheds=("pds" "bmq" "cacule" "cfs")
elif [ "$_kver" = "517" ]; then elif [ "$_kver" = "517" ]; then
@@ -297,7 +297,9 @@ _set_cpu_scheduler() {
elif [ "$_kver" = "519" ]; then elif [ "$_kver" = "519" ]; then
_avail_cpu_scheds=("cfs" "pds" "bmq" "cacule" "tt" "bore") _avail_cpu_scheds=("cfs" "pds" "bmq" "cacule" "tt" "bore")
elif [ "$_kver" = "600" ]; then elif [ "$_kver" = "600" ]; then
_avail_cpu_scheds=("cfs" "pds" "bmq" "tt" "bore") _avail_cpu_scheds=("cfs" "tt" "bore")
elif [ "$_kver" = "601" ]; then
_avail_cpu_scheds=("cfs" "bore")
else else
_avail_cpu_scheds=("cfs") _avail_cpu_scheds=("cfs")
fi fi
@@ -705,7 +707,7 @@ _tkg_srcprep() {
elif [ "$_kver" = "509" ]; then elif [ "$_kver" = "509" ]; then
rev=3 rev=3
elif [ "$_kver" = "510" ]; then elif [ "$_kver" = "510" ]; then
rev=3 rev=5
elif [ "$_kver" = "511" ]; then elif [ "$_kver" = "511" ]; then
rev=3 rev=3
elif [ "$_kver" = "512" ]; then elif [ "$_kver" = "512" ]; then
@@ -868,7 +870,7 @@ _tkg_srcprep() {
if [ -z $_debug ]; then if [ -z $_debug ]; then
# Set some -tkg defaults # Set some -tkg defaults
_disable "DYNAMIC_FAULT" "DEFAULT_FQ_CODEL" _disable "DYNAMIC_FAULT" "DEFAULT_FQ_CODEL" "WERROR"
_enable "DEFAULT_CAKE" _enable "DEFAULT_CAKE"
if [ "$_kver" = "504" ]; then if [ "$_kver" = "504" ]; then
_module "TP_SMAPI" _module "TP_SMAPI"
@@ -941,10 +943,10 @@ _tkg_srcprep() {
# cpu opt # cpu opt
_cpu_marchs=("native_amd" "native_intel" "generic_cpu" "generic_cpu2" "generic_cpu3" "generic_cpu4") _cpu_marchs=("native_amd" "native_intel" "generic_cpu" "generic_cpu2" "generic_cpu3" "generic_cpu4")
_cpu_marchs+=("k8" "k8sse3" "k10" "barcelona" "bobcat" "jaguar" "bulldozer" "piledriver") _cpu_marchs+=("k8" "k8sse3" "k10" "barcelona" "bobcat" "jaguar" "bulldozer" "piledriver")
_cpu_marchs+=("steamroller" "excavator" "zen" "zen2" "zen3" "mpsc" "atom" "core2" "nehalem" "westmere") _cpu_marchs+=("steamroller" "excavator" "zen" "zen2" "zen3" "zen4" "mpsc" "atom" "core2" "nehalem" "westmere")
_cpu_marchs+=("bonnell" "silvermont" "sandybridge" "ivybridge" "haswell" "broadwell" "skylake") _cpu_marchs+=("bonnell" "silvermont" "sandybridge" "ivybridge" "haswell" "broadwell" "skylake")
_cpu_marchs+=("skylakex" "cannonlake" "icelake" "goldmont" "goldmontplus" "cascadelake") _cpu_marchs+=("skylakex" "cannonlake" "icelake" "goldmont" "goldmontplus" "cascadelake")
_cpu_marchs+=("cooperlake" "tigerlake" "sapphirerapids" "rocketlake" "alderlake") _cpu_marchs+=("cooperlake" "tigerlake" "sapphirerapids" "rocketlake" "alderlake" "meteorlake")
typeset -A _generic_march_map typeset -A _generic_march_map
_generic_march_map=( _generic_march_map=(
@@ -1328,7 +1330,7 @@ _tkg_srcprep() {
plain "Use CattaRappa mode (Tickless/Dynticks) ?" plain "Use CattaRappa mode (Tickless/Dynticks) ?"
_tickless_array_text=( _tickless_array_text=(
"No, use periodic ticks." "No, use periodic ticks."
"Yes, full tickless baby!\n Can give higher performances in many cases but lower consistency on some hardware." "Yes, full tickless baby!\n Full tickless can give higher performances in case you use isolation of CPUs for task, in other cases it behaves as Idle."
"Just tickless idle plz.\n Just tickless idle can perform better with some platforms (mostly AMD) or CPU schedulers (mostly MuQSS)." "Just tickless idle plz.\n Just tickless idle can perform better with some platforms (mostly AMD) or CPU schedulers (mostly MuQSS)."
) )
_default_index="2" _default_index="2"
@@ -1599,8 +1601,10 @@ CONFIG_DEBUG_INFO_BTF_MODULES=y\r
fi fi
if [ -n "$_NR_CPUS_value" ]; then if [ -n "$_NR_CPUS_value" ]; then
scripts/config --set-val "NR_CPUS" "$_NR_CPUS_value" scripts/config --set-val "NR_CPUS" "$_NR_CPUS_value"
_enable "FORCE_NR_CPUS"
else else
scripts/config --set-val "NR_CPUS" "128" scripts/config --set-val "NR_CPUS" "128"
_disable "FORCE_NR_CPUS"
fi fi
fi fi

View File

@@ -64,3 +64,588 @@ index 2c7171e0b0010..85de313ddec29 100644
select CPU_FREQ_GOV_PERFORMANCE select CPU_FREQ_GOV_PERFORMANCE
help help
From 2535fbde890f14c78b750139fcf87d1143850626 Mon Sep 17 00:00:00 2001
From: Johannes Weiner <hannes@cmpxchg.org>
Date: Tue, 2 Aug 2022 12:28:11 -0400
Subject: [PATCH] mm: vmscan: fix extreme overreclaim and swap floods
During proactive reclaim, we sometimes observe severe overreclaim, with
several thousand times more pages reclaimed than requested.
This trace was obtained from shrink_lruvec() during such an instance:
prio:0 anon_cost:1141521 file_cost:7767
nr_reclaimed:4387406 nr_to_reclaim:1047 (or_factor:4190)
nr=[7161123 345 578 1111]
While he reclaimer requested 4M, vmscan reclaimed close to 16G, most of it
by swapping. These requests take over a minute, during which the write()
to memory.reclaim is unkillably stuck inside the kernel.
Digging into the source, this is caused by the proportional reclaim
bailout logic. This code tries to resolve a fundamental conflict: to
reclaim roughly what was requested, while also aging all LRUs fairly and
in accordance to their size, swappiness, refault rates etc. The way it
attempts fairness is that once the reclaim goal has been reached, it stops
scanning the LRUs with the smaller remaining scan targets, and adjusts the
remainder of the bigger LRUs according to how much of the smaller LRUs was
scanned. It then finishes scanning that remainder regardless of the
reclaim goal.
This works fine if priority levels are low and the LRU lists are
comparable in size. However, in this instance, the cgroup that is
targeted by proactive reclaim has almost no files left - they've already
been squeezed out by proactive reclaim earlier - and the remaining anon
pages are hot. Anon rotations cause the priority level to drop to 0,
which results in reclaim targeting all of anon (a lot) and all of file
(almost nothing). By the time reclaim decides to bail, it has scanned
most or all of the file target, and therefor must also scan most or all of
the enormous anon target. This target is thousands of times larger than
the reclaim goal, thus causing the overreclaim.
The bailout code hasn't changed in years, why is this failing now? The
most likely explanations are two other recent changes in anon reclaim:
1. Before the series starting with commit 5df741963d52 ("mm: fix LRU
balancing effect of new transparent huge pages"), the VM was
overall relatively reluctant to swap at all, even if swap was
configured. This means the LRU balancing code didn't come into play
as often as it does now, and mostly in high pressure situations
where pronounced swap activity wouldn't be as surprising.
2. For historic reasons, shrink_lruvec() loops on the scan targets of
all LRU lists except the active anon one, meaning it would bail if
the only remaining pages to scan were active anon - even if there
were a lot of them.
Before the series starting with commit ccc5dc67340c ("mm/vmscan:
make active/inactive ratio as 1:1 for anon lru"), most anon pages
would live on the active LRU; the inactive one would contain only a
handful of preselected reclaim candidates. After the series, anon
gets aged similarly to file, and the inactive list is the default
for new anon pages as well, making it often the much bigger list.
As a result, the VM is now more likely to actually finish large
anon targets than before.
Change the code such that only one SWAP_CLUSTER_MAX-sized nudge toward the
larger LRU lists is made before bailing out on a met reclaim goal.
This fixes the extreme overreclaim problem.
Fairness is more subtle and harder to evaluate. No obvious misbehavior
was observed on the test workload, in any case. Conceptually, fairness
should primarily be a cumulative effect from regular, lower priority
scans. Once the VM is in trouble and needs to escalate scan targets to
make forward progress, fairness needs to take a backseat. This is also
acknowledged by the myriad exceptions in get_scan_count(). This patch
makes fairness decrease gradually, as it keeps fairness work static over
increasing priority levels with growing scan targets. This should make
more sense - although we may have to re-visit the exact values.
Link: https://lkml.kernel.org/r/20220802162811.39216-1-hannes@cmpxchg.org
Signed-off-by: Johannes Weiner <hannes@cmpxchg.org>
Reviewed-by: Rik van Riel <riel@surriel.com>
Acked-by: Mel Gorman <mgorman@techsingularity.net>
Cc: Hugh Dickins <hughd@google.com>
Cc: Joonsoo Kim <iamjoonsoo.kim@lge.com>
Cc: <stable@vger.kernel.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
---
mm/vmscan.c | 10 ++++------
1 file changed, 4 insertions(+), 6 deletions(-)
diff --git a/mm/vmscan.c b/mm/vmscan.c
index 382dbe97329f33..266eb8cfe93a67 100644
--- a/mm/vmscan.c
+++ b/mm/vmscan.c
@@ -2955,8 +2955,8 @@ static void shrink_lruvec(struct lruvec *lruvec, struct scan_control *sc)
enum lru_list lru;
unsigned long nr_reclaimed = 0;
unsigned long nr_to_reclaim = sc->nr_to_reclaim;
+ bool proportional_reclaim;
struct blk_plug plug;
- bool scan_adjusted;
get_scan_count(lruvec, sc, nr);
@@ -2974,8 +2974,8 @@ static void shrink_lruvec(struct lruvec *lruvec, struct scan_control *sc)
* abort proportional reclaim if either the file or anon lru has already
* dropped to zero at the first pass.
*/
- scan_adjusted = (!cgroup_reclaim(sc) && !current_is_kswapd() &&
- sc->priority == DEF_PRIORITY);
+ proportional_reclaim = (!cgroup_reclaim(sc) && !current_is_kswapd() &&
+ sc->priority == DEF_PRIORITY);
blk_start_plug(&plug);
while (nr[LRU_INACTIVE_ANON] || nr[LRU_ACTIVE_FILE] ||
@@ -2995,7 +2995,7 @@ static void shrink_lruvec(struct lruvec *lruvec, struct scan_control *sc)
cond_resched();
- if (nr_reclaimed < nr_to_reclaim || scan_adjusted)
+ if (nr_reclaimed < nr_to_reclaim || proportional_reclaim)
continue;
/*
@@ -3046,8 +3046,6 @@ static void shrink_lruvec(struct lruvec *lruvec, struct scan_control *sc)
nr_scanned = targets[lru] - nr[lru];
nr[lru] = targets[lru] * (100 - percentage) / 100;
nr[lru] -= min(nr[lru], nr_scanned);
-
- scan_adjusted = true;
}
blk_finish_plug(&plug);
sc->nr_reclaimed += nr_reclaimed;
From 430daaab3c78de6bd82f10cfb5a0f016c6e583f6 Mon Sep 17 00:00:00 2001
From: Desmond Cheong Zhi Xi <desmondcheongzx@gmail.com>
Date: Mon, 4 Oct 2021 14:07:34 -0400
Subject: [PATCH] Bluetooth: fix deadlock for RFCOMM sk state change
Syzbot reports the following task hang [1]:
INFO: task syz-executor255:8499 blocked for more than 143 seconds.
Not tainted 5.14.0-rc7-syzkaller #0
Call Trace:
context_switch kernel/sched/core.c:4681 [inline]
__schedule+0x93a/0x26f0 kernel/sched/core.c:5938
schedule+0xd3/0x270 kernel/sched/core.c:6017
__lock_sock+0x13d/0x260 net/core/sock.c:2644
lock_sock_nested+0xf6/0x120 net/core/sock.c:3185
lock_sock include/net/sock.h:1612 [inline]
rfcomm_sk_state_change+0xb4/0x390 net/bluetooth/rfcomm/sock.c:73
__rfcomm_dlc_close+0x1b6/0x8a0 net/bluetooth/rfcomm/core.c:489
rfcomm_dlc_close+0x1ea/0x240 net/bluetooth/rfcomm/core.c:520
__rfcomm_sock_close+0xac/0x260 net/bluetooth/rfcomm/sock.c:220
rfcomm_sock_shutdown+0xe9/0x210 net/bluetooth/rfcomm/sock.c:931
rfcomm_sock_release+0x5f/0x140 net/bluetooth/rfcomm/sock.c:951
__sock_release+0xcd/0x280 net/socket.c:649
sock_close+0x18/0x20 net/socket.c:1314
__fput+0x288/0x920 fs/file_table.c:280
task_work_run+0xdd/0x1a0 kernel/task_work.c:164
exit_task_work include/linux/task_work.h:32 [inline]
do_exit+0xbd4/0x2a60 kernel/exit.c:825
do_group_exit+0x125/0x310 kernel/exit.c:922
get_signal+0x47f/0x2160 kernel/signal.c:2808
arch_do_signal_or_restart+0x2a9/0x1c40 arch/x86/kernel/signal.c:865
handle_signal_work kernel/entry/common.c:148 [inline]
exit_to_user_mode_loop kernel/entry/common.c:172 [inline]
exit_to_user_mode_prepare+0x17d/0x290 kernel/entry/common.c:209
__syscall_exit_to_user_mode_work kernel/entry/common.c:291 [inline]
syscall_exit_to_user_mode+0x19/0x60 kernel/entry/common.c:302
do_syscall_64+0x42/0xb0 arch/x86/entry/common.c:86
entry_SYSCALL_64_after_hwframe+0x44/0xae
Showing all locks held in the system:
1 lock held by khungtaskd/1653:
#0: ffffffff8b97c280 (rcu_read_lock){....}-{1:2}, at:
debug_show_all_locks+0x53/0x260 kernel/locking/lockdep.c:6446
1 lock held by krfcommd/4781:
#0: ffffffff8d306528 (rfcomm_mutex){+.+.}-{3:3}, at:
rfcomm_process_sessions net/bluetooth/rfcomm/core.c:1979 [inline]
#0: ffffffff8d306528 (rfcomm_mutex){+.+.}-{3:3}, at:
rfcomm_run+0x2ed/0x4a20 net/bluetooth/rfcomm/core.c:2086
2 locks held by in:imklog/8206:
#0: ffff8880182ce5f0 (&f->f_pos_lock){+.+.}-{3:3}, at:
__fdget_pos+0xe9/0x100 fs/file.c:974
#1: ffff8880b9c51a58 (&rq->__lock){-.-.}-{2:2}, at:
raw_spin_rq_lock_nested kernel/sched/core.c:460 [inline]
#1: ffff8880b9c51a58 (&rq->__lock){-.-.}-{2:2}, at: raw_spin_rq_lock
kernel/sched/sched.h:1307 [inline]
#1: ffff8880b9c51a58 (&rq->__lock){-.-.}-{2:2}, at: rq_lock
kernel/sched/sched.h:1610 [inline]
#1: ffff8880b9c51a58 (&rq->__lock){-.-.}-{2:2}, at:
__schedule+0x233/0x26f0 kernel/sched/core.c:5852
4 locks held by syz-executor255/8499:
#0: ffff888039a83690 (&sb->s_type->i_mutex_key#13){+.+.}-{3:3}, at:
inode_lock include/linux/fs.h:774 [inline]
#0: ffff888039a83690 (&sb->s_type->i_mutex_key#13){+.+.}-{3:3}, at:
__sock_release+0x86/0x280 net/socket.c:648
#1:
ffff88802fa31120 (sk_lock-AF_BLUETOOTH-BTPROTO_RFCOMM){+.+.}-{0:0},
at: lock_sock include/net/sock.h:1612 [inline]
#1:
ffff88802fa31120 (sk_lock-AF_BLUETOOTH-BTPROTO_RFCOMM){+.+.}-{0:0},
at: rfcomm_sock_shutdown+0x54/0x210 net/bluetooth/rfcomm/sock.c:928
#2: ffffffff8d306528 (rfcomm_mutex){+.+.}-{3:3}, at:
rfcomm_dlc_close+0x34/0x240 net/bluetooth/rfcomm/core.c:507
#3: ffff888141bd6d28 (&d->lock){+.+.}-{3:3}, at:
__rfcomm_dlc_close+0x162/0x8a0 net/bluetooth/rfcomm/core.c:487
==================================================================
The task hangs because of a deadlock that occurs when lock_sock() is
called in rfcomm_sk_state_change(). One such call stack is:
rfcomm_sock_shutdown():
lock_sock();
__rfcomm_sock_close():
rfcomm_dlc_close():
__rfcomm_dlc_close():
rfcomm_dlc_lock();
rfcomm_sk_state_change():
lock_sock();
lock_sock() has to be called when the sk state is changed because the
lock is not always held when rfcomm_sk_state_change() is
called. However, besides the recursive deadlock, there is also an
issue of a lock hierarchy inversion between rfcomm_dlc_lock() and
lock_sock() if the socket is locked in rfcomm_sk_state_change().
To avoid these issues, we can instead schedule the sk state change in
the global workqueue. This is already the implicit assumption about
how sk state changes happen. For example, in rfcomm_sock_shutdown(),
the call to __rfcomm_sock_close() is followed by
bt_sock_wait_state().
Additionally, the call to rfcomm_sock_kill() inside
rfcomm_sk_state_change() should be removed. The socket shouldn't be
killed here because only rfcomm_sock_release() calls sock_orphan(),
which it already follows up with a call to rfcomm_sock_kill().
Fixes: b7ce436a5d79 ("Bluetooth: switch to lock_sock in RFCOMM")
Link: https://syzkaller.appspot.com/bug?extid=7d51f807c81b190a127d [1]
Reported-by: syzbot+7d51f807c81b190a127d@syzkaller.appspotmail.com
Tested-by: syzbot+7d51f807c81b190a127d@syzkaller.appspotmail.com
Signed-off-by: Desmond Cheong Zhi Xi <desmondcheongzx@gmail.com>
Cc: Hillf Danton <hdanton@sina.com>
---
include/net/bluetooth/rfcomm.h | 3 +++
net/bluetooth/rfcomm/core.c | 2 ++
net/bluetooth/rfcomm/sock.c | 34 ++++++++++++++++++++++------------
3 files changed, 27 insertions(+), 12 deletions(-)
diff --git a/include/net/bluetooth/rfcomm.h b/include/net/bluetooth/rfcomm.h
index 99d26879b02a53..a92799fc5e74d0 100644
--- a/include/net/bluetooth/rfcomm.h
+++ b/include/net/bluetooth/rfcomm.h
@@ -171,6 +171,7 @@ struct rfcomm_dlc {
struct rfcomm_session *session;
struct sk_buff_head tx_queue;
struct timer_list timer;
+ struct work_struct state_change_work;
struct mutex lock;
unsigned long state;
@@ -186,6 +187,7 @@ struct rfcomm_dlc {
u8 sec_level;
u8 role_switch;
u32 defer_setup;
+ int err;
uint mtu;
uint cfc;
@@ -310,6 +312,7 @@ struct rfcomm_pinfo {
u8 role_switch;
};
+void __rfcomm_sk_state_change(struct work_struct *work);
int rfcomm_init_sockets(void);
void rfcomm_cleanup_sockets(void);
diff --git a/net/bluetooth/rfcomm/core.c b/net/bluetooth/rfcomm/core.c
index 7324764384b677..c6494e85cd68b2 100644
--- a/net/bluetooth/rfcomm/core.c
+++ b/net/bluetooth/rfcomm/core.c
@@ -289,6 +289,7 @@ static void rfcomm_dlc_clear_state(struct rfcomm_dlc *d)
d->flags = 0;
d->mscex = 0;
d->sec_level = BT_SECURITY_LOW;
+ d->err = 0;
d->mtu = RFCOMM_DEFAULT_MTU;
d->v24_sig = RFCOMM_V24_RTC | RFCOMM_V24_RTR | RFCOMM_V24_DV;
@@ -306,6 +307,7 @@ struct rfcomm_dlc *rfcomm_dlc_alloc(gfp_t prio)
timer_setup(&d->timer, rfcomm_dlc_timeout, 0);
skb_queue_head_init(&d->tx_queue);
+ INIT_WORK(&d->state_change_work, __rfcomm_sk_state_change);
mutex_init(&d->lock);
refcount_set(&d->refcnt, 1);
diff --git a/net/bluetooth/rfcomm/sock.c b/net/bluetooth/rfcomm/sock.c
index 4bf4ea6cbb5eee..4850dafbaa05fb 100644
--- a/net/bluetooth/rfcomm/sock.c
+++ b/net/bluetooth/rfcomm/sock.c
@@ -61,19 +61,22 @@ static void rfcomm_sk_data_ready(struct rfcomm_dlc *d, struct sk_buff *skb)
rfcomm_dlc_throttle(d);
}
-static void rfcomm_sk_state_change(struct rfcomm_dlc *d, int err)
+void __rfcomm_sk_state_change(struct work_struct *work)
{
+ struct rfcomm_dlc *d = container_of(work, struct rfcomm_dlc,
+ state_change_work);
struct sock *sk = d->owner, *parent;
if (!sk)
return;
- BT_DBG("dlc %p state %ld err %d", d, d->state, err);
-
lock_sock(sk);
+ rfcomm_dlc_lock(d);
- if (err)
- sk->sk_err = err;
+ BT_DBG("dlc %p state %ld err %d", d, d->state, d->err);
+
+ if (d->err)
+ sk->sk_err = d->err;
sk->sk_state = d->state;
@@ -91,15 +94,22 @@ static void rfcomm_sk_state_change(struct rfcomm_dlc *d, int err)
sk->sk_state_change(sk);
}
+ rfcomm_dlc_unlock(d);
release_sock(sk);
+ sock_put(sk);
+}
- if (parent && sock_flag(sk, SOCK_ZAPPED)) {
- /* We have to drop DLC lock here, otherwise
- * rfcomm_sock_destruct() will dead lock. */
- rfcomm_dlc_unlock(d);
- rfcomm_sock_kill(sk);
- rfcomm_dlc_lock(d);
- }
+static void rfcomm_sk_state_change(struct rfcomm_dlc *d, int err)
+{
+ struct sock *sk = d->owner;
+
+ if (!sk)
+ return;
+
+ d->err = err;
+ sock_hold(sk);
+ if (!schedule_work(&d->state_change_work))
+ sock_put(sk);
}
/* ---- Socket functions ---- */
From 50e6a66675f6c9835d4f1d4f8c947d1699ce8e24 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Christian=20K=C3=B6nig?= <christian.koenig@amd.com>
Date: Fri, 7 Oct 2022 09:51:13 +0200
Subject: [PATCH 4/5] drm/sched: add DRM_SCHED_FENCE_DONT_PIPELINE flag
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit
Setting this flag on a scheduler fence prevents pipelining of jobs
depending on this fence. In other words we always insert a full CPU
round trip before dependen jobs are pushed to the pipeline.
Signed-off-by: Christian König <christian.koenig@amd.com>
---
drivers/gpu/drm/scheduler/sched_entity.c | 3 ++-
include/drm/gpu_scheduler.h | 9 +++++++++
2 files changed, 11 insertions(+), 1 deletion(-)
diff --git a/drivers/gpu/drm/scheduler/sched_entity.c b/drivers/gpu/drm/scheduler/sched_entity.c
index 6b25b2f4f5a3..6137537aaea4 100644
--- a/drivers/gpu/drm/scheduler/sched_entity.c
+++ b/drivers/gpu/drm/scheduler/sched_entity.c
@@ -385,7 +385,8 @@ static bool drm_sched_entity_add_dependency_cb(struct drm_sched_entity *entity)
}
s_fence = to_drm_sched_fence(fence);
- if (s_fence && s_fence->sched == sched) {
+ if (s_fence && s_fence->sched == sched &&
+ !test_bit(DRM_SCHED_FENCE_DONT_PIPELINE, &fence->flags)) {
/*
* Fence is from the same scheduler, only need to wait for
diff --git a/include/drm/gpu_scheduler.h b/include/drm/gpu_scheduler.h
index addb135eeea6..289a33e80639 100644
--- a/include/drm/gpu_scheduler.h
+++ b/include/drm/gpu_scheduler.h
@@ -32,6 +32,15 @@
#define MAX_WAIT_SCHED_ENTITY_Q_EMPTY msecs_to_jiffies(1000)
+/**
+ * DRM_SCHED_FENCE_DONT_PIPELINE - Prefent dependency pipelining
+ *
+ * Setting this flag on a scheduler fence prevents pipelining of jobs depending
+ * on this fence. In other words we always insert a full CPU round trip before
+ * dependen jobs are pushed to the hw queue.
+ */
+#define DRM_SCHED_FENCE_DONT_PIPELINE DMA_FENCE_FLAG_USER_BITS
+
struct drm_gem_object;
struct drm_gpu_scheduler;
--
2.25.1
From e15e1601fba660124acd7ad41b6f61d46a1c4835 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Christian=20K=C3=B6nig?= <christian.koenig@amd.com>
Date: Fri, 7 Oct 2022 10:59:58 +0200
Subject: [PATCH 5/5] drm/amdgpu: use DRM_SCHED_FENCE_DONT_PIPELINE for VM
updates
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit
Make sure that we always have a CPU round trip to let the submission
code correctly decide if a TLB flush is necessary or not.
Signed-off-by: Christian König <christian.koenig@amd.com>
---
drivers/gpu/drm/amd/amdgpu/amdgpu_vm_sdma.c | 9 ++++++++-
1 file changed, 8 insertions(+), 1 deletion(-)
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm_sdma.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm_sdma.c
index 1fd3cbca20a2..c7bf189d50de 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm_sdma.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm_sdma.c
@@ -115,8 +115,15 @@ static int amdgpu_vm_sdma_commit(struct amdgpu_vm_update_params *p,
amdgpu_bo_fence(p->vm->root.bo, f, true);
}
- if (fence && !p->immediate)
+ if (fence && !p->immediate) {
+ /*
+ * Most hw generations now have a separate queue for page table
+ * updates, but when the queue is shared with userspace we need
+ * the extra CPU round trip to correctly flush the TLB.
+ */
+ set_bit(DRM_SCHED_FENCE_DONT_PIPELINE, &f->flags);
swap(*fence, f);
+ }
dma_fence_put(f);
return 0;
--
2.25.1
From fb23dad87a0bfb6fdfde3dc1d18104da631d050a Mon Sep 17 00:00:00 2001
From: Sjoerd Simons <sjoerd@collabora.com>
Date: Sat, 8 Oct 2022 21:57:51 +0200
Subject: [PATCH] soundwire: intel: Initialize clock stop timeout
The bus->clk_stop_timeout member is only initialized to a non-zero value
during the codec driver probe. This can lead to corner cases where this
value remains pegged at zero when the bus suspends, which results in an
endless loop in sdw_bus_wait_for_clk_prep_deprep().
Corner cases include configurations with no codecs described in the
firmware, or delays in probing codec drivers.
Initializing the default timeout to the smallest non-zero value avoid this
problem and allows for the existing logic to be preserved: the
bus->clk_stop_timeout is set as the maximum required by all codecs
connected on the bus.
Signed-off-by: Sjoerd Simons <sjoerd@collabora.com>
---
drivers/soundwire/intel.c | 1 +
1 file changed, 1 insertion(+)
diff --git a/drivers/soundwire/intel.c b/drivers/soundwire/intel.c
index af6c1a93372d90..002bc26b525e87 100644
--- a/drivers/soundwire/intel.c
+++ b/drivers/soundwire/intel.c
@@ -1307,6 +1307,7 @@ static int intel_link_probe(struct auxiliary_device *auxdev,
cdns->msg_count = 0;
bus->link_id = auxdev->id;
+ bus->clk_stop_timeout = 1;
sdw_cdns_probe(cdns);
From 785699dbc7041b99e0027bff27ffe17eba202e96 Mon Sep 17 00:00:00 2001
From: Arunpravin Paneer Selvam <Arunpravin.PaneerSelvam@amd.com>
Date: Tue, 4 Oct 2022 07:33:39 -0700
Subject: [PATCH] drm/amdgpu: Fix VRAM BO swap issue
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit
DRM buddy manager allocates the contiguous memory requests in
a single block or multiple blocks. So for the ttm move operation
(incase of low vram memory) we should consider all the blocks to
compute the total memory size which compared with the struct
ttm_resource num_pages in order to verify that the blocks are
contiguous for the eviction process.
v2: Added a Fixes tag
v3: Rewrite the code to save a bit of calculations and
variables (Christian)
Fixes: c9cad937c0c5 ("drm/amdgpu: add drm buddy support to amdgpu")
Signed-off-by: Arunpravin Paneer Selvam <Arunpravin.PaneerSelvam@amd.com>
Reviewed-by: Christian König <christian.koenig@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
---
drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c | 17 ++++++++++++-----
1 file changed, 12 insertions(+), 5 deletions(-)
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
index 134575a3893c53..794062ab57fca4 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
@@ -424,8 +424,9 @@ static int amdgpu_move_blit(struct ttm_buffer_object *bo,
static bool amdgpu_mem_visible(struct amdgpu_device *adev,
struct ttm_resource *mem)
{
- uint64_t mem_size = (u64)mem->num_pages << PAGE_SHIFT;
+ u64 mem_size = (u64)mem->num_pages << PAGE_SHIFT;
struct amdgpu_res_cursor cursor;
+ u64 end;
if (mem->mem_type == TTM_PL_SYSTEM ||
mem->mem_type == TTM_PL_TT)
@@ -434,12 +435,18 @@ static bool amdgpu_mem_visible(struct amdgpu_device *adev,
return false;
amdgpu_res_first(mem, 0, mem_size, &cursor);
+ end = cursor.start + cursor.size;
+ while (cursor.remaining) {
+ amdgpu_res_next(&cursor, cursor.size);
- /* ttm_resource_ioremap only supports contiguous memory */
- if (cursor.size != mem_size)
- return false;
+ /* ttm_resource_ioremap only supports contiguous memory */
+ if (end != cursor.start)
+ return false;
+
+ end = cursor.start + cursor.size;
+ }
- return cursor.start + cursor.size <= adev->gmc.visible_vram_size;
+ return end <= adev->gmc.visible_vram_size;
}
/*
From 6df3912f64cea68409b08d282ffbccf0af7f8d8e Mon Sep 17 00:00:00 2001
From: Arunpravin Paneer Selvam <Arunpravin.PaneerSelvam@amd.com>
Date: Mon, 17 Oct 2022 13:15:21 -0700
Subject: [PATCH] drm/amdgpu: Fix for BO move issue
If there are no blocks to compare then exit
the loop.
Signed-off-by: Arunpravin Paneer Selvam <Arunpravin.PaneerSelvam@amd.com>
---
drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c | 3 +++
1 file changed, 3 insertions(+)
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
index 794062ab57fca4..9e6c23266a1a0f 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
@@ -439,6 +439,9 @@ static bool amdgpu_mem_visible(struct amdgpu_device *adev,
while (cursor.remaining) {
amdgpu_res_next(&cursor, cursor.size);
+ if (!cursor.remaining)
+ break;
+
/* ttm_resource_ioremap only supports contiguous memory */
if (end != cursor.start)
return false;

View File

@@ -0,0 +1,156 @@
From 5ec2dd3a095442ec1a21d86042a4994f2ba24e63 Mon Sep 17 00:00:00 2001
Message-Id: <5ec2dd3a095442ec1a21d86042a4994f2ba24e63.1512651251.git.jan.steffens@gmail.com>
From: Serge Hallyn <serge.hallyn@canonical.com>
Date: Fri, 31 May 2013 19:12:12 +0100
Subject: [PATCH] add sysctl to disallow unprivileged CLONE_NEWUSER by default
Signed-off-by: Serge Hallyn <serge.hallyn@ubuntu.com>
[bwh: Remove unneeded binary sysctl bits]
Signed-off-by: Daniel Micay <danielmicay@gmail.com>
---
kernel/fork.c | 15 +++++++++++++++
kernel/sysctl.c | 12 ++++++++++++
kernel/user_namespace.c | 3 +++
3 files changed, 30 insertions(+)
diff --git a/kernel/fork.c b/kernel/fork.c
index 07cc743698d3668e..4011d68a8ff9305c 100644
--- a/kernel/fork.c
+++ b/kernel/fork.c
@@ -102,6 +102,11 @@
#define CREATE_TRACE_POINTS
#include <trace/events/task.h>
+#ifdef CONFIG_USER_NS
+extern int unprivileged_userns_clone;
+#else
+#define unprivileged_userns_clone 0
+#endif
/*
* Minimum number of threads to boot the kernel
@@ -1555,6 +1560,10 @@ static __latent_entropy struct task_struct *copy_process(
if ((clone_flags & (CLONE_NEWUSER|CLONE_FS)) == (CLONE_NEWUSER|CLONE_FS))
return ERR_PTR(-EINVAL);
+ if ((clone_flags & CLONE_NEWUSER) && !unprivileged_userns_clone)
+ if (!capable(CAP_SYS_ADMIN))
+ return ERR_PTR(-EPERM);
+
/*
* Thread groups must share signals as well, and detached threads
* can only be started up within the thread group.
@@ -2348,6 +2357,12 @@ SYSCALL_DEFINE1(unshare, unsigned long, unshare_flags)
if (unshare_flags & CLONE_NEWNS)
unshare_flags |= CLONE_FS;
+ if ((unshare_flags & CLONE_NEWUSER) && !unprivileged_userns_clone) {
+ err = -EPERM;
+ if (!capable(CAP_SYS_ADMIN))
+ goto bad_unshare_out;
+ }
+
err = check_unshare_flags(unshare_flags);
if (err)
goto bad_unshare_out;
diff --git a/kernel/sysctl.c b/kernel/sysctl.c
index b86520ed3fb60fbf..f7dab3760839f1a1 100644
--- a/kernel/sysctl.c
+++ b/kernel/sysctl.c
@@ -105,6 +105,9 @@ extern int core_uses_pid;
#if defined(CONFIG_SYSCTL)
+#ifdef CONFIG_USER_NS
+extern int unprivileged_userns_clone;
+#endif
/* Constants used for minimum and maximum */
#ifdef CONFIG_PERF_EVENTS
@@ -513,6 +516,15 @@ static struct ctl_table kern_table[] = {
.proc_handler = proc_dointvec,
},
#endif
+#ifdef CONFIG_USER_NS
+ {
+ .procname = "unprivileged_userns_clone",
+ .data = &unprivileged_userns_clone,
+ .maxlen = sizeof(int),
+ .mode = 0644,
+ .proc_handler = proc_dointvec,
+ },
+#endif
#ifdef CONFIG_PROC_SYSCTL
{
.procname = "tainted",
diff --git a/kernel/user_namespace.c b/kernel/user_namespace.c
index c490f1e4313b998a..dd03bd39d7bf194d 100644
--- a/kernel/user_namespace.c
+++ b/kernel/user_namespace.c
@@ -24,6 +24,9 @@
#include <linux/projid.h>
#include <linux/fs_struct.h>
+/* sysctl */
+int unprivileged_userns_clone;
+
static struct kmem_cache *user_ns_cachep __read_mostly;
static DEFINE_MUTEX(userns_state_mutex);
--
2.15.1
From b5202296055dd333db4425120d3f93ef4e6a0573 Mon Sep 17 00:00:00 2001
From: "Jan Alexander Steffens (heftig)" <jan.steffens@gmail.com>
Date: Thu, 7 Dec 2017 13:50:48 +0100
Subject: ZEN: Add CONFIG for unprivileged_userns_clone
This way our default behavior continues to match the vanilla kernel.
---
init/Kconfig | 16 ++++++++++++++++
kernel/user_namespace.c | 4 ++++
2 files changed, 20 insertions(+)
diff --git a/init/Kconfig b/init/Kconfig
index 4592bf7997c0..f3df02990aff 100644
--- a/init/Kconfig
+++ b/init/Kconfig
@@ -1004,6 +1004,22 @@ config USER_NS
If unsure, say N.
+config USER_NS_UNPRIVILEGED
+ bool "Allow unprivileged users to create namespaces"
+ default y
+ depends on USER_NS
+ help
+ When disabled, unprivileged users will not be able to create
+ new namespaces. Allowing users to create their own namespaces
+ has been part of several recent local privilege escalation
+ exploits, so if you need user namespaces but are
+ paranoid^Wsecurity-conscious you want to disable this.
+
+ This setting can be overridden at runtime via the
+ kernel.unprivileged_userns_clone sysctl.
+
+ If unsure, say Y.
+
config PID_NS
bool "PID Namespaces"
default y
diff --git a/kernel/user_namespace.c b/kernel/user_namespace.c
index 6b9dbc257e34..107b17f0d528 100644
--- a/kernel/user_namespace.c
+++ b/kernel/user_namespace.c
@@ -27,7 +27,11 @@
#include <linux/sort.h>
/* sysctl */
+#ifdef CONFIG_USER_NS_UNPRIVILEGED
+int unprivileged_userns_clone = 1;
+#else
int unprivileged_userns_clone;
+#endif
static struct kmem_cache *user_ns_cachep __read_mostly;
static DEFINE_MUTEX(userns_state_mutex);

View File

@@ -0,0 +1,244 @@
From 5ae86c8436b83762bc6cf46bea1da6ace2d3f50e Mon Sep 17 00:00:00 2001
From: Paul Gofman <pgofman@codeweavers.com>
Date: Wed, 6 May 2020 14:37:44 +0300
Subject: [PATCH 1/2] mm: Support soft dirty flag reset for VA range.
---
fs/proc/task_mmu.c | 129 ++++++++++++++++++++++++++++++++++++---------
1 file changed, 103 insertions(+), 26 deletions(-)
diff --git a/fs/proc/task_mmu.c b/fs/proc/task_mmu.c
index 3cec6fbef725..7c7865028f10 100644
--- a/fs/proc/task_mmu.c
+++ b/fs/proc/task_mmu.c
@@ -1032,6 +1032,8 @@ enum clear_refs_types {
struct clear_refs_private {
enum clear_refs_types type;
+ unsigned long start, end;
+ bool clear_range;
};
#ifdef CONFIG_MEM_SOFT_DIRTY
@@ -1125,6 +1127,8 @@ static int clear_refs_pte_range(pmd_t *pmd, unsigned long addr,
spinlock_t *ptl;
struct page *page;
+ BUG_ON(addr < cp->start || end > cp->end);
+
ptl = pmd_trans_huge_lock(pmd, vma);
if (ptl) {
if (cp->type == CLEAR_REFS_SOFT_DIRTY) {
@@ -1181,9 +1185,11 @@ static int clear_refs_test_walk(unsigned long start, unsigned long end,
struct clear_refs_private *cp = walk->private;
struct vm_area_struct *vma = walk->vma;
- if (vma->vm_flags & VM_PFNMAP)
+ if (!cp->clear_range && (vma->vm_flags & VM_PFNMAP))
return 1;
+ BUG_ON(start < cp->start || end > cp->end);
+
/*
* Writing 1 to /proc/pid/clear_refs affects all pages.
* Writing 2 to /proc/pid/clear_refs only affects anonymous pages.
@@ -1206,10 +1212,12 @@ static ssize_t clear_refs_write(struct file *file, const char __user *buf,
size_t count, loff_t *ppos)
{
struct task_struct *task;
- char buffer[PROC_NUMBUF];
+ char buffer[18];
struct mm_struct *mm;
struct vm_area_struct *vma;
enum clear_refs_types type;
+ unsigned long start, end;
+ bool clear_range;
int itype;
int rv;
@@ -1218,12 +1226,34 @@ static ssize_t clear_refs_write(struct file *file, const char __user *buf,
count = sizeof(buffer) - 1;
if (copy_from_user(buffer, buf, count))
return -EFAULT;
- rv = kstrtoint(strstrip(buffer), 10, &itype);
- if (rv < 0)
- return rv;
- type = (enum clear_refs_types)itype;
- if (type < CLEAR_REFS_ALL || type >= CLEAR_REFS_LAST)
- return -EINVAL;
+
+ if (buffer[0] == '6')
+ {
+ static int once;
+
+ if (!once++)
+ printk(KERN_DEBUG "task_mmu: Using POC clear refs range implementation.\n");
+
+ if (count != 17)
+ return -EINVAL;
+
+ type = CLEAR_REFS_SOFT_DIRTY;
+ start = *(unsigned long *)(buffer + 1);
+ end = *(unsigned long *)(buffer + 1 + 8);
+ }
+ else
+ {
+ rv = kstrtoint(strstrip(buffer), 10, &itype);
+ if (rv < 0)
+ return rv;
+ type = (enum clear_refs_types)itype;
+
+ if (type < CLEAR_REFS_ALL || type >= CLEAR_REFS_LAST)
+ return -EINVAL;
+
+ start = 0;
+ end = -1UL;
+ }
task = get_proc_task(file_inode(file));
if (!task)
@@ -1235,40 +1265,86 @@ static ssize_t clear_refs_write(struct file *file, const char __user *buf,
.type = type,
};
- if (mmap_write_lock_killable(mm)) {
- count = -EINTR;
- goto out_mm;
+ if (start || end != -1UL)
+ {
+ start = min(start, -1) & PAGE_MASK;
+ end = min(end, -1) & PAGE_MASK;
+
+ if (start >= end)
+ {
+ count = -EINVAL;
+ goto out_mm;
+ }
+ clear_range = true;
}
+ else
+ {
+ clear_range = false;
+ }
+
+ cp.start = start;
+ cp.end = end;
+ cp.clear_range = clear_range;
+
if (type == CLEAR_REFS_MM_HIWATER_RSS) {
+ if (mmap_write_lock_killable(mm)) {
+ count = -EINTR;
+ goto out_mm;
+ }
+
/*
* Writing 5 to /proc/pid/clear_refs resets the peak
* resident set size to this mm's current rss value.
*/
reset_mm_hiwater_rss(mm);
- goto out_unlock;
+ mmap_write_unlock(mm);
+ goto out_mm;
}
if (type == CLEAR_REFS_SOFT_DIRTY) {
- mas_for_each(&mas, vma, ULONG_MAX) {
- if (!(vma->vm_flags & VM_SOFTDIRTY))
- continue;
- vma->vm_flags &= ~VM_SOFTDIRTY;
- vma_set_page_prot(vma);
+ if (mmap_read_lock_killable(mm)) {
+ count = -EINTR;
+ goto out_mm;
}
-
+ if (!clear_range)
+ mas_for_each(&mas, vma, ULONG_MAX) {
+ if (!(vma->vm_flags & VM_SOFTDIRTY))
+ continue;
+ mmap_read_unlock(mm);
+ if (mmap_write_lock_killable(mm)) {
+ count = -EINTR;
+ goto out_mm;
+ }
+ mas_for_each(&mas, vma, ULONG_MAX) {
+ vma->vm_flags &= ~VM_SOFTDIRTY;
+ vma_set_page_prot(vma);
+ }
+ mmap_write_downgrade(mm);
+ break;
+ }
inc_tlb_flush_pending(mm);
mmu_notifier_range_init(&range, MMU_NOTIFY_SOFT_DIRTY,
- 0, NULL, mm, 0, -1UL);
+ 0, NULL, mm, start, end);
mmu_notifier_invalidate_range_start(&range);
}
- walk_page_range(mm, 0, -1, &clear_refs_walk_ops, &cp);
+ else
+ {
+ if (mmap_write_lock_killable(mm)) {
+ count = -EINTR;
+ goto out_mm;
+ }
+ }
+ walk_page_range(mm, start, end == -1UL ? -1 : end, &clear_refs_walk_ops, &cp);
if (type == CLEAR_REFS_SOFT_DIRTY) {
mmu_notifier_invalidate_range_end(&range);
flush_tlb_mm(mm);
dec_tlb_flush_pending(mm);
+ mmap_read_unlock(mm);
+ }
+ else
+ {
+ mmap_write_unlock(mm);
}
-out_unlock:
- mmap_write_unlock(mm);
out_mm:
mmput(mm);
}
@@ -1301,6 +1377,7 @@ struct pagemapread {
#define PM_PFRAME_MASK GENMASK_ULL(PM_PFRAME_BITS - 1, 0)
#define PM_SOFT_DIRTY BIT_ULL(55)
#define PM_MMAP_EXCLUSIVE BIT_ULL(56)
+#define PM_SOFT_DIRTY_PAGE BIT_ULL(57)
#define PM_UFFD_WP BIT_ULL(57)
#define PM_FILE BIT_ULL(61)
#define PM_SWAP BIT_ULL(62)
@@ -1373,13 +1450,13 @@ static pagemap_entry_t pte_to_pagemap_entry(struct pagemapread *pm,
flags |= PM_PRESENT;
page = vm_normal_page(vma, addr, pte);
if (pte_soft_dirty(pte))
- flags |= PM_SOFT_DIRTY;
+ flags |= PM_SOFT_DIRTY | PM_SOFT_DIRTY_PAGE;
if (pte_uffd_wp(pte))
flags |= PM_UFFD_WP;
} else if (is_swap_pte(pte)) {
swp_entry_t entry;
if (pte_swp_soft_dirty(pte))
- flags |= PM_SOFT_DIRTY;
+ flags |= PM_SOFT_DIRTY | PM_SOFT_DIRTY_PAGE;
if (pte_swp_uffd_wp(pte))
flags |= PM_UFFD_WP;
entry = pte_to_swp_entry(pte);
@@ -1500,7 +1500,7 @@
flags |= PM_PRESENT;
if (pmd_soft_dirty(pmd))
- flags |= PM_SOFT_DIRTY;
+ flags |= PM_SOFT_DIRTY | PM_SOFT_DIRTY_PAGE;
if (pmd_uffd_wp(pmd))
flags |= PM_UFFD_WP;
if (pm->show_pfn)
@@ -1442,7 +1519,7 @@ static int pagemap_pmd_range(pmd_t *pmdp, unsigned long addr, unsigned long end,
}
flags |= PM_SWAP;
if (pmd_swp_soft_dirty(pmd))
- flags |= PM_SOFT_DIRTY;
+ flags |= PM_SOFT_DIRTY | PM_SOFT_DIRTY_PAGE;
if (pmd_swp_uffd_wp(pmd))
flags |= PM_UFFD_WP;
VM_BUG_ON(!is_pmd_migration_entry(pmd));
--
2.30.2

View File

@@ -0,0 +1,360 @@
From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
From: Arjan van de Ven <arjan@linux.intel.com>
Date: Mon, 14 Mar 2016 11:10:58 -0600
Subject: [PATCH] pci pme wakeups
Reduce wakeups for PME checks, which are a workaround for miswired
boards (sadly, too many of them) in laptops.
---
drivers/pci/pci.c | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/drivers/pci/pci.c b/drivers/pci/pci.c
index c9338f9..6974fbf 100644
--- a/drivers/pci/pci.c
+++ b/drivers/pci/pci.c
@@ -62,7 +62,7 @@ struct pci_pme_device {
struct pci_dev *dev;
};
-#define PME_TIMEOUT 1000 /* How long between PME checks */
+#define PME_TIMEOUT 4000 /* How long between PME checks */
static void pci_dev_d3_sleep(struct pci_dev *dev)
{
--
https://clearlinux.org
From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
From: Arjan van de Ven <arjan@linux.intel.com>
Date: Sat, 19 Mar 2016 21:32:19 -0400
Subject: [PATCH] intel_idle: tweak cpuidle cstates
Increase target_residency in cpuidle cstate
Tune intel_idle to be a bit less agressive;
Clear linux is cleaner in hygiene (wakupes) than the average linux,
so we can afford changing these in a way that increases
performance while keeping power efficiency
---
drivers/idle/intel_idle.c | 44 +++++++++++++++++++--------------------
1 file changed, 22 insertions(+), 22 deletions(-)
diff --git a/drivers/idle/intel_idle.c b/drivers/idle/intel_idle.c
index f449584..c994d24 100644
--- a/drivers/idle/intel_idle.c
+++ b/drivers/idle/intel_idle.c
@@ -531,7 +531,7 @@ static struct cpuidle_state hsw_cstates[] __initdata = {
.desc = "MWAIT 0x01",
.flags = MWAIT2flg(0x01) | CPUIDLE_FLAG_ALWAYS_ENABLE,
.exit_latency = 10,
- .target_residency = 20,
+ .target_residency = 120,
.enter = &intel_idle,
.enter_s2idle = intel_idle_s2idle, },
{
@@ -539,7 +539,7 @@ static struct cpuidle_state hsw_cstates[] __initdata = {
.desc = "MWAIT 0x10",
.flags = MWAIT2flg(0x10) | CPUIDLE_FLAG_TLB_FLUSHED,
.exit_latency = 33,
- .target_residency = 100,
+ .target_residency = 900,
.enter = &intel_idle,
.enter_s2idle = intel_idle_s2idle, },
{
@@ -547,7 +547,7 @@ static struct cpuidle_state hsw_cstates[] __initdata = {
.desc = "MWAIT 0x20",
.flags = MWAIT2flg(0x20) | CPUIDLE_FLAG_TLB_FLUSHED,
.exit_latency = 133,
- .target_residency = 400,
+ .target_residency = 1000,
.enter = &intel_idle,
.enter_s2idle = intel_idle_s2idle, },
{
@@ -555,7 +555,7 @@ static struct cpuidle_state hsw_cstates[] __initdata = {
.desc = "MWAIT 0x32",
.flags = MWAIT2flg(0x32) | CPUIDLE_FLAG_TLB_FLUSHED,
.exit_latency = 166,
- .target_residency = 500,
+ .target_residency = 1500,
.enter = &intel_idle,
.enter_s2idle = intel_idle_s2idle, },
{
@@ -563,7 +563,7 @@ static struct cpuidle_state hsw_cstates[] __initdata = {
.desc = "MWAIT 0x40",
.flags = MWAIT2flg(0x40) | CPUIDLE_FLAG_TLB_FLUSHED,
.exit_latency = 300,
- .target_residency = 900,
+ .target_residency = 2000,
.enter = &intel_idle,
.enter_s2idle = intel_idle_s2idle, },
{
@@ -571,7 +571,7 @@ static struct cpuidle_state hsw_cstates[] __initdata = {
.desc = "MWAIT 0x50",
.flags = MWAIT2flg(0x50) | CPUIDLE_FLAG_TLB_FLUSHED,
.exit_latency = 600,
- .target_residency = 1800,
+ .target_residency = 5000,
.enter = &intel_idle,
.enter_s2idle = intel_idle_s2idle, },
{
@@ -579,7 +579,7 @@ static struct cpuidle_state hsw_cstates[] __initdata = {
.desc = "MWAIT 0x60",
.flags = MWAIT2flg(0x60) | CPUIDLE_FLAG_TLB_FLUSHED,
.exit_latency = 2600,
- .target_residency = 7700,
+ .target_residency = 9000,
.enter = &intel_idle,
.enter_s2idle = intel_idle_s2idle, },
{
@@ -599,7 +599,7 @@ static struct cpuidle_state bdw_cstates[] __initdata = {
.desc = "MWAIT 0x01",
.flags = MWAIT2flg(0x01) | CPUIDLE_FLAG_ALWAYS_ENABLE,
.exit_latency = 10,
- .target_residency = 20,
+ .target_residency = 120,
.enter = &intel_idle,
.enter_s2idle = intel_idle_s2idle, },
{
@@ -607,7 +607,7 @@ static struct cpuidle_state bdw_cstates[] __initdata = {
.desc = "MWAIT 0x10",
.flags = MWAIT2flg(0x10) | CPUIDLE_FLAG_TLB_FLUSHED,
.exit_latency = 40,
- .target_residency = 100,
+ .target_residency = 1000,
.enter = &intel_idle,
.enter_s2idle = intel_idle_s2idle, },
{
@@ -615,7 +615,7 @@ static struct cpuidle_state bdw_cstates[] __initdata = {
.desc = "MWAIT 0x20",
.flags = MWAIT2flg(0x20) | CPUIDLE_FLAG_TLB_FLUSHED,
.exit_latency = 133,
- .target_residency = 400,
+ .target_residency = 1000,
.enter = &intel_idle,
.enter_s2idle = intel_idle_s2idle, },
{
@@ -623,7 +623,7 @@ static struct cpuidle_state bdw_cstates[] __initdata = {
.desc = "MWAIT 0x32",
.flags = MWAIT2flg(0x32) | CPUIDLE_FLAG_TLB_FLUSHED,
.exit_latency = 166,
- .target_residency = 500,
+ .target_residency = 2000,
.enter = &intel_idle,
.enter_s2idle = intel_idle_s2idle, },
{
@@ -631,7 +631,7 @@ static struct cpuidle_state bdw_cstates[] __initdata = {
.desc = "MWAIT 0x40",
.flags = MWAIT2flg(0x40) | CPUIDLE_FLAG_TLB_FLUSHED,
.exit_latency = 300,
- .target_residency = 900,
+ .target_residency = 4000,
.enter = &intel_idle,
.enter_s2idle = intel_idle_s2idle, },
{
@@ -639,7 +639,7 @@ static struct cpuidle_state bdw_cstates[] __initdata = {
.desc = "MWAIT 0x50",
.flags = MWAIT2flg(0x50) | CPUIDLE_FLAG_TLB_FLUSHED,
.exit_latency = 600,
- .target_residency = 1800,
+ .target_residency = 7000,
.enter = &intel_idle,
.enter_s2idle = intel_idle_s2idle, },
{
@@ -647,7 +647,7 @@ static struct cpuidle_state bdw_cstates[] __initdata = {
.desc = "MWAIT 0x60",
.flags = MWAIT2flg(0x60) | CPUIDLE_FLAG_TLB_FLUSHED,
.exit_latency = 2600,
- .target_residency = 7700,
+ .target_residency = 9000,
.enter = &intel_idle,
.enter_s2idle = intel_idle_s2idle, },
{
@@ -668,7 +668,7 @@ static struct cpuidle_state skl_cstates[] __initdata = {
.desc = "MWAIT 0x01",
.flags = MWAIT2flg(0x01) | CPUIDLE_FLAG_ALWAYS_ENABLE,
.exit_latency = 10,
- .target_residency = 20,
+ .target_residency = 120,
.enter = &intel_idle,
.enter_s2idle = intel_idle_s2idle, },
{
@@ -676,7 +676,7 @@ static struct cpuidle_state skl_cstates[] __initdata = {
.desc = "MWAIT 0x10",
.flags = MWAIT2flg(0x10) | CPUIDLE_FLAG_TLB_FLUSHED,
.exit_latency = 70,
- .target_residency = 100,
+ .target_residency = 1000,
.enter = &intel_idle,
.enter_s2idle = intel_idle_s2idle, },
{
@@ -684,7 +684,7 @@ static struct cpuidle_state skl_cstates[] __initdata = {
.desc = "MWAIT 0x20",
.flags = MWAIT2flg(0x20) | CPUIDLE_FLAG_TLB_FLUSHED,
.exit_latency = 85,
- .target_residency = 200,
+ .target_residency = 600,
.enter = &intel_idle,
.enter_s2idle = intel_idle_s2idle, },
{
@@ -692,7 +692,7 @@ static struct cpuidle_state skl_cstates[] __initdata = {
.desc = "MWAIT 0x33",
.flags = MWAIT2flg(0x33) | CPUIDLE_FLAG_TLB_FLUSHED,
.exit_latency = 124,
- .target_residency = 800,
+ .target_residency = 3000,
.enter = &intel_idle,
.enter_s2idle = intel_idle_s2idle, },
{
@@ -700,7 +700,7 @@ static struct cpuidle_state skl_cstates[] __initdata = {
.desc = "MWAIT 0x40",
.flags = MWAIT2flg(0x40) | CPUIDLE_FLAG_TLB_FLUSHED,
.exit_latency = 200,
- .target_residency = 800,
+ .target_residency = 3200,
.enter = &intel_idle,
.enter_s2idle = intel_idle_s2idle, },
{
@@ -708,7 +708,7 @@ static struct cpuidle_state skl_cstates[] __initdata = {
.desc = "MWAIT 0x50",
.flags = MWAIT2flg(0x50) | CPUIDLE_FLAG_TLB_FLUSHED,
.exit_latency = 480,
- .target_residency = 5000,
+ .target_residency = 9000,
.enter = &intel_idle,
.enter_s2idle = intel_idle_s2idle, },
{
@@ -716,7 +716,7 @@ static struct cpuidle_state skl_cstates[] __initdata = {
.desc = "MWAIT 0x60",
.flags = MWAIT2flg(0x60) | CPUIDLE_FLAG_TLB_FLUSHED,
.exit_latency = 890,
- .target_residency = 5000,
+ .target_residency = 9000,
.enter = &intel_idle,
.enter_s2idle = intel_idle_s2idle, },
{
@@ -737,7 +737,7 @@ static struct cpuidle_state skx_cstates[] __initdata = {
.desc = "MWAIT 0x01",
.flags = MWAIT2flg(0x01) | CPUIDLE_FLAG_ALWAYS_ENABLE,
.exit_latency = 10,
- .target_residency = 20,
+ .target_residency = 300,
.enter = &intel_idle,
.enter_s2idle = intel_idle_s2idle, },
{
--
https://clearlinux.org
From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
From: Arjan van de Ven <arjan@linux.intel.com>
Date: Fri, 6 Jan 2017 15:34:09 +0000
Subject: [PATCH] ipv4/tcp: allow the memory tuning for tcp to go a little
bigger than default
---
net/ipv4/tcp.c | 4 ++--
1 file changed, 2 insertions(+), 2 deletions(-)
diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c
index 30c1142..4345075 100644
--- a/net/ipv4/tcp.c
+++ b/net/ipv4/tcp.c
@@ -4201,8 +4201,8 @@ void __init tcp_init(void)
tcp_init_mem();
/* Set per-socket limits to no more than 1/128 the pressure threshold */
limit = nr_free_buffer_pages() << (PAGE_SHIFT - 7);
- max_wshare = min(4UL*1024*1024, limit);
- max_rshare = min(6UL*1024*1024, limit);
+ max_wshare = min(16UL*1024*1024, limit);
+ max_rshare = min(16UL*1024*1024, limit);
init_net.ipv4.sysctl_tcp_wmem[0] = SK_MEM_QUANTUM;
init_net.ipv4.sysctl_tcp_wmem[1] = 16*1024;
--
https://clearlinux.org
From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
From: Arjan van de Ven <arjan@linux.intel.com>
Date: Sun, 18 Feb 2018 23:35:41 +0000
Subject: [PATCH] locking: rwsem: spin faster
tweak rwsem owner spinning a bit
---
kernel/locking/rwsem.c | 4 +++-
1 file changed, 3 insertions(+), 1 deletion(-)
diff --git a/kernel/locking/rwsem.c b/kernel/locking/rwsem.c
index f11b9bd..1bbfcc1 100644
--- a/kernel/locking/rwsem.c
+++ b/kernel/locking/rwsem.c
@@ -717,6 +717,7 @@ rwsem_spin_on_owner(struct rw_semaphore *sem, unsigned long nonspinnable)
struct task_struct *new, *owner;
unsigned long flags, new_flags;
enum owner_state state;
+ int i = 0;
owner = rwsem_owner_flags(sem, &flags);
state = rwsem_owner_state(owner, flags, nonspinnable);
@@ -750,7 +751,8 @@ rwsem_spin_on_owner(struct rw_semaphore *sem, unsigned long nonspinnable)
break;
}
- cpu_relax();
+ if (i++ > 1000)
+ cpu_relax();
}
rcu_read_unlock();
--
https://clearlinux.org
From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
From: Arjan van de Ven <arjan@linux.intel.com>
Date: Thu, 2 Jun 2016 23:36:32 -0500
Subject: [PATCH] initialize ata before graphics
ATA init is the long pole in the boot process, and its asynchronous.
move the graphics init after it so that ata and graphics initialize
in parallel
---
drivers/Makefile | 15 ++++++++-------
1 file changed, 8 insertions(+), 7 deletions(-)
diff --git a/drivers/Makefile b/drivers/Makefile
index c0cd1b9..af1e2fb 100644
--- a/drivers/Makefile
+++ b/drivers/Makefile
@@ -59,15 +59,8 @@ obj-y += char/
# iommu/ comes before gpu as gpu are using iommu controllers
obj-y += iommu/
-# gpu/ comes after char for AGP vs DRM startup and after iommu
-obj-y += gpu/
-
obj-$(CONFIG_CONNECTOR) += connector/
-# i810fb and intelfb depend on char/agp/
-obj-$(CONFIG_FB_I810) += video/fbdev/i810/
-obj-$(CONFIG_FB_INTEL) += video/fbdev/intelfb/
-
obj-$(CONFIG_PARPORT) += parport/
obj-$(CONFIG_NVM) += lightnvm/
obj-y += base/ block/ misc/ mfd/ nfc/
@@ -80,6 +73,14 @@ obj-$(CONFIG_IDE) += ide/
obj-y += scsi/
obj-y += nvme/
obj-$(CONFIG_ATA) += ata/
+
+# gpu/ comes after char for AGP vs DRM startup and after iommu
+obj-y += gpu/
+
+# i810fb and intelfb depend on char/agp/
+obj-$(CONFIG_FB_I810) += video/fbdev/i810/
+obj-$(CONFIG_FB_INTEL) += video/fbdev/intelfb/
+
obj-$(CONFIG_TARGET_CORE) += target/
obj-$(CONFIG_MTD) += mtd/
obj-$(CONFIG_SPI) += spi/
--
https://clearlinux.org

View File

@@ -0,0 +1,363 @@
From 9c85113cf4019e7b277a44e72bda8b78347aa72f Mon Sep 17 00:00:00 2001
From: Paul Gofman <pgofman@codeweavers.com>
Date: Thu, 7 May 2020 14:05:31 +0300
Subject: [PATCH 2/2] mm: Support soft dirty flag read with reset.
---
fs/proc/base.c | 3 +
fs/proc/internal.h | 1 +
fs/proc/task_mmu.c | 144 +++++++++++++++++++++++++++++++++++++++------
3 files changed, 130 insertions(+), 18 deletions(-)
diff --git a/fs/proc/base.c b/fs/proc/base.c
index b3422cda2a91..8199ae2411ca 100644
--- a/fs/proc/base.c
+++ b/fs/proc/base.c
@@ -3202,6 +3202,9 @@ static const struct pid_entry tgid_base_stuff[] = {
REG("smaps", S_IRUGO, proc_pid_smaps_operations),
REG("smaps_rollup", S_IRUGO, proc_pid_smaps_rollup_operations),
REG("pagemap", S_IRUSR, proc_pagemap_operations),
+#ifdef CONFIG_MEM_SOFT_DIRTY
+ REG("pagemap_reset", S_IRUSR, proc_pagemap_reset_operations),
+#endif
#endif
#ifdef CONFIG_SECURITY
DIR("attr", S_IRUGO|S_IXUGO, proc_attr_dir_inode_operations, proc_attr_dir_operations),
diff --git a/fs/proc/internal.h b/fs/proc/internal.h
index f60b379dcdc7..36a901cf0e7f 100644
--- a/fs/proc/internal.h
+++ b/fs/proc/internal.h
@@ -303,6 +303,7 @@ extern const struct file_operations proc_pid_smaps_operations;
extern const struct file_operations proc_pid_smaps_rollup_operations;
extern const struct file_operations proc_clear_refs_operations;
extern const struct file_operations proc_pagemap_operations;
+extern const struct file_operations proc_pagemap_reset_operations;
extern unsigned long task_vsize(struct mm_struct *);
extern unsigned long task_statm(struct mm_struct *,
diff --git a/fs/proc/task_mmu.c b/fs/proc/task_mmu.c
index 7c7865028f10..a21694967915 100644
--- a/fs/proc/task_mmu.c
+++ b/fs/proc/task_mmu.c
@@ -1056,8 +1056,8 @@ static inline bool pte_is_pinned(struct vm_area_struct *vma, unsigned long addr,
return page_maybe_dma_pinned(page);
}
-static inline void clear_soft_dirty(struct vm_area_struct *vma,
- unsigned long addr, pte_t *pte)
+static inline bool clear_soft_dirty(struct vm_area_struct *vma,
+ unsigned long addr, pte_t *pte)
{
/*
* The soft-dirty tracker uses #PF-s to catch writes
@@ -1066,37 +1066,46 @@ static inline void clear_soft_dirty(struct vm_area_struct *vma,
* of how soft-dirty works.
*/
pte_t ptent = *pte;
+ bool ret = false;
if (pte_present(ptent)) {
pte_t old_pte;
if (pte_is_pinned(vma, addr, ptent))
- return;
+ return ret;
old_pte = ptep_modify_prot_start(vma, addr, pte);
+ ret = pte_soft_dirty(old_pte);
ptent = pte_wrprotect(old_pte);
ptent = pte_clear_soft_dirty(ptent);
ptep_modify_prot_commit(vma, addr, pte, old_pte, ptent);
} else if (is_swap_pte(ptent)) {
+ ret = pte_swp_soft_dirty(ptent);
ptent = pte_swp_clear_soft_dirty(ptent);
set_pte_at(vma->vm_mm, addr, pte, ptent);
}
+ return ret;
}
#else
-static inline void clear_soft_dirty(struct vm_area_struct *vma,
+static inline bool clear_soft_dirty(struct vm_area_struct *vma,
unsigned long addr, pte_t *pte)
{
+ return false;
}
#endif
#if defined(CONFIG_MEM_SOFT_DIRTY) && defined(CONFIG_TRANSPARENT_HUGEPAGE)
-static inline void clear_soft_dirty_pmd(struct vm_area_struct *vma,
+static inline bool clear_soft_dirty_pmd(struct vm_area_struct *vma,
unsigned long addr, pmd_t *pmdp)
{
pmd_t old, pmd = *pmdp;
+ bool ret = false;
if (pmd_present(pmd)) {
/* See comment in change_huge_pmd() */
old = pmdp_invalidate(vma, addr, pmdp);
+
+ ret = pmd_soft_dirty(old);
+
if (pmd_dirty(old))
pmd = pmd_mkdirty(pmd);
if (pmd_young(old))
@@ -1107,14 +1116,17 @@ static inline void clear_soft_dirty_pmd(struct vm_area_struct *vma,
set_pmd_at(vma->vm_mm, addr, pmdp, pmd);
} else if (is_migration_entry(pmd_to_swp_entry(pmd))) {
+ ret = pmd_swp_soft_dirty(pmd);
pmd = pmd_swp_clear_soft_dirty(pmd);
set_pmd_at(vma->vm_mm, addr, pmdp, pmd);
}
+ return ret;
}
#else
-static inline void clear_soft_dirty_pmd(struct vm_area_struct *vma,
+static inline bool clear_soft_dirty_pmd(struct vm_area_struct *vma,
unsigned long addr, pmd_t *pmdp)
{
+ return false;
}
#endif
@@ -1367,6 +1379,7 @@ struct pagemapread {
int pos, len; /* units: PM_ENTRY_BYTES, not bytes */
pagemap_entry_t *buffer;
bool show_pfn;
+ bool reset;
};
#define PAGEMAP_WALK_SIZE (PMD_SIZE)
@@ -1398,6 +1411,14 @@ static int add_to_pagemap(unsigned long addr, pagemap_entry_t *pme,
return 0;
}
+static int add_addr_to_pagemap(unsigned long addr, struct pagemapread *pm)
+{
+ ((unsigned long *)pm->buffer)[pm->pos++] = addr;
+ if (pm->pos >= pm->len)
+ return PM_END_OF_BUFFER;
+ return 0;
+}
+
static int pagemap_pte_hole(unsigned long start, unsigned long end,
__always_unused int depth, struct mm_walk *walk)
{
@@ -1405,6 +1426,9 @@ static int pagemap_pte_hole(unsigned long start, unsigned long end,
unsigned long addr = start;
int err = 0;
+ if (pm->reset)
+ goto out;
+
while (addr < end) {
struct vm_area_struct *vma = find_vma(walk->mm, addr);
pagemap_entry_t pme = make_pme(0, 0);
@@ -1439,8 +1463,9 @@ static int pagemap_pte_hole(unsigned long start, unsigned long end,
}
static pagemap_entry_t pte_to_pagemap_entry(struct pagemapread *pm,
- struct vm_area_struct *vma, unsigned long addr, pte_t pte)
+ struct vm_area_struct *vma, unsigned long addr, pte_t *pte_addr)
{
+ pte_t pte = *pte_addr;
u64 frame = 0, flags = 0;
struct page *page = NULL;
@@ -1493,6 +1518,20 @@ static int pagemap_pmd_range(pmd_t *pmdp, unsigned long addr, unsigned long end,
pmd_t pmd = *pmdp;
struct page *page = NULL;
+ if (pm->reset)
+ {
+ if (clear_soft_dirty_pmd(vma, addr, pmdp))
+ {
+ for (; addr != end; addr += PAGE_SIZE)
+ {
+ err = add_addr_to_pagemap(addr, pm);
+ if (err)
+ break;
+ }
+ }
+ goto trans_huge_done;
+ }
+
if (vma->vm_flags & VM_SOFTDIRTY)
flags |= PM_SOFT_DIRTY;
@@ -1541,6 +1580,7 @@ static int pagemap_pmd_range(pmd_t *pmdp, unsigned long addr, unsigned long end,
frame += (1 << MAX_SWAPFILES_SHIFT);
}
}
+trans_huge_done:
spin_unlock(ptl);
return err;
}
@@ -1555,10 +1595,18 @@ static int pagemap_pmd_range(pmd_t *pmdp, unsigned long addr, unsigned long end,
*/
orig_pte = pte = pte_offset_map_lock(walk->mm, pmdp, addr, &ptl);
for (; addr < end; pte++, addr += PAGE_SIZE) {
- pagemap_entry_t pme;
+ if (pm->reset)
+ {
+ if (clear_soft_dirty(vma, addr, pte))
+ err = add_addr_to_pagemap(addr, pm);
+ }
+ else
+ {
+ pagemap_entry_t pme;
- pme = pte_to_pagemap_entry(pm, vma, addr, *pte);
- err = add_to_pagemap(addr, &pme, pm);
+ pme = pte_to_pagemap_entry(pm, vma, addr, pte);
+ err = add_to_pagemap(addr, &pme, pm);
+ }
if (err)
break;
}
@@ -1650,8 +1698,8 @@ static const struct mm_walk_ops pagemap_ops = {
* determine which areas of memory are actually mapped and llseek to
* skip over unmapped regions.
*/
-static ssize_t pagemap_read(struct file *file, char __user *buf,
- size_t count, loff_t *ppos)
+static ssize_t do_pagemap_read(struct file *file, char __user *buf,
+ size_t count, loff_t *ppos, bool reset)
{
struct mm_struct *mm = file->private_data;
struct pagemapread pm;
@@ -1660,6 +1708,8 @@ static ssize_t pagemap_read(struct file *file, char __user *buf,
unsigned long start_vaddr;
unsigned long end_vaddr;
int ret = 0, copied = 0;
+ struct mmu_notifier_range range;
+ size_t buffer_len;
if (!mm || !mmget_not_zero(mm))
goto out;
@@ -1675,19 +1725,38 @@ static ssize_t pagemap_read(struct file *file, char __user *buf,
/* do not disclose physical addresses: attack vector */
pm.show_pfn = file_ns_capable(file, &init_user_ns, CAP_SYS_ADMIN);
+ pm.reset = reset;
- pm.len = (PAGEMAP_WALK_SIZE >> PAGE_SHIFT);
- pm.buffer = kmalloc_array(pm.len, PM_ENTRY_BYTES, GFP_KERNEL);
+ buffer_len = min(PAGEMAP_WALK_SIZE >> PAGE_SHIFT, count / PM_ENTRY_BYTES);
+
+ pm.buffer = kmalloc_array(buffer_len, PM_ENTRY_BYTES, GFP_KERNEL);
ret = -ENOMEM;
if (!pm.buffer)
goto out_mm;
src = *ppos;
svpfn = src / PM_ENTRY_BYTES;
- end_vaddr = mm->task_size;
+
+ start_vaddr = svpfn << PAGE_SHIFT;
+
+ if (reset)
+ {
+ if (count < sizeof(end_vaddr))
+ {
+ ret = -EINVAL;
+ goto out_mm;
+ }
+ if (copy_from_user(&end_vaddr, buf, sizeof(end_vaddr)))
+ return -EFAULT;
+ end_vaddr = min(end_vaddr, mm->task_size);
+ }
+ else
+ {
+ end_vaddr = mm->task_size;
+ start_vaddr = end_vaddr;
+ }
/* watch out for wraparound */
- start_vaddr = end_vaddr;
if (svpfn <= (ULONG_MAX >> PAGE_SHIFT))
start_vaddr = untagged_addr(svpfn << PAGE_SHIFT);
@@ -1707,18 +1776,35 @@ static ssize_t pagemap_read(struct file *file, char __user *buf,
unsigned long end;
pm.pos = 0;
- end = (start_vaddr + PAGEMAP_WALK_SIZE) & PAGEMAP_WALK_MASK;
+ pm.len = min(buffer_len, count / PM_ENTRY_BYTES);
+
+ end = reset ? end_vaddr : (start_vaddr + (pm.len << PAGE_SHIFT));
/* overflow ? */
if (end < start_vaddr || end > end_vaddr)
end = end_vaddr;
+
ret = mmap_read_lock_killable(mm);
if (ret)
goto out_free;
+
+ if (reset)
+ {
+ inc_tlb_flush_pending(mm);
+ mmu_notifier_range_init(&range, MMU_NOTIFY_SOFT_DIRTY,
+ 0, NULL, mm, start_vaddr, end);
+ mmu_notifier_invalidate_range_start(&range);
+ }
ret = walk_page_range(mm, start_vaddr, end, &pagemap_ops, &pm);
+ if (reset)
+ {
+ mmu_notifier_invalidate_range_end(&range);
+ flush_tlb_mm(mm);
+ dec_tlb_flush_pending(mm);
+ }
mmap_read_unlock(mm);
- start_vaddr = end;
len = min(count, PM_ENTRY_BYTES * pm.pos);
+ BUG_ON(ret && ret != PM_END_OF_BUFFER);
if (copy_to_user(buf, pm.buffer, len)) {
ret = -EFAULT;
goto out_free;
@@ -1726,6 +1812,8 @@ static ssize_t pagemap_read(struct file *file, char __user *buf,
copied += len;
buf += len;
count -= len;
+
+ start_vaddr = reset && pm.pos == pm.len ? ((unsigned long *)pm.buffer)[pm.pos - 1] + PAGE_SIZE : end;
}
*ppos += copied;
if (!ret || ret == PM_END_OF_BUFFER)
@@ -1739,6 +1827,18 @@ static ssize_t pagemap_read(struct file *file, char __user *buf,
return ret;
}
+static ssize_t pagemap_read(struct file *file, char __user *buf,
+ size_t count, loff_t *ppos)
+{
+ return do_pagemap_read(file, buf, count, ppos, false);
+}
+
+static ssize_t pagemap_reset_read(struct file *file, char __user *buf,
+ size_t count, loff_t *ppos)
+{
+ return do_pagemap_read(file, buf, count, ppos, true);
+}
+
static int pagemap_open(struct inode *inode, struct file *file)
{
struct mm_struct *mm;
@@ -1765,6 +1865,14 @@ const struct file_operations proc_pagemap_operations = {
.open = pagemap_open,
.release = pagemap_release,
};
+
+const struct file_operations proc_pagemap_reset_operations = {
+ .llseek = mem_lseek, /* borrow this */
+ .read = pagemap_reset_read,
+ .open = pagemap_open,
+ .release = pagemap_release,
+};
+
#endif /* CONFIG_PROC_PAGE_MONITOR */
#ifdef CONFIG_NUMA
--
2.30.2

View File

@@ -0,0 +1,621 @@
From f7f49141a5dbe9c99d78196b58c44307fb2e6be3 Mon Sep 17 00:00:00 2001
From: Tk-Glitch <ti3nou@gmail.com>
Date: Wed, 4 Jul 2018 04:30:08 +0200
Subject: [PATCH 01/17] glitched
---
init/Makefile | 2 +-
1 file changed, 1 insertions(+), 1 deletions(-)
diff --git a/init/Makefile b/init/Makefile
index baf3ab8d9d49..854e32e6aec7 100755
--- a/init/Makefile
+++ b/init/Makefile
@@ -19,7 +19,7 @@ else
# Maximum length of UTS_VERSION is 64 chars
filechk_uts_version = \
- utsver=$$(echo '$(pound)'"$(build-version)" $(smp-flag-y) $(preempt-flag-y) "$(build-timestamp)" | cut -b -64); \
+ utsver=$$(echo '$(pound)'"$(build-version)" $(smp-flag-y) $(preempt-flag-y) "TKG" "$(build-timestamp)" | cut -b -64); \
echo '$(pound)'define UTS_VERSION \""$${utsver}"\"
#
--
2.28.0
From c304f43d14e98d4bf1215fc10bc5012f554bdd8a Mon Sep 17 00:00:00 2001
From: Alexandre Frade <admfrade@gmail.com>
Date: Mon, 29 Jan 2018 16:59:22 +0000
Subject: [PATCH 02/17] dcache: cache_pressure = 50 decreases the rate at which
VFS caches are reclaimed
Signed-off-by: Alexandre Frade <admfrade@gmail.com>
---
fs/dcache.c | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/fs/dcache.c b/fs/dcache.c
index 361ea7ab30ea..0c5cf69b241a 100644
--- a/fs/dcache.c
+++ b/fs/dcache.c
@@ -71,7 +71,7 @@
* If no ancestor relationship:
* arbitrary, since it's serialized on rename_lock
*/
-int sysctl_vfs_cache_pressure __read_mostly = 100;
+int sysctl_vfs_cache_pressure __read_mostly = 50;
EXPORT_SYMBOL_GPL(sysctl_vfs_cache_pressure);
__cacheline_aligned_in_smp DEFINE_SEQLOCK(rename_lock);
--
2.28.0
diff --git a/kernel/sched/rt.c b/kernel/sched/rt.c
index f788cd61df21..2bfbb4213707 100644
--- a/kernel/sched/rt.c
+++ b/kernel/sched/rt.c
@@ -15,9 +15,9 @@ __read_mostly int scheduler_running;
/*
* part of the period that we allow rt tasks to run in us.
- * default: 0.95s
+ * XanMod default: 0.98s
*/
-int sysctl_sched_rt_runtime = 950000;
+int sysctl_sched_rt_runtime = 980000;
#ifdef CONFIG_SYSCTL
static int sysctl_sched_rr_timeslice = (MSEC_PER_SEC / HZ) * RR_TIMESLICE;
--
2.28.0
From acc49f33a10f61dc66c423888cbb883ba46710e4 Mon Sep 17 00:00:00 2001
From: Alexandre Frade <admfrade@gmail.com>
Date: Mon, 29 Jan 2018 17:41:29 +0000
Subject: [PATCH 04/17] scripts: disable the localversion "+" tag of a git repo
Signed-off-by: Alexandre Frade <admfrade@gmail.com>
---
scripts/setlocalversion | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/scripts/setlocalversion b/scripts/setlocalversion
index 20f2efd57b11..0552d8b9f582 100755
--- a/scripts/setlocalversion
+++ b/scripts/setlocalversion
@@ -54,7 +54,7 @@ scm_version()
# If only the short version is requested, don't bother
# running further git commands
if $short; then
- echo "+"
+ # echo "+"
return
fi
# If we are past a tagged commit (like
--
2.28.0
From 360c6833e07cc9fdef5746f6bc45bdbc7212288d Mon Sep 17 00:00:00 2001
From: "Jan Alexander Steffens (heftig)" <jan.steffens@gmail.com>
Date: Fri, 26 Oct 2018 11:22:33 +0100
Subject: [PATCH 06/17] infiniband: Fix __read_overflow2 error with -O3
inlining
---
drivers/infiniband/core/addr.c | 1 +
1 file changed, 1 insertion(+)
diff --git a/drivers/infiniband/core/addr.c b/drivers/infiniband/core/addr.c
index 3a98439bba83..6efc4f907f58 100644
--- a/drivers/infiniband/core/addr.c
+++ b/drivers/infiniband/core/addr.c
@@ -820,6 +820,7 @@ int rdma_addr_find_l2_eth_by_grh(const union ib_gid *sgid,
union {
struct sockaddr_in _sockaddr_in;
struct sockaddr_in6 _sockaddr_in6;
+ struct sockaddr_ib _sockaddr_ib;
} sgid_addr, dgid_addr;
int ret;
--
2.28.0
From f85ed068b4d0e6c31edce8574a95757a60e58b87 Mon Sep 17 00:00:00 2001
From: Etienne Juvigny <Ti3noU@gmail.com>
Date: Mon, 3 Sep 2018 17:36:25 +0200
Subject: [PATCH 07/17] Zenify & stuff
---
init/Kconfig | 32 ++++++++++++++++++++++++++++++++
kernel/sched/fair.c | 25 +++++++++++++++++++++++++
mm/page-writeback.c | 8 ++++++++
3 files changed, 65 insertions(+)
diff --git a/init/Kconfig b/init/Kconfig
index 3ae8678e1145..da708eed0f1e 100644
--- a/init/Kconfig
+++ b/init/Kconfig
@@ -92,6 +92,38 @@ config THREAD_INFO_IN_TASK
menu "General setup"
+config ZENIFY
+ bool "A selection of patches from Zen/Liquorix kernel and additional tweaks for a better gaming experience"
+ default y
+ help
+ Tunes the kernel for responsiveness at the cost of throughput and power usage.
+
+ --- Virtual Memory Subsystem ---------------------------
+
+ Mem dirty before bg writeback..: 10 % -> 20 %
+ Mem dirty before sync writeback: 20 % -> 50 %
+
+ --- Block Layer ----------------------------------------
+
+ Queue depth...............: 128 -> 512
+ Default MQ scheduler......: mq-deadline -> bfq
+
+ --- CFS CPU Scheduler ----------------------------------
+
+ Scheduling latency.............: 6 -> 3 ms
+ Minimal granularity............: 0.75 -> 0.3 ms
+ Wakeup granularity.............: 1 -> 0.5 ms
+ CPU migration cost.............: 0.5 -> 0.25 ms
+ Bandwidth slice size...........: 5 -> 3 ms
+ Ondemand fine upscaling limit..: 95 % -> 85 %
+
+ --- MuQSS CPU Scheduler --------------------------------
+
+ Scheduling interval............: 6 -> 3 ms
+ ISO task max realtime use......: 70 % -> 25 %
+ Ondemand coarse upscaling limit: 80 % -> 45 %
+ Ondemand fine upscaling limit..: 95 % -> 45 %
+
config BROKEN
bool
diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c
index 6b3b59cc51d6..2a0072192c3d 100644
--- a/kernel/sched/fair.c
+++ b/kernel/sched/fair.c
@@ -37,8 +37,13 @@
*
* (default: 6ms * (1 + ilog(ncpus)), units: nanoseconds)
*/
+#ifdef CONFIG_ZENIFY
+unsigned int sysctl_sched_latency = 3000000ULL;
+static unsigned int normalized_sysctl_sched_latency = 3000000ULL;
+#else
unsigned int sysctl_sched_latency = 6000000ULL;
static unsigned int normalized_sysctl_sched_latency = 6000000ULL;
+#endif
/*
* The initial- and re-scaling of tunables is configurable
@@ -58,21 +63,34 @@ enum sched_tunable_scaling sysctl_sched_tunable_scaling = SCHED_TUNABLESCALING_L
*
* (default: 0.75 msec * (1 + ilog(ncpus)), units: nanoseconds)
*/
+#ifdef CONFIG_ZENIFY
+unsigned int sysctl_sched_min_granularity = 300000ULL;
+static unsigned int normalized_sysctl_sched_min_granularity = 300000ULL;
+#else
unsigned int sysctl_sched_min_granularity = 750000ULL;
static unsigned int normalized_sysctl_sched_min_granularity = 750000ULL;
+#endif
/*
* Minimal preemption granularity for CPU-bound SCHED_IDLE tasks.
* Applies only when SCHED_IDLE tasks compete with normal tasks.
*
* (default: 0.75 msec)
*/
+#ifdef CONFIG_ZENIFY
+unsigned int sysctl_sched_idle_min_granularity = 300000ULL;
+#else
unsigned int sysctl_sched_idle_min_granularity = 750000ULL;
+#endif
/*
* This value is kept at sysctl_sched_latency/sysctl_sched_min_granularity
*/
+#ifdef CONFIG_ZENIFY
+static unsigned int sched_nr_latency = 10;
+#else
static unsigned int sched_nr_latency = 8;
+#endif
/*
* After fork, child runs first. If set to 0 (default) then
@@ -128,8 +149,12 @@ int __weak arch_asym_cpu_priority(int cpu)
*
* (default: 5 msec, units: microseconds)
*/
+#ifdef CONFIG_ZENIFY
+static unsigned int sysctl_sched_cfs_bandwidth_slice = 3000UL;
+#else
static unsigned int sysctl_sched_cfs_bandwidth_slice = 5000UL;
#endif
+#endif
#ifdef CONFIG_SYSCTL
static struct ctl_table sched_fair_sysctls[] = {
diff --git a/mm/page-writeback.c b/mm/page-writeback.c
index 28b3e7a67565..01a1aef2b9b1 100644
--- a/mm/page-writeback.c
+++ b/mm/page-writeback.c
@@ -71,7 +71,11 @@ static long ratelimit_pages = 32;
/*
* Start background writeback (via writeback threads) at this percentage
*/
+#ifdef CONFIG_ZENIFY
+static int dirty_background_ratio = 20;
+#else
static int dirty_background_ratio = 10;
+#endif
/*
* dirty_background_bytes starts at 0 (disabled) so that it is a function of
@@ -88,7 +92,11 @@ int vm_highmem_is_dirtyable;
/*
* The generator of dirty data starts writeback at this percentage
*/
+#ifdef CONFIG_ZENIFY
+static int vm_dirty_ratio = 50;
+#else
static int vm_dirty_ratio = 20;
+#endif
/*
* vm_dirty_bytes starts at 0 (disabled) so that it is a function of
--
2.28.0
From e92e67143385cf285851e12aa8b7f083dd38dd24 Mon Sep 17 00:00:00 2001
From: Steven Barrett <damentz@liquorix.net>
Date: Sun, 16 Jan 2011 18:57:32 -0600
Subject: [PATCH 08/17] ZEN: Allow TCP YeAH as default congestion control
4.4: In my tests YeAH dramatically slowed down transfers over a WLAN,
reducing throughput from ~65Mbps (CUBIC) to ~7MBps (YeAH) over 10
seconds (netperf TCP_STREAM) including long stalls.
Be careful when choosing this. ~heftig
---
net/ipv4/Kconfig | 4 ++++
1 file changed, 4 insertions(+)
diff --git a/net/ipv4/Kconfig b/net/ipv4/Kconfig
index e64e59b536d3..bfb55ef7ebbe 100644
--- a/net/ipv4/Kconfig
+++ b/net/ipv4/Kconfig
@@ -691,6 +691,9 @@ choice
config DEFAULT_VEGAS
bool "Vegas" if TCP_CONG_VEGAS=y
+ config DEFAULT_YEAH
+ bool "YeAH" if TCP_CONG_YEAH=y
+
config DEFAULT_VENO
bool "Veno" if TCP_CONG_VENO=y
@@ -724,6 +727,7 @@ config DEFAULT_TCP_CONG
default "htcp" if DEFAULT_HTCP
default "hybla" if DEFAULT_HYBLA
default "vegas" if DEFAULT_VEGAS
+ default "yeah" if DEFAULT_YEAH
default "westwood" if DEFAULT_WESTWOOD
default "veno" if DEFAULT_VENO
default "reno" if DEFAULT_RENO
--
2.28.0
From 76dbe7477bfde1b5e8bf29a71b5af7ab2be9b98e Mon Sep 17 00:00:00 2001
From: Steven Barrett <steven@liquorix.net>
Date: Wed, 28 Nov 2018 19:01:27 -0600
Subject: [PATCH 09/17] zen: Use [defer+madvise] as default khugepaged defrag
strategy
For some reason, the default strategy to respond to THP fault fallbacks
is still just madvise, meaning stall if the program wants transparent
hugepages, but don't trigger a background reclaim / compaction if THP
begins to fail allocations. This creates a snowball affect where we
still use the THP code paths, but we almost always fail once a system
has been active and busy for a while.
The option "defer" was created for interactive systems where THP can
still improve performance. If we have to fallback to a regular page due
to an allocation failure or anything else, we will trigger a background
reclaim and compaction so future THP attempts succeed and previous
attempts eventually have their smaller pages combined without stalling
running applications.
We still want madvise to stall applications that explicitely want THP,
so defer+madvise _does_ make a ton of sense. Make it the default for
interactive systems, especially if the kernel maintainer left
transparent hugepages on "always".
Reasoning and details in the original patch: https://lwn.net/Articles/711248/
---
mm/huge_memory.c | 4 ++++
1 file changed, 4 insertions(+)
diff --git a/mm/huge_memory.c b/mm/huge_memory.c
index 74300e337c3c..9277f22c10a7 100644
--- a/mm/huge_memory.c
+++ b/mm/huge_memory.c
@@ -53,7 +53,11 @@ unsigned long transparent_hugepage_flags __read_mostly =
#ifdef CONFIG_TRANSPARENT_HUGEPAGE_MADVISE
(1<<TRANSPARENT_HUGEPAGE_REQ_MADV_FLAG)|
#endif
+#ifdef CONFIG_ZENIFY
+ (1<<TRANSPARENT_HUGEPAGE_DEFRAG_KSWAPD_OR_MADV_FLAG)|
+#else
(1<<TRANSPARENT_HUGEPAGE_DEFRAG_REQ_MADV_FLAG)|
+#endif
(1<<TRANSPARENT_HUGEPAGE_DEFRAG_KHUGEPAGED_FLAG)|
(1<<TRANSPARENT_HUGEPAGE_USE_ZERO_PAGE_FLAG);
--
2.28.0
From 2b65a1329cb220b43c19c4d0de5833fae9e2b22d Mon Sep 17 00:00:00 2001
From: Alexandre Frade <admfrade@gmail.com>
Date: Wed, 24 Oct 2018 16:58:52 -0300
Subject: [PATCH 10/17] net/sched: allow configuring cake qdisc as default
Signed-off-by: Alexandre Frade <admfrade@gmail.com>
---
net/sched/Kconfig | 4 ++++
1 file changed, 4 insertions(+)
diff --git a/net/sched/Kconfig b/net/sched/Kconfig
index 84badf00647e..6a922bca9f39 100644
--- a/net/sched/Kconfig
+++ b/net/sched/Kconfig
@@ -471,6 +471,9 @@ choice
config DEFAULT_SFQ
bool "Stochastic Fair Queue" if NET_SCH_SFQ
+ config DEFAULT_CAKE
+ bool "Common Applications Kept Enhanced" if NET_SCH_CAKE
+
config DEFAULT_PFIFO_FAST
bool "Priority FIFO Fast"
endchoice
@@ -481,6 +484,7 @@ config DEFAULT_NET_SCH
default "fq" if DEFAULT_FQ
default "fq_codel" if DEFAULT_FQ_CODEL
default "sfq" if DEFAULT_SFQ
+ default "cake" if DEFAULT_CAKE
default "pfifo_fast"
endif
--
2.28.0
From 816ee502759e954304693813bd03d94986b28dba Mon Sep 17 00:00:00 2001
From: Tk-Glitch <ti3nou@gmail.com>
Date: Mon, 18 Feb 2019 17:40:57 +0100
Subject: [PATCH 11/17] mm: Set watermark_scale_factor to 200 (from 10)
Multiple users have reported it's helping reducing/eliminating stuttering
with DXVK.
---
mm/page_alloc.c | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/mm/page_alloc.c b/mm/page_alloc.c
index 898ff44f2c7b..e72074034793 100644
--- a/mm/page_alloc.c
+++ b/mm/page_alloc.c
@@ -330,7 +330,7 @@ int watermark_boost_factor __read_mostly;
int min_free_kbytes = 1024;
int user_min_free_kbytes = -1;
int watermark_boost_factor __read_mostly = 15000;
-int watermark_scale_factor = 10;
+int watermark_scale_factor = 200;
static unsigned long nr_kernel_pages __initdata;
static unsigned long nr_all_pages __initdata;
--
2.28.0
From 90240bcd90a568878738e66c0d45bed3e38e347b Mon Sep 17 00:00:00 2001
From: Tk-Glitch <ti3nou@gmail.com>
Date: Fri, 19 Apr 2019 12:33:38 +0200
Subject: [PATCH 12/17] Set vm.max_map_count to 262144 by default
The value is still pretty low, and AMD64-ABI and ELF extended numbering
supports that, so we should be fine on modern x86 systems.
This fixes crashes in some applications using more than 65535 vmas (also
affects some windows games running in wine, such as Star Citizen).
---
include/linux/mm.h | 3 +--
1 file changed, 1 insertion(+), 2 deletions(-)
diff --git a/include/linux/mm.h b/include/linux/mm.h
index bc05c3588aa3..b0cefe94920d 100644
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -190,8 +190,7 @@ static inline void __mm_zero_struct_page(struct page *page)
* not a hard limit any more. Although some userspace tools can be surprised by
* that.
*/
-#define MAPCOUNT_ELF_CORE_MARGIN (5)
-#define DEFAULT_MAX_MAP_COUNT (USHRT_MAX - MAPCOUNT_ELF_CORE_MARGIN)
+#define DEFAULT_MAX_MAP_COUNT (262144)
extern int sysctl_max_map_count;
--
2.28.0
From 3a34034dba5efe91bcec491efe8c66e8087f509b Mon Sep 17 00:00:00 2001
From: Tk-Glitch <ti3nou@gmail.com>
Date: Mon, 27 Jul 2020 00:19:18 +0200
Subject: [PATCH 13/17] mm: bump DEFAULT_MAX_MAP_COUNT
Some games such as Detroit: Become Human tend to be very crash prone with
lower values.
---
include/linux/mm.h | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/include/linux/mm.h b/include/linux/mm.h
index b0cefe94920d..890165099b07 100644
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -190,7 +190,7 @@ static inline void __mm_zero_struct_page(struct page *page)
* not a hard limit any more. Although some userspace tools can be surprised by
* that.
*/
-#define DEFAULT_MAX_MAP_COUNT (262144)
+#define DEFAULT_MAX_MAP_COUNT (16777216)
extern int sysctl_max_map_count;
--
2.28.0
From 977812938da7c7226415778c340832141d9278b7 Mon Sep 17 00:00:00 2001
From: Alexandre Frade <admfrade@gmail.com>
Date: Mon, 25 Nov 2019 15:13:06 -0300
Subject: [PATCH 14/17] elevator: set default scheduler to bfq for blk-mq
Signed-off-by: Alexandre Frade <admfrade@gmail.com>
---
block/elevator.c | 6 +++---
1 file changed, 3 insertions(+), 3 deletions(-)
diff --git a/block/elevator.c b/block/elevator.c
index 4eab3d70e880..79669aa39d79 100644
--- a/block/elevator.c
+++ b/block/elevator.c
@@ -623,19 +623,19 @@ static inline bool elv_support_iosched(struct request_queue *q)
}
/*
- * For single queue devices, default to using mq-deadline. If we have multiple
- * queues or mq-deadline is not available, default to "none".
+ * For single queue devices, default to using bfq. If we have multiple
+ * queues or bfq is not available, default to "none".
*/
static struct elevator_type *elevator_get_default(struct request_queue *q)
{
if (q->tag_set && q->tag_set->flags & BLK_MQ_F_NO_SCHED_BY_DEFAULT)
return NULL;
if (q->nr_hw_queues != 1 &&
!blk_mq_is_shared_tags(q->tag_set->flags))
return NULL;
- return elevator_get(q, "mq-deadline", false);
+ return elevator_get(q, "bfq", false);
}
/*
--
2.28.0
From 3c229f434aca65c4ca61772bc03c3e0370817b92 Mon Sep 17 00:00:00 2001
From: Alexandre Frade <kernel@xanmod.org>
Date: Mon, 3 Aug 2020 17:05:04 +0000
Subject: [PATCH 16/17] mm: set 2 megabytes for address_space-level file
read-ahead pages size
Signed-off-by: Alexandre Frade <kernel@xanmod.org>
---
include/linux/pagemap.h | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/include/linux/pagemap.h b/include/linux/pagemap.h
index cf2468da68e9..007dea784451 100644
--- a/include/linux/pagemap.h
+++ b/include/linux/pagemap.h
@@ -655,7 +655,7 @@ int replace_page_cache_page(struct page *old, struct page *new, gfp_t gfp_mask);
void delete_from_page_cache_batch(struct address_space *mapping,
struct pagevec *pvec);
-#define VM_READAHEAD_PAGES (SZ_128K / PAGE_SIZE)
+#define VM_READAHEAD_PAGES (SZ_2M / PAGE_SIZE)
void page_cache_sync_readahead(struct address_space *, struct file_ra_state *,
struct file *, pgoff_t index, unsigned long req_count);
--
2.28.0
From 716f41cf6631f3a85834dcb67b4ce99185b6387f Mon Sep 17 00:00:00 2001
From: Steven Barrett <steven@liquorix.net>
Date: Wed, 15 Jan 2020 20:43:56 -0600
Subject: [PATCH 17/17] ZEN: intel-pstate: Implement "enable" parameter
If intel-pstate is compiled into the kernel, it will preempt the loading
of acpi-cpufreq so you can take advantage of hardware p-states without
any friction.
However, intel-pstate is not completely superior to cpufreq's ondemand
for one reason. There's no concept of an up_threshold property.
In ondemand, up_threshold essentially reduces the maximum utilization to
compare against, allowing you to hit max frequencies and turbo boost
from a much lower core utilization.
With intel-pstate, you have the concept of minimum and maximum
performance, but no tunable that lets you define, maximum frequency
means 50% core utilization. For just this oversight, there's reasons
you may want ondemand.
Lets support setting "enable" in kernel boot parameters. This lets
kernel maintainers include "intel_pstate=disable" statically in the
static boot parameters, but let users of the kernel override this
selection.
---
Documentation/admin-guide/kernel-parameters.txt | 3 +++
drivers/cpufreq/intel_pstate.c | 2 ++
2 files changed, 5 insertions(+)
diff --git a/Documentation/admin-guide/kernel-parameters.txt b/Documentation/admin-guide/kernel-parameters.txt
index fb95fad81c79..3e92fee81e33 100644
--- a/Documentation/admin-guide/kernel-parameters.txt
+++ b/Documentation/admin-guide/kernel-parameters.txt
@@ -1857,6 +1857,9 @@
disable
Do not enable intel_pstate as the default
scaling driver for the supported processors
+ enable
+ Enable intel_pstate in-case "disable" was passed
+ previously in the kernel boot parameters
passive
Use intel_pstate as a scaling driver, but configure it
to work with generic cpufreq governors (instead of
diff --git a/drivers/cpufreq/intel_pstate.c b/drivers/cpufreq/intel_pstate.c
index 36a469150ff9..aee891c9b78a 100644
--- a/drivers/cpufreq/intel_pstate.c
+++ b/drivers/cpufreq/intel_pstate.c
@@ -2845,6 +2845,8 @@ static int __init intel_pstate_setup(char *str)
if (!strcmp(str, "no_hwp"))
no_hwp = 1;
+ if (!strcmp(str, "enable"))
+ no_load = 0;
if (!strcmp(str, "force"))
force_load = 1;
if (!strcmp(str, "hwp_only"))
--
2.28.0

View File

@@ -0,0 +1,22 @@
diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c
index 6b3b59cc51d6..2a0072192c3d 100644
--- a/kernel/sched/fair.c
+++ b/kernel/sched/fair.c
@@ -81,10 +95,17 @@ unsigned int sysctl_sched_child_runs_first __read_mostly;
*
* (default: 1 msec * (1 + ilog(ncpus)), units: nanoseconds)
*/
+#ifdef CONFIG_ZENIFY
+unsigned int sysctl_sched_wakeup_granularity = 500000UL;
+static unsigned int normalized_sysctl_sched_wakeup_granularity = 500000UL;
+
+const_debug unsigned int sysctl_sched_migration_cost = 50000UL;
+#else
unsigned int sysctl_sched_wakeup_granularity = 1000000UL;
static unsigned int normalized_sysctl_sched_wakeup_granularity = 1000000UL;
const_debug unsigned int sysctl_sched_migration_cost = 500000UL;
+#endif
int sched_thermal_decay_shift;
static int __init setup_sched_thermal_decay_shift(char *str)

View File

@@ -0,0 +1,90 @@
diff --git a/kernel/Kconfig.hz b/kernel/Kconfig.hz
index 2a202a846757..1d9c7ed79b11 100644
--- a/kernel/Kconfig.hz
+++ b/kernel/Kconfig.hz
@@ -4,7 +4,7 @@
choice
prompt "Timer frequency"
- default HZ_250
+ default HZ_500
help
Allows the configuration of the timer frequency. It is customary
to have the timer interrupt run at 1000 Hz but 100 Hz may be more
@@ -39,6 +39,13 @@ choice
on SMP and NUMA systems and exactly dividing by both PAL and
NTSC frame rates for video and multimedia work.
+ config HZ_500
+ bool "500 HZ"
+ help
+ 500 Hz is a balanced timer frequency. Provides fast interactivity
+ on desktops with great smoothness without increasing CPU power
+ consumption and sacrificing the battery life on laptops.
+
config HZ_1000
bool "1000 HZ"
help
@@ -52,6 +59,7 @@ config HZ
default 100 if HZ_100
default 250 if HZ_250
default 300 if HZ_300
+ default 500 if HZ_500
default 1000 if HZ_1000
config SCHED_HRTICK
diff --git a/kernel/Kconfig.hz b/kernel/Kconfig.hz
index 2a202a846757..1d9c7ed79b11 100644
--- a/kernel/Kconfig.hz
+++ b/kernel/Kconfig.hz
@@ -4,7 +4,7 @@
choice
prompt "Timer frequency"
- default HZ_500
+ default HZ_750
help
Allows the configuration of the timer frequency. It is customary
to have the timer interrupt run at 1000 Hz but 100 Hz may be more
@@ -46,6 +46,13 @@ choice
on desktops with great smoothness without increasing CPU power
consumption and sacrificing the battery life on laptops.
+ config HZ_750
+ bool "750 HZ"
+ help
+ 750 Hz is a good timer frequency for desktops. Provides fast
+ interactivity with great smoothness without sacrificing too
+ much throughput.
+
config HZ_1000
bool "1000 HZ"
help
@@ -60,6 +67,7 @@ config HZ
default 250 if HZ_250
default 300 if HZ_300
default 500 if HZ_500
+ default 750 if HZ_750
default 1000 if HZ_1000
config SCHED_HRTICK
diff --git a/drivers/cpufreq/cpufreq_ondemand.c b/drivers/cpufreq/cpufreq_ondemand.c
index 6b423eebfd5d..61e3271675d6 100644
--- a/drivers/cpufreq/cpufreq_ondemand.c
+++ b/drivers/cpufreq/cpufreq_ondemand.c
@@ -21,10 +21,10 @@
#include "cpufreq_ondemand.h"
/* On-demand governor macros */
-#define DEF_FREQUENCY_UP_THRESHOLD (80)
-#define DEF_SAMPLING_DOWN_FACTOR (1)
+#define DEF_FREQUENCY_UP_THRESHOLD (55)
+#define DEF_SAMPLING_DOWN_FACTOR (5)
#define MAX_SAMPLING_DOWN_FACTOR (100000)
-#define MICRO_FREQUENCY_UP_THRESHOLD (95)
+#define MICRO_FREQUENCY_UP_THRESHOLD (63)
#define MICRO_FREQUENCY_MIN_SAMPLE_RATE (10000)
#define MIN_FREQUENCY_UP_THRESHOLD (1)
#define MAX_FREQUENCY_UP_THRESHOLD (100)

View File

@@ -0,0 +1,193 @@
From cdeab384f48dd9c88e2dff2e9ad8d57dca1a1b1c Mon Sep 17 00:00:00 2001
From: Mark Weiman <mark.weiman@markzz.com>
Date: Sun, 12 Aug 2018 11:36:21 -0400
Subject: [PATCH] pci: Enable overrides for missing ACS capabilities
This an updated version of Alex Williamson's patch from:
https://lkml.org/lkml/2013/5/30/513
Original commit message follows:
PCIe ACS (Access Control Services) is the PCIe 2.0+ feature that
allows us to control whether transactions are allowed to be redirected
in various subnodes of a PCIe topology. For instance, if two
endpoints are below a root port or downsteam switch port, the
downstream port may optionally redirect transactions between the
devices, bypassing upstream devices. The same can happen internally
on multifunction devices. The transaction may never be visible to the
upstream devices.
One upstream device that we particularly care about is the IOMMU. If
a redirection occurs in the topology below the IOMMU, then the IOMMU
cannot provide isolation between devices. This is why the PCIe spec
encourages topologies to include ACS support. Without it, we have to
assume peer-to-peer DMA within a hierarchy can bypass IOMMU isolation.
Unfortunately, far too many topologies do not support ACS to make this
a steadfast requirement. Even the latest chipsets from Intel are only
sporadically supporting ACS. We have trouble getting interconnect
vendors to include the PCIe spec required PCIe capability, let alone
suggested features.
Therefore, we need to add some flexibility. The pcie_acs_override=
boot option lets users opt-in specific devices or sets of devices to
assume ACS support. The "downstream" option assumes full ACS support
on root ports and downstream switch ports. The "multifunction"
option assumes the subset of ACS features available on multifunction
endpoints and upstream switch ports are supported. The "id:nnnn:nnnn"
option enables ACS support on devices matching the provided vendor
and device IDs, allowing more strategic ACS overrides. These options
may be combined in any order. A maximum of 16 id specific overrides
are available. It's suggested to use the most limited set of options
necessary to avoid completely disabling ACS across the topology.
Note to hardware vendors, we have facilities to permanently quirk
specific devices which enforce isolation but not provide an ACS
capability. Please contact me to have your devices added and save
your customers the hassle of this boot option.
Signed-off-by: Mark Weiman <mark.weiman@markzz.com>
---
.../admin-guide/kernel-parameters.txt | 9 ++
drivers/pci/quirks.c | 101 ++++++++++++++++++
2 files changed, 110 insertions(+)
diff --git a/Documentation/admin-guide/kernel-parameters.txt b/Documentation/admin-guide/kernel-parameters.txt
index aefd358a5ca3..173b3596fd9e 100644
--- a/Documentation/admin-guide/kernel-parameters.txt
+++ b/Documentation/admin-guide/kernel-parameters.txt
@@ -3190,6 +3190,15 @@
nomsi [MSI] If the PCI_MSI kernel config parameter is
enabled, this kernel boot option can be used to
disable the use of MSI interrupts system-wide.
+ pcie_acs_override =
+ [PCIE] Override missing PCIe ACS support for:
+ downstream
+ All downstream ports - full ACS capabilities
+ multifunction
+ All multifunction devices - multifunction ACS subset
+ id:nnnn:nnnn
+ Specific device - full ACS capabilities
+ Specified as vid:did (vendor/device ID) in hex
noioapicquirk [APIC] Disable all boot interrupt quirks.
Safety option to keep boot IRQs enabled. This
should never be necessary.
diff --git a/drivers/pci/quirks.c b/drivers/pci/quirks.c
index 4700d24e5d55..8f7a3d7fd9c1 100644
--- a/drivers/pci/quirks.c
+++ b/drivers/pci/quirks.c
@@ -3372,6 +3372,106 @@ static void quirk_no_bus_reset(struct pci_dev *dev)
dev->dev_flags |= PCI_DEV_FLAGS_NO_BUS_RESET;
}
+static bool acs_on_downstream;
+static bool acs_on_multifunction;
+
+#define NUM_ACS_IDS 16
+struct acs_on_id {
+ unsigned short vendor;
+ unsigned short device;
+};
+static struct acs_on_id acs_on_ids[NUM_ACS_IDS];
+static u8 max_acs_id;
+
+static __init int pcie_acs_override_setup(char *p)
+{
+ if (!p)
+ return -EINVAL;
+
+ while (*p) {
+ if (!strncmp(p, "downstream", 10))
+ acs_on_downstream = true;
+ if (!strncmp(p, "multifunction", 13))
+ acs_on_multifunction = true;
+ if (!strncmp(p, "id:", 3)) {
+ char opt[5];
+ int ret;
+ long val;
+
+ if (max_acs_id >= NUM_ACS_IDS - 1) {
+ pr_warn("Out of PCIe ACS override slots (%d)\n",
+ NUM_ACS_IDS);
+ goto next;
+ }
+
+ p += 3;
+ snprintf(opt, 5, "%s", p);
+ ret = kstrtol(opt, 16, &val);
+ if (ret) {
+ pr_warn("PCIe ACS ID parse error %d\n", ret);
+ goto next;
+ }
+ acs_on_ids[max_acs_id].vendor = val;
+
+ p += strcspn(p, ":");
+ if (*p != ':') {
+ pr_warn("PCIe ACS invalid ID\n");
+ goto next;
+ }
+
+ p++;
+ snprintf(opt, 5, "%s", p);
+ ret = kstrtol(opt, 16, &val);
+ if (ret) {
+ pr_warn("PCIe ACS ID parse error %d\n", ret);
+ goto next;
+ }
+ acs_on_ids[max_acs_id].device = val;
+ max_acs_id++;
+ }
+next:
+ p += strcspn(p, ",");
+ if (*p == ',')
+ p++;
+ }
+
+ if (acs_on_downstream || acs_on_multifunction || max_acs_id)
+ pr_warn("Warning: PCIe ACS overrides enabled; This may allow non-IOMMU protected peer-to-peer DMA\n");
+
+ return 0;
+}
+early_param("pcie_acs_override", pcie_acs_override_setup);
+
+static int pcie_acs_overrides(struct pci_dev *dev, u16 acs_flags)
+{
+ int i;
+
+ /* Never override ACS for legacy devices or devices with ACS caps */
+ if (!pci_is_pcie(dev) ||
+ pci_find_ext_capability(dev, PCI_EXT_CAP_ID_ACS))
+ return -ENOTTY;
+
+ for (i = 0; i < max_acs_id; i++)
+ if (acs_on_ids[i].vendor == dev->vendor &&
+ acs_on_ids[i].device == dev->device)
+ return 1;
+
+ switch (pci_pcie_type(dev)) {
+ case PCI_EXP_TYPE_DOWNSTREAM:
+ case PCI_EXP_TYPE_ROOT_PORT:
+ if (acs_on_downstream)
+ return 1;
+ break;
+ case PCI_EXP_TYPE_ENDPOINT:
+ case PCI_EXP_TYPE_UPSTREAM:
+ case PCI_EXP_TYPE_LEG_END:
+ case PCI_EXP_TYPE_RC_END:
+ if (acs_on_multifunction && dev->multifunction)
+ return 1;
+ }
+
+ return -ENOTTY;
+}
/*
* Some Atheros AR9xxx and QCA988x chips do not behave after a bus reset.
* The device will throw a Link Down error on AER-capable systems and
@@ -4513,6 +4613,7 @@ static const struct pci_dev_acs_enabled {
{ PCI_VENDOR_ID_ZHAOXIN, 0x9083, pci_quirk_mf_endpoint_acs },
/* Zhaoxin Root/Downstream Ports */
{ PCI_VENDOR_ID_ZHAOXIN, PCI_ANY_ID, pci_quirk_zhaoxin_pcie_ports_acs },
+ { PCI_ANY_ID, PCI_ANY_ID, pcie_acs_overrides },
{ 0 }
};

View File

@@ -0,0 +1,166 @@
From b70e738f08403950aa3053c36b98c6b0eeb0eb90 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Andr=C3=A9=20Almeida?= <andrealmeid@collabora.com>
Date: Mon, 25 Oct 2021 09:49:42 -0300
Subject: [PATCH] futex: Add entry point for FUTEX_WAIT_MULTIPLE (opcode 31)
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit
Add an option to wait on multiple futexes using the old interface, that
uses opcode 31 through futex() syscall. Do that by just translation the
old interface to use the new code. This allows old and stable versions
of Proton to still use fsync in new kernel releases.
Signed-off-by: André Almeida <andrealmeid@collabora.com>
---
include/uapi/linux/futex.h | 13 +++++++
kernel/futex/syscalls.c | 75 +++++++++++++++++++++++++++++++++++++-
2 files changed, 87 insertions(+), 1 deletion(-)
diff --git a/include/uapi/linux/futex.h b/include/uapi/linux/futex.h
index 71a5df8d2689..d375ab21cbf8 100644
--- a/include/uapi/linux/futex.h
+++ b/include/uapi/linux/futex.h
@@ -22,6 +22,7 @@
#define FUTEX_WAIT_REQUEUE_PI 11
#define FUTEX_CMP_REQUEUE_PI 12
#define FUTEX_LOCK_PI2 13
+#define FUTEX_WAIT_MULTIPLE 31
#define FUTEX_PRIVATE_FLAG 128
#define FUTEX_CLOCK_REALTIME 256
@@ -68,6 +69,18 @@ struct futex_waitv {
__u32 __reserved;
};
+/**
+ * struct futex_wait_block - Block of futexes to be waited for
+ * @uaddr: User address of the futex
+ * @val: Futex value expected by userspace
+ * @bitset: Bitset for the optional bitmasked wakeup
+ */
+struct futex_wait_block {
+ __u32 __user *uaddr;
+ __u32 val;
+ __u32 bitset;
+};
+
/*
* Support for robust futexes: the kernel cleans up held futexes at
* thread exit time.
diff --git a/kernel/futex/syscalls.c b/kernel/futex/syscalls.c
index 6f91a07a6a83..2f4d4c04ede2 100644
--- a/kernel/futex/syscalls.c
+++ b/kernel/futex/syscalls.c
@@ -158,6 +158,7 @@ static __always_inline bool futex_cmd_has_timeout(u32 cmd)
case FUTEX_LOCK_PI2:
case FUTEX_WAIT_BITSET:
case FUTEX_WAIT_REQUEUE_PI:
+ case FUTEX_WAIT_MULTIPLE:
return true;
}
return false;
@@ -170,13 +171,79 @@ futex_init_timeout(u32 cmd, u32 op, struct timespec64 *ts, ktime_t *t)
return -EINVAL;
*t = timespec64_to_ktime(*ts);
- if (cmd == FUTEX_WAIT)
+ if (cmd == FUTEX_WAIT || cmd == FUTEX_WAIT_MULTIPLE)
*t = ktime_add_safe(ktime_get(), *t);
else if (cmd != FUTEX_LOCK_PI && !(op & FUTEX_CLOCK_REALTIME))
*t = timens_ktime_to_host(CLOCK_MONOTONIC, *t);
return 0;
}
+/**
+ * futex_read_wait_block - Read an array of futex_wait_block from userspace
+ * @uaddr: Userspace address of the block
+ * @count: Number of blocks to be read
+ *
+ * This function creates and allocate an array of futex_q (we zero it to
+ * initialize the fields) and then, for each futex_wait_block element from
+ * userspace, fill a futex_q element with proper values.
+ */
+inline struct futex_vector *futex_read_wait_block(u32 __user *uaddr, u32 count)
+{
+ unsigned int i;
+ struct futex_vector *futexv;
+ struct futex_wait_block fwb;
+ struct futex_wait_block __user *entry =
+ (struct futex_wait_block __user *)uaddr;
+
+ if (!count || count > FUTEX_WAITV_MAX)
+ return ERR_PTR(-EINVAL);
+
+ futexv = kcalloc(count, sizeof(*futexv), GFP_KERNEL);
+ if (!futexv)
+ return ERR_PTR(-ENOMEM);
+
+ for (i = 0; i < count; i++) {
+ if (copy_from_user(&fwb, &entry[i], sizeof(fwb))) {
+ kfree(futexv);
+ return ERR_PTR(-EFAULT);
+ }
+
+ futexv[i].w.flags = FUTEX_32;
+ futexv[i].w.val = fwb.val;
+ futexv[i].w.uaddr = (uintptr_t) (fwb.uaddr);
+ futexv[i].q = futex_q_init;
+ }
+
+ return futexv;
+}
+
+int futex_wait_multiple(struct futex_vector *vs, unsigned int count,
+ struct hrtimer_sleeper *to);
+
+int futex_opcode_31(ktime_t *abs_time, u32 __user *uaddr, int count)
+{
+ int ret;
+ struct futex_vector *vs;
+ struct hrtimer_sleeper *to = NULL, timeout;
+
+ to = futex_setup_timer(abs_time, &timeout, 0, 0);
+
+ vs = futex_read_wait_block(uaddr, count);
+
+ if (IS_ERR(vs))
+ return PTR_ERR(vs);
+
+ ret = futex_wait_multiple(vs, count, abs_time ? to : NULL);
+ kfree(vs);
+
+ if (to) {
+ hrtimer_cancel(&to->timer);
+ destroy_hrtimer_on_stack(&to->timer);
+ }
+
+ return ret;
+}
+
SYSCALL_DEFINE6(futex, u32 __user *, uaddr, int, op, u32, val,
const struct __kernel_timespec __user *, utime,
u32 __user *, uaddr2, u32, val3)
@@ -196,6 +263,9 @@ SYSCALL_DEFINE6(futex, u32 __user *, uaddr, int, op, u32, val,
tp = &t;
}
+ if (cmd == FUTEX_WAIT_MULTIPLE)
+ return futex_opcode_31(tp, uaddr, val);
+
return do_futex(uaddr, op, val, tp, uaddr2, (unsigned long)utime, val3);
}
@@ -392,6 +462,9 @@ SYSCALL_DEFINE6(futex_time32, u32 __user *, uaddr, int, op, u32, val,
tp = &t;
}
+ if (cmd == FUTEX_WAIT_MULTIPLE)
+ return futex_opcode_31(tp, uaddr, val);
+
return do_futex(uaddr, op, val, tp, uaddr2, (unsigned long)utime, val3);
}
#endif /* CONFIG_COMPAT_32BIT_TIME */
--
2.33.1

File diff suppressed because it is too large Load Diff

View File

@@ -0,0 +1,428 @@
From e5e77ad2223f662e1615266d8ef39a8db7e65a70 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Felix=20H=C3=A4dicke?= <felixhaedicke@web.de>
Date: Thu, 19 Nov 2020 09:22:32 +0100
Subject: HID: quirks: Add Apple Magic Trackpad 2 to hid_have_special_driver
list
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit
The Apple Magic Trackpad 2 is handled by the magicmouse driver. And
there were severe stability issues when both drivers (hid-generic and
hid-magicmouse) were loaded for this device.
Fixes: https://bugzilla.kernel.org/show_bug.cgi?id=210241
Signed-off-by: Felix Hädicke <felixhaedicke@web.de>
---
drivers/hid/hid-quirks.c | 2 ++
1 file changed, 2 insertions(+)
diff --git a/drivers/hid/hid-quirks.c b/drivers/hid/hid-quirks.c
index bf7ecab5d9e5..142e9dae2837 100644
--- a/drivers/hid/hid-quirks.c
+++ b/drivers/hid/hid-quirks.c
@@ -478,6 +478,8 @@ static const struct hid_device_id hid_have_special_driver[] = {
#if IS_ENABLED(CONFIG_HID_MAGICMOUSE)
{ HID_BLUETOOTH_DEVICE(USB_VENDOR_ID_APPLE, USB_DEVICE_ID_APPLE_MAGICMOUSE) },
{ HID_BLUETOOTH_DEVICE(USB_VENDOR_ID_APPLE, USB_DEVICE_ID_APPLE_MAGICTRACKPAD) },
+ { HID_BLUETOOTH_DEVICE(BT_VENDOR_ID_APPLE, USB_DEVICE_ID_APPLE_MAGICTRACKPAD2) },
+ { HID_USB_DEVICE(USB_VENDOR_ID_APPLE, USB_DEVICE_ID_APPLE_MAGICTRACKPAD2) },
#endif
#if IS_ENABLED(CONFIG_HID_MAYFLASH)
{ HID_USB_DEVICE(USB_VENDOR_ID_DRAGONRISE, USB_DEVICE_ID_DRAGONRISE_PS3) },
--
cgit v1.2.3-1-gf6bb5
From f7f49141a5dbe9c99d78196b58c44307fb2e6be3 Mon Sep 17 00:00:00 2001
From: Tk-Glitch <ti3nou@gmail.com>
Date: Wed, 3 Feb 2021 11:20:12 +0200
Subject: Revert "cpufreq: Avoid configuring old governors as default with intel_pstate"
This is an undesirable behavior for us since our aggressive ondemand performs
better than schedutil for gaming when using intel_pstate in passive mode.
Also it interferes with the option to select the desired default governor we have.
diff --git a/drivers/cpufreq/Kconfig b/drivers/cpufreq/Kconfig
index 2c7171e0b0010..85de313ddec29 100644
--- a/drivers/cpufreq/Kconfig
+++ b/drivers/cpufreq/Kconfig
@@ -71,7 +71,6 @@ config CPU_FREQ_DEFAULT_GOV_USERSPACE
config CPU_FREQ_DEFAULT_GOV_ONDEMAND
bool "ondemand"
- depends on !(X86_INTEL_PSTATE && SMP)
select CPU_FREQ_GOV_ONDEMAND
select CPU_FREQ_GOV_PERFORMANCE
help
@@ -83,7 +84,6 @@ config CPU_FREQ_DEFAULT_GOV_ONDEMAND
config CPU_FREQ_DEFAULT_GOV_CONSERVATIVE
bool "conservative"
- depends on !(X86_INTEL_PSTATE && SMP)
select CPU_FREQ_GOV_CONSERVATIVE
select CPU_FREQ_GOV_PERFORMANCE
help
From 2535fbde890f14c78b750139fcf87d1143850626 Mon Sep 17 00:00:00 2001
From: Johannes Weiner <hannes@cmpxchg.org>
Date: Tue, 2 Aug 2022 12:28:11 -0400
Subject: [PATCH] mm: vmscan: fix extreme overreclaim and swap floods
During proactive reclaim, we sometimes observe severe overreclaim, with
several thousand times more pages reclaimed than requested.
This trace was obtained from shrink_lruvec() during such an instance:
prio:0 anon_cost:1141521 file_cost:7767
nr_reclaimed:4387406 nr_to_reclaim:1047 (or_factor:4190)
nr=[7161123 345 578 1111]
While he reclaimer requested 4M, vmscan reclaimed close to 16G, most of it
by swapping. These requests take over a minute, during which the write()
to memory.reclaim is unkillably stuck inside the kernel.
Digging into the source, this is caused by the proportional reclaim
bailout logic. This code tries to resolve a fundamental conflict: to
reclaim roughly what was requested, while also aging all LRUs fairly and
in accordance to their size, swappiness, refault rates etc. The way it
attempts fairness is that once the reclaim goal has been reached, it stops
scanning the LRUs with the smaller remaining scan targets, and adjusts the
remainder of the bigger LRUs according to how much of the smaller LRUs was
scanned. It then finishes scanning that remainder regardless of the
reclaim goal.
This works fine if priority levels are low and the LRU lists are
comparable in size. However, in this instance, the cgroup that is
targeted by proactive reclaim has almost no files left - they've already
been squeezed out by proactive reclaim earlier - and the remaining anon
pages are hot. Anon rotations cause the priority level to drop to 0,
which results in reclaim targeting all of anon (a lot) and all of file
(almost nothing). By the time reclaim decides to bail, it has scanned
most or all of the file target, and therefor must also scan most or all of
the enormous anon target. This target is thousands of times larger than
the reclaim goal, thus causing the overreclaim.
The bailout code hasn't changed in years, why is this failing now? The
most likely explanations are two other recent changes in anon reclaim:
1. Before the series starting with commit 5df741963d52 ("mm: fix LRU
balancing effect of new transparent huge pages"), the VM was
overall relatively reluctant to swap at all, even if swap was
configured. This means the LRU balancing code didn't come into play
as often as it does now, and mostly in high pressure situations
where pronounced swap activity wouldn't be as surprising.
2. For historic reasons, shrink_lruvec() loops on the scan targets of
all LRU lists except the active anon one, meaning it would bail if
the only remaining pages to scan were active anon - even if there
were a lot of them.
Before the series starting with commit ccc5dc67340c ("mm/vmscan:
make active/inactive ratio as 1:1 for anon lru"), most anon pages
would live on the active LRU; the inactive one would contain only a
handful of preselected reclaim candidates. After the series, anon
gets aged similarly to file, and the inactive list is the default
for new anon pages as well, making it often the much bigger list.
As a result, the VM is now more likely to actually finish large
anon targets than before.
Change the code such that only one SWAP_CLUSTER_MAX-sized nudge toward the
larger LRU lists is made before bailing out on a met reclaim goal.
This fixes the extreme overreclaim problem.
Fairness is more subtle and harder to evaluate. No obvious misbehavior
was observed on the test workload, in any case. Conceptually, fairness
should primarily be a cumulative effect from regular, lower priority
scans. Once the VM is in trouble and needs to escalate scan targets to
make forward progress, fairness needs to take a backseat. This is also
acknowledged by the myriad exceptions in get_scan_count(). This patch
makes fairness decrease gradually, as it keeps fairness work static over
increasing priority levels with growing scan targets. This should make
more sense - although we may have to re-visit the exact values.
Link: https://lkml.kernel.org/r/20220802162811.39216-1-hannes@cmpxchg.org
Signed-off-by: Johannes Weiner <hannes@cmpxchg.org>
Reviewed-by: Rik van Riel <riel@surriel.com>
Acked-by: Mel Gorman <mgorman@techsingularity.net>
Cc: Hugh Dickins <hughd@google.com>
Cc: Joonsoo Kim <iamjoonsoo.kim@lge.com>
Cc: <stable@vger.kernel.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
---
mm/vmscan.c | 10 ++++------
1 file changed, 4 insertions(+), 6 deletions(-)
diff --git a/mm/vmscan.c b/mm/vmscan.c
index 382dbe97329f33..266eb8cfe93a67 100644
--- a/mm/vmscan.c
+++ b/mm/vmscan.c
@@ -2955,8 +2955,8 @@ static void shrink_lruvec(struct lruvec *lruvec, struct scan_control *sc)
enum lru_list lru;
unsigned long nr_reclaimed = 0;
unsigned long nr_to_reclaim = sc->nr_to_reclaim;
+ bool proportional_reclaim;
struct blk_plug plug;
- bool scan_adjusted;
get_scan_count(lruvec, sc, nr);
@@ -2974,8 +2974,8 @@ static void shrink_lruvec(struct lruvec *lruvec, struct scan_control *sc)
* abort proportional reclaim if either the file or anon lru has already
* dropped to zero at the first pass.
*/
- scan_adjusted = (!cgroup_reclaim(sc) && !current_is_kswapd() &&
- sc->priority == DEF_PRIORITY);
+ proportional_reclaim = (!cgroup_reclaim(sc) && !current_is_kswapd() &&
+ sc->priority == DEF_PRIORITY);
blk_start_plug(&plug);
while (nr[LRU_INACTIVE_ANON] || nr[LRU_ACTIVE_FILE] ||
@@ -2995,7 +2995,7 @@ static void shrink_lruvec(struct lruvec *lruvec, struct scan_control *sc)
cond_resched();
- if (nr_reclaimed < nr_to_reclaim || scan_adjusted)
+ if (nr_reclaimed < nr_to_reclaim || proportional_reclaim)
continue;
/*
@@ -3046,8 +3046,6 @@ static void shrink_lruvec(struct lruvec *lruvec, struct scan_control *sc)
nr_scanned = targets[lru] - nr[lru];
nr[lru] = targets[lru] * (100 - percentage) / 100;
nr[lru] -= min(nr[lru], nr_scanned);
-
- scan_adjusted = true;
}
blk_finish_plug(&plug);
sc->nr_reclaimed += nr_reclaimed;
From 430daaab3c78de6bd82f10cfb5a0f016c6e583f6 Mon Sep 17 00:00:00 2001
From: Desmond Cheong Zhi Xi <desmondcheongzx@gmail.com>
Date: Mon, 4 Oct 2021 14:07:34 -0400
Subject: [PATCH] Bluetooth: fix deadlock for RFCOMM sk state change
Syzbot reports the following task hang [1]:
INFO: task syz-executor255:8499 blocked for more than 143 seconds.
Not tainted 5.14.0-rc7-syzkaller #0
Call Trace:
context_switch kernel/sched/core.c:4681 [inline]
__schedule+0x93a/0x26f0 kernel/sched/core.c:5938
schedule+0xd3/0x270 kernel/sched/core.c:6017
__lock_sock+0x13d/0x260 net/core/sock.c:2644
lock_sock_nested+0xf6/0x120 net/core/sock.c:3185
lock_sock include/net/sock.h:1612 [inline]
rfcomm_sk_state_change+0xb4/0x390 net/bluetooth/rfcomm/sock.c:73
__rfcomm_dlc_close+0x1b6/0x8a0 net/bluetooth/rfcomm/core.c:489
rfcomm_dlc_close+0x1ea/0x240 net/bluetooth/rfcomm/core.c:520
__rfcomm_sock_close+0xac/0x260 net/bluetooth/rfcomm/sock.c:220
rfcomm_sock_shutdown+0xe9/0x210 net/bluetooth/rfcomm/sock.c:931
rfcomm_sock_release+0x5f/0x140 net/bluetooth/rfcomm/sock.c:951
__sock_release+0xcd/0x280 net/socket.c:649
sock_close+0x18/0x20 net/socket.c:1314
__fput+0x288/0x920 fs/file_table.c:280
task_work_run+0xdd/0x1a0 kernel/task_work.c:164
exit_task_work include/linux/task_work.h:32 [inline]
do_exit+0xbd4/0x2a60 kernel/exit.c:825
do_group_exit+0x125/0x310 kernel/exit.c:922
get_signal+0x47f/0x2160 kernel/signal.c:2808
arch_do_signal_or_restart+0x2a9/0x1c40 arch/x86/kernel/signal.c:865
handle_signal_work kernel/entry/common.c:148 [inline]
exit_to_user_mode_loop kernel/entry/common.c:172 [inline]
exit_to_user_mode_prepare+0x17d/0x290 kernel/entry/common.c:209
__syscall_exit_to_user_mode_work kernel/entry/common.c:291 [inline]
syscall_exit_to_user_mode+0x19/0x60 kernel/entry/common.c:302
do_syscall_64+0x42/0xb0 arch/x86/entry/common.c:86
entry_SYSCALL_64_after_hwframe+0x44/0xae
Showing all locks held in the system:
1 lock held by khungtaskd/1653:
#0: ffffffff8b97c280 (rcu_read_lock){....}-{1:2}, at:
debug_show_all_locks+0x53/0x260 kernel/locking/lockdep.c:6446
1 lock held by krfcommd/4781:
#0: ffffffff8d306528 (rfcomm_mutex){+.+.}-{3:3}, at:
rfcomm_process_sessions net/bluetooth/rfcomm/core.c:1979 [inline]
#0: ffffffff8d306528 (rfcomm_mutex){+.+.}-{3:3}, at:
rfcomm_run+0x2ed/0x4a20 net/bluetooth/rfcomm/core.c:2086
2 locks held by in:imklog/8206:
#0: ffff8880182ce5f0 (&f->f_pos_lock){+.+.}-{3:3}, at:
__fdget_pos+0xe9/0x100 fs/file.c:974
#1: ffff8880b9c51a58 (&rq->__lock){-.-.}-{2:2}, at:
raw_spin_rq_lock_nested kernel/sched/core.c:460 [inline]
#1: ffff8880b9c51a58 (&rq->__lock){-.-.}-{2:2}, at: raw_spin_rq_lock
kernel/sched/sched.h:1307 [inline]
#1: ffff8880b9c51a58 (&rq->__lock){-.-.}-{2:2}, at: rq_lock
kernel/sched/sched.h:1610 [inline]
#1: ffff8880b9c51a58 (&rq->__lock){-.-.}-{2:2}, at:
__schedule+0x233/0x26f0 kernel/sched/core.c:5852
4 locks held by syz-executor255/8499:
#0: ffff888039a83690 (&sb->s_type->i_mutex_key#13){+.+.}-{3:3}, at:
inode_lock include/linux/fs.h:774 [inline]
#0: ffff888039a83690 (&sb->s_type->i_mutex_key#13){+.+.}-{3:3}, at:
__sock_release+0x86/0x280 net/socket.c:648
#1:
ffff88802fa31120 (sk_lock-AF_BLUETOOTH-BTPROTO_RFCOMM){+.+.}-{0:0},
at: lock_sock include/net/sock.h:1612 [inline]
#1:
ffff88802fa31120 (sk_lock-AF_BLUETOOTH-BTPROTO_RFCOMM){+.+.}-{0:0},
at: rfcomm_sock_shutdown+0x54/0x210 net/bluetooth/rfcomm/sock.c:928
#2: ffffffff8d306528 (rfcomm_mutex){+.+.}-{3:3}, at:
rfcomm_dlc_close+0x34/0x240 net/bluetooth/rfcomm/core.c:507
#3: ffff888141bd6d28 (&d->lock){+.+.}-{3:3}, at:
__rfcomm_dlc_close+0x162/0x8a0 net/bluetooth/rfcomm/core.c:487
==================================================================
The task hangs because of a deadlock that occurs when lock_sock() is
called in rfcomm_sk_state_change(). One such call stack is:
rfcomm_sock_shutdown():
lock_sock();
__rfcomm_sock_close():
rfcomm_dlc_close():
__rfcomm_dlc_close():
rfcomm_dlc_lock();
rfcomm_sk_state_change():
lock_sock();
lock_sock() has to be called when the sk state is changed because the
lock is not always held when rfcomm_sk_state_change() is
called. However, besides the recursive deadlock, there is also an
issue of a lock hierarchy inversion between rfcomm_dlc_lock() and
lock_sock() if the socket is locked in rfcomm_sk_state_change().
To avoid these issues, we can instead schedule the sk state change in
the global workqueue. This is already the implicit assumption about
how sk state changes happen. For example, in rfcomm_sock_shutdown(),
the call to __rfcomm_sock_close() is followed by
bt_sock_wait_state().
Additionally, the call to rfcomm_sock_kill() inside
rfcomm_sk_state_change() should be removed. The socket shouldn't be
killed here because only rfcomm_sock_release() calls sock_orphan(),
which it already follows up with a call to rfcomm_sock_kill().
Fixes: b7ce436a5d79 ("Bluetooth: switch to lock_sock in RFCOMM")
Link: https://syzkaller.appspot.com/bug?extid=7d51f807c81b190a127d [1]
Reported-by: syzbot+7d51f807c81b190a127d@syzkaller.appspotmail.com
Tested-by: syzbot+7d51f807c81b190a127d@syzkaller.appspotmail.com
Signed-off-by: Desmond Cheong Zhi Xi <desmondcheongzx@gmail.com>
Cc: Hillf Danton <hdanton@sina.com>
---
include/net/bluetooth/rfcomm.h | 3 +++
net/bluetooth/rfcomm/core.c | 2 ++
net/bluetooth/rfcomm/sock.c | 34 ++++++++++++++++++++++------------
3 files changed, 27 insertions(+), 12 deletions(-)
diff --git a/include/net/bluetooth/rfcomm.h b/include/net/bluetooth/rfcomm.h
index 99d26879b02a53..a92799fc5e74d0 100644
--- a/include/net/bluetooth/rfcomm.h
+++ b/include/net/bluetooth/rfcomm.h
@@ -171,6 +171,7 @@ struct rfcomm_dlc {
struct rfcomm_session *session;
struct sk_buff_head tx_queue;
struct timer_list timer;
+ struct work_struct state_change_work;
struct mutex lock;
unsigned long state;
@@ -186,6 +187,7 @@ struct rfcomm_dlc {
u8 sec_level;
u8 role_switch;
u32 defer_setup;
+ int err;
uint mtu;
uint cfc;
@@ -310,6 +312,7 @@ struct rfcomm_pinfo {
u8 role_switch;
};
+void __rfcomm_sk_state_change(struct work_struct *work);
int rfcomm_init_sockets(void);
void rfcomm_cleanup_sockets(void);
diff --git a/net/bluetooth/rfcomm/core.c b/net/bluetooth/rfcomm/core.c
index 7324764384b677..c6494e85cd68b2 100644
--- a/net/bluetooth/rfcomm/core.c
+++ b/net/bluetooth/rfcomm/core.c
@@ -289,6 +289,7 @@ static void rfcomm_dlc_clear_state(struct rfcomm_dlc *d)
d->flags = 0;
d->mscex = 0;
d->sec_level = BT_SECURITY_LOW;
+ d->err = 0;
d->mtu = RFCOMM_DEFAULT_MTU;
d->v24_sig = RFCOMM_V24_RTC | RFCOMM_V24_RTR | RFCOMM_V24_DV;
@@ -306,6 +307,7 @@ struct rfcomm_dlc *rfcomm_dlc_alloc(gfp_t prio)
timer_setup(&d->timer, rfcomm_dlc_timeout, 0);
skb_queue_head_init(&d->tx_queue);
+ INIT_WORK(&d->state_change_work, __rfcomm_sk_state_change);
mutex_init(&d->lock);
refcount_set(&d->refcnt, 1);
diff --git a/net/bluetooth/rfcomm/sock.c b/net/bluetooth/rfcomm/sock.c
index 4bf4ea6cbb5eee..4850dafbaa05fb 100644
--- a/net/bluetooth/rfcomm/sock.c
+++ b/net/bluetooth/rfcomm/sock.c
@@ -61,19 +61,22 @@ static void rfcomm_sk_data_ready(struct rfcomm_dlc *d, struct sk_buff *skb)
rfcomm_dlc_throttle(d);
}
-static void rfcomm_sk_state_change(struct rfcomm_dlc *d, int err)
+void __rfcomm_sk_state_change(struct work_struct *work)
{
+ struct rfcomm_dlc *d = container_of(work, struct rfcomm_dlc,
+ state_change_work);
struct sock *sk = d->owner, *parent;
if (!sk)
return;
- BT_DBG("dlc %p state %ld err %d", d, d->state, err);
-
lock_sock(sk);
+ rfcomm_dlc_lock(d);
- if (err)
- sk->sk_err = err;
+ BT_DBG("dlc %p state %ld err %d", d, d->state, d->err);
+
+ if (d->err)
+ sk->sk_err = d->err;
sk->sk_state = d->state;
@@ -91,15 +94,22 @@ static void rfcomm_sk_state_change(struct rfcomm_dlc *d, int err)
sk->sk_state_change(sk);
}
+ rfcomm_dlc_unlock(d);
release_sock(sk);
+ sock_put(sk);
+}
- if (parent && sock_flag(sk, SOCK_ZAPPED)) {
- /* We have to drop DLC lock here, otherwise
- * rfcomm_sock_destruct() will dead lock. */
- rfcomm_dlc_unlock(d);
- rfcomm_sock_kill(sk);
- rfcomm_dlc_lock(d);
- }
+static void rfcomm_sk_state_change(struct rfcomm_dlc *d, int err)
+{
+ struct sock *sk = d->owner;
+
+ if (!sk)
+ return;
+
+ d->err = err;
+ sock_hold(sk);
+ if (!schedule_work(&d->state_change_work))
+ sock_put(sk);
}
/* ---- Socket functions ---- */

View File

@@ -0,0 +1,27 @@
# Remove the obsoletes line in kernel-headers
# Add provides for kernel-devel so there's no conflict
diff --git a/scripts/package/mkspec b/scripts/package/mkspec
index 7c477ca7d..1158f5559 100755
--- a/scripts/package/mkspec
+++ b/scripts/package/mkspec
@@ -25,0 +26 @@ fi
+PROVIDES_DRM=""
@@ -27 +28 @@ if grep -q CONFIG_DRM=y .config; then
- PROVIDES=kernel-drm
+ PROVIDES_DRM="Provides: kernel-drm = %{version}"
@@ -30 +30,0 @@ fi
-PROVIDES="$PROVIDES kernel-$KERNELRELEASE"
@@ -51 +51,3 @@ $S Source: kernel-$__KERNELRELEASE.tar.gz
- Provides: $PROVIDES
+ $PROVIDES_DRM
+ Provides: kernel = %{version}
+ Provides: kernel-uname-r = %{version}
+ Provides: installonlypkg(kernel) = %{version}
@@ -61 +63 @@ $S Source: kernel-$__KERNELRELEASE.tar.gz
- Obsoletes: kernel-headers
+ Provides: installonlypkg(kernel) = %{version}
@@ -72,0 +75,3 @@ $S$M Group: System Environment/Kernel
+$S$M Provides: kernel-devel = %{version}
+$S$M Provides: kernel-devel-uname-r = %{version}
+$S$M Provides: installonlypkg(kernel) = %{version}

View File

@@ -0,0 +1,46 @@
diff --git a/Makefile b/Makefile
--- a/Makefile
+++ b/Makefile
@@ -442,7 +442,7 @@ endif
HOSTPKG_CONFIG = pkg-config
KBUILD_USERHOSTCFLAGS := -Wall -Wmissing-prototypes -Wstrict-prototypes \
- -O2 -fomit-frame-pointer -std=gnu11 \
+ -O3 -fomit-frame-pointer -std=gnu11 \
-Wdeclaration-after-statement
KBUILD_USERCFLAGS := $(KBUILD_USERHOSTCFLAGS) $(USERCFLAGS)
KBUILD_USERLDFLAGS := $(USERLDFLAGS)
@@ -474,7 +474,7 @@ endif
-Wclippy::dbg_macro
KBUILD_HOSTCFLAGS := $(KBUILD_USERHOSTCFLAGS) $(HOST_LFS_CFLAGS) $(HOSTCFLAGS)
-KBUILD_HOSTCXXFLAGS := -Wall -O2 $(HOST_LFS_CFLAGS) $(HOSTCXXFLAGS)
+KBUILD_HOSTCXXFLAGS := -Wall -O3 $(HOST_LFS_CFLAGS) $(HOSTCXXFLAGS)
KBUILD_HOSTRUSTFLAGS := $(rust_common_flags) -O -Cstrip=debuginfo \
-Zallow-features= $(HOSTRUSTFLAGS)
KBUILD_HOSTLDFLAGS := $(HOST_LFS_LDFLAGS) $(HOSTLDFLAGS)
@@ -757,7 +757,7 @@ KBUILD_CFLAGS += $(call cc-disable-warning, format-overflow)
KBUILD_CFLAGS += $(call cc-disable-warning, address-of-packed-member)
ifdef CONFIG_CC_OPTIMIZE_FOR_PERFORMANCE
-KBUILD_CFLAGS += -O2
+KBUILD_CFLAGS += -O3
KBUILD_RUSTFLAGS += -Copt-level=2
else ifdef CONFIG_CC_OPTIMIZE_FOR_SIZE
KBUILD_CFLAGS += -Os
diff --git a/init/Kconfig b/init/Kconfig
--- a/init/Kconfig
+++ b/init/Kconfig
@@ -1401,10 +1401,10 @@ choice
default CC_OPTIMIZE_FOR_PERFORMANCE
config CC_OPTIMIZE_FOR_PERFORMANCE
- bool "Optimize for performance (-O2)"
+ bool "Optimize for performance (-O3)"
help
This is the default optimization level for the kernel, building
- with the "-O2" compiler flag for best performance and most
+ with the "-O3" compiler flag for best performance and most
helpful compile-time warnings.
config CC_OPTIMIZE_FOR_SIZE