Compare commits

...

31 Commits

Author SHA1 Message Date
Sravan Balaji
5d3c6cafba Switch back to PDS Scheduler 2023-06-19 18:36:16 -04:00
Sravan Balaji
a51ee8badb Switch to BMQ Scheduler
- Change yield type and rr interval to BMQ defaults
- Disable PDS specific smt nice priority support
- Disable setting kernel parameters (handled by grub)
2023-06-19 18:36:16 -04:00
Sravan Balaji
9896815864 PDS Kernel Configuration 2023-06-19 18:36:16 -04:00
Tk-Glitch
e6c1edf943 Remove undesired merged hunk
Not ready yet :D

Fixes ddc5ffd463
2023-06-17 20:30:50 +02:00
Tk-Glitch
ddc5ffd463 Bump PREEMPT_RT mappings 2023-06-17 20:29:10 +02:00
Tk-Glitch
2786ab2916 linux 6.3.y: Update defconfig to 6.3.8-arch1
https://gitlab.archlinux.org/archlinux/packaging/packages/linux/-/blob/main/config
2023-06-17 20:22:43 +02:00
whitehara
a586812578 Fix install.sh for fedora (#752) 2023-06-07 17:41:55 +02:00
ptr1337
77078d8340 6.3: Add bcachefs (#749)
Signed-off-by: Peter Jung <admin@ptr1337.dev>
2023-05-29 19:47:33 +02:00
redglobuli
f2561e8f52 Update prepare to include BORE Scheduler (#747)
updated to include BORE scheduler, BORE patch is supporting linux 6.3
2023-05-29 19:47:08 +02:00
Tk-Glitch
ac13936ea1 Move 6.0 and 6.2 to EOL 2023-05-27 02:45:31 +02:00
Tk-Glitch
feb1867486 Add 6.4 rc initial support 2023-05-15 16:38:06 +02:00
Tk-Glitch
5c02fb44c4 Revert "linux 6.3.y: Add amdgpu fixup for vkd3d-proton to misc-additions"
This reverts commit e5fd39cad7.
2023-05-14 19:33:27 +02:00
Tk-Glitch
e5fd39cad7 linux 6.3.y: Add amdgpu fixup for vkd3d-proton to misc-additions
https://patchwork.freedesktop.org/patch/534250/
2023-05-13 02:53:57 +02:00
Tk-Glitch
6c9383631d linux 6.3.y: Properly disable MLX5_CORE on Project C builds
Fixes https://github.com/Frogging-Family/linux-tkg/issues/731
Fixes b54abe9ba5
2023-05-13 02:52:40 +02:00
Tk-Glitch
b54abe9ba5 linux 6.3.y: Update project C to v6.3-r1 and disable MLX5_CORE when using it
Fixes compilation error with NUMA enabled
2023-05-11 21:25:07 +02:00
Tk-Glitch
af84cc4514 Add support for emerald rapids march 2023-05-11 19:26:59 +02:00
Tk-Glitch
f65c485bf4 linux 6.3.y: Update defconfig 2023-05-11 19:26:11 +02:00
Vasiliy Stelmachenok
c0f870d1f2 linux 6.{2,3}.y: Fix wrong patch (#739) 2023-05-07 12:04:38 +02:00
Vasiliy Stelmachenok
5750e6174c linux 6.2.y: Replace "Remove all energy efficiency functions" with the default value of sched_energy_aware (#728)
* linux 6.2.y: Replace "Remove all energy efficiency functions" with the default value of sched_energy_aware

* linux 6.3.y: Replace "Remove all energy efficiency functions" with the default value of sched_energy_aware
2023-05-06 18:33:31 +02:00
Tk-Glitch
ac8b6420c6 linux 6.3: Fixup for Project C with NUMA enabled
https://gitlab.com/alfredchen/linux-prjc/-/issues/80#note_1368202615
2023-04-26 12:50:55 +02:00
Tk-Glitch
e4f8d0d479 linux 6.3: Bring Project C and enable TT, and fix fedora-rpm patch
https://gitlab.com/alfredchen/projectc/-/blob/master/6.3/prjc_v6.3-r0.patch
2023-04-25 21:05:21 +02:00
Tk-Glitch
24f561c816 linux 6.2.y: Update Project C to r2 with pds priority squeeze 0.5
https://gitlab.com/alfredchen/projectc/-/tree/master/6.2
2023-04-12 17:37:56 +02:00
Tk-Glitch
f033dc9a50 Defconfig refresh 2023-04-12 17:35:37 +02:00
icxes
e64616ef3c linux 6.3 RC: fix ACS override patch causing an error (#725) 2023-04-05 16:14:53 +02:00
Dmitry Skvortsov
d638829538 fsync: Rename toggle options (#694)
changes that should bring clarity:
 - rename toggle option for fsync backport patches that was adopted
 in Linux 5.16. Keep enabled by default;
 - rename toggle option and file name for fsync legacy patches,
 known as "FUTEX_WAIT_MULTIPLE (opcode 31)";
 - disable deprecated "futex2_interface" patches by default.
2023-03-21 17:42:03 +01:00
Tk-Glitch
f95793c534 linux 6.1.y: Update defconfig 2023-03-12 20:06:29 +01:00
Tk-Glitch
2b5eda7968 linux 6.2.y: Update defconfig and fixup 0006-add-acs-overrides_iommu patch 2023-03-12 20:06:16 +01:00
Tk-Glitch
1f3a417ff7 linux 6.2.y, 6.3rc: Enable EFI_HANDOVER_PROTOCOL 2023-03-10 00:06:08 +01:00
Tk-Glitch
b13b5bbcf3 linux 6.3 RC: Disable X86_KERNEL_IBT for now
Workaround for `btf_encoder__encode: btf__dedup failed!`

https://lore.kernel.org/bpf/SY4P282MB1084A0E31D4228DF89FC42639DA29@SY4P282MB1084.AUSP282.PROD.OUTLOOK.COM/T/
2023-03-06 16:55:10 +01:00
Tk-Glitch
764dc320d5 Update rt mappings 2023-03-06 14:02:53 +01:00
Tk-Glitch
feb9373578 Add initial support for 6.3 RC1 2023-03-06 13:58:11 +01:00
66 changed files with 147515 additions and 722 deletions

View File

@@ -3,7 +3,7 @@
# Linux distribution you are using, options are "Arch", "Ubuntu", "Debian", "Fedora", "Suse", "Gentoo", "Generic".
# It is automatically set to "Arch" when using PKGBUILD.
# If left empty, the script will prompt
_distro=""
_distro="Arch"
# Kernel Version - Options are "5.4", and from "5.7" to "5.19"
# you can also set a specific kernel version, e.g. "6.0-rc4" or "5.10.51",
@@ -46,7 +46,7 @@ CUSTOM_GCC_PATH=""
CUSTOM_LLVM_PATH=""
# Set to true to bypass makepkg.conf and use all available threads for compilation. False will respect your makepkg.conf options.
_force_all_threads="true"
_force_all_threads="false"
# Set to true to prevent ccache from being used and set CONFIG_GCC_PLUGINS=y (which needs to be disabled for ccache to work properly)
_noccache="false"
@@ -60,10 +60,10 @@ _modprobeddb="false"
_modprobeddb_db_path=~/.config/modprobed.db
# Set to "1" to call make menuconfig, "2" to call make nconfig, "3" to call make xconfig, before building the kernel. Set to false to disable and skip the prompt.
_menunconfig=""
_menunconfig="false"
# Set to true to generate a kernel config fragment from your changes in menuconfig/nconfig. Set to false to disable and skip the prompt.
_diffconfig=""
_diffconfig="false"
# Set to the file name where the generated config fragment should be written to. Only used if _diffconfig is active.
_diffconfig_name=""
@@ -97,11 +97,11 @@ _STRIP="true"
# LEAVE AN EMPTY VALUE TO BE PROMPTED ABOUT FOLLOWING OPTIONS AT BUILD TIME
# CPU scheduler - Options are "upds" (TkG's Undead PDS), "pds", "bmq", "muqss", "cacule", "tt", "bore" or "cfs" (kernel's default)
_cpusched=""
_cpusched="pds"
# Compiler to use - Options are "gcc" or "llvm".
# For advanced users.
_compiler=""
_compiler="gcc"
# Force the use of the LLVM Integrated Assembler whether using LLVM, LTO or not.
# Set to "1" to enable.
@@ -131,7 +131,7 @@ _preempt_rt_force=""
# For BMQ: 0: No yield.
# 1: Deboost and requeue task. (Default)
# 2: Set rq skip task.
_sched_yield_type=""
_sched_yield_type="0"
# Round Robin interval is the longest duration two tasks with the same nice level will be delayed for. When CPU time is requested by a task, it receives a time slice equal
# to the rr_interval in addition to a virtual deadline. When using yield_type 2, a low value can help offset the disadvantages of rescheduling a process that has yielded.
@@ -139,7 +139,7 @@ _sched_yield_type=""
# PDS default: 4ms"
# BMQ default: 2ms"
# Set to "1" for 2ms, "2" for 4ms, "3" for 6ms, "4" for 8ms, or "default" to keep the chosen scheduler defaults.
_rr_interval=""
_rr_interval="default"
# Set to "true" to disable FUNCTION_TRACER/GRAPH_TRACER, lowering overhead but limiting debugging and analyzing of kernel functions - Kernel default is "false"
_ftracedisable="false"
@@ -154,10 +154,10 @@ _misc_adds="true"
# Full tickless can give higher performances in case you use isolation of CPUs for tasks
# and it works only when using the nohz_full kernel parameter, otherwise behaves like idle.
# Just tickless idle perform better for most platforms.
_tickless=""
_tickless="2"
# Set to "true" to use ACS override patch - https://wiki.archlinux.org/index.php/PCI_passthrough_via_OVMF#Bypassing_the_IOMMU_groups_.28ACS_override_patch.29 - Kernel default is "false"
_acs_override=""
_acs_override="false"
# Set to "true" to add Bcache filesystem support. You'll have to install bcachefs-tools-git from AUR for utilities - https://bcachefs.org/ - If in doubt, set to "false"
# This can be buggy and isn't recommended on a production machine, also enabling this option will not allow you to enable MGLRU.
@@ -168,13 +168,13 @@ _bcachefs="false"
_winesync="false"
# Set to "true" to enable Binder and Ashmem, the kernel modules required to use the android emulator Anbox. ! This doesn't apply to 5.4.y !
_anbox=""
_anbox="false"
# A selection of patches from Zen/Liquorix kernel and additional tweaks for a better gaming experience (ZENIFY) - Default is "true"
_zenify="true"
# compiler optimization level - 1. Optimize for performance (-O2); 2. Optimize harder (-O3); 3. Optimize for size (-Os) - Kernel default is "1"
_compileroptlevel="1"
_compileroptlevel="2"
# CPU compiler optimizations - Defaults to prompt at kernel config if left empty
# AMD CPUs : "k8" "k8sse3" "k10" "barcelona" "bobcat" "jaguar" "bulldozer" "piledriver" "steamroller" "excavator" "zen" "zen2" "zen3" "zen4" (zen3 opt support depends on GCC11) (zen4 opt support depends on GCC13)
@@ -188,7 +188,7 @@ _compileroptlevel="1"
# - "generic_v2" (depends on GCC11 - to share the package between machines with different CPU µarch supporting at least x86-64-v2
# - "generic_v3" (depends on GCC11 - to share the package between machines with different CPU µarch supporting at least x86-64-v3
# - "generic_v4" (depends on GCC11 - to share the package between machines with different CPU µarch supporting at least x86-64-v4
_processor_opt=""
_processor_opt="skylake"
# CacULE only - Enable Response Driven Balancer, an experimental load balancer for CacULE
_cacule_rdb="false"
@@ -201,13 +201,13 @@ _cacule_rdb_interval="19"
_tt_high_hz="false"
# MuQSS and PDS only - SMT (Hyperthreading) aware nice priority and policy support (SMT_NICE) - Kernel default is "true" - You can disable this on non-SMT/HT CPUs for lower overhead
_smt_nice=""
_smt_nice="true"
# Trust the CPU manufacturer to initialize Linux's CRNG (RANDOM_TRUST_CPU) - Kernel default is "false"
_random_trust_cpu="true"
# Timer frequency - "100" "250" "300" "500" "750" "1000" ("2000" is available for cacule cpusched only) - More options available in kernel config prompt when left empty depending on selected cpusched with the default option pointed with a ">" (2000 for cacule, 100 for muqss and 1000 for other cpu schedulers)
_timer_freq=""
_timer_freq="1000"
# Default CPU governor - "performance", "ondemand", "schedutil" or leave empty for default (schedutil)
_default_cpu_gov="ondemand"
@@ -223,7 +223,7 @@ _aggressive_ondemand="true"
_tcp_cong_alg=""
# You can pass a default set of kernel command line options here - example: "intel_pstate=passive nowatchdog amdgpu.ppfeaturemask=0xfffd7fff mitigations=off"
_custom_commandline="intel_pstate=passive split_lock_detect=off"
_custom_commandline=""
# Selection of Clearlinux patches
_clear_patches="true"
@@ -246,18 +246,18 @@ _NR_CPUS_value=""
#### LEGACY OPTIONS ####
# Set to "true" to enable support for fsync, an experimental replacement for esync found in Valve Proton 4.11+ - https://steamcommunity.com/games/221410/announcements/detail/2957094910196249305
# Can be enabled alongside _futex_waitv on 5.13+ to use it as a fallback for older Proton builds
_fsync="true"
# Upstreamed version of Fsync from Linux 5.16 for previous kernel versions - https://github.com/andrealmeid/futex_waitv_patches
# ! Only affect 5.13, 5.14 and 5.15 kernel branches. Safely ignored for 5.16 or newer !
_fsync_backport="true"
# Set to "true" to enable backported patches to add support for the futex_waitv() syscall, a new interface for fsync. Upstream as of 5.16 and requires a wine/proton with builtin support for it - https://github.com/ValveSoftware/wine/pull/128
# !! Disables futex2 interfaces support !!
# https://github.com/andrealmeid/futex_waitv_patches
_futex_waitv="true"
# Fsync legacy, known as FUTEX_WAIT_MULTIPLE (opcode 31) - previous version of fsync required for Valve Proton 4.11, 5.0 and 5.13 - https://steamcommunity.com/games/221410/announcements/detail/2957094910196249305
_fsync_legacy="true"
# Set to "true" to enable support for futex2, an experimental interface that can be used by proton-tkg and proton 5.13 experimental through Fsync - Can be enabled alongside fsync to use it as a fallback
# Set to "true" to enable support for futex2, a DEPRECATED interface that can be used by proton-tkg and proton 5.13 experimental through Fsync - Can be enabled alongside fsync legacy to use it as a fallback
# https://gitlab.collabora.com/tonyk/linux/-/tree/futex2-dev
_futex2="true"
# ! Only affect 5.10-5.14 kernel branches. Safely ignored for 5.15 or newer !
# ! required _fsync_backport="false" !
_fsync_futex2="false"
# Set to "true" to add back missing symbol for AES-NI/AVX support on ZFS - This is a legacy option that can be ignored on 5.10+ kernels - https://github.com/NixOS/nixpkgs/blob/master/pkgs/os-specific/linux/kernel/export_kernel_fpu_functions.patch
_zfsfix="true"

View File

@@ -73,7 +73,7 @@ _install_dependencies() {
if [ $(rpm -E %fedora) = "32" ]; then
sudo dnf install bison ccache dwarves elfutils-libelf-devel fedora-packager fedpkg flex gcc-c++ git grubby libXi-devel lz4 ncurses-devel openssl-devel pesign qt5-devel rpm-build rpmdevtools schedtool zstd ${clang_deps} -y
else
sudo dnf install bison ccache dwarves elfutils-devel elfutils-libelf-devel fedora-packager fedpkg flex gcc-c++ git grubby libXi-devel lz4 make ncurses-devel openssl openssl-devel perl-devel perl-generators pesign python3-devel qt5-qtbase-devel rpm-build rpmdevtools schedtool zstd -y ${clang_deps} -y
sudo dnf install perl bison ccache dwarves elfutils-devel elfutils-libelf-devel fedora-packager fedpkg flex gcc-c++ git grubby libXi-devel lz4 make ncurses-devel openssl openssl-devel perl-devel perl-generators pesign python3-devel qt5-qtbase-devel rpm-build rpmdevtools schedtool zstd -y ${clang_deps} -y
fi
elif [ "$_distro" = "Suse" ]; then
msg2 "Installing dependencies"
@@ -251,6 +251,9 @@ if [ "$1" = "install" ]; then
_fedora_work_dir="$_kernel_work_folder_abs/linux-tkg-rpmbuild"
msg2 "Add patched files to the diff.patch"
(cd ${_kernel_work_folder_abs} && git add -- . ':!linux-tkg-rpmbuild')
msg2 "Building kernel RPM packages"
RPMOPTS="--define '_topdir ${_fedora_work_dir}'" make ${llvm_opt} -j ${_thread_num} rpm-pkg EXTRAVERSION="${_extra_ver_str}"
msg2 "Building successfully finished!"

View File

@@ -1,15 +1,15 @@
#
# Automatically generated file; DO NOT EDIT.
# Linux/x86 5.15.61 Kernel Configuration
# Linux/x86 5.15.106 Kernel Configuration
#
CONFIG_CC_VERSION_TEXT="gcc (TkG-mostlyportable) 12.1.1 20220515"
CONFIG_CC_VERSION_TEXT="gcc (GCC) 12.2.1 20230201"
CONFIG_CC_IS_GCC=y
CONFIG_GCC_VERSION=120101
CONFIG_GCC_VERSION=120201
CONFIG_CLANG_VERSION=0
CONFIG_AS_IS_GNU=y
CONFIG_AS_VERSION=20244315
CONFIG_AS_VERSION=24000
CONFIG_LD_IS_BFD=y
CONFIG_LD_VERSION=20244315
CONFIG_LD_VERSION=24000
CONFIG_LLD_VERSION=0
CONFIG_CC_CAN_LINK=y
CONFIG_CC_CAN_LINK_STATIC=y
@@ -18,6 +18,7 @@ CONFIG_CC_HAS_ASM_GOTO_OUTPUT=y
CONFIG_CC_HAS_ASM_GOTO_TIED_OUTPUT=y
CONFIG_CC_HAS_ASM_INLINE=y
CONFIG_CC_HAS_NO_PROFILE_FN_ATTR=y
CONFIG_PAHOLE_VERSION=124
CONFIG_IRQ_WORK=y
CONFIG_BUILDTIME_TABLE_SORT=y
CONFIG_THREAD_INFO_IN_TASK=y
@@ -437,7 +438,7 @@ CONFIG_I8K=m
CONFIG_MICROCODE=y
CONFIG_MICROCODE_INTEL=y
CONFIG_MICROCODE_AMD=y
# CONFIG_MICROCODE_OLD_INTERFACE is not set
# CONFIG_MICROCODE_LATE_LOADING is not set
CONFIG_X86_MSR=y
CONFIG_X86_CPUID=y
CONFIG_X86_5LEVEL=y
@@ -1142,6 +1143,7 @@ CONFIG_INET_ESP=m
CONFIG_INET_ESP_OFFLOAD=m
CONFIG_INET_ESPINTCP=y
CONFIG_INET_IPCOMP=m
CONFIG_INET_TABLE_PERTURB_ORDER=16
CONFIG_INET_XFRM_TUNNEL=m
CONFIG_INET_TUNNEL=m
CONFIG_INET_DIAG=m
@@ -1707,7 +1709,6 @@ CONFIG_DEFAULT_NET_SCH="fq_codel"
#
CONFIG_NET_CLS=y
CONFIG_NET_CLS_BASIC=m
CONFIG_NET_CLS_TCINDEX=m
CONFIG_NET_CLS_ROUTE4=m
CONFIG_NET_CLS_FW=m
CONFIG_NET_CLS_U32=m
@@ -9898,6 +9899,7 @@ CONFIG_GCC_PLUGIN_STRUCTLEAK=y
# Memory initialization
#
CONFIG_CC_HAS_AUTO_VAR_INIT_PATTERN=y
CONFIG_CC_HAS_AUTO_VAR_INIT_ZERO_BARE=y
CONFIG_CC_HAS_AUTO_VAR_INIT_ZERO=y
# CONFIG_INIT_STACK_NONE is not set
# CONFIG_GCC_PLUGIN_STRUCTLEAK_USER is not set
@@ -10351,6 +10353,8 @@ CONFIG_SYMBOLIC_ERRNAME=y
CONFIG_DEBUG_BUGVERBOSE=y
# end of printk and dmesg options
CONFIG_AS_HAS_NON_CONST_LEB128=y
#
# Compile-time checks and compiler options
#
@@ -10360,6 +10364,7 @@ CONFIG_DEBUG_INFO=y
# CONFIG_DEBUG_INFO_SPLIT is not set
# CONFIG_DEBUG_INFO_DWARF_TOOLCHAIN_DEFAULT is not set
CONFIG_DEBUG_INFO_DWARF4=y
# CONFIG_DEBUG_INFO_DWARF5 is not set
CONFIG_DEBUG_INFO_BTF=y
CONFIG_PAHOLE_HAS_SPLIT_BTF=y
CONFIG_DEBUG_INFO_BTF_MODULES=y

View File

@@ -1,15 +1,15 @@
#
# Automatically generated file; DO NOT EDIT.
# Linux/x86 6.1.0-arch1 Kernel Configuration
# Linux/x86 6.1.23 Kernel Configuration
#
CONFIG_CC_VERSION_TEXT="gcc (GCC) 12.2.0"
CONFIG_CC_VERSION_TEXT="gcc (GCC) 12.2.1 20230201"
CONFIG_CC_IS_GCC=y
CONFIG_GCC_VERSION=120200
CONFIG_GCC_VERSION=120201
CONFIG_CLANG_VERSION=0
CONFIG_AS_IS_GNU=y
CONFIG_AS_VERSION=23900
CONFIG_AS_VERSION=24000
CONFIG_LD_IS_BFD=y
CONFIG_LD_VERSION=23900
CONFIG_LD_VERSION=24000
CONFIG_LLD_VERSION=0
CONFIG_CC_CAN_LINK=y
CONFIG_CC_CAN_LINK_STATIC=y
@@ -206,6 +206,7 @@ CONFIG_ARCH_SUPPORTS_NUMA_BALANCING=y
CONFIG_ARCH_WANT_BATCHED_UNMAP_TLB_FLUSH=y
CONFIG_CC_HAS_INT128=y
CONFIG_CC_IMPLICIT_FALLTHROUGH="-Wimplicit-fallthrough=5"
CONFIG_GCC11_NO_ARRAY_BOUNDS=y
CONFIG_GCC12_NO_ARRAY_BOUNDS=y
CONFIG_CC_NO_ARRAY_BOUNDS=y
CONFIG_ARCH_SUPPORTS_INT128=y
@@ -1784,7 +1785,6 @@ CONFIG_DEFAULT_NET_SCH="fq_codel"
#
CONFIG_NET_CLS=y
CONFIG_NET_CLS_BASIC=m
CONFIG_NET_CLS_TCINDEX=m
CONFIG_NET_CLS_ROUTE4=m
CONFIG_NET_CLS_FW=m
CONFIG_NET_CLS_U32=m
@@ -4456,7 +4456,6 @@ CONFIG_SERIAL_ARC_NR_PORTS=1
CONFIG_SERIAL_RP2=m
CONFIG_SERIAL_RP2_NR_UARTS=32
CONFIG_SERIAL_FSL_LPUART=m
CONFIG_SERIAL_FSL_LPUART_CONSOLE=y
CONFIG_SERIAL_FSL_LINFLEXUART=m
CONFIG_SERIAL_MEN_Z135=m
CONFIG_SERIAL_SPRD=m
@@ -6527,7 +6526,6 @@ CONFIG_DRM=y
CONFIG_DRM_MIPI_DBI=m
CONFIG_DRM_MIPI_DSI=y
# CONFIG_DRM_DEBUG_MM is not set
CONFIG_DRM_USE_DYNAMIC_DEBUG=y
CONFIG_DRM_KMS_HELPER=y
CONFIG_DRM_FBDEV_EMULATION=y
CONFIG_DRM_FBDEV_OVERALLOC=100
@@ -6598,7 +6596,6 @@ CONFIG_DRM_I915_FORCE_PROBE="*"
CONFIG_DRM_I915_CAPTURE_ERROR=y
CONFIG_DRM_I915_COMPRESS_ERROR=y
CONFIG_DRM_I915_USERPTR=y
CONFIG_DRM_I915_GVT=y
CONFIG_DRM_I915_GVT_KVMGT=m
CONFIG_DRM_I915_PXP=y
CONFIG_DRM_I915_REQUEST_TIMEOUT=20000
@@ -6609,6 +6606,7 @@ CONFIG_DRM_I915_PREEMPT_TIMEOUT=640
CONFIG_DRM_I915_MAX_REQUEST_BUSYWAIT=8000
CONFIG_DRM_I915_STOP_TIMEOUT=100
CONFIG_DRM_I915_TIMESLICE_DURATION=1
CONFIG_DRM_I915_GVT=y
CONFIG_DRM_VGEM=m
CONFIG_DRM_VKMS=m
CONFIG_DRM_VMWGFX=m
@@ -6974,6 +6972,7 @@ CONFIG_SND_HDA_CODEC_SI3054=m
CONFIG_SND_HDA_GENERIC=m
CONFIG_SND_HDA_POWER_SAVE_DEFAULT=1
CONFIG_SND_HDA_INTEL_HDMI_SILENT_STREAM=y
# CONFIG_SND_HDA_CTL_DEV_ID is not set
# end of HD-Audio
CONFIG_SND_HDA_CORE=m

View File

@@ -1,6 +1,6 @@
#
# Automatically generated file; DO NOT EDIT.
# Linux/x86 6.2.0 Kernel Configuration
# Linux/x86 6.2.10 Kernel Configuration
#
CONFIG_CC_VERSION_TEXT="gcc (GCC) 12.2.1 20230201"
CONFIG_CC_IS_GCC=y
@@ -487,7 +487,7 @@ CONFIG_X86_INTEL_TSX_MODE_AUTO=y
CONFIG_X86_SGX=y
CONFIG_EFI=y
CONFIG_EFI_STUB=y
# CONFIG_EFI_HANDOVER_PROTOCOL is not set
CONFIG_EFI_HANDOVER_PROTOCOL=y
CONFIG_EFI_MIXED=y
# CONFIG_EFI_FAKE_MEMMAP is not set
CONFIG_EFI_RUNTIME_MAP=y
@@ -1804,7 +1804,6 @@ CONFIG_DEFAULT_NET_SCH="fq_codel"
#
CONFIG_NET_CLS=y
CONFIG_NET_CLS_BASIC=m
CONFIG_NET_CLS_TCINDEX=m
CONFIG_NET_CLS_ROUTE4=m
CONFIG_NET_CLS_FW=m
CONFIG_NET_CLS_U32=m
@@ -3332,6 +3331,7 @@ CONFIG_MICROSOFT_MANA=m
CONFIG_NET_VENDOR_MYRI=y
CONFIG_MYRI10GE=m
CONFIG_MYRI10GE_DCA=y
CONFIG_FEALNX=m
CONFIG_NET_VENDOR_NI=y
CONFIG_NI_XGE_MANAGEMENT_ENET=m
CONFIG_NET_VENDOR_NATSEMI=y
@@ -4494,7 +4494,6 @@ CONFIG_SERIAL_ARC_NR_PORTS=1
CONFIG_SERIAL_RP2=m
CONFIG_SERIAL_RP2_NR_UARTS=32
CONFIG_SERIAL_FSL_LPUART=m
CONFIG_SERIAL_FSL_LPUART_CONSOLE=y
CONFIG_SERIAL_FSL_LINFLEXUART=m
CONFIG_SERIAL_MEN_Z135=m
CONFIG_SERIAL_SPRD=m
@@ -6651,7 +6650,6 @@ CONFIG_DRM_I915_FORCE_PROBE="*"
CONFIG_DRM_I915_CAPTURE_ERROR=y
CONFIG_DRM_I915_COMPRESS_ERROR=y
CONFIG_DRM_I915_USERPTR=y
CONFIG_DRM_I915_GVT=y
CONFIG_DRM_I915_GVT_KVMGT=m
CONFIG_DRM_I915_PXP=y
CONFIG_DRM_I915_REQUEST_TIMEOUT=20000
@@ -6663,6 +6661,7 @@ CONFIG_DRM_I915_PREEMPT_TIMEOUT_COMPUTE=7500
CONFIG_DRM_I915_MAX_REQUEST_BUSYWAIT=8000
CONFIG_DRM_I915_STOP_TIMEOUT=100
CONFIG_DRM_I915_TIMESLICE_DURATION=1
CONFIG_DRM_I915_GVT=y
CONFIG_DRM_VGEM=m
CONFIG_DRM_VKMS=m
CONFIG_DRM_VMWGFX=m
@@ -7027,6 +7026,7 @@ CONFIG_SND_HDA_CODEC_SI3054=m
CONFIG_SND_HDA_GENERIC=m
CONFIG_SND_HDA_POWER_SAVE_DEFAULT=1
CONFIG_SND_HDA_INTEL_HDMI_SILENT_STREAM=y
# CONFIG_SND_HDA_CTL_DEV_ID is not set
# end of HD-Audio
CONFIG_SND_HDA_CORE=m

View File

@@ -0,0 +1,14 @@
[Trigger]
Type = File
Operation = Install
Operation = Upgrade
Operation = Remove
Target = usr/lib/modules/*/
Target = !usr/lib/modules/*/?*
[Action]
Description = Cleaning up...
When = PostTransaction
Exec = /usr/share/libalpm/scripts/cleanup
NeedsTargets

10
linux-tkg-config/6.3/cleanup Executable file
View File

@@ -0,0 +1,10 @@
#!/bin/bash
for _f in /usr/lib/modules/*tkg*; do
if [[ ! -e ${_f}/vmlinuz ]]; then
rm -rf "$_f"
fi
done
# vim:set ft=sh sw=2 et:

File diff suppressed because it is too large Load Diff

View File

@@ -0,0 +1,14 @@
[Trigger]
Type = File
Operation = Install
Operation = Upgrade
Operation = Remove
Target = usr/lib/modules/*/
Target = !usr/lib/modules/*/?*
[Action]
Description = Cleaning up...
When = PostTransaction
Exec = /usr/share/libalpm/scripts/cleanup
NeedsTargets

10
linux-tkg-config/6.4/cleanup Executable file
View File

@@ -0,0 +1,10 @@
#!/bin/bash
for _f in /usr/lib/modules/*tkg*; do
if [[ ! -e ${_f}/vmlinuz ]]; then
rm -rf "$_f"
fi
done
# vim:set ft=sh sw=2 et:

File diff suppressed because it is too large Load Diff

View File

@@ -1,10 +1,10 @@
#!/bin/bash
# List of kernels that are maintained upstream
_current_kernels=("6.2" "6.1" "6.0" "5.15" "5.10" "5.4")
_current_kernels=("6.4" "6.3" "6.1" "5.15" "5.10" "5.4")
# List of kernels that are no longer maintained upstream
_eol_kernels=("5.19" "5.18" "5.17" "5.16" "5.14" "5.13" "5.12" "5.11" "5.9" "5.8" "5.7")
_eol_kernels=("6.2" "6.0" "5.19" "5.18" "5.17" "5.16" "5.14" "5.13" "5.12" "5.11" "5.9" "5.8" "5.7")
typeset -Ag _kernel_git_remotes
_kernel_git_remotes=(
@@ -39,32 +39,34 @@ done
# PREEMPT_RT's supported kernel subversion
typeset -Ag _rt_subver_map
_rt_subver_map=(
["5.4"]="221"
["5.4"]="242"
["5.9"]="1"
["5.10"]="153"
["5.10"]="180"
["5.11"]="4"
["5.14"]="2"
["5.15"]="79"
["5.15"]="113"
["5.16"]="2"
["5.17"]="1"
["6.0"]="5"
["6.1"]="rc7"
["6.1"]="33"
["6.3"]="3"
)
# PREEMPT_RT's patch revision for the kernel
# We separated this to allow for forcing the application of the patch when _preempt_rt_force=1 on version mismatch
typeset -Ag _rt_rev_map
_rt_rev_map=(
["5.4"]="79"
["5.4"]="81"
["5.9"]="20"
["5.10"]="76"
["5.10"]="89"
["5.11"]="11"
["5.14"]="21"
["5.15"]="54"
["5.15"]="64"
["5.16"]="19"
["5.17"]="17"
["6.0"]="14"
["6.1"]="5"
["6.1"]="11"
["6.3"]="15"
)
_undefine() {
@@ -252,7 +254,7 @@ _set_cpu_scheduler() {
["bore"]="BORE (Burst-Oriented Response Enhancer) CPU Scheduler"
)
# CPU SCHED selector
# CPU SCHED selector - _projectc_unoff=1 sets unofficial Project C revision flag for a given version
if [ "$_kver" = "504" ]; then
_avail_cpu_scheds=("pds" "bmq" "muqss" "cacule" "cfs")
elif [ "$_kver" = "507" ]; then
@@ -285,7 +287,8 @@ _set_cpu_scheduler() {
_avail_cpu_scheds=("cfs" "pds" "bmq" "tt" "bore")
elif [ "$_kver" = "602" ]; then
_avail_cpu_scheds=("cfs" "pds" "bmq" "tt" "bore")
_projectc_unoff=1
elif [ "$_kver" = "603" ]; then
_avail_cpu_scheds=("cfs" "pds" "bmq" "tt" "bore")
else
_avail_cpu_scheds=("cfs")
fi
@@ -593,8 +596,10 @@ _tkg_srcprep() {
cd "$_kernel_work_folder_abs"
msg2 "Setting version..."
scripts/setlocalversion --save-scmversion
if (( "$_kver" <= 602 )); then
msg2 "Setting version..."
scripts/setlocalversion --save-scmversion
fi
if [ "${_distro}" = "Arch" ]; then
echo "-$pkgrel-tkg-${_cpusched}${_compiler_name}" > localversion.10-pkgrel
@@ -724,6 +729,10 @@ _tkg_srcprep() {
rev=2
elif [ "$_kver" = "601" ]; then
rev=1
elif [ "$_kver" = "602" ]; then
rev=2
elif [ "$_kver" = "603" ]; then
rev=1
else
rev=0
fi
@@ -869,6 +878,10 @@ _tkg_srcprep() {
# buggy project C/PSI interaction workaround
if [ "${_cpusched}" = "pds" ] || [ "${_cpusched}" = "bmq" ]; then
_enable "PSI_DEFAULT_DISABLED"
# Disable MLX5_CORE on Prjc 6.3.y
if [ "$_kver" = "603" ]; then
_disable "MLX5_CORE"
fi
fi
if [ -n "$_custom_commandline" ]; then
@@ -934,7 +947,7 @@ _tkg_srcprep() {
_cpu_marchs+=("k8" "k8sse3" "k10" "barcelona" "bobcat" "jaguar" "bulldozer" "piledriver")
_cpu_marchs+=("steamroller" "excavator" "zen" "zen2" "zen3" "zen4" "mpsc" "atom" "core2" "nehalem" "westmere")
_cpu_marchs+=("bonnell" "silvermont" "sandybridge" "ivybridge" "haswell" "broadwell" "skylake")
_cpu_marchs+=("skylakex" "cannonlake" "icelake" "goldmont" "goldmontplus" "cascadelake")
_cpu_marchs+=("skylakex" "cannonlake" "icelake" "goldmont" "goldmontplus" "cascadelake" "emeraldrapids")
_cpu_marchs+=("cooperlake" "tigerlake" "sapphirerapids" "rocketlake" "alderlake" "raptorlake" "meteorlake")
typeset -A _generic_march_map
@@ -1389,10 +1402,10 @@ _tkg_srcprep() {
fi
fi
# futex_waitv support
# fsync (futex_waitv) support
tkgpatch="$srcdir/0007-v${_basekernel}-futex_waitv.patch"
if [ -e "$tkgpatch" ]; then
if [ -z "$_futex_waitv" ]; then
if [ -z "$_fsync_backport" ]; then
plain ""
plain "Enable support for futex_waitv, backported patches for fsync from 5.16 Kernel"
plain "! Will disable futex2 patchset !"
@@ -1400,33 +1413,33 @@ _tkg_srcprep() {
plain "https://github.com/ValveSoftware/wine/pull/128"
read -rp "`echo $' > N/y : '`" CONDITION9;
fi
if [[ "$CONDITION9" =~ [yY] ]] || [ "$_futex_waitv" = "true" ]; then
_msg="Patching futex_waitv support"
if [[ "$CONDITION9" =~ [yY] ]] || [ "$_fsync_backport" = "true" ]; then
_msg="Patching fsync support"
_tkg_patcher
_futex2="false"
_fsync_futex2="false"
fi
else
_futex_waitv="false"
_fsync_backport="false"
fi
# fsync support
if [[ $_kver > 515 ]] || [[ "$CONDITION9" =~ [yY] ]] || [ "$_futex_waitv" = "true" ]; then
tkgpatch="$srcdir/0007-v${_basekernel}-fsync1_via_futex_waitv.patch"
# fsync legacy support
if [[ $_kver > 515 ]] || [[ "$CONDITION9" =~ [yY] ]] || [ "$_fsync_backport" = "true" ]; then
tkgpatch="$srcdir/0007-v${_basekernel}-fsync_legacy_via_futex_waitv.patch"
else
tkgpatch="$srcdir/0007-v${_basekernel}-fsync.patch"
tkgpatch="$srcdir/0007-v${_basekernel}-fsync_legacy.patch"
fi
if [ -e "$tkgpatch" ]; then
if [ -z "$_fsync" ]; then
if [ -z "$_fsync_legacy" ]; then
plain ""
plain "Enable support for fsync, an experimental replacement for esync in Valve Proton 4.11+"
plain "Enable support for FUTEX_WAIT_MULTIPLE (opcode 31) - fsync legacy used in Valve Proton 4.11, 5.0 and 5.13"
plain "https://steamcommunity.com/games/221410/announcements/detail/2957094910196249305"
if [[ "$CONDITION9" =~ [yY] ]] || [ "$_futex_waitv" = "true" ]; then
if [[ "$CONDITION9" =~ [yY] ]] || [ "$_fsync_backport" = "true" ]; then
plain "Will be used as a fallback to futex_waitv on older Proton builds if enabled"
fi
read -rp "`echo $' > N/y : '`" CONDITION10;
fi
if [[ "$CONDITION10" =~ [yY] ]] || [ "$_fsync" = "true" ]; then
_msg="Patching Fsync support"
if [[ "$CONDITION10" =~ [yY] ]] || [ "$_fsync_legacy" = "true" ]; then
_msg="Patching fsync legacy support"
_tkg_patcher
fi
fi
@@ -1434,15 +1447,15 @@ _tkg_srcprep() {
# futex2 support
tkgpatch="$srcdir/0007-v${_basekernel}-futex2_interface.patch"
if [ -e "$tkgpatch" ]; then
if [ -z "$_futex2" ]; then
if [ -z "$_fsync_futex2" ]; then
plain ""
plain "Enable support for futex2, an experimental replacement for esync and fsync in Valve Proton 5.13 experimental"
plain "Can be enabled alongside regular fsync patchset to have a fallback option"
plain "Enable support for futex2, a DEPRECATED replacement for esync and fsync in Valve Proton 5.13 experimental"
plain "Can be enabled alongside fsync legacy patchset to have a fallback option"
plain "https://gitlab.collabora.com/tonyk/linux/-/tree/futex2-dev"
plain "https://github.com/ValveSoftware/Proton/issues/4568"
read -rp "`echo $' > N/y : '`" CONDITION11;
fi
if [[ "$CONDITION11" =~ [yY] ]] || [ "$_futex2" = "true" ]; then
if [[ "$CONDITION11" =~ [yY] ]] || [ "$_fsync_futex2" = "true" ]; then
_msg="Patching futex2 support"
_tkg_patcher
_enable "FUTEX2"

View File

@@ -17,268 +17,20 @@ index 6b3b59cc51d6..2a0072192c3d 100644
const_debug unsigned int sysctl_sched_migration_cost = 500000UL;
+#endif
int sched_thermal_decay_shift;
static int __init setup_sched_thermal_decay_shift(char *str)
diff --git a/kernel/sched/topology.c b/kernel/sched/topology.c
index 051aaf65c..705df5511 100644
--- a/kernel/sched/topology.c
+++ b/kernel/sched/topology.c
@@ -208,7 +208,7 @@ sd_parent_degenerate(struct sched_domain *sd, struct sched_domain *parent)
From 5d5b708e3731e135ea7ae168571ad78d883e63e8 Mon Sep 17 00:00:00 2001
From: Alexandre Frade <kernel@xanmod.org>
Date: Wed, 1 Feb 2023 10:17:47 +0000
Subject: [PATCH 02/16] XANMOD: fair: Remove all energy efficiency functions
#if defined(CONFIG_ENERGY_MODEL) && defined(CONFIG_CPU_FREQ_GOV_SCHEDUTIL)
DEFINE_STATIC_KEY_FALSE(sched_energy_present);
-static unsigned int sysctl_sched_energy_aware = 1;
+static unsigned int sysctl_sched_energy_aware = 0;
DEFINE_MUTEX(sched_energy_mutex);
bool sched_energy_update;
Signed-off-by: Alexandre Frade <kernel@xanmod.org>
---
kernel/sched/fair.c | 224 +-------------------------------------------
1 file changed, 3 insertions(+), 221 deletions(-)
diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c
index 0f8736991427..345cc5e9fa6e 100644
--- a/kernel/sched/fair.c
+++ b/kernel/sched/fair.c
@@ -19,6 +19,9 @@
*
* Adaptive scheduling granularity, math enhancements by Peter Zijlstra
* Copyright (C) 2007 Red Hat, Inc., Peter Zijlstra
+ *
+ * Remove energy efficiency functions by Alexandre Frade
+ * (C) 2021 Alexandre Frade <kernel@xanmod.org>
*/
#include <linux/energy_model.h>
#include <linux/mmap_lock.h>
@@ -7136,219 +7139,6 @@ eenv_pd_max_util(struct energy_env *eenv, struct cpumask *pd_cpus,
return min(max_util, eenv->cpu_cap);
}
-/*
- * compute_energy(): Use the Energy Model to estimate the energy that @pd would
- * consume for a given utilization landscape @eenv. When @dst_cpu < 0, the task
- * contribution is ignored.
- */
-static inline unsigned long
-compute_energy(struct energy_env *eenv, struct perf_domain *pd,
- struct cpumask *pd_cpus, struct task_struct *p, int dst_cpu)
-{
- unsigned long max_util = eenv_pd_max_util(eenv, pd_cpus, p, dst_cpu);
- unsigned long busy_time = eenv->pd_busy_time;
-
- if (dst_cpu >= 0)
- busy_time = min(eenv->pd_cap, busy_time + eenv->task_busy_time);
-
- return em_cpu_energy(pd->em_pd, max_util, busy_time, eenv->cpu_cap);
-}
-
-/*
- * find_energy_efficient_cpu(): Find most energy-efficient target CPU for the
- * waking task. find_energy_efficient_cpu() looks for the CPU with maximum
- * spare capacity in each performance domain and uses it as a potential
- * candidate to execute the task. Then, it uses the Energy Model to figure
- * out which of the CPU candidates is the most energy-efficient.
- *
- * The rationale for this heuristic is as follows. In a performance domain,
- * all the most energy efficient CPU candidates (according to the Energy
- * Model) are those for which we'll request a low frequency. When there are
- * several CPUs for which the frequency request will be the same, we don't
- * have enough data to break the tie between them, because the Energy Model
- * only includes active power costs. With this model, if we assume that
- * frequency requests follow utilization (e.g. using schedutil), the CPU with
- * the maximum spare capacity in a performance domain is guaranteed to be among
- * the best candidates of the performance domain.
- *
- * In practice, it could be preferable from an energy standpoint to pack
- * small tasks on a CPU in order to let other CPUs go in deeper idle states,
- * but that could also hurt our chances to go cluster idle, and we have no
- * ways to tell with the current Energy Model if this is actually a good
- * idea or not. So, find_energy_efficient_cpu() basically favors
- * cluster-packing, and spreading inside a cluster. That should at least be
- * a good thing for latency, and this is consistent with the idea that most
- * of the energy savings of EAS come from the asymmetry of the system, and
- * not so much from breaking the tie between identical CPUs. That's also the
- * reason why EAS is enabled in the topology code only for systems where
- * SD_ASYM_CPUCAPACITY is set.
- *
- * NOTE: Forkees are not accepted in the energy-aware wake-up path because
- * they don't have any useful utilization data yet and it's not possible to
- * forecast their impact on energy consumption. Consequently, they will be
- * placed by find_idlest_cpu() on the least loaded CPU, which might turn out
- * to be energy-inefficient in some use-cases. The alternative would be to
- * bias new tasks towards specific types of CPUs first, or to try to infer
- * their util_avg from the parent task, but those heuristics could hurt
- * other use-cases too. So, until someone finds a better way to solve this,
- * let's keep things simple by re-using the existing slow path.
- */
-static int find_energy_efficient_cpu(struct task_struct *p, int prev_cpu)
-{
- struct cpumask *cpus = this_cpu_cpumask_var_ptr(select_rq_mask);
- unsigned long prev_delta = ULONG_MAX, best_delta = ULONG_MAX;
- unsigned long p_util_min = uclamp_is_used() ? uclamp_eff_value(p, UCLAMP_MIN) : 0;
- unsigned long p_util_max = uclamp_is_used() ? uclamp_eff_value(p, UCLAMP_MAX) : 1024;
- struct root_domain *rd = this_rq()->rd;
- int cpu, best_energy_cpu, target = -1;
- struct sched_domain *sd;
- struct perf_domain *pd;
- struct energy_env eenv;
-
- rcu_read_lock();
- pd = rcu_dereference(rd->pd);
- if (!pd || READ_ONCE(rd->overutilized))
- goto unlock;
-
- /*
- * Energy-aware wake-up happens on the lowest sched_domain starting
- * from sd_asym_cpucapacity spanning over this_cpu and prev_cpu.
- */
- sd = rcu_dereference(*this_cpu_ptr(&sd_asym_cpucapacity));
- while (sd && !cpumask_test_cpu(prev_cpu, sched_domain_span(sd)))
- sd = sd->parent;
- if (!sd)
- goto unlock;
-
- target = prev_cpu;
-
- sync_entity_load_avg(&p->se);
- if (!uclamp_task_util(p, p_util_min, p_util_max))
- goto unlock;
-
- eenv_task_busy_time(&eenv, p, prev_cpu);
-
- for (; pd; pd = pd->next) {
- unsigned long util_min = p_util_min, util_max = p_util_max;
- unsigned long cpu_cap, cpu_thermal_cap, util;
- unsigned long cur_delta, max_spare_cap = 0;
- unsigned long rq_util_min, rq_util_max;
- unsigned long prev_spare_cap = 0;
- int max_spare_cap_cpu = -1;
- unsigned long base_energy;
-
- cpumask_and(cpus, perf_domain_span(pd), cpu_online_mask);
-
- if (cpumask_empty(cpus))
- continue;
-
- /* Account thermal pressure for the energy estimation */
- cpu = cpumask_first(cpus);
- cpu_thermal_cap = arch_scale_cpu_capacity(cpu);
- cpu_thermal_cap -= arch_scale_thermal_pressure(cpu);
-
- eenv.cpu_cap = cpu_thermal_cap;
- eenv.pd_cap = 0;
-
- for_each_cpu(cpu, cpus) {
- struct rq *rq = cpu_rq(cpu);
-
- eenv.pd_cap += cpu_thermal_cap;
-
- if (!cpumask_test_cpu(cpu, sched_domain_span(sd)))
- continue;
-
- if (!cpumask_test_cpu(cpu, p->cpus_ptr))
- continue;
-
- util = cpu_util_next(cpu, p, cpu);
- cpu_cap = capacity_of(cpu);
-
- /*
- * Skip CPUs that cannot satisfy the capacity request.
- * IOW, placing the task there would make the CPU
- * overutilized. Take uclamp into account to see how
- * much capacity we can get out of the CPU; this is
- * aligned with sched_cpu_util().
- */
- if (uclamp_is_used() && !uclamp_rq_is_idle(rq)) {
- /*
- * Open code uclamp_rq_util_with() except for
- * the clamp() part. Ie: apply max aggregation
- * only. util_fits_cpu() logic requires to
- * operate on non clamped util but must use the
- * max-aggregated uclamp_{min, max}.
- */
- rq_util_min = uclamp_rq_get(rq, UCLAMP_MIN);
- rq_util_max = uclamp_rq_get(rq, UCLAMP_MAX);
-
- util_min = max(rq_util_min, p_util_min);
- util_max = max(rq_util_max, p_util_max);
- }
- if (!util_fits_cpu(util, util_min, util_max, cpu))
- continue;
-
- lsub_positive(&cpu_cap, util);
-
- if (cpu == prev_cpu) {
- /* Always use prev_cpu as a candidate. */
- prev_spare_cap = cpu_cap;
- } else if (cpu_cap > max_spare_cap) {
- /*
- * Find the CPU with the maximum spare capacity
- * among the remaining CPUs in the performance
- * domain.
- */
- max_spare_cap = cpu_cap;
- max_spare_cap_cpu = cpu;
- }
- }
-
- if (max_spare_cap_cpu < 0 && prev_spare_cap == 0)
- continue;
-
- eenv_pd_busy_time(&eenv, cpus, p);
- /* Compute the 'base' energy of the pd, without @p */
- base_energy = compute_energy(&eenv, pd, cpus, p, -1);
-
- /* Evaluate the energy impact of using prev_cpu. */
- if (prev_spare_cap > 0) {
- prev_delta = compute_energy(&eenv, pd, cpus, p,
- prev_cpu);
- /* CPU utilization has changed */
- if (prev_delta < base_energy)
- goto unlock;
- prev_delta -= base_energy;
- best_delta = min(best_delta, prev_delta);
- }
-
- /* Evaluate the energy impact of using max_spare_cap_cpu. */
- if (max_spare_cap_cpu >= 0 && max_spare_cap > prev_spare_cap) {
- cur_delta = compute_energy(&eenv, pd, cpus, p,
- max_spare_cap_cpu);
- /* CPU utilization has changed */
- if (cur_delta < base_energy)
- goto unlock;
- cur_delta -= base_energy;
- if (cur_delta < best_delta) {
- best_delta = cur_delta;
- best_energy_cpu = max_spare_cap_cpu;
- }
- }
- }
- rcu_read_unlock();
-
- if (best_delta < prev_delta)
- target = best_energy_cpu;
-
- return target;
-
-unlock:
- rcu_read_unlock();
-
- return target;
-}
-
/*
* select_task_rq_fair: Select target runqueue for the waking task in domains
* that have the relevant SD flag set. In practice, this is SD_BALANCE_WAKE,
@@ -7376,14 +7166,6 @@ select_task_rq_fair(struct task_struct *p, int prev_cpu, int wake_flags)
lockdep_assert_held(&p->pi_lock);
if (wake_flags & WF_TTWU) {
record_wakee(p);
-
- if (sched_energy_enabled()) {
- new_cpu = find_energy_efficient_cpu(p, prev_cpu);
- if (new_cpu >= 0)
- return new_cpu;
- new_cpu = prev_cpu;
- }
-
want_affine = !wake_wide(p) && cpumask_test_cpu(cpu, p->cpus_ptr);
}
--
2.39.1

View File

@@ -78,7 +78,7 @@ index 4700d24e5d55..8f7a3d7fd9c1 100644
@@ -3372,6 +3372,106 @@ static void quirk_no_bus_reset(struct pci_dev *dev)
dev->dev_flags |= PCI_DEV_FLAGS_NO_BUS_RESET;
}
+static bool acs_on_downstream;
+static bool acs_on_multifunction;
+
@@ -180,14 +180,14 @@ index 4700d24e5d55..8f7a3d7fd9c1 100644
+ return -ENOTTY;
+}
/*
* Some Atheros AR9xxx and QCA988x chips do not behave after a bus reset.
* The device will throw a Link Down error on AER-capable systems and
@@ -4513,6 +4613,7 @@ static const struct pci_dev_acs_enabled {
{ PCI_VENDOR_ID_ZHAOXIN, 0x9083, pci_quirk_mf_endpoint_acs },
/* Zhaoxin Root/Downstream Ports */
* Some NVIDIA GPU devices do not work with bus reset, SBR needs to be
* prevented for those affected devices.
@@ -5002,6 +5102,8 @@ static const struct pci_dev_acs_enabled {
{ PCI_VENDOR_ID_ZHAOXIN, PCI_ANY_ID, pci_quirk_zhaoxin_pcie_ports_acs },
+ { PCI_ANY_ID, PCI_ANY_ID, pcie_acs_overrides },
/* Wangxun nics */
{ PCI_VENDOR_ID_WANGXUN, PCI_ANY_ID, pci_quirk_wangxun_nic_acs },
+ /* ACS override */
+ { PCI_ANY_ID, PCI_ANY_ID, pcie_acs_overrides },
{ 0 }
};

File diff suppressed because it is too large Load Diff

View File

@@ -0,0 +1,151 @@
From d50977b164e708bf523a35ef53315355528c3ca6 Mon Sep 17 00:00:00 2001
From: "Jan Alexander Steffens (heftig)" <jan.steffens@gmail.com>
Date: Mon, 16 Sep 2019 04:53:20 +0200
Subject: [PATCH] ZEN: Add sysctl and CONFIG to disallow unprivileged
CLONE_NEWUSER
Our default behavior continues to match the vanilla kernel.
---
include/linux/user_namespace.h | 4 ++++
init/Kconfig | 16 ++++++++++++++++
kernel/fork.c | 14 ++++++++++++++
kernel/sysctl.c | 12 ++++++++++++
kernel/user_namespace.c | 7 +++++++
5 files changed, 53 insertions(+)
diff --git a/include/linux/user_namespace.h b/include/linux/user_namespace.h
index 45f09bec02c485..87b20e2ee27445 100644
--- a/include/linux/user_namespace.h
+++ b/include/linux/user_namespace.h
@@ -148,6 +148,8 @@ static inline void set_userns_rlimit_max(struct user_namespace *ns,
#ifdef CONFIG_USER_NS
+extern int unprivileged_userns_clone;
+
static inline struct user_namespace *get_user_ns(struct user_namespace *ns)
{
if (ns)
@@ -181,6 +183,8 @@ extern bool current_in_userns(const struct user_namespace *target_ns);
struct ns_common *ns_get_owner(struct ns_common *ns);
#else
+#define unprivileged_userns_clone 0
+
static inline struct user_namespace *get_user_ns(struct user_namespace *ns)
{
return &init_user_ns;
diff --git a/init/Kconfig b/init/Kconfig
index 94125d3b6893c7..9f7139b536f638 100644
--- a/init/Kconfig
+++ b/init/Kconfig
@@ -1247,6 +1247,22 @@ config USER_NS
If unsure, say N.
+config USER_NS_UNPRIVILEGED
+ bool "Allow unprivileged users to create namespaces"
+ default y
+ depends on USER_NS
+ help
+ When disabled, unprivileged users will not be able to create
+ new namespaces. Allowing users to create their own namespaces
+ has been part of several recent local privilege escalation
+ exploits, so if you need user namespaces but are
+ paranoid^Wsecurity-conscious you want to disable this.
+
+ This setting can be overridden at runtime via the
+ kernel.unprivileged_userns_clone sysctl.
+
+ If unsure, say Y.
+
config PID_NS
bool "PID Namespaces"
default y
diff --git a/kernel/fork.c b/kernel/fork.c
index 08969f5aa38d59..ff601cb7a1fae0 100644
--- a/kernel/fork.c
+++ b/kernel/fork.c
@@ -98,6 +98,10 @@
#include <linux/io_uring.h>
#include <linux/bpf.h>
+#ifdef CONFIG_USER_NS
+#include <linux/user_namespace.h>
+#endif
+
#include <asm/pgalloc.h>
#include <linux/uaccess.h>
#include <asm/mmu_context.h>
@@ -2008,6 +2012,10 @@ static __latent_entropy struct task_struct *copy_process(
if ((clone_flags & (CLONE_NEWUSER|CLONE_FS)) == (CLONE_NEWUSER|CLONE_FS))
return ERR_PTR(-EINVAL);
+ if ((clone_flags & CLONE_NEWUSER) && !unprivileged_userns_clone)
+ if (!capable(CAP_SYS_ADMIN))
+ return ERR_PTR(-EPERM);
+
/*
* Thread groups must share signals as well, and detached threads
* can only be started up within the thread group.
@@ -3166,6 +3174,12 @@ int ksys_unshare(unsigned long unshare_flags)
if (unshare_flags & CLONE_NEWNS)
unshare_flags |= CLONE_FS;
+ if ((unshare_flags & CLONE_NEWUSER) && !unprivileged_userns_clone) {
+ err = -EPERM;
+ if (!capable(CAP_SYS_ADMIN))
+ goto bad_unshare_out;
+ }
+
err = check_unshare_flags(unshare_flags);
if (err)
goto bad_unshare_out;
diff --git a/kernel/sysctl.c b/kernel/sysctl.c
index c6d9dec11b749d..9a4514ad481b21 100644
--- a/kernel/sysctl.c
+++ b/kernel/sysctl.c
@@ -81,6 +81,9 @@
#ifdef CONFIG_RT_MUTEXES
#include <linux/rtmutex.h>
#endif
+#ifdef CONFIG_USER_NS
+#include <linux/user_namespace.h>
+#endif
/* shared constants to be used in various sysctls */
const int sysctl_vals[] = { 0, 1, 2, 3, 4, 100, 200, 1000, 3000, INT_MAX, 65535, -1 };
@@ -1659,6 +1662,15 @@ static struct ctl_table kern_table[] = {
.mode = 0644,
.proc_handler = proc_dointvec,
},
+#ifdef CONFIG_USER_NS
+ {
+ .procname = "unprivileged_userns_clone",
+ .data = &unprivileged_userns_clone,
+ .maxlen = sizeof(int),
+ .mode = 0644,
+ .proc_handler = proc_dointvec,
+ },
+#endif
#ifdef CONFIG_PROC_SYSCTL
{
.procname = "tainted",
diff --git a/kernel/user_namespace.c b/kernel/user_namespace.c
index 54211dbd516c57..16ca0c1516298d 100644
--- a/kernel/user_namespace.c
+++ b/kernel/user_namespace.c
@@ -22,6 +22,13 @@
#include <linux/bsearch.h>
#include <linux/sort.h>
+/* sysctl */
+#ifdef CONFIG_USER_NS_UNPRIVILEGED
+int unprivileged_userns_clone = 1;
+#else
+int unprivileged_userns_clone;
+#endif
+
static struct kmem_cache *user_ns_cachep __read_mostly;
static DEFINE_MUTEX(userns_state_mutex);

View File

@@ -0,0 +1,244 @@
From 5ae86c8436b83762bc6cf46bea1da6ace2d3f50e Mon Sep 17 00:00:00 2001
From: Paul Gofman <pgofman@codeweavers.com>
Date: Wed, 6 May 2020 14:37:44 +0300
Subject: [PATCH 1/2] mm: Support soft dirty flag reset for VA range.
---
fs/proc/task_mmu.c | 129 ++++++++++++++++++++++++++++++++++++---------
1 file changed, 103 insertions(+), 26 deletions(-)
diff --git a/fs/proc/task_mmu.c b/fs/proc/task_mmu.c
index 3cec6fbef725..7c7865028f10 100644
--- a/fs/proc/task_mmu.c
+++ b/fs/proc/task_mmu.c
@@ -1032,6 +1032,8 @@ enum clear_refs_types {
struct clear_refs_private {
enum clear_refs_types type;
+ unsigned long start, end;
+ bool clear_range;
};
#ifdef CONFIG_MEM_SOFT_DIRTY
@@ -1125,6 +1127,8 @@ static int clear_refs_pte_range(pmd_t *pmd, unsigned long addr,
spinlock_t *ptl;
struct page *page;
+ BUG_ON(addr < cp->start || end > cp->end);
+
ptl = pmd_trans_huge_lock(pmd, vma);
if (ptl) {
if (cp->type == CLEAR_REFS_SOFT_DIRTY) {
@@ -1181,9 +1185,11 @@ static int clear_refs_test_walk(unsigned long start, unsigned long end,
struct clear_refs_private *cp = walk->private;
struct vm_area_struct *vma = walk->vma;
- if (vma->vm_flags & VM_PFNMAP)
+ if (!cp->clear_range && (vma->vm_flags & VM_PFNMAP))
return 1;
+ BUG_ON(start < cp->start || end > cp->end);
+
/*
* Writing 1 to /proc/pid/clear_refs affects all pages.
* Writing 2 to /proc/pid/clear_refs only affects anonymous pages.
@@ -1206,10 +1212,12 @@ static ssize_t clear_refs_write(struct file *file, const char __user *buf,
size_t count, loff_t *ppos)
{
struct task_struct *task;
- char buffer[PROC_NUMBUF];
+ char buffer[18];
struct mm_struct *mm;
struct vm_area_struct *vma;
enum clear_refs_types type;
+ unsigned long start, end;
+ bool clear_range;
int itype;
int rv;
@@ -1218,12 +1226,34 @@ static ssize_t clear_refs_write(struct file *file, const char __user *buf,
count = sizeof(buffer) - 1;
if (copy_from_user(buffer, buf, count))
return -EFAULT;
- rv = kstrtoint(strstrip(buffer), 10, &itype);
- if (rv < 0)
- return rv;
- type = (enum clear_refs_types)itype;
- if (type < CLEAR_REFS_ALL || type >= CLEAR_REFS_LAST)
- return -EINVAL;
+
+ if (buffer[0] == '6')
+ {
+ static int once;
+
+ if (!once++)
+ printk(KERN_DEBUG "task_mmu: Using POC clear refs range implementation.\n");
+
+ if (count != 17)
+ return -EINVAL;
+
+ type = CLEAR_REFS_SOFT_DIRTY;
+ start = *(unsigned long *)(buffer + 1);
+ end = *(unsigned long *)(buffer + 1 + 8);
+ }
+ else
+ {
+ rv = kstrtoint(strstrip(buffer), 10, &itype);
+ if (rv < 0)
+ return rv;
+ type = (enum clear_refs_types)itype;
+
+ if (type < CLEAR_REFS_ALL || type >= CLEAR_REFS_LAST)
+ return -EINVAL;
+
+ start = 0;
+ end = -1UL;
+ }
task = get_proc_task(file_inode(file));
if (!task)
@@ -1235,40 +1265,86 @@ static ssize_t clear_refs_write(struct file *file, const char __user *buf,
.type = type,
};
- if (mmap_write_lock_killable(mm)) {
- count = -EINTR;
- goto out_mm;
+ if (start || end != -1UL)
+ {
+ start = min(start, -1) & PAGE_MASK;
+ end = min(end, -1) & PAGE_MASK;
+
+ if (start >= end)
+ {
+ count = -EINVAL;
+ goto out_mm;
+ }
+ clear_range = true;
}
+ else
+ {
+ clear_range = false;
+ }
+
+ cp.start = start;
+ cp.end = end;
+ cp.clear_range = clear_range;
+
if (type == CLEAR_REFS_MM_HIWATER_RSS) {
+ if (mmap_write_lock_killable(mm)) {
+ count = -EINTR;
+ goto out_mm;
+ }
+
/*
* Writing 5 to /proc/pid/clear_refs resets the peak
* resident set size to this mm's current rss value.
*/
reset_mm_hiwater_rss(mm);
- goto out_unlock;
+ mmap_write_unlock(mm);
+ goto out_mm;
}
if (type == CLEAR_REFS_SOFT_DIRTY) {
- for_each_vma(vmi, vma) {
- if (!(vma->vm_flags & VM_SOFTDIRTY))
- continue;
- vm_flags_clear(vma, VM_SOFTDIRTY);
- vma_set_page_prot(vma);
+ if (mmap_read_lock_killable(mm)) {
+ count = -EINTR;
+ goto out_mm;
}
-
+ if (!clear_range)
+ for_each_vma(vmi, vma) {
+ if (!(vma->vm_flags & VM_SOFTDIRTY))
+ continue;
+ mmap_read_unlock(mm);
+ if (mmap_write_lock_killable(mm)) {
+ count = -EINTR;
+ goto out_mm;
+ }
+ for_each_vma(vmi, vma) {
+ vm_flags_clear(vma, VM_SOFTDIRTY);
+ vma_set_page_prot(vma);
+ }
+ mmap_write_downgrade(mm);
+ break;
+ }
inc_tlb_flush_pending(mm);
mmu_notifier_range_init(&range, MMU_NOTIFY_SOFT_DIRTY,
- 0, mm, 0, -1UL);
+ 0, mm, start, end);
mmu_notifier_invalidate_range_start(&range);
}
- walk_page_range(mm, 0, -1, &clear_refs_walk_ops, &cp);
+ else
+ {
+ if (mmap_write_lock_killable(mm)) {
+ count = -EINTR;
+ goto out_mm;
+ }
+ }
+ walk_page_range(mm, start, end == -1UL ? -1 : end, &clear_refs_walk_ops, &cp);
if (type == CLEAR_REFS_SOFT_DIRTY) {
mmu_notifier_invalidate_range_end(&range);
flush_tlb_mm(mm);
dec_tlb_flush_pending(mm);
+ mmap_read_unlock(mm);
+ }
+ else
+ {
+ mmap_write_unlock(mm);
}
-out_unlock:
- mmap_write_unlock(mm);
out_mm:
mmput(mm);
}
@@ -1301,6 +1377,7 @@ struct pagemapread {
#define PM_PFRAME_MASK GENMASK_ULL(PM_PFRAME_BITS - 1, 0)
#define PM_SOFT_DIRTY BIT_ULL(55)
#define PM_MMAP_EXCLUSIVE BIT_ULL(56)
+#define PM_SOFT_DIRTY_PAGE BIT_ULL(57)
#define PM_UFFD_WP BIT_ULL(57)
#define PM_FILE BIT_ULL(61)
#define PM_SWAP BIT_ULL(62)
@@ -1373,13 +1450,13 @@ static pagemap_entry_t pte_to_pagemap_entry(struct pagemapread *pm,
flags |= PM_PRESENT;
page = vm_normal_page(vma, addr, pte);
if (pte_soft_dirty(pte))
- flags |= PM_SOFT_DIRTY;
+ flags |= PM_SOFT_DIRTY | PM_SOFT_DIRTY_PAGE;
if (pte_uffd_wp(pte))
flags |= PM_UFFD_WP;
} else if (is_swap_pte(pte)) {
swp_entry_t entry;
if (pte_swp_soft_dirty(pte))
- flags |= PM_SOFT_DIRTY;
+ flags |= PM_SOFT_DIRTY | PM_SOFT_DIRTY_PAGE;
if (pte_swp_uffd_wp(pte))
flags |= PM_UFFD_WP;
entry = pte_to_swp_entry(pte);
@@ -1500,7 +1500,7 @@
flags |= PM_PRESENT;
if (pmd_soft_dirty(pmd))
- flags |= PM_SOFT_DIRTY;
+ flags |= PM_SOFT_DIRTY | PM_SOFT_DIRTY_PAGE;
if (pmd_uffd_wp(pmd))
flags |= PM_UFFD_WP;
if (pm->show_pfn)
@@ -1442,7 +1519,7 @@ static int pagemap_pmd_range(pmd_t *pmdp, unsigned long addr, unsigned long end,
}
flags |= PM_SWAP;
if (pmd_swp_soft_dirty(pmd))
- flags |= PM_SOFT_DIRTY;
+ flags |= PM_SOFT_DIRTY | PM_SOFT_DIRTY_PAGE;
if (pmd_swp_uffd_wp(pmd))
flags |= PM_UFFD_WP;
VM_BUG_ON(!is_pmd_migration_entry(pmd));
--
2.30.2

View File

@@ -0,0 +1,620 @@
From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
From: Arjan van de Ven <arjan@linux.intel.com>
Date: Mon, 14 Mar 2016 11:10:58 -0600
Subject: [PATCH] pci pme wakeups
Reduce wakeups for PME checks, which are a workaround for miswired
boards (sadly, too many of them) in laptops.
---
drivers/pci/pci.c | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/drivers/pci/pci.c b/drivers/pci/pci.c
index c9338f9..6974fbf 100644
--- a/drivers/pci/pci.c
+++ b/drivers/pci/pci.c
@@ -62,7 +62,7 @@ struct pci_pme_device {
struct pci_dev *dev;
};
-#define PME_TIMEOUT 1000 /* How long between PME checks */
+#define PME_TIMEOUT 4000 /* How long between PME checks */
static void pci_dev_d3_sleep(struct pci_dev *dev)
{
--
https://clearlinux.org
From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
From: Arjan van de Ven <arjan@linux.intel.com>
Date: Sat, 19 Mar 2016 21:32:19 -0400
Subject: [PATCH] intel_idle: tweak cpuidle cstates
Increase target_residency in cpuidle cstate
Tune intel_idle to be a bit less agressive;
Clear linux is cleaner in hygiene (wakupes) than the average linux,
so we can afford changing these in a way that increases
performance while keeping power efficiency
---
drivers/idle/intel_idle.c | 44 +++++++++++++++++++--------------------
1 file changed, 22 insertions(+), 22 deletions(-)
diff --git a/drivers/idle/intel_idle.c b/drivers/idle/intel_idle.c
index f449584..c994d24 100644
--- a/drivers/idle/intel_idle.c
+++ b/drivers/idle/intel_idle.c
@@ -531,7 +531,7 @@ static struct cpuidle_state hsw_cstates[] __initdata = {
.desc = "MWAIT 0x01",
.flags = MWAIT2flg(0x01) | CPUIDLE_FLAG_ALWAYS_ENABLE,
.exit_latency = 10,
- .target_residency = 20,
+ .target_residency = 120,
.enter = &intel_idle,
.enter_s2idle = intel_idle_s2idle, },
{
@@ -539,7 +539,7 @@ static struct cpuidle_state hsw_cstates[] __initdata = {
.desc = "MWAIT 0x10",
.flags = MWAIT2flg(0x10) | CPUIDLE_FLAG_TLB_FLUSHED,
.exit_latency = 33,
- .target_residency = 100,
+ .target_residency = 900,
.enter = &intel_idle,
.enter_s2idle = intel_idle_s2idle, },
{
@@ -547,7 +547,7 @@ static struct cpuidle_state hsw_cstates[] __initdata = {
.desc = "MWAIT 0x20",
.flags = MWAIT2flg(0x20) | CPUIDLE_FLAG_TLB_FLUSHED,
.exit_latency = 133,
- .target_residency = 400,
+ .target_residency = 1000,
.enter = &intel_idle,
.enter_s2idle = intel_idle_s2idle, },
{
@@ -555,7 +555,7 @@ static struct cpuidle_state hsw_cstates[] __initdata = {
.desc = "MWAIT 0x32",
.flags = MWAIT2flg(0x32) | CPUIDLE_FLAG_TLB_FLUSHED,
.exit_latency = 166,
- .target_residency = 500,
+ .target_residency = 1500,
.enter = &intel_idle,
.enter_s2idle = intel_idle_s2idle, },
{
@@ -563,7 +563,7 @@ static struct cpuidle_state hsw_cstates[] __initdata = {
.desc = "MWAIT 0x40",
.flags = MWAIT2flg(0x40) | CPUIDLE_FLAG_TLB_FLUSHED,
.exit_latency = 300,
- .target_residency = 900,
+ .target_residency = 2000,
.enter = &intel_idle,
.enter_s2idle = intel_idle_s2idle, },
{
@@ -571,7 +571,7 @@ static struct cpuidle_state hsw_cstates[] __initdata = {
.desc = "MWAIT 0x50",
.flags = MWAIT2flg(0x50) | CPUIDLE_FLAG_TLB_FLUSHED,
.exit_latency = 600,
- .target_residency = 1800,
+ .target_residency = 5000,
.enter = &intel_idle,
.enter_s2idle = intel_idle_s2idle, },
{
@@ -579,7 +579,7 @@ static struct cpuidle_state hsw_cstates[] __initdata = {
.desc = "MWAIT 0x60",
.flags = MWAIT2flg(0x60) | CPUIDLE_FLAG_TLB_FLUSHED,
.exit_latency = 2600,
- .target_residency = 7700,
+ .target_residency = 9000,
.enter = &intel_idle,
.enter_s2idle = intel_idle_s2idle, },
{
@@ -599,7 +599,7 @@ static struct cpuidle_state bdw_cstates[] __initdata = {
.desc = "MWAIT 0x01",
.flags = MWAIT2flg(0x01) | CPUIDLE_FLAG_ALWAYS_ENABLE,
.exit_latency = 10,
- .target_residency = 20,
+ .target_residency = 120,
.enter = &intel_idle,
.enter_s2idle = intel_idle_s2idle, },
{
@@ -607,7 +607,7 @@ static struct cpuidle_state bdw_cstates[] __initdata = {
.desc = "MWAIT 0x10",
.flags = MWAIT2flg(0x10) | CPUIDLE_FLAG_TLB_FLUSHED,
.exit_latency = 40,
- .target_residency = 100,
+ .target_residency = 1000,
.enter = &intel_idle,
.enter_s2idle = intel_idle_s2idle, },
{
@@ -615,7 +615,7 @@ static struct cpuidle_state bdw_cstates[] __initdata = {
.desc = "MWAIT 0x20",
.flags = MWAIT2flg(0x20) | CPUIDLE_FLAG_TLB_FLUSHED,
.exit_latency = 133,
- .target_residency = 400,
+ .target_residency = 1000,
.enter = &intel_idle,
.enter_s2idle = intel_idle_s2idle, },
{
@@ -623,7 +623,7 @@ static struct cpuidle_state bdw_cstates[] __initdata = {
.desc = "MWAIT 0x32",
.flags = MWAIT2flg(0x32) | CPUIDLE_FLAG_TLB_FLUSHED,
.exit_latency = 166,
- .target_residency = 500,
+ .target_residency = 2000,
.enter = &intel_idle,
.enter_s2idle = intel_idle_s2idle, },
{
@@ -631,7 +631,7 @@ static struct cpuidle_state bdw_cstates[] __initdata = {
.desc = "MWAIT 0x40",
.flags = MWAIT2flg(0x40) | CPUIDLE_FLAG_TLB_FLUSHED,
.exit_latency = 300,
- .target_residency = 900,
+ .target_residency = 4000,
.enter = &intel_idle,
.enter_s2idle = intel_idle_s2idle, },
{
@@ -639,7 +639,7 @@ static struct cpuidle_state bdw_cstates[] __initdata = {
.desc = "MWAIT 0x50",
.flags = MWAIT2flg(0x50) | CPUIDLE_FLAG_TLB_FLUSHED,
.exit_latency = 600,
- .target_residency = 1800,
+ .target_residency = 7000,
.enter = &intel_idle,
.enter_s2idle = intel_idle_s2idle, },
{
@@ -647,7 +647,7 @@ static struct cpuidle_state bdw_cstates[] __initdata = {
.desc = "MWAIT 0x60",
.flags = MWAIT2flg(0x60) | CPUIDLE_FLAG_TLB_FLUSHED,
.exit_latency = 2600,
- .target_residency = 7700,
+ .target_residency = 9000,
.enter = &intel_idle,
.enter_s2idle = intel_idle_s2idle, },
{
@@ -668,7 +668,7 @@ static struct cpuidle_state skl_cstates[] __initdata = {
.desc = "MWAIT 0x01",
.flags = MWAIT2flg(0x01) | CPUIDLE_FLAG_ALWAYS_ENABLE,
.exit_latency = 10,
- .target_residency = 20,
+ .target_residency = 120,
.enter = &intel_idle,
.enter_s2idle = intel_idle_s2idle, },
{
@@ -676,7 +676,7 @@ static struct cpuidle_state skl_cstates[] __initdata = {
.desc = "MWAIT 0x10",
.flags = MWAIT2flg(0x10) | CPUIDLE_FLAG_TLB_FLUSHED,
.exit_latency = 70,
- .target_residency = 100,
+ .target_residency = 1000,
.enter = &intel_idle,
.enter_s2idle = intel_idle_s2idle, },
{
@@ -684,7 +684,7 @@ static struct cpuidle_state skl_cstates[] __initdata = {
.desc = "MWAIT 0x20",
.flags = MWAIT2flg(0x20) | CPUIDLE_FLAG_TLB_FLUSHED,
.exit_latency = 85,
- .target_residency = 200,
+ .target_residency = 600,
.enter = &intel_idle,
.enter_s2idle = intel_idle_s2idle, },
{
@@ -692,7 +692,7 @@ static struct cpuidle_state skl_cstates[] __initdata = {
.desc = "MWAIT 0x33",
.flags = MWAIT2flg(0x33) | CPUIDLE_FLAG_TLB_FLUSHED,
.exit_latency = 124,
- .target_residency = 800,
+ .target_residency = 3000,
.enter = &intel_idle,
.enter_s2idle = intel_idle_s2idle, },
{
@@ -700,7 +700,7 @@ static struct cpuidle_state skl_cstates[] __initdata = {
.desc = "MWAIT 0x40",
.flags = MWAIT2flg(0x40) | CPUIDLE_FLAG_TLB_FLUSHED,
.exit_latency = 200,
- .target_residency = 800,
+ .target_residency = 3200,
.enter = &intel_idle,
.enter_s2idle = intel_idle_s2idle, },
{
@@ -708,7 +708,7 @@ static struct cpuidle_state skl_cstates[] __initdata = {
.desc = "MWAIT 0x50",
.flags = MWAIT2flg(0x50) | CPUIDLE_FLAG_TLB_FLUSHED,
.exit_latency = 480,
- .target_residency = 5000,
+ .target_residency = 9000,
.enter = &intel_idle,
.enter_s2idle = intel_idle_s2idle, },
{
@@ -716,7 +716,7 @@ static struct cpuidle_state skl_cstates[] __initdata = {
.desc = "MWAIT 0x60",
.flags = MWAIT2flg(0x60) | CPUIDLE_FLAG_TLB_FLUSHED,
.exit_latency = 890,
- .target_residency = 5000,
+ .target_residency = 9000,
.enter = &intel_idle,
.enter_s2idle = intel_idle_s2idle, },
{
@@ -737,7 +737,7 @@ static struct cpuidle_state skx_cstates[] __initdata = {
.desc = "MWAIT 0x01",
.flags = MWAIT2flg(0x01) | CPUIDLE_FLAG_ALWAYS_ENABLE,
.exit_latency = 10,
- .target_residency = 20,
+ .target_residency = 300,
.enter = &intel_idle,
.enter_s2idle = intel_idle_s2idle, },
{
--
https://clearlinux.org
From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
From: Arjan van de Ven <arjan@linux.intel.com>
Date: Fri, 6 Jan 2017 15:34:09 +0000
Subject: [PATCH] ipv4/tcp: allow the memory tuning for tcp to go a little
bigger than default
---
net/ipv4/tcp.c | 4 ++--
1 file changed, 2 insertions(+), 2 deletions(-)
diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c
index 30c1142..4345075 100644
--- a/net/ipv4/tcp.c
+++ b/net/ipv4/tcp.c
@@ -4201,8 +4201,8 @@ void __init tcp_init(void)
tcp_init_mem();
/* Set per-socket limits to no more than 1/128 the pressure threshold */
limit = nr_free_buffer_pages() << (PAGE_SHIFT - 7);
- max_wshare = min(4UL*1024*1024, limit);
- max_rshare = min(6UL*1024*1024, limit);
+ max_wshare = min(16UL*1024*1024, limit);
+ max_rshare = min(16UL*1024*1024, limit);
init_net.ipv4.sysctl_tcp_wmem[0] = SK_MEM_QUANTUM;
init_net.ipv4.sysctl_tcp_wmem[1] = 16*1024;
--
https://clearlinux.org
From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
From: Arjan van de Ven <arjan@linux.intel.com>
Date: Sun, 18 Feb 2018 23:35:41 +0000
Subject: [PATCH] locking: rwsem: spin faster
tweak rwsem owner spinning a bit
---
kernel/locking/rwsem.c | 4 +++-
1 file changed, 3 insertions(+), 1 deletion(-)
diff --git a/kernel/locking/rwsem.c b/kernel/locking/rwsem.c
index f11b9bd..1bbfcc1 100644
--- a/kernel/locking/rwsem.c
+++ b/kernel/locking/rwsem.c
@@ -717,6 +717,7 @@ rwsem_spin_on_owner(struct rw_semaphore *sem, unsigned long nonspinnable)
struct task_struct *new, *owner;
unsigned long flags, new_flags;
enum owner_state state;
+ int i = 0;
owner = rwsem_owner_flags(sem, &flags);
state = rwsem_owner_state(owner, flags, nonspinnable);
@@ -750,7 +751,8 @@ rwsem_spin_on_owner(struct rw_semaphore *sem, unsigned long nonspinnable)
break;
}
- cpu_relax();
+ if (i++ > 1000)
+ cpu_relax();
}
rcu_read_unlock();
--
https://clearlinux.org
From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
From: Arjan van de Ven <arjan@linux.intel.com>
Date: Thu, 2 Jun 2016 23:36:32 -0500
Subject: [PATCH] initialize ata before graphics
ATA init is the long pole in the boot process, and its asynchronous.
move the graphics init after it so that ata and graphics initialize
in parallel
---
drivers/Makefile | 15 ++++++++-------
1 file changed, 8 insertions(+), 7 deletions(-)
diff --git a/drivers/Makefile b/drivers/Makefile
index c0cd1b9..af1e2fb 100644
--- a/drivers/Makefile
+++ b/drivers/Makefile
@@ -59,15 +59,8 @@ obj-y += char/
# iommu/ comes before gpu as gpu are using iommu controllers
obj-y += iommu/
-# gpu/ comes after char for AGP vs DRM startup and after iommu
-obj-y += gpu/
-
obj-$(CONFIG_CONNECTOR) += connector/
-# i810fb and intelfb depend on char/agp/
-obj-$(CONFIG_FB_I810) += video/fbdev/i810/
-obj-$(CONFIG_FB_INTEL) += video/fbdev/intelfb/
-
obj-$(CONFIG_PARPORT) += parport/
obj-$(CONFIG_NVM) += lightnvm/
obj-y += base/ block/ misc/ mfd/ nfc/
@@ -80,6 +73,14 @@ obj-$(CONFIG_IDE) += ide/
obj-y += scsi/
obj-y += nvme/
obj-$(CONFIG_ATA) += ata/
+
+# gpu/ comes after char for AGP vs DRM startup and after iommu
+obj-y += gpu/
+
+# i810fb and intelfb depend on char/agp/
+obj-$(CONFIG_FB_I810) += video/fbdev/i810/
+obj-$(CONFIG_FB_INTEL) += video/fbdev/intelfb/
+
obj-$(CONFIG_TARGET_CORE) += target/
obj-$(CONFIG_MTD) += mtd/
obj-$(CONFIG_SPI) += spi/
--
https://clearlinux.org
From 676c2dc63592f52b716515573a3a825582a371e9 Mon Sep 17 00:00:00 2001
From: Arjan van de Ven <arjan@linux.intel.com>
Date: Sat, 8 Dec 2018 18:21:32 +0000
Subject: [PATCH 1/9] x86/vdso: Use lfence instead of rep and nop
Signed-off-by: Alexandre Frade <kernel@xanmod.org>
---
arch/x86/include/asm/vdso/processor.h | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/arch/x86/include/asm/vdso/processor.h b/arch/x86/include/asm/vdso/processor.h
index 57b1a7034c64..e2c45674f989 100644
--- a/arch/x86/include/asm/vdso/processor.h
+++ b/arch/x86/include/asm/vdso/processor.h
@@ -10,7 +10,7 @@
/* REP NOP (PAUSE) is a good thing to insert into busy-wait loops. */
static __always_inline void rep_nop(void)
{
- asm volatile("rep; nop" ::: "memory");
+ asm volatile("lfence" ::: "memory");
}
static __always_inline void cpu_relax(void)
--
2.39.1
From 48dc9669f8db68adc480ffc2698ed8204440e45b Mon Sep 17 00:00:00 2001
From: Arjan van de Ven <arjan@linux.intel.com>
Date: Thu, 13 Dec 2018 01:00:49 +0000
Subject: [PATCH 2/9] sched/wait: Do accept() in LIFO order for cache
efficiency
Signed-off-by: Alexandre Frade <kernel@xanmod.org>
---
include/linux/wait.h | 2 ++
kernel/sched/wait.c | 24 ++++++++++++++++++++++++
net/ipv4/inet_connection_sock.c | 2 +-
3 files changed, 27 insertions(+), 1 deletion(-)
diff --git a/include/linux/wait.h b/include/linux/wait.h
index a0307b516b09..edc21128f387 100644
--- a/include/linux/wait.h
+++ b/include/linux/wait.h
@@ -165,6 +165,7 @@ static inline bool wq_has_sleeper(struct wait_queue_head *wq_head)
extern void add_wait_queue(struct wait_queue_head *wq_head, struct wait_queue_entry *wq_entry);
extern void add_wait_queue_exclusive(struct wait_queue_head *wq_head, struct wait_queue_entry *wq_entry);
+extern void add_wait_queue_exclusive_lifo(struct wait_queue_head *wq_head, struct wait_queue_entry *wq_entry);
extern void add_wait_queue_priority(struct wait_queue_head *wq_head, struct wait_queue_entry *wq_entry);
extern void remove_wait_queue(struct wait_queue_head *wq_head, struct wait_queue_entry *wq_entry);
@@ -1192,6 +1193,7 @@ do { \
*/
void prepare_to_wait(struct wait_queue_head *wq_head, struct wait_queue_entry *wq_entry, int state);
bool prepare_to_wait_exclusive(struct wait_queue_head *wq_head, struct wait_queue_entry *wq_entry, int state);
+void prepare_to_wait_exclusive_lifo(struct wait_queue_head *wq_head, struct wait_queue_entry *wq_entry, int state);
long prepare_to_wait_event(struct wait_queue_head *wq_head, struct wait_queue_entry *wq_entry, int state);
void finish_wait(struct wait_queue_head *wq_head, struct wait_queue_entry *wq_entry);
long wait_woken(struct wait_queue_entry *wq_entry, unsigned mode, long timeout);
diff --git a/kernel/sched/wait.c b/kernel/sched/wait.c
index 133b74730738..1647fb8662eb 100644
--- a/kernel/sched/wait.c
+++ b/kernel/sched/wait.c
@@ -47,6 +47,17 @@ void add_wait_queue_priority(struct wait_queue_head *wq_head, struct wait_queue_
}
EXPORT_SYMBOL_GPL(add_wait_queue_priority);
+void add_wait_queue_exclusive_lifo(struct wait_queue_head *wq_head, struct wait_queue_entry *wq_entry)
+{
+ unsigned long flags;
+
+ wq_entry->flags |= WQ_FLAG_EXCLUSIVE;
+ spin_lock_irqsave(&wq_head->lock, flags);
+ __add_wait_queue(wq_head, wq_entry);
+ spin_unlock_irqrestore(&wq_head->lock, flags);
+}
+EXPORT_SYMBOL(add_wait_queue_exclusive_lifo);
+
void remove_wait_queue(struct wait_queue_head *wq_head, struct wait_queue_entry *wq_entry)
{
unsigned long flags;
@@ -293,6 +304,19 @@ prepare_to_wait_exclusive(struct wait_queue_head *wq_head, struct wait_queue_ent
}
EXPORT_SYMBOL(prepare_to_wait_exclusive);
+void prepare_to_wait_exclusive_lifo(struct wait_queue_head *wq_head, struct wait_queue_entry *wq_entry, int state)
+{
+ unsigned long flags;
+
+ wq_entry->flags |= WQ_FLAG_EXCLUSIVE;
+ spin_lock_irqsave(&wq_head->lock, flags);
+ if (list_empty(&wq_entry->entry))
+ __add_wait_queue(wq_head, wq_entry);
+ set_current_state(state);
+ spin_unlock_irqrestore(&wq_head->lock, flags);
+}
+EXPORT_SYMBOL(prepare_to_wait_exclusive_lifo);
+
void init_wait_entry(struct wait_queue_entry *wq_entry, int flags)
{
wq_entry->flags = flags;
diff --git a/net/ipv4/inet_connection_sock.c b/net/ipv4/inet_connection_sock.c
index f2c43f67187d..9885bfb429a2 100644
--- a/net/ipv4/inet_connection_sock.c
+++ b/net/ipv4/inet_connection_sock.c
@@ -606,7 +606,7 @@ static int inet_csk_wait_for_connect(struct sock *sk, long timeo)
* having to remove and re-insert us on the wait queue.
*/
for (;;) {
- prepare_to_wait_exclusive(sk_sleep(sk), &wait,
+ prepare_to_wait_exclusive_lifo(sk_sleep(sk), &wait,
TASK_INTERRUPTIBLE);
release_sock(sk);
if (reqsk_queue_empty(&icsk->icsk_accept_queue))
--
2.39.1
From afa213811c5490906caf394b20bb4b616fc6f12a Mon Sep 17 00:00:00 2001
From: Ricardo Neri <ricardo.neri-calderon@linux.intel.com>
Date: Thu, 25 Aug 2022 15:55:26 -0700
Subject: [PATCH 3/9] sched/fair: Simplify asym_packing logic for SMT sched
groups
When the destination CPU is an SMT sibling and idle, it can only help the
busiest group if all of its other SMT siblings are also idle. Otherwise,
there is not increase in throughput.
It does not matter whether the busiest group has SMT siblings. Simply
check if there are any tasks running on the local group before proceeding.
Cc: Ben Segall <bsegall@google.com>
Cc: Daniel Bristot de Oliveira <bristot@redhat.com>
Cc: Dietmar Eggemann <dietmar.eggemann@arm.com>
Cc: Len Brown <len.brown@intel.com>
Cc: Mel Gorman <mgorman@suse.de>
Cc: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
Cc: Srinivas Pandruvada <srinivas.pandruvada@linux.intel.com>
Cc: Steven Rostedt <rostedt@goodmis.org>
Cc: Tim C. Chen <tim.c.chen@intel.com>
Cc: Valentin Schneider <vschneid@redhat.com>
Cc: x86@kernel.org
Cc: linux-kernel@vger.kernel.org
Reviewed-by: Len Brown <len.brown@intel.com>
Signed-off-by: Ricardo Neri <ricardo.neri-calderon@linux.intel.com>
Signed-off-by: Alexandre Frade <kernel@xanmod.org>
---
kernel/sched/fair.c | 29 +++++++++--------------------
1 file changed, 9 insertions(+), 20 deletions(-)
diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c
index 345cc5e9fa6e..60f9690a5626 100644
--- a/kernel/sched/fair.c
+++ b/kernel/sched/fair.c
@@ -8921,12 +8921,10 @@ static bool asym_smt_can_pull_tasks(int dst_cpu, struct sd_lb_stats *sds,
struct sched_group *sg)
{
#ifdef CONFIG_SCHED_SMT
- bool local_is_smt, sg_is_smt;
+ bool local_is_smt;
int sg_busy_cpus;
local_is_smt = sds->local->flags & SD_SHARE_CPUCAPACITY;
- sg_is_smt = sg->flags & SD_SHARE_CPUCAPACITY;
-
sg_busy_cpus = sgs->group_weight - sgs->idle_cpus;
if (!local_is_smt) {
@@ -8947,25 +8945,16 @@ static bool asym_smt_can_pull_tasks(int dst_cpu, struct sd_lb_stats *sds,
return sched_asym_prefer(dst_cpu, sg->asym_prefer_cpu);
}
- /* @dst_cpu has SMT siblings. */
-
- if (sg_is_smt) {
- int local_busy_cpus = sds->local->group_weight -
- sds->local_stat.idle_cpus;
- int busy_cpus_delta = sg_busy_cpus - local_busy_cpus;
-
- if (busy_cpus_delta == 1)
- return sched_asym_prefer(dst_cpu, sg->asym_prefer_cpu);
-
- return false;
- }
-
/*
- * @sg does not have SMT siblings. Ensure that @sds::local does not end
- * up with more than one busy SMT sibling and only pull tasks if there
- * are not busy CPUs (i.e., no CPU has running tasks).
+ * @dst_cpu has SMT siblings. When both @dst_cpu and the busiest core
+ * have one or more busy siblings, moving tasks between them results
+ * in the same throughput. Only if all the siblings of @dst_cpu are
+ * idle throughput can increase.
+ *
+ * If the difference in the number of busy CPUs is two or more, let
+ * find_busiest_group() take care of it.
*/
- if (!sds->local_stat.sum_nr_running)
+ if (sg_busy_cpus == 1 && !sds->local_stat.sum_nr_running)
return sched_asym_prefer(dst_cpu, sg->asym_prefer_cpu);
return false;
--
2.39.1
From a1f627fd10ced4f5eeae678bc4ba96ea7fa01d7e Mon Sep 17 00:00:00 2001
From: Ricardo Neri <ricardo.neri-calderon@linux.intel.com>
Date: Thu, 25 Aug 2022 15:55:28 -0700
Subject: [PATCH 4/9] sched/fair: Let lower-priority CPUs do active balancing
When more than one SMT siblings of a physical core are busy, an idle CPU
of lower priority can help.
Indicate that the low priority CPU can do active balancing from the high-
priority CPU only if they belong to separate cores.
Cc: Ben Segall <bsegall@google.com>
Cc: Daniel Bristot de Oliveira <bristot@redhat.com>
Cc: Dietmar Eggemann <dietmar.eggemann@arm.com>
Cc: Len Brown <len.brown@intel.com>
Cc: Mel Gorman <mgorman@suse.de>
Cc: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
Cc: Srinivas Pandruvada <srinivas.pandruvada@linux.intel.com>
Cc: Steven Rostedt <rostedt@goodmis.org>
Cc: Tim C. Chen <tim.c.chen@intel.com>
Cc: Valentin Schneider <vschneid@redhat.com>
Cc: x86@kernel.org
Cc: linux-kernel@vger.kernel.org
Reviewed-by: Len Brown <len.brown@intel.com>
Signed-off-by: Ricardo Neri <ricardo.neri-calderon@linux.intel.com>
Signed-off-by: Alexandre Frade <kernel@xanmod.org>
---
kernel/sched/fair.c | 7 ++++++-
1 file changed, 6 insertions(+), 1 deletion(-)
diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c
index 60f9690a5626..67b0eacad0e9 100644
--- a/kernel/sched/fair.c
+++ b/kernel/sched/fair.c
@@ -10176,9 +10176,14 @@ asym_active_balance(struct lb_env *env)
* ASYM_PACKING needs to force migrate tasks from busy but
* lower priority CPUs in order to pack all tasks in the
* highest priority CPUs.
+ *
+ * If the busy CPU has higher priority but is an SMT sibling
+ * in which other SMT siblings are also busy, a lower-priority
+ * CPU in a separate core can help.
*/
return env->idle != CPU_NOT_IDLE && (env->sd->flags & SD_ASYM_PACKING) &&
- sched_asym_prefer(env->dst_cpu, env->src_cpu);
+ (sched_asym_prefer(env->dst_cpu, env->src_cpu) ||
+ !(env->sd->flags & SD_SHARE_CPUCAPACITY));
}
static inline bool
--
2.39.1

View File

@@ -0,0 +1,363 @@
From 9c85113cf4019e7b277a44e72bda8b78347aa72f Mon Sep 17 00:00:00 2001
From: Paul Gofman <pgofman@codeweavers.com>
Date: Thu, 7 May 2020 14:05:31 +0300
Subject: [PATCH 2/2] mm: Support soft dirty flag read with reset.
---
fs/proc/base.c | 3 +
fs/proc/internal.h | 1 +
fs/proc/task_mmu.c | 144 +++++++++++++++++++++++++++++++++++++++------
3 files changed, 130 insertions(+), 18 deletions(-)
diff --git a/fs/proc/base.c b/fs/proc/base.c
index b3422cda2a91..8199ae2411ca 100644
--- a/fs/proc/base.c
+++ b/fs/proc/base.c
@@ -3202,6 +3202,9 @@ static const struct pid_entry tgid_base_stuff[] = {
REG("smaps", S_IRUGO, proc_pid_smaps_operations),
REG("smaps_rollup", S_IRUGO, proc_pid_smaps_rollup_operations),
REG("pagemap", S_IRUSR, proc_pagemap_operations),
+#ifdef CONFIG_MEM_SOFT_DIRTY
+ REG("pagemap_reset", S_IRUSR, proc_pagemap_reset_operations),
+#endif
#endif
#ifdef CONFIG_SECURITY
DIR("attr", S_IRUGO|S_IXUGO, proc_attr_dir_inode_operations, proc_attr_dir_operations),
diff --git a/fs/proc/internal.h b/fs/proc/internal.h
index f60b379dcdc7..36a901cf0e7f 100644
--- a/fs/proc/internal.h
+++ b/fs/proc/internal.h
@@ -303,6 +303,7 @@ extern const struct file_operations proc_pid_smaps_operations;
extern const struct file_operations proc_pid_smaps_rollup_operations;
extern const struct file_operations proc_clear_refs_operations;
extern const struct file_operations proc_pagemap_operations;
+extern const struct file_operations proc_pagemap_reset_operations;
extern unsigned long task_vsize(struct mm_struct *);
extern unsigned long task_statm(struct mm_struct *,
diff --git a/fs/proc/task_mmu.c b/fs/proc/task_mmu.c
index 7c7865028f10..a21694967915 100644
--- a/fs/proc/task_mmu.c
+++ b/fs/proc/task_mmu.c
@@ -1056,8 +1056,8 @@ static inline bool pte_is_pinned(struct vm_area_struct *vma, unsigned long addr,
return page_maybe_dma_pinned(page);
}
-static inline void clear_soft_dirty(struct vm_area_struct *vma,
- unsigned long addr, pte_t *pte)
+static inline bool clear_soft_dirty(struct vm_area_struct *vma,
+ unsigned long addr, pte_t *pte)
{
/*
* The soft-dirty tracker uses #PF-s to catch writes
@@ -1066,37 +1066,46 @@ static inline void clear_soft_dirty(struct vm_area_struct *vma,
* of how soft-dirty works.
*/
pte_t ptent = *pte;
+ bool ret = false;
if (pte_present(ptent)) {
pte_t old_pte;
if (pte_is_pinned(vma, addr, ptent))
- return;
+ return ret;
old_pte = ptep_modify_prot_start(vma, addr, pte);
+ ret = pte_soft_dirty(old_pte);
ptent = pte_wrprotect(old_pte);
ptent = pte_clear_soft_dirty(ptent);
ptep_modify_prot_commit(vma, addr, pte, old_pte, ptent);
} else if (is_swap_pte(ptent)) {
+ ret = pte_swp_soft_dirty(ptent);
ptent = pte_swp_clear_soft_dirty(ptent);
set_pte_at(vma->vm_mm, addr, pte, ptent);
}
+ return ret;
}
#else
-static inline void clear_soft_dirty(struct vm_area_struct *vma,
+static inline bool clear_soft_dirty(struct vm_area_struct *vma,
unsigned long addr, pte_t *pte)
{
+ return false;
}
#endif
#if defined(CONFIG_MEM_SOFT_DIRTY) && defined(CONFIG_TRANSPARENT_HUGEPAGE)
-static inline void clear_soft_dirty_pmd(struct vm_area_struct *vma,
+static inline bool clear_soft_dirty_pmd(struct vm_area_struct *vma,
unsigned long addr, pmd_t *pmdp)
{
pmd_t old, pmd = *pmdp;
+ bool ret = false;
if (pmd_present(pmd)) {
/* See comment in change_huge_pmd() */
old = pmdp_invalidate(vma, addr, pmdp);
+
+ ret = pmd_soft_dirty(old);
+
if (pmd_dirty(old))
pmd = pmd_mkdirty(pmd);
if (pmd_young(old))
@@ -1107,14 +1116,17 @@ static inline void clear_soft_dirty_pmd(struct vm_area_struct *vma,
set_pmd_at(vma->vm_mm, addr, pmdp, pmd);
} else if (is_migration_entry(pmd_to_swp_entry(pmd))) {
+ ret = pmd_swp_soft_dirty(pmd);
pmd = pmd_swp_clear_soft_dirty(pmd);
set_pmd_at(vma->vm_mm, addr, pmdp, pmd);
}
+ return ret;
}
#else
-static inline void clear_soft_dirty_pmd(struct vm_area_struct *vma,
+static inline bool clear_soft_dirty_pmd(struct vm_area_struct *vma,
unsigned long addr, pmd_t *pmdp)
{
+ return false;
}
#endif
@@ -1367,6 +1379,7 @@ struct pagemapread {
int pos, len; /* units: PM_ENTRY_BYTES, not bytes */
pagemap_entry_t *buffer;
bool show_pfn;
+ bool reset;
};
#define PAGEMAP_WALK_SIZE (PMD_SIZE)
@@ -1398,6 +1411,14 @@ static int add_to_pagemap(unsigned long addr, pagemap_entry_t *pme,
return 0;
}
+static int add_addr_to_pagemap(unsigned long addr, struct pagemapread *pm)
+{
+ ((unsigned long *)pm->buffer)[pm->pos++] = addr;
+ if (pm->pos >= pm->len)
+ return PM_END_OF_BUFFER;
+ return 0;
+}
+
static int pagemap_pte_hole(unsigned long start, unsigned long end,
__always_unused int depth, struct mm_walk *walk)
{
@@ -1405,6 +1426,9 @@ static int pagemap_pte_hole(unsigned long start, unsigned long end,
unsigned long addr = start;
int err = 0;
+ if (pm->reset)
+ goto out;
+
while (addr < end) {
struct vm_area_struct *vma = find_vma(walk->mm, addr);
pagemap_entry_t pme = make_pme(0, 0);
@@ -1439,8 +1463,9 @@ static int pagemap_pte_hole(unsigned long start, unsigned long end,
}
static pagemap_entry_t pte_to_pagemap_entry(struct pagemapread *pm,
- struct vm_area_struct *vma, unsigned long addr, pte_t pte)
+ struct vm_area_struct *vma, unsigned long addr, pte_t *pte_addr)
{
+ pte_t pte = *pte_addr;
u64 frame = 0, flags = 0;
struct page *page = NULL;
@@ -1493,6 +1518,20 @@ static int pagemap_pmd_range(pmd_t *pmdp, unsigned long addr, unsigned long end,
pmd_t pmd = *pmdp;
struct page *page = NULL;
+ if (pm->reset)
+ {
+ if (clear_soft_dirty_pmd(vma, addr, pmdp))
+ {
+ for (; addr != end; addr += PAGE_SIZE)
+ {
+ err = add_addr_to_pagemap(addr, pm);
+ if (err)
+ break;
+ }
+ }
+ goto trans_huge_done;
+ }
+
if (vma->vm_flags & VM_SOFTDIRTY)
flags |= PM_SOFT_DIRTY;
@@ -1541,6 +1580,7 @@ static int pagemap_pmd_range(pmd_t *pmdp, unsigned long addr, unsigned long end,
frame += (1 << MAX_SWAPFILES_SHIFT);
}
}
+trans_huge_done:
spin_unlock(ptl);
return err;
}
@@ -1555,10 +1595,18 @@ static int pagemap_pmd_range(pmd_t *pmdp, unsigned long addr, unsigned long end,
*/
orig_pte = pte = pte_offset_map_lock(walk->mm, pmdp, addr, &ptl);
for (; addr < end; pte++, addr += PAGE_SIZE) {
- pagemap_entry_t pme;
+ if (pm->reset)
+ {
+ if (clear_soft_dirty(vma, addr, pte))
+ err = add_addr_to_pagemap(addr, pm);
+ }
+ else
+ {
+ pagemap_entry_t pme;
- pme = pte_to_pagemap_entry(pm, vma, addr, *pte);
- err = add_to_pagemap(addr, &pme, pm);
+ pme = pte_to_pagemap_entry(pm, vma, addr, pte);
+ err = add_to_pagemap(addr, &pme, pm);
+ }
if (err)
break;
}
@@ -1650,8 +1698,8 @@ static const struct mm_walk_ops pagemap_ops = {
* determine which areas of memory are actually mapped and llseek to
* skip over unmapped regions.
*/
-static ssize_t pagemap_read(struct file *file, char __user *buf,
- size_t count, loff_t *ppos)
+static ssize_t do_pagemap_read(struct file *file, char __user *buf,
+ size_t count, loff_t *ppos, bool reset)
{
struct mm_struct *mm = file->private_data;
struct pagemapread pm;
@@ -1660,6 +1708,8 @@ static ssize_t pagemap_read(struct file *file, char __user *buf,
unsigned long start_vaddr;
unsigned long end_vaddr;
int ret = 0, copied = 0;
+ struct mmu_notifier_range range;
+ size_t buffer_len;
if (!mm || !mmget_not_zero(mm))
goto out;
@@ -1675,19 +1725,38 @@ static ssize_t pagemap_read(struct file *file, char __user *buf,
/* do not disclose physical addresses: attack vector */
pm.show_pfn = file_ns_capable(file, &init_user_ns, CAP_SYS_ADMIN);
+ pm.reset = reset;
- pm.len = (PAGEMAP_WALK_SIZE >> PAGE_SHIFT);
- pm.buffer = kmalloc_array(pm.len, PM_ENTRY_BYTES, GFP_KERNEL);
+ buffer_len = min(PAGEMAP_WALK_SIZE >> PAGE_SHIFT, count / PM_ENTRY_BYTES);
+
+ pm.buffer = kmalloc_array(buffer_len, PM_ENTRY_BYTES, GFP_KERNEL);
ret = -ENOMEM;
if (!pm.buffer)
goto out_mm;
src = *ppos;
svpfn = src / PM_ENTRY_BYTES;
- end_vaddr = mm->task_size;
+
+ start_vaddr = svpfn << PAGE_SHIFT;
+
+ if (reset)
+ {
+ if (count < sizeof(end_vaddr))
+ {
+ ret = -EINVAL;
+ goto out_mm;
+ }
+ if (copy_from_user(&end_vaddr, buf, sizeof(end_vaddr)))
+ return -EFAULT;
+ end_vaddr = min(end_vaddr, mm->task_size);
+ }
+ else
+ {
+ end_vaddr = mm->task_size;
+ start_vaddr = end_vaddr;
+ }
/* watch out for wraparound */
- start_vaddr = end_vaddr;
if (svpfn <= (ULONG_MAX >> PAGE_SHIFT))
start_vaddr = untagged_addr(svpfn << PAGE_SHIFT);
@@ -1707,18 +1776,35 @@ static ssize_t pagemap_read(struct file *file, char __user *buf,
unsigned long end;
pm.pos = 0;
- end = (start_vaddr + PAGEMAP_WALK_SIZE) & PAGEMAP_WALK_MASK;
+ pm.len = min(buffer_len, count / PM_ENTRY_BYTES);
+
+ end = reset ? end_vaddr : (start_vaddr + (pm.len << PAGE_SHIFT));
/* overflow ? */
if (end < start_vaddr || end > end_vaddr)
end = end_vaddr;
+
ret = mmap_read_lock_killable(mm);
if (ret)
goto out_free;
+
+ if (reset)
+ {
+ inc_tlb_flush_pending(mm);
+ mmu_notifier_range_init(&range, MMU_NOTIFY_SOFT_DIRTY,
+ 0, mm, start_vaddr, end);
+ mmu_notifier_invalidate_range_start(&range);
+ }
ret = walk_page_range(mm, start_vaddr, end, &pagemap_ops, &pm);
+ if (reset)
+ {
+ mmu_notifier_invalidate_range_end(&range);
+ flush_tlb_mm(mm);
+ dec_tlb_flush_pending(mm);
+ }
mmap_read_unlock(mm);
- start_vaddr = end;
len = min(count, PM_ENTRY_BYTES * pm.pos);
+ BUG_ON(ret && ret != PM_END_OF_BUFFER);
if (copy_to_user(buf, pm.buffer, len)) {
ret = -EFAULT;
goto out_free;
@@ -1726,6 +1812,8 @@ static ssize_t pagemap_read(struct file *file, char __user *buf,
copied += len;
buf += len;
count -= len;
+
+ start_vaddr = reset && pm.pos == pm.len ? ((unsigned long *)pm.buffer)[pm.pos - 1] + PAGE_SIZE : end;
}
*ppos += copied;
if (!ret || ret == PM_END_OF_BUFFER)
@@ -1739,6 +1827,18 @@ static ssize_t pagemap_read(struct file *file, char __user *buf,
return ret;
}
+static ssize_t pagemap_read(struct file *file, char __user *buf,
+ size_t count, loff_t *ppos)
+{
+ return do_pagemap_read(file, buf, count, ppos, false);
+}
+
+static ssize_t pagemap_reset_read(struct file *file, char __user *buf,
+ size_t count, loff_t *ppos)
+{
+ return do_pagemap_read(file, buf, count, ppos, true);
+}
+
static int pagemap_open(struct inode *inode, struct file *file)
{
struct mm_struct *mm;
@@ -1765,6 +1865,14 @@ const struct file_operations proc_pagemap_operations = {
.open = pagemap_open,
.release = pagemap_release,
};
+
+const struct file_operations proc_pagemap_reset_operations = {
+ .llseek = mem_lseek, /* borrow this */
+ .read = pagemap_reset_read,
+ .open = pagemap_open,
+ .release = pagemap_release,
+};
+
#endif /* CONFIG_PROC_PAGE_MONITOR */
#ifdef CONFIG_NUMA
--
2.30.2

View File

@@ -0,0 +1,879 @@
From f7f49141a5dbe9c99d78196b58c44307fb2e6be3 Mon Sep 17 00:00:00 2001
From: Tk-Glitch <ti3nou@gmail.com>
Date: Wed, 4 Jul 2018 04:30:08 +0200
Subject: [PATCH 01/17] glitched
---
init/Makefile | 2 +-
1 file changed, 1 insertions(+), 1 deletions(-)
diff --git a/init/Makefile b/init/Makefile
index baf3ab8d9d49..854e32e6aec7 100755
--- a/init/Makefile
+++ b/init/Makefile
@@ -19,7 +19,7 @@ else
# Maximum length of UTS_VERSION is 64 chars
filechk_uts_version = \
- utsver=$$(echo '$(pound)'"$(build-version)" $(smp-flag-y) $(preempt-flag-y) "$(build-timestamp)" | cut -b -64); \
+ utsver=$$(echo '$(pound)'"$(build-version)" $(smp-flag-y) $(preempt-flag-y) "TKG" "$(build-timestamp)" | cut -b -64); \
echo '$(pound)'define UTS_VERSION \""$${utsver}"\"
#
--
2.28.0
From c304f43d14e98d4bf1215fc10bc5012f554bdd8a Mon Sep 17 00:00:00 2001
From: Alexandre Frade <admfrade@gmail.com>
Date: Mon, 29 Jan 2018 16:59:22 +0000
Subject: [PATCH 02/17] dcache: cache_pressure = 50 decreases the rate at which
VFS caches are reclaimed
Signed-off-by: Alexandre Frade <admfrade@gmail.com>
---
fs/dcache.c | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/fs/dcache.c b/fs/dcache.c
index 361ea7ab30ea..0c5cf69b241a 100644
--- a/fs/dcache.c
+++ b/fs/dcache.c
@@ -71,7 +71,7 @@
* If no ancestor relationship:
* arbitrary, since it's serialized on rename_lock
*/
-int sysctl_vfs_cache_pressure __read_mostly = 100;
+int sysctl_vfs_cache_pressure __read_mostly = 50;
EXPORT_SYMBOL_GPL(sysctl_vfs_cache_pressure);
__cacheline_aligned_in_smp DEFINE_SEQLOCK(rename_lock);
--
2.28.0
diff --git a/kernel/sched/rt.c b/kernel/sched/rt.c
index f788cd61df21..2bfbb4213707 100644
--- a/kernel/sched/rt.c
+++ b/kernel/sched/rt.c
@@ -15,9 +15,9 @@ __read_mostly int scheduler_running;
/*
* part of the period that we allow rt tasks to run in us.
- * default: 0.95s
+ * XanMod default: 0.98s
*/
-int sysctl_sched_rt_runtime = 950000;
+int sysctl_sched_rt_runtime = 980000;
#ifdef CONFIG_SYSCTL
static int sysctl_sched_rr_timeslice = (MSEC_PER_SEC / HZ) * RR_TIMESLICE;
--
2.28.0
From acc49f33a10f61dc66c423888cbb883ba46710e4 Mon Sep 17 00:00:00 2001
From: Alexandre Frade <admfrade@gmail.com>
Date: Mon, 29 Jan 2018 17:41:29 +0000
Subject: [PATCH 04/17] scripts: disable the localversion "+" tag of a git repo
Signed-off-by: Alexandre Frade <admfrade@gmail.com>
---
scripts/setlocalversion | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/scripts/setlocalversion b/scripts/setlocalversion
index 20f2efd57b11..0552d8b9f582 100755
--- a/scripts/setlocalversion
+++ b/scripts/setlocalversion
@@ -54,7 +54,7 @@ scm_version()
# If only the short version is requested, don't bother
# running further git commands
if $short; then
- echo "+"
+ #echo "+"
return
fi
# If we are past the tagged commit, we pretty print it.
--
2.28.0
From 360c6833e07cc9fdef5746f6bc45bdbc7212288d Mon Sep 17 00:00:00 2001
From: "Jan Alexander Steffens (heftig)" <jan.steffens@gmail.com>
Date: Fri, 26 Oct 2018 11:22:33 +0100
Subject: [PATCH 06/17] infiniband: Fix __read_overflow2 error with -O3
inlining
---
drivers/infiniband/core/addr.c | 1 +
1 file changed, 1 insertion(+)
diff --git a/drivers/infiniband/core/addr.c b/drivers/infiniband/core/addr.c
index 3a98439bba83..6efc4f907f58 100644
--- a/drivers/infiniband/core/addr.c
+++ b/drivers/infiniband/core/addr.c
@@ -820,6 +820,7 @@ int rdma_addr_find_l2_eth_by_grh(const union ib_gid *sgid,
union {
struct sockaddr_in _sockaddr_in;
struct sockaddr_in6 _sockaddr_in6;
+ struct sockaddr_ib _sockaddr_ib;
} sgid_addr, dgid_addr;
int ret;
--
2.28.0
From f85ed068b4d0e6c31edce8574a95757a60e58b87 Mon Sep 17 00:00:00 2001
From: Etienne Juvigny <Ti3noU@gmail.com>
Date: Mon, 3 Sep 2018 17:36:25 +0200
Subject: [PATCH 07/17] Zenify & stuff
---
init/Kconfig | 32 ++++++++++++++++++++++++++++++++
kernel/sched/fair.c | 25 +++++++++++++++++++++++++
mm/page-writeback.c | 8 ++++++++
3 files changed, 65 insertions(+)
diff --git a/init/Kconfig b/init/Kconfig
index 3ae8678e1145..da708eed0f1e 100644
--- a/init/Kconfig
+++ b/init/Kconfig
@@ -92,6 +92,38 @@ config THREAD_INFO_IN_TASK
menu "General setup"
+config ZENIFY
+ bool "A selection of patches from Zen/Liquorix kernel and additional tweaks for a better gaming experience"
+ default y
+ help
+ Tunes the kernel for responsiveness at the cost of throughput and power usage.
+
+ --- Virtual Memory Subsystem ---------------------------
+
+ Mem dirty before bg writeback..: 10 % -> 20 %
+ Mem dirty before sync writeback: 20 % -> 50 %
+
+ --- Block Layer ----------------------------------------
+
+ Queue depth...............: 128 -> 512
+ Default MQ scheduler......: mq-deadline -> bfq
+
+ --- CFS CPU Scheduler ----------------------------------
+
+ Scheduling latency.............: 6 -> 3 ms
+ Minimal granularity............: 0.75 -> 0.3 ms
+ Wakeup granularity.............: 1 -> 0.5 ms
+ CPU migration cost.............: 0.5 -> 0.25 ms
+ Bandwidth slice size...........: 5 -> 3 ms
+ Ondemand fine upscaling limit..: 95 % -> 85 %
+
+ --- MuQSS CPU Scheduler --------------------------------
+
+ Scheduling interval............: 6 -> 3 ms
+ ISO task max realtime use......: 70 % -> 25 %
+ Ondemand coarse upscaling limit: 80 % -> 45 %
+ Ondemand fine upscaling limit..: 95 % -> 45 %
+
config BROKEN
bool
diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c
index 6b3b59cc51d6..2a0072192c3d 100644
--- a/kernel/sched/fair.c
+++ b/kernel/sched/fair.c
@@ -37,8 +37,13 @@
*
* (default: 6ms * (1 + ilog(ncpus)), units: nanoseconds)
*/
+#ifdef CONFIG_ZENIFY
+unsigned int sysctl_sched_latency = 3000000ULL;
+static unsigned int normalized_sysctl_sched_latency = 3000000ULL;
+#else
unsigned int sysctl_sched_latency = 6000000ULL;
static unsigned int normalized_sysctl_sched_latency = 6000000ULL;
+#endif
/*
* The initial- and re-scaling of tunables is configurable
@@ -58,21 +63,34 @@ enum sched_tunable_scaling sysctl_sched_tunable_scaling = SCHED_TUNABLESCALING_L
*
* (default: 0.75 msec * (1 + ilog(ncpus)), units: nanoseconds)
*/
+#ifdef CONFIG_ZENIFY
+unsigned int sysctl_sched_min_granularity = 300000ULL;
+static unsigned int normalized_sysctl_sched_min_granularity = 300000ULL;
+#else
unsigned int sysctl_sched_min_granularity = 750000ULL;
static unsigned int normalized_sysctl_sched_min_granularity = 750000ULL;
+#endif
/*
* Minimal preemption granularity for CPU-bound SCHED_IDLE tasks.
* Applies only when SCHED_IDLE tasks compete with normal tasks.
*
* (default: 0.75 msec)
*/
+#ifdef CONFIG_ZENIFY
+unsigned int sysctl_sched_idle_min_granularity = 300000ULL;
+#else
unsigned int sysctl_sched_idle_min_granularity = 750000ULL;
+#endif
/*
* This value is kept at sysctl_sched_latency/sysctl_sched_min_granularity
*/
+#ifdef CONFIG_ZENIFY
+static unsigned int sched_nr_latency = 10;
+#else
static unsigned int sched_nr_latency = 8;
+#endif
/*
* After fork, child runs first. If set to 0 (default) then
@@ -128,8 +149,12 @@ int __weak arch_asym_cpu_priority(int cpu)
*
* (default: 5 msec, units: microseconds)
*/
+#ifdef CONFIG_ZENIFY
+static unsigned int sysctl_sched_cfs_bandwidth_slice = 3000UL;
+#else
static unsigned int sysctl_sched_cfs_bandwidth_slice = 5000UL;
#endif
+#endif
#ifdef CONFIG_SYSCTL
static struct ctl_table sched_fair_sysctls[] = {
diff --git a/mm/page-writeback.c b/mm/page-writeback.c
index 28b3e7a67565..01a1aef2b9b1 100644
--- a/mm/page-writeback.c
+++ b/mm/page-writeback.c
@@ -71,7 +71,11 @@ static long ratelimit_pages = 32;
/*
* Start background writeback (via writeback threads) at this percentage
*/
+#ifdef CONFIG_ZENIFY
+static int dirty_background_ratio = 20;
+#else
static int dirty_background_ratio = 10;
+#endif
/*
* dirty_background_bytes starts at 0 (disabled) so that it is a function of
@@ -88,7 +92,11 @@ int vm_highmem_is_dirtyable;
/*
* The generator of dirty data starts writeback at this percentage
*/
+#ifdef CONFIG_ZENIFY
+static int vm_dirty_ratio = 50;
+#else
static int vm_dirty_ratio = 20;
+#endif
/*
* vm_dirty_bytes starts at 0 (disabled) so that it is a function of
--
2.28.0
From e92e67143385cf285851e12aa8b7f083dd38dd24 Mon Sep 17 00:00:00 2001
From: Steven Barrett <damentz@liquorix.net>
Date: Sun, 16 Jan 2011 18:57:32 -0600
Subject: [PATCH 08/17] ZEN: Allow TCP YeAH as default congestion control
4.4: In my tests YeAH dramatically slowed down transfers over a WLAN,
reducing throughput from ~65Mbps (CUBIC) to ~7MBps (YeAH) over 10
seconds (netperf TCP_STREAM) including long stalls.
Be careful when choosing this. ~heftig
---
net/ipv4/Kconfig | 4 ++++
1 file changed, 4 insertions(+)
diff --git a/net/ipv4/Kconfig b/net/ipv4/Kconfig
index e64e59b536d3..bfb55ef7ebbe 100644
--- a/net/ipv4/Kconfig
+++ b/net/ipv4/Kconfig
@@ -691,6 +691,9 @@ choice
config DEFAULT_VEGAS
bool "Vegas" if TCP_CONG_VEGAS=y
+ config DEFAULT_YEAH
+ bool "YeAH" if TCP_CONG_YEAH=y
+
config DEFAULT_VENO
bool "Veno" if TCP_CONG_VENO=y
@@ -724,6 +727,7 @@ config DEFAULT_TCP_CONG
default "htcp" if DEFAULT_HTCP
default "hybla" if DEFAULT_HYBLA
default "vegas" if DEFAULT_VEGAS
+ default "yeah" if DEFAULT_YEAH
default "westwood" if DEFAULT_WESTWOOD
default "veno" if DEFAULT_VENO
default "reno" if DEFAULT_RENO
--
2.28.0
From 76dbe7477bfde1b5e8bf29a71b5af7ab2be9b98e Mon Sep 17 00:00:00 2001
From: Steven Barrett <steven@liquorix.net>
Date: Wed, 28 Nov 2018 19:01:27 -0600
Subject: [PATCH 09/17] zen: Use [defer+madvise] as default khugepaged defrag
strategy
For some reason, the default strategy to respond to THP fault fallbacks
is still just madvise, meaning stall if the program wants transparent
hugepages, but don't trigger a background reclaim / compaction if THP
begins to fail allocations. This creates a snowball affect where we
still use the THP code paths, but we almost always fail once a system
has been active and busy for a while.
The option "defer" was created for interactive systems where THP can
still improve performance. If we have to fallback to a regular page due
to an allocation failure or anything else, we will trigger a background
reclaim and compaction so future THP attempts succeed and previous
attempts eventually have their smaller pages combined without stalling
running applications.
We still want madvise to stall applications that explicitely want THP,
so defer+madvise _does_ make a ton of sense. Make it the default for
interactive systems, especially if the kernel maintainer left
transparent hugepages on "always".
Reasoning and details in the original patch: https://lwn.net/Articles/711248/
---
mm/huge_memory.c | 4 ++++
1 file changed, 4 insertions(+)
diff --git a/mm/huge_memory.c b/mm/huge_memory.c
index 74300e337c3c..9277f22c10a7 100644
--- a/mm/huge_memory.c
+++ b/mm/huge_memory.c
@@ -53,7 +53,11 @@ unsigned long transparent_hugepage_flags __read_mostly =
#ifdef CONFIG_TRANSPARENT_HUGEPAGE_MADVISE
(1<<TRANSPARENT_HUGEPAGE_REQ_MADV_FLAG)|
#endif
+#ifdef CONFIG_ZENIFY
+ (1<<TRANSPARENT_HUGEPAGE_DEFRAG_KSWAPD_OR_MADV_FLAG)|
+#else
(1<<TRANSPARENT_HUGEPAGE_DEFRAG_REQ_MADV_FLAG)|
+#endif
(1<<TRANSPARENT_HUGEPAGE_DEFRAG_KHUGEPAGED_FLAG)|
(1<<TRANSPARENT_HUGEPAGE_USE_ZERO_PAGE_FLAG);
--
2.28.0
From 2b65a1329cb220b43c19c4d0de5833fae9e2b22d Mon Sep 17 00:00:00 2001
From: Alexandre Frade <admfrade@gmail.com>
Date: Wed, 24 Oct 2018 16:58:52 -0300
Subject: [PATCH 10/17] net/sched: allow configuring cake qdisc as default
Signed-off-by: Alexandre Frade <admfrade@gmail.com>
---
net/sched/Kconfig | 4 ++++
1 file changed, 4 insertions(+)
diff --git a/net/sched/Kconfig b/net/sched/Kconfig
index 84badf00647e..6a922bca9f39 100644
--- a/net/sched/Kconfig
+++ b/net/sched/Kconfig
@@ -471,6 +471,9 @@ choice
config DEFAULT_SFQ
bool "Stochastic Fair Queue" if NET_SCH_SFQ
+ config DEFAULT_CAKE
+ bool "Common Applications Kept Enhanced" if NET_SCH_CAKE
+
config DEFAULT_PFIFO_FAST
bool "Priority FIFO Fast"
endchoice
@@ -481,6 +484,7 @@ config DEFAULT_NET_SCH
default "fq" if DEFAULT_FQ
default "fq_codel" if DEFAULT_FQ_CODEL
default "sfq" if DEFAULT_SFQ
+ default "cake" if DEFAULT_CAKE
default "pfifo_fast"
endif
--
2.28.0
From 90240bcd90a568878738e66c0d45bed3e38e347b Mon Sep 17 00:00:00 2001
From: Tk-Glitch <ti3nou@gmail.com>
Date: Fri, 19 Apr 2019 12:33:38 +0200
Subject: [PATCH 12/17] Set vm.max_map_count to 262144 by default
The value is still pretty low, and AMD64-ABI and ELF extended numbering
supports that, so we should be fine on modern x86 systems.
This fixes crashes in some applications using more than 65535 vmas (also
affects some windows games running in wine, such as Star Citizen).
---
include/linux/mm.h | 3 +--
1 file changed, 1 insertion(+), 2 deletions(-)
diff --git a/include/linux/mm.h b/include/linux/mm.h
index bc05c3588aa3..b0cefe94920d 100644
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -190,8 +190,7 @@ static inline void __mm_zero_struct_page(struct page *page)
* not a hard limit any more. Although some userspace tools can be surprised by
* that.
*/
-#define MAPCOUNT_ELF_CORE_MARGIN (5)
-#define DEFAULT_MAX_MAP_COUNT (USHRT_MAX - MAPCOUNT_ELF_CORE_MARGIN)
+#define DEFAULT_MAX_MAP_COUNT (262144)
extern int sysctl_max_map_count;
--
2.28.0
From 3a34034dba5efe91bcec491efe8c66e8087f509b Mon Sep 17 00:00:00 2001
From: Tk-Glitch <ti3nou@gmail.com>
Date: Mon, 27 Jul 2020 00:19:18 +0200
Subject: [PATCH 13/17] mm: bump DEFAULT_MAX_MAP_COUNT
Some games such as Detroit: Become Human tend to be very crash prone with
lower values.
---
include/linux/mm.h | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/include/linux/mm.h b/include/linux/mm.h
index b0cefe94920d..890165099b07 100644
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -190,7 +190,7 @@ static inline void __mm_zero_struct_page(struct page *page)
* not a hard limit any more. Although some userspace tools can be surprised by
* that.
*/
-#define DEFAULT_MAX_MAP_COUNT (262144)
+#define DEFAULT_MAX_MAP_COUNT (16777216)
extern int sysctl_max_map_count;
--
2.28.0
From 977812938da7c7226415778c340832141d9278b7 Mon Sep 17 00:00:00 2001
From: Alexandre Frade <admfrade@gmail.com>
Date: Mon, 25 Nov 2019 15:13:06 -0300
Subject: [PATCH 14/17] elevator: set default scheduler to bfq for blk-mq
Signed-off-by: Alexandre Frade <admfrade@gmail.com>
---
block/elevator.c | 6 +++---
1 file changed, 3 insertions(+), 3 deletions(-)
diff --git a/block/elevator.c b/block/elevator.c
index 4eab3d70e880..79669aa39d79 100644
--- a/block/elevator.c
+++ b/block/elevator.c
@@ -623,19 +623,19 @@ static inline bool elv_support_iosched(struct request_queue *q)
}
/*
- * For single queue devices, default to using mq-deadline. If we have multiple
- * queues or mq-deadline is not available, default to "none".
+ * For single queue devices, default to using bfq. If we have multiple
+ * queues or bfq is not available, default to "none".
*/
static struct elevator_type *elevator_get_default(struct request_queue *q)
{
if (q->tag_set && q->tag_set->flags & BLK_MQ_F_NO_SCHED_BY_DEFAULT)
return NULL;
if (q->nr_hw_queues != 1 &&
!blk_mq_is_shared_tags(q->tag_set->flags))
return NULL;
- return elevator_find_get(q, "mq-deadline");
+ return elevator_find_get(q, "bfq");
}
/*
--
2.28.0
From 3c229f434aca65c4ca61772bc03c3e0370817b92 Mon Sep 17 00:00:00 2001
From: Alexandre Frade <kernel@xanmod.org>
Date: Mon, 3 Aug 2020 17:05:04 +0000
Subject: [PATCH 16/17] mm: set 2 megabytes for address_space-level file
read-ahead pages size
Signed-off-by: Alexandre Frade <kernel@xanmod.org>
---
include/linux/pagemap.h | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/include/linux/pagemap.h b/include/linux/pagemap.h
index cf2468da68e9..007dea784451 100644
--- a/include/linux/pagemap.h
+++ b/include/linux/pagemap.h
@@ -655,7 +655,7 @@ int replace_page_cache_page(struct page *old, struct page *new, gfp_t gfp_mask);
void delete_from_page_cache_batch(struct address_space *mapping,
struct pagevec *pvec);
-#define VM_READAHEAD_PAGES (SZ_128K / PAGE_SIZE)
+#define VM_READAHEAD_PAGES (SZ_2M / PAGE_SIZE)
void page_cache_sync_readahead(struct address_space *, struct file_ra_state *,
struct file *, pgoff_t index, unsigned long req_count);
--
2.28.0
From 716f41cf6631f3a85834dcb67b4ce99185b6387f Mon Sep 17 00:00:00 2001
From: Steven Barrett <steven@liquorix.net>
Date: Wed, 15 Jan 2020 20:43:56 -0600
Subject: [PATCH 17/17] ZEN: intel-pstate: Implement "enable" parameter
If intel-pstate is compiled into the kernel, it will preempt the loading
of acpi-cpufreq so you can take advantage of hardware p-states without
any friction.
However, intel-pstate is not completely superior to cpufreq's ondemand
for one reason. There's no concept of an up_threshold property.
In ondemand, up_threshold essentially reduces the maximum utilization to
compare against, allowing you to hit max frequencies and turbo boost
from a much lower core utilization.
With intel-pstate, you have the concept of minimum and maximum
performance, but no tunable that lets you define, maximum frequency
means 50% core utilization. For just this oversight, there's reasons
you may want ondemand.
Lets support setting "enable" in kernel boot parameters. This lets
kernel maintainers include "intel_pstate=disable" statically in the
static boot parameters, but let users of the kernel override this
selection.
---
Documentation/admin-guide/kernel-parameters.txt | 3 +++
drivers/cpufreq/intel_pstate.c | 2 ++
2 files changed, 5 insertions(+)
diff --git a/Documentation/admin-guide/kernel-parameters.txt b/Documentation/admin-guide/kernel-parameters.txt
index fb95fad81c79..3e92fee81e33 100644
--- a/Documentation/admin-guide/kernel-parameters.txt
+++ b/Documentation/admin-guide/kernel-parameters.txt
@@ -1857,6 +1857,9 @@
disable
Do not enable intel_pstate as the default
scaling driver for the supported processors
+ enable
+ Enable intel_pstate in-case "disable" was passed
+ previously in the kernel boot parameters
passive
Use intel_pstate as a scaling driver, but configure it
to work with generic cpufreq governors (instead of
diff --git a/drivers/cpufreq/intel_pstate.c b/drivers/cpufreq/intel_pstate.c
index 36a469150ff9..aee891c9b78a 100644
--- a/drivers/cpufreq/intel_pstate.c
+++ b/drivers/cpufreq/intel_pstate.c
@@ -2845,6 +2845,8 @@ static int __init intel_pstate_setup(char *str)
if (!strcmp(str, "no_hwp"))
no_hwp = 1;
+ if (!strcmp(str, "enable"))
+ no_load = 0;
if (!strcmp(str, "force"))
force_load = 1;
if (!strcmp(str, "hwp_only"))
--
2.28.0
From 379cbab18b5c75c622b93e2c5abdfac141fe9654 Mon Sep 17 00:00:00 2001
From: Kenny Levinsen <kl@kl.wtf>
Date: Sun, 27 Dec 2020 14:43:13 +0000
Subject: [PATCH] ZEN: Input: evdev - use call_rcu when detaching client
Significant time was spent on synchronize_rcu in evdev_detach_client
when applications closed evdev devices. Switching VT away from a
graphical environment commonly leads to mass input device closures,
which could lead to noticable delays on systems with many input devices.
Replace synchronize_rcu with call_rcu, deferring reclaim of the evdev
client struct till after the RCU grace period instead of blocking the
calling application.
While this does not solve all slow evdev fd closures, it takes care of a
good portion of them, including this simple test:
#include <fcntl.h>
#include <unistd.h>
int main(int argc, char *argv[])
{
int idx, fd;
const char *path = "/dev/input/event0";
for (idx = 0; idx < 1000; idx++) {
if ((fd = open(path, O_RDWR)) == -1) {
return -1;
}
close(fd);
}
return 0;
}
Time to completion of above test when run locally:
Before: 0m27.111s
After: 0m0.018s
Signed-off-by: Kenny Levinsen <kl@kl.wtf>
---
drivers/input/evdev.c | 19 +++++++++++--------
1 file changed, 11 insertions(+), 8 deletions(-)
diff --git a/drivers/input/evdev.c b/drivers/input/evdev.c
index 95f90699d2b17b..2b10fe29d2c8d9 100644
--- a/drivers/input/evdev.c
+++ b/drivers/input/evdev.c
@@ -46,6 +46,7 @@ struct evdev_client {
struct fasync_struct *fasync;
struct evdev *evdev;
struct list_head node;
+ struct rcu_head rcu;
enum input_clock_type clk_type;
bool revoked;
unsigned long *evmasks[EV_CNT];
@@ -377,13 +378,22 @@ static void evdev_attach_client(struct evdev *evdev,
spin_unlock(&evdev->client_lock);
}
+static void evdev_reclaim_client(struct rcu_head *rp)
+{
+ struct evdev_client *client = container_of(rp, struct evdev_client, rcu);
+ unsigned int i;
+ for (i = 0; i < EV_CNT; ++i)
+ bitmap_free(client->evmasks[i]);
+ kvfree(client);
+}
+
static void evdev_detach_client(struct evdev *evdev,
struct evdev_client *client)
{
spin_lock(&evdev->client_lock);
list_del_rcu(&client->node);
spin_unlock(&evdev->client_lock);
- synchronize_rcu();
+ call_rcu(&client->rcu, evdev_reclaim_client);
}
static int evdev_open_device(struct evdev *evdev)
@@ -436,7 +446,6 @@ static int evdev_release(struct inode *inode, struct file *file)
{
struct evdev_client *client = file->private_data;
struct evdev *evdev = client->evdev;
- unsigned int i;
mutex_lock(&evdev->mutex);
@@ -448,11 +457,6 @@ static int evdev_release(struct inode *inode, struct file *file)
evdev_detach_client(evdev, client);
- for (i = 0; i < EV_CNT; ++i)
- bitmap_free(client->evmasks[i]);
-
- kvfree(client);
-
evdev_close_device(evdev);
return 0;
@@ -495,7 +499,6 @@ static int evdev_open(struct inode *inode, struct file *file)
err_free_client:
evdev_detach_client(evdev, client);
- kvfree(client);
return error;
}
From 2aafb56f20e4b63d8c4af172fe9d017c64bc4129 Mon Sep 17 00:00:00 2001
From: Sultan Alsawaf <sultan@kerneltoast.com>
Date: Wed, 20 Oct 2021 20:50:11 -0700
Subject: [PATCH] ZEN: mm: Lower the non-hugetlbpage pageblock size to reduce
scheduling delays
The page allocator processes free pages in groups of pageblocks, where
the size of a pageblock is typically quite large (1024 pages without
hugetlbpage support). Pageblocks are processed atomically with the zone
lock held, which can cause severe scheduling delays on both the CPU
going through the pageblock and any other CPUs waiting to acquire the
zone lock. A frequent offender is move_freepages_block(), which is used
by rmqueue() for page allocation.
As it turns out, there's no requirement for pageblocks to be so large,
so the pageblock order can simply be reduced to ease the scheduling
delays and zone lock contention. PAGE_ALLOC_COSTLY_ORDER is used as a
reasonable setting to ensure non-costly page allocation requests can
still be serviced without always needing to free up more than one
pageblock's worth of pages at a time.
This has a noticeable effect on overall system latency when memory
pressure is elevated. The various mm functions which operate on
pageblocks no longer appear in the preemptoff tracer, where previously
they would spend up to 100 ms on a mobile arm64 CPU processing a
pageblock with preemption disabled and the zone lock held.
Signed-off-by: Sultan Alsawaf <sultan@kerneltoast.com>
---
include/linux/pageblock-flags.h | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/include/linux/pageblock-flags.h b/include/linux/pageblock-flags.h
index 5f1ae07d724b88..97cda629c9e909 100644
--- a/include/linux/pageblock-flags.h
+++ b/include/linux/pageblock-flags.h
@@ -48,7 +48,7 @@ extern unsigned int pageblock_order;
#else /* CONFIG_HUGETLB_PAGE */
/* If huge pages are not used, group by MAX_ORDER_NR_PAGES */
-#define pageblock_order (MAX_ORDER-1)
+#define pageblock_order PAGE_ALLOC_COSTLY_ORDER
#endif /* CONFIG_HUGETLB_PAGE */
From f22bc56be85e69c71c8e36041193856bb8b01525 Mon Sep 17 00:00:00 2001
From: Sultan Alsawaf <sultan@kerneltoast.com>
Date: Wed, 20 Oct 2021 20:50:32 -0700
Subject: [PATCH] ZEN: mm: Don't hog the CPU and zone lock in rmqueue_bulk()
There is noticeable scheduling latency and heavy zone lock contention
stemming from rmqueue_bulk's single hold of the zone lock while doing
its work, as seen with the preemptoff tracer. There's no actual need for
rmqueue_bulk() to hold the zone lock the entire time; it only does so
for supposed efficiency. As such, we can relax the zone lock and even
reschedule when IRQs are enabled in order to keep the scheduling delays
and zone lock contention at bay. Forward progress is still guaranteed,
as the zone lock can only be relaxed after page removal.
With this change, rmqueue_bulk() no longer appears as a serious offender
in the preemptoff tracer, and system latency is noticeably improved.
Signed-off-by: Sultan Alsawaf <sultan@kerneltoast.com>
---
mm/page_alloc.c | 23 ++++++++++++++++++-----
1 file changed, 18 insertions(+), 5 deletions(-)
diff --git a/mm/page_alloc.c b/mm/page_alloc.c
index a0b0397e29ee4c..87a983a356530c 100644
--- a/mm/page_alloc.c
+++ b/mm/page_alloc.c
@@ -3118,15 +3119,16 @@ __rmqueue(struct zone *zone, unsigned int order, int migratetype,
}
/*
- * Obtain a specified number of elements from the buddy allocator, all under
- * a single hold of the lock, for efficiency. Add them to the supplied list.
- * Returns the number of new pages which were placed at *list.
+ * Obtain a specified number of elements from the buddy allocator, and relax the
+ * zone lock when needed. Add them to the supplied list. Returns the number of
+ * new pages which were placed at *list.
*/
static int rmqueue_bulk(struct zone *zone, unsigned int order,
unsigned long count, struct list_head *list,
int migratetype, unsigned int alloc_flags)
{
unsigned long flags;
- int i, allocated = 0;
+ const bool can_resched = !preempt_count() && !irqs_disabled();
+ int i, allocated = 0, last_mod = 0;
/* Caller must hold IRQ-safe pcp->lock so IRQs are disabled. */
spin_lock(&zone->lock);
@@ -3137,6 +3138,18 @@ static int rmqueue_bulk(struct zone *zone, unsigned int order,
if (unlikely(page == NULL))
break;
+ /* Reschedule and ease the contention on the lock if needed */
+ if (i + 1 < count && ((can_resched && need_resched()) ||
+ spin_needbreak(&zone->lock))) {
+ __mod_zone_page_state(zone, NR_FREE_PAGES,
+ -((i + 1 - last_mod) << order));
+ last_mod = i + 1;
+ spin_unlock(&zone->lock);
+ if (can_resched)
+ cond_resched();
+ spin_lock(&zone->lock);
+ }
+
if (unlikely(check_pcp_refill(page, order)))
continue;
@@ -3163,7 +3176,7 @@ static int rmqueue_bulk(struct zone *zone, unsigned int order,
* on i. Do not confuse with 'allocated' which is the number of
* pages added to the pcp list.
*/
- __mod_zone_page_state(zone, NR_FREE_PAGES, -(i << order));
+ __mod_zone_page_state(zone, NR_FREE_PAGES, -((i - last_mod) << order));
spin_unlock(&zone->lock);
return allocated;
}
From 6329525a0fa10cd13f39b76948b1296150f75c95 Mon Sep 17 00:00:00 2001
From: Alexandre Frade <kernel@xanmod.org>
Date: Mon, 29 Aug 2022 16:47:26 +0000
Subject: [PATCH 14/16] XANMOD: Makefile: Disable GCC vectorization on trees
Signed-off-by: Alexandre Frade <kernel@xanmod.org>
---
Makefile | 3 +++
1 file changed, 3 insertions(+)
diff --git a/Makefile b/Makefile
index 3f6628780eb2..35a5ae1ede42 100644
--- a/Makefile
+++ b/Makefile
@@ -1069,6 +1069,9 @@ endif
KBUILD_CFLAGS-$(call gcc-min-version, 90100) += -Wno-alloc-size-larger-than
KBUILD_CFLAGS += $(KBUILD_CFLAGS-y) $(CONFIG_CC_IMPLICIT_FALLTHROUGH)
+# disable GCC vectorization on trees
+KBUILD_CFLAGS += $(call cc-option, -fno-tree-vectorize)
+
# disable invalid "can't wrap" optimizations for signed / pointers
KBUILD_CFLAGS += -fno-strict-overflow
--
2.39.1
From f997578464b2c4c63e7bd1afbfef56212ee44f2d Mon Sep 17 00:00:00 2001
From: Etienne JUVIGNY <ti3nou@gmail.com>
Date: Mon, 6 Mar 2023 13:54:09 +0100
Subject: Don't add -dirty versioning on unclean trees
diff --git a/scripts/setlocalversion b/scripts/setlocalversion
index ca5795e16..ad0d94477 100755
--- a/scripts/setlocalversion
+++ b/scripts/setlocalversion
@@ -85,12 +85,12 @@ scm_version()
# git-diff-index does not refresh the index, so it may give misleading
# results.
# See git-update-index(1), git-diff-index(1), and git-status(1).
- if {
- git --no-optional-locks status -uno --porcelain 2>/dev/null ||
- git diff-index --name-only HEAD
- } | read dummy; then
- printf '%s' -dirty
- fi
+ #if {
+ # git --no-optional-locks status -uno --porcelain 2>/dev/null ||
+ # git diff-index --name-only HEAD
+ #} | read dummy; then
+ # printf '%s' -dirty
+ #fi
}
collect_files()

View File

@@ -0,0 +1,36 @@
diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c
index 6b3b59cc51d6..2a0072192c3d 100644
--- a/kernel/sched/fair.c
+++ b/kernel/sched/fair.c
@@ -81,10 +95,17 @@ unsigned int sysctl_sched_child_runs_first __read_mostly;
*
* (default: 1 msec * (1 + ilog(ncpus)), units: nanoseconds)
*/
+#ifdef CONFIG_ZENIFY
+unsigned int sysctl_sched_wakeup_granularity = 500000UL;
+static unsigned int normalized_sysctl_sched_wakeup_granularity = 500000UL;
+
+const_debug unsigned int sysctl_sched_migration_cost = 50000UL;
+#else
unsigned int sysctl_sched_wakeup_granularity = 1000000UL;
static unsigned int normalized_sysctl_sched_wakeup_granularity = 1000000UL;
const_debug unsigned int sysctl_sched_migration_cost = 500000UL;
+#endif
int sched_thermal_decay_shift;
static int __init setup_sched_thermal_decay_shift(char *str)
diff --git a/kernel/sched/topology.c b/kernel/sched/topology.c
index 051aaf65c..705df5511 100644
--- a/kernel/sched/topology.c
+++ b/kernel/sched/topology.c
@@ -208,7 +208,7 @@ sd_parent_degenerate(struct sched_domain *sd, struct sched_domain *parent)
#if defined(CONFIG_ENERGY_MODEL) && defined(CONFIG_CPU_FREQ_GOV_SCHEDUTIL)
DEFINE_STATIC_KEY_FALSE(sched_energy_present);
-static unsigned int sysctl_sched_energy_aware = 1;
+static unsigned int sysctl_sched_energy_aware = 0;
DEFINE_MUTEX(sched_energy_mutex);
bool sched_energy_update;

View File

@@ -0,0 +1,118 @@
diff --git a/kernel/Kconfig.hz b/kernel/Kconfig.hz
index 2a202a846757..1d9c7ed79b11 100644
--- a/kernel/Kconfig.hz
+++ b/kernel/Kconfig.hz
@@ -4,7 +4,7 @@
choice
prompt "Timer frequency"
- default HZ_250
+ default HZ_500
help
Allows the configuration of the timer frequency. It is customary
to have the timer interrupt run at 1000 Hz but 100 Hz may be more
@@ -39,6 +39,13 @@ choice
on SMP and NUMA systems and exactly dividing by both PAL and
NTSC frame rates for video and multimedia work.
+ config HZ_500
+ bool "500 HZ"
+ help
+ 500 Hz is a balanced timer frequency. Provides fast interactivity
+ on desktops with great smoothness without increasing CPU power
+ consumption and sacrificing the battery life on laptops.
+
config HZ_1000
bool "1000 HZ"
help
@@ -52,6 +59,7 @@ config HZ
default 100 if HZ_100
default 250 if HZ_250
default 300 if HZ_300
+ default 500 if HZ_500
default 1000 if HZ_1000
config SCHED_HRTICK
diff --git a/kernel/Kconfig.hz b/kernel/Kconfig.hz
index 2a202a846757..1d9c7ed79b11 100644
--- a/kernel/Kconfig.hz
+++ b/kernel/Kconfig.hz
@@ -4,7 +4,7 @@
choice
prompt "Timer frequency"
- default HZ_500
+ default HZ_750
help
Allows the configuration of the timer frequency. It is customary
to have the timer interrupt run at 1000 Hz but 100 Hz may be more
@@ -46,6 +46,13 @@ choice
on desktops with great smoothness without increasing CPU power
consumption and sacrificing the battery life on laptops.
+ config HZ_750
+ bool "750 HZ"
+ help
+ 750 Hz is a good timer frequency for desktops. Provides fast
+ interactivity with great smoothness without sacrificing too
+ much throughput.
+
config HZ_1000
bool "1000 HZ"
help
@@ -60,6 +67,7 @@ config HZ
default 250 if HZ_250
default 300 if HZ_300
default 500 if HZ_500
+ default 750 if HZ_750
default 1000 if HZ_1000
config SCHED_HRTICK
diff --git a/drivers/cpufreq/cpufreq_ondemand.c b/drivers/cpufreq/cpufreq_ondemand.c
index 6b423eebfd5d..61e3271675d6 100644
--- a/drivers/cpufreq/cpufreq_ondemand.c
+++ b/drivers/cpufreq/cpufreq_ondemand.c
@@ -21,10 +21,10 @@
#include "cpufreq_ondemand.h"
/* On-demand governor macros */
-#define DEF_FREQUENCY_UP_THRESHOLD (80)
-#define DEF_SAMPLING_DOWN_FACTOR (1)
+#define DEF_FREQUENCY_UP_THRESHOLD (55)
+#define DEF_SAMPLING_DOWN_FACTOR (5)
#define MAX_SAMPLING_DOWN_FACTOR (100000)
-#define MICRO_FREQUENCY_UP_THRESHOLD (95)
+#define MICRO_FREQUENCY_UP_THRESHOLD (63)
#define MICRO_FREQUENCY_MIN_SAMPLE_RATE (10000)
#define MIN_FREQUENCY_UP_THRESHOLD (1)
#define MAX_FREQUENCY_UP_THRESHOLD (100)
From cba31b19f8c38696b13ba48e0e8b6dbe747d6bae Mon Sep 17 00:00:00 2001
From: Alexandre Frade <admfrade@gmail.com>
Date: Mon, 29 Jan 2018 17:31:25 +0000
Subject: [PATCH 10/16] XANMOD: mm/vmscan: vm_swappiness = 30 decreases the
amount of swapping
Signed-off-by: Alexandre Frade <admfrade@gmail.com>
Signed-off-by: Alexandre Frade <kernel@xanmod.org>
---
mm/vmscan.c | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/mm/vmscan.c b/mm/vmscan.c
index 5b7b8d4f5297..549684b29418 100644
--- a/mm/vmscan.c
+++ b/mm/vmscan.c
@@ -190,7 +190,7 @@ struct scan_control {
/*
* From 0 .. 200. Higher means more swappy.
*/
-int vm_swappiness = 60;
+int vm_swappiness = 30;
static void set_task_reclaim_state(struct task_struct *task,
struct reclaim_state *rs)
--
2.39.1

View File

@@ -0,0 +1,90 @@
From f7f49141a5dbe9c99d78196b58c44307fb2e6be3 Mon Sep 17 00:00:00 2001
From: Tk-Glitch <ti3nou@gmail.com>
Date: Wed, 4 Jul 2018 04:30:08 +0200
Subject: glitched - PDS
diff --git a/kernel/Kconfig.hz b/kernel/Kconfig.hz
index 2a202a846757..1d9c7ed79b11 100644
--- a/kernel/Kconfig.hz
+++ b/kernel/Kconfig.hz
@@ -4,7 +4,7 @@
choice
prompt "Timer frequency"
- default HZ_250
+ default HZ_500
help
Allows the configuration of the timer frequency. It is customary
to have the timer interrupt run at 1000 Hz but 100 Hz may be more
@@ -39,6 +39,13 @@ choice
on SMP and NUMA systems and exactly dividing by both PAL and
NTSC frame rates for video and multimedia work.
+ config HZ_500
+ bool "500 HZ"
+ help
+ 500 Hz is a balanced timer frequency. Provides fast interactivity
+ on desktops with great smoothness without increasing CPU power
+ consumption and sacrificing the battery life on laptops.
+
config HZ_1000
bool "1000 HZ"
help
@@ -52,6 +59,7 @@ config HZ
default 100 if HZ_100
default 250 if HZ_250
default 300 if HZ_300
+ default 500 if HZ_500
default 1000 if HZ_1000
config SCHED_HRTICK
diff --git a/kernel/Kconfig.hz b/kernel/Kconfig.hz
index 2a202a846757..1d9c7ed79b11 100644
--- a/kernel/Kconfig.hz
+++ b/kernel/Kconfig.hz
@@ -4,7 +4,7 @@
choice
prompt "Timer frequency"
- default HZ_500
+ default HZ_750
help
Allows the configuration of the timer frequency. It is customary
to have the timer interrupt run at 1000 Hz but 100 Hz may be more
@@ -46,6 +46,13 @@ choice
on desktops with great smoothness without increasing CPU power
consumption and sacrificing the battery life on laptops.
+ config HZ_750
+ bool "750 HZ"
+ help
+ 750 Hz is a good timer frequency for desktops. Provides fast
+ interactivity with great smoothness without sacrificing too
+ much throughput.
+
config HZ_1000
bool "1000 HZ"
help
@@ -60,6 +67,7 @@ config HZ
default 250 if HZ_250
default 300 if HZ_300
default 500 if HZ_500
+ default 750 if HZ_750
default 1000 if HZ_1000
config SCHED_HRTICK
diff --git a/mm/vmscan.c b/mm/vmscan.c
index 9270a4370d54..30d01e647417 100644
--- a/mm/vmscan.c
+++ b/mm/vmscan.c
@@ -169,7 +169,7 @@
/*
* From 0 .. 200. Higher means more swappy.
*/
-int vm_swappiness = 60;
+int vm_swappiness = 20;
static void set_task_reclaim_state(struct task_struct *task,
struct reclaim_state *rs)

View File

@@ -0,0 +1,193 @@
From cdeab384f48dd9c88e2dff2e9ad8d57dca1a1b1c Mon Sep 17 00:00:00 2001
From: Mark Weiman <mark.weiman@markzz.com>
Date: Sun, 12 Aug 2018 11:36:21 -0400
Subject: [PATCH] pci: Enable overrides for missing ACS capabilities
This an updated version of Alex Williamson's patch from:
https://lkml.org/lkml/2013/5/30/513
Original commit message follows:
PCIe ACS (Access Control Services) is the PCIe 2.0+ feature that
allows us to control whether transactions are allowed to be redirected
in various subnodes of a PCIe topology. For instance, if two
endpoints are below a root port or downsteam switch port, the
downstream port may optionally redirect transactions between the
devices, bypassing upstream devices. The same can happen internally
on multifunction devices. The transaction may never be visible to the
upstream devices.
One upstream device that we particularly care about is the IOMMU. If
a redirection occurs in the topology below the IOMMU, then the IOMMU
cannot provide isolation between devices. This is why the PCIe spec
encourages topologies to include ACS support. Without it, we have to
assume peer-to-peer DMA within a hierarchy can bypass IOMMU isolation.
Unfortunately, far too many topologies do not support ACS to make this
a steadfast requirement. Even the latest chipsets from Intel are only
sporadically supporting ACS. We have trouble getting interconnect
vendors to include the PCIe spec required PCIe capability, let alone
suggested features.
Therefore, we need to add some flexibility. The pcie_acs_override=
boot option lets users opt-in specific devices or sets of devices to
assume ACS support. The "downstream" option assumes full ACS support
on root ports and downstream switch ports. The "multifunction"
option assumes the subset of ACS features available on multifunction
endpoints and upstream switch ports are supported. The "id:nnnn:nnnn"
option enables ACS support on devices matching the provided vendor
and device IDs, allowing more strategic ACS overrides. These options
may be combined in any order. A maximum of 16 id specific overrides
are available. It's suggested to use the most limited set of options
necessary to avoid completely disabling ACS across the topology.
Note to hardware vendors, we have facilities to permanently quirk
specific devices which enforce isolation but not provide an ACS
capability. Please contact me to have your devices added and save
your customers the hassle of this boot option.
Signed-off-by: Mark Weiman <mark.weiman@markzz.com>
---
.../admin-guide/kernel-parameters.txt | 9 ++
drivers/pci/quirks.c | 101 ++++++++++++++++++
2 files changed, 110 insertions(+)
diff --git a/Documentation/admin-guide/kernel-parameters.txt b/Documentation/admin-guide/kernel-parameters.txt
index aefd358a5ca3..173b3596fd9e 100644
--- a/Documentation/admin-guide/kernel-parameters.txt
+++ b/Documentation/admin-guide/kernel-parameters.txt
@@ -3190,6 +3190,15 @@
nomsi [MSI] If the PCI_MSI kernel config parameter is
enabled, this kernel boot option can be used to
disable the use of MSI interrupts system-wide.
+ pcie_acs_override =
+ [PCIE] Override missing PCIe ACS support for:
+ downstream
+ All downstream ports - full ACS capabilities
+ multifunction
+ All multifunction devices - multifunction ACS subset
+ id:nnnn:nnnn
+ Specific device - full ACS capabilities
+ Specified as vid:did (vendor/device ID) in hex
noioapicquirk [APIC] Disable all boot interrupt quirks.
Safety option to keep boot IRQs enabled. This
should never be necessary.
diff --git a/drivers/pci/quirks.c b/drivers/pci/quirks.c
index 4700d24e5d55..8f7a3d7fd9c1 100644
--- a/drivers/pci/quirks.c
+++ b/drivers/pci/quirks.c
@@ -3372,6 +3372,106 @@ static void quirk_no_bus_reset(struct pci_dev *dev)
dev->dev_flags |= PCI_DEV_FLAGS_NO_BUS_RESET;
}
+static bool acs_on_downstream;
+static bool acs_on_multifunction;
+
+#define NUM_ACS_IDS 16
+struct acs_on_id {
+ unsigned short vendor;
+ unsigned short device;
+};
+static struct acs_on_id acs_on_ids[NUM_ACS_IDS];
+static u8 max_acs_id;
+
+static __init int pcie_acs_override_setup(char *p)
+{
+ if (!p)
+ return -EINVAL;
+
+ while (*p) {
+ if (!strncmp(p, "downstream", 10))
+ acs_on_downstream = true;
+ if (!strncmp(p, "multifunction", 13))
+ acs_on_multifunction = true;
+ if (!strncmp(p, "id:", 3)) {
+ char opt[5];
+ int ret;
+ long val;
+
+ if (max_acs_id >= NUM_ACS_IDS - 1) {
+ pr_warn("Out of PCIe ACS override slots (%d)\n",
+ NUM_ACS_IDS);
+ goto next;
+ }
+
+ p += 3;
+ snprintf(opt, 5, "%s", p);
+ ret = kstrtol(opt, 16, &val);
+ if (ret) {
+ pr_warn("PCIe ACS ID parse error %d\n", ret);
+ goto next;
+ }
+ acs_on_ids[max_acs_id].vendor = val;
+
+ p += strcspn(p, ":");
+ if (*p != ':') {
+ pr_warn("PCIe ACS invalid ID\n");
+ goto next;
+ }
+
+ p++;
+ snprintf(opt, 5, "%s", p);
+ ret = kstrtol(opt, 16, &val);
+ if (ret) {
+ pr_warn("PCIe ACS ID parse error %d\n", ret);
+ goto next;
+ }
+ acs_on_ids[max_acs_id].device = val;
+ max_acs_id++;
+ }
+next:
+ p += strcspn(p, ",");
+ if (*p == ',')
+ p++;
+ }
+
+ if (acs_on_downstream || acs_on_multifunction || max_acs_id)
+ pr_warn("Warning: PCIe ACS overrides enabled; This may allow non-IOMMU protected peer-to-peer DMA\n");
+
+ return 0;
+}
+early_param("pcie_acs_override", pcie_acs_override_setup);
+
+static int pcie_acs_overrides(struct pci_dev *dev, u16 acs_flags)
+{
+ int i;
+
+ /* Never override ACS for legacy devices or devices with ACS caps */
+ if (!pci_is_pcie(dev) ||
+ pci_find_ext_capability(dev, PCI_EXT_CAP_ID_ACS))
+ return -ENOTTY;
+
+ for (i = 0; i < max_acs_id; i++)
+ if (acs_on_ids[i].vendor == dev->vendor &&
+ acs_on_ids[i].device == dev->device)
+ return 1;
+
+ switch (pci_pcie_type(dev)) {
+ case PCI_EXP_TYPE_DOWNSTREAM:
+ case PCI_EXP_TYPE_ROOT_PORT:
+ if (acs_on_downstream)
+ return 1;
+ break;
+ case PCI_EXP_TYPE_ENDPOINT:
+ case PCI_EXP_TYPE_UPSTREAM:
+ case PCI_EXP_TYPE_LEG_END:
+ case PCI_EXP_TYPE_RC_END:
+ if (acs_on_multifunction && dev->multifunction)
+ return 1;
+ }
+
+ return -ENOTTY;
+}
/*
* Some Atheros AR9xxx and QCA988x chips do not behave after a bus reset.
* The device will throw a Link Down error on AER-capable systems and
@@ -5102,6 +5102,7 @@
{ PCI_VENDOR_ID_ZHAOXIN, PCI_ANY_ID, pci_quirk_zhaoxin_pcie_ports_acs },
/* Wangxun nics */
{ PCI_VENDOR_ID_WANGXUN, PCI_ANY_ID, pci_quirk_wangxun_nic_acs },
+ { PCI_ANY_ID, PCI_ANY_ID, pcie_acs_overrides },
{ 0 }
};

View File

@@ -13,12 +13,12 @@ of Proton to still use fsync in new kernel releases.
Signed-off-by: André Almeida <andrealmeid@collabora.com>
---
include/uapi/linux/futex.h | 12 ++++++
kernel/futex/core.c | 75 +++++++++++++++++++++++++++++++++++++-
2 files changed, 86 insertions(+), 1 deletion(-)
include/uapi/linux/futex.h | 13 +++++++
kernel/futex/syscalls.c | 75 +++++++++++++++++++++++++++++++++++++-
2 files changed, 87 insertions(+), 1 deletion(-)
diff --git a/include/uapi/linux/futex.h b/include/uapi/linux/futex.h
index 2a06b99f9803..417c5d89b745 100644
index 71a5df8d2689..d375ab21cbf8 100644
--- a/include/uapi/linux/futex.h
+++ b/include/uapi/linux/futex.h
@@ -22,6 +22,7 @@
@@ -29,7 +29,7 @@ index 2a06b99f9803..417c5d89b745 100644
#define FUTEX_PRIVATE_FLAG 128
#define FUTEX_CLOCK_REALTIME 256
@@ -68,6 +69,17 @@ struct futex_waitv {
@@ -68,6 +69,18 @@ struct futex_waitv {
__u32 __reserved;
};
@@ -44,14 +44,15 @@ index 2a06b99f9803..417c5d89b745 100644
+ __u32 val;
+ __u32 bitset;
+};
+
/*
* Support for robust futexes: the kernel cleans up held futexes at
diff --git a/kernel/futex/core.c b/kernel/futex/core.c
index 4a9e7ce3714a..c3f2e65afab8 100644
--- a/kernel/futex/core.c
+++ b/kernel/futex/core.c
@@ -4012,6 +4012,7 @@ static __always_inline bool futex_cmd_has_timeout(u32 cmd)
* thread exit time.
diff --git a/kernel/futex/syscalls.c b/kernel/futex/syscalls.c
index 6f91a07a6a83..2f4d4c04ede2 100644
--- a/kernel/futex/syscalls.c
+++ b/kernel/futex/syscalls.c
@@ -158,6 +158,7 @@ static __always_inline bool futex_cmd_has_timeout(u32 cmd)
case FUTEX_LOCK_PI2:
case FUTEX_WAIT_BITSET:
case FUTEX_WAIT_REQUEUE_PI:
@@ -59,7 +60,7 @@ index 4a9e7ce3714a..c3f2e65afab8 100644
return true;
}
return false;
@@ -4024,13 +4025,79 @@ futex_init_timeout(u32 cmd, u32 op, struct timespec64 *ts, ktime_t *t)
@@ -170,13 +171,79 @@ futex_init_timeout(u32 cmd, u32 op, struct timespec64 *ts, ktime_t *t)
return -EINVAL;
*t = timespec64_to_ktime(*ts);
@@ -140,7 +141,7 @@ index 4a9e7ce3714a..c3f2e65afab8 100644
SYSCALL_DEFINE6(futex, u32 __user *, uaddr, int, op, u32, val,
const struct __kernel_timespec __user *, utime,
u32 __user *, uaddr2, u32, val3)
@@ -4050,6 +4117,9 @@ SYSCALL_DEFINE6(futex, u32 __user *, uaddr, int, op, u32, val,
@@ -196,6 +263,9 @@ SYSCALL_DEFINE6(futex, u32 __user *, uaddr, int, op, u32, val,
tp = &t;
}
@@ -150,7 +151,7 @@ index 4a9e7ce3714a..c3f2e65afab8 100644
return do_futex(uaddr, op, val, tp, uaddr2, (unsigned long)utime, val3);
}
@@ -4551,6 +4621,9 @@ SYSCALL_DEFINE6(futex_time32, u32 __user *, uaddr, int, op, u32, val,
@@ -392,6 +462,9 @@ SYSCALL_DEFINE6(futex_time32, u32 __user *, uaddr, int, op, u32, val,
tp = &t;
}

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

View File

@@ -0,0 +1,90 @@
From f7f49141a5dbe9c99d78196b58c44307fb2e6be3 Mon Sep 17 00:00:00 2001
From: Tk-Glitch <ti3nou@gmail.com>
Date: Wed, 4 Jul 2018 04:30:08 +0200
Subject: glitched - BMQ
diff --git a/kernel/Kconfig.hz b/kernel/Kconfig.hz
index 2a202a846757..1d9c7ed79b11 100644
--- a/kernel/Kconfig.hz
+++ b/kernel/Kconfig.hz
@@ -4,7 +4,7 @@
choice
prompt "Timer frequency"
- default HZ_250
+ default HZ_500
help
Allows the configuration of the timer frequency. It is customary
to have the timer interrupt run at 1000 Hz but 100 Hz may be more
@@ -39,6 +39,13 @@ choice
on SMP and NUMA systems and exactly dividing by both PAL and
NTSC frame rates for video and multimedia work.
+ config HZ_500
+ bool "500 HZ"
+ help
+ 500 Hz is a balanced timer frequency. Provides fast interactivity
+ on desktops with great smoothness without increasing CPU power
+ consumption and sacrificing the battery life on laptops.
+
config HZ_1000
bool "1000 HZ"
help
@@ -52,6 +59,7 @@ config HZ
default 100 if HZ_100
default 250 if HZ_250
default 300 if HZ_300
+ default 500 if HZ_500
default 1000 if HZ_1000
config SCHED_HRTICK
diff --git a/kernel/Kconfig.hz b/kernel/Kconfig.hz
index 2a202a846757..1d9c7ed79b11 100644
--- a/kernel/Kconfig.hz
+++ b/kernel/Kconfig.hz
@@ -4,7 +4,7 @@
choice
prompt "Timer frequency"
- default HZ_500
+ default HZ_750
help
Allows the configuration of the timer frequency. It is customary
to have the timer interrupt run at 1000 Hz but 100 Hz may be more
@@ -46,6 +46,13 @@ choice
on desktops with great smoothness without increasing CPU power
consumption and sacrificing the battery life on laptops.
+ config HZ_750
+ bool "750 HZ"
+ help
+ 750 Hz is a good timer frequency for desktops. Provides fast
+ interactivity with great smoothness without sacrificing too
+ much throughput.
+
config HZ_1000
bool "1000 HZ"
help
@@ -60,6 +67,7 @@ config HZ
default 250 if HZ_250
default 300 if HZ_300
default 500 if HZ_500
+ default 750 if HZ_750
default 1000 if HZ_1000
config SCHED_HRTICK
diff --git a/mm/vmscan.c b/mm/vmscan.c
index 9270a4370d54..30d01e647417 100644
--- a/mm/vmscan.c
+++ b/mm/vmscan.c
@@ -169,7 +169,7 @@
/*
* From 0 .. 200. Higher means more swappy.
*/
-int vm_swappiness = 60;
+int vm_swappiness = 20;
static void set_task_reclaim_state(struct task_struct *task,
struct reclaim_state *rs)

View File

@@ -0,0 +1,18 @@
diff --git a/drivers/cpufreq/cpufreq_ondemand.c b/drivers/cpufreq/cpufreq_ondemand.c
index 6b423eebfd5d..61e3271675d6 100644
--- a/drivers/cpufreq/cpufreq_ondemand.c
+++ b/drivers/cpufreq/cpufreq_ondemand.c
@@ -21,10 +21,10 @@
#include "cpufreq_ondemand.h"
/* On-demand governor macros */
-#define DEF_FREQUENCY_UP_THRESHOLD (80)
-#define DEF_SAMPLING_DOWN_FACTOR (1)
+#define DEF_FREQUENCY_UP_THRESHOLD (55)
+#define DEF_SAMPLING_DOWN_FACTOR (5)
#define MAX_SAMPLING_DOWN_FACTOR (100000)
-#define MICRO_FREQUENCY_UP_THRESHOLD (95)
+#define MICRO_FREQUENCY_UP_THRESHOLD (63)
#define MICRO_FREQUENCY_MIN_SAMPLE_RATE (10000)
#define MIN_FREQUENCY_UP_THRESHOLD (1)
#define MAX_FREQUENCY_UP_THRESHOLD (100)

View File

@@ -0,0 +1,66 @@
From e5e77ad2223f662e1615266d8ef39a8db7e65a70 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Felix=20H=C3=A4dicke?= <felixhaedicke@web.de>
Date: Thu, 19 Nov 2020 09:22:32 +0100
Subject: HID: quirks: Add Apple Magic Trackpad 2 to hid_have_special_driver
list
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit
The Apple Magic Trackpad 2 is handled by the magicmouse driver. And
there were severe stability issues when both drivers (hid-generic and
hid-magicmouse) were loaded for this device.
Fixes: https://bugzilla.kernel.org/show_bug.cgi?id=210241
Signed-off-by: Felix Hädicke <felixhaedicke@web.de>
---
drivers/hid/hid-quirks.c | 2 ++
1 file changed, 2 insertions(+)
diff --git a/drivers/hid/hid-quirks.c b/drivers/hid/hid-quirks.c
index bf7ecab5d9e5..142e9dae2837 100644
--- a/drivers/hid/hid-quirks.c
+++ b/drivers/hid/hid-quirks.c
@@ -478,6 +478,8 @@ static const struct hid_device_id hid_have_special_driver[] = {
#if IS_ENABLED(CONFIG_HID_MAGICMOUSE)
{ HID_BLUETOOTH_DEVICE(USB_VENDOR_ID_APPLE, USB_DEVICE_ID_APPLE_MAGICMOUSE) },
{ HID_BLUETOOTH_DEVICE(USB_VENDOR_ID_APPLE, USB_DEVICE_ID_APPLE_MAGICTRACKPAD) },
+ { HID_BLUETOOTH_DEVICE(BT_VENDOR_ID_APPLE, USB_DEVICE_ID_APPLE_MAGICTRACKPAD2) },
+ { HID_USB_DEVICE(USB_VENDOR_ID_APPLE, USB_DEVICE_ID_APPLE_MAGICTRACKPAD2) },
#endif
#if IS_ENABLED(CONFIG_HID_MAYFLASH)
{ HID_USB_DEVICE(USB_VENDOR_ID_DRAGONRISE, USB_DEVICE_ID_DRAGONRISE_PS3) },
--
cgit v1.2.3-1-gf6bb5
From f7f49141a5dbe9c99d78196b58c44307fb2e6be3 Mon Sep 17 00:00:00 2001
From: Tk-Glitch <ti3nou@gmail.com>
Date: Wed, 3 Feb 2021 11:20:12 +0200
Subject: Revert "cpufreq: Avoid configuring old governors as default with intel_pstate"
This is an undesirable behavior for us since our aggressive ondemand performs
better than schedutil for gaming when using intel_pstate in passive mode.
Also it interferes with the option to select the desired default governor we have.
diff --git a/drivers/cpufreq/Kconfig b/drivers/cpufreq/Kconfig
index 2c7171e0b0010..85de313ddec29 100644
--- a/drivers/cpufreq/Kconfig
+++ b/drivers/cpufreq/Kconfig
@@ -71,7 +71,6 @@ config CPU_FREQ_DEFAULT_GOV_USERSPACE
config CPU_FREQ_DEFAULT_GOV_ONDEMAND
bool "ondemand"
- depends on !(X86_INTEL_PSTATE && SMP)
select CPU_FREQ_GOV_ONDEMAND
select CPU_FREQ_GOV_PERFORMANCE
help
@@ -83,7 +84,6 @@ config CPU_FREQ_DEFAULT_GOV_ONDEMAND
config CPU_FREQ_DEFAULT_GOV_CONSERVATIVE
bool "conservative"
- depends on !(X86_INTEL_PSTATE && SMP)
select CPU_FREQ_GOV_CONSERVATIVE
select CPU_FREQ_GOV_PERFORMANCE
help

View File

@@ -0,0 +1,53 @@
diff --git a/scripts/package/mkspec b/scripts/package/mkspec
index 70392fd2f..34f98648f 100755
--- a/scripts/package/mkspec
+++ b/scripts/package/mkspec
@@ -25,7 +25,7 @@ fi
+PROVIDES_DRM=""
if grep -q CONFIG_DRM=y include/config/auto.conf; then
- PROVIDES=kernel-drm
+ PROVIDES_DRM="Provides: kernel-drm = %{version}"
fi
-PROVIDES="$PROVIDES kernel-$KERNELRELEASE"
__KERNELRELEASE=$(echo $KERNELRELEASE | sed -e "s/-/_/g")
@@ -50,3 +50,6 @@ sed -e '/^DEL/d' -e 's/^\t*//' <<EOF
$S Source: kernel-$__KERNELRELEASE.tar.gz
- Provides: $PROVIDES
+ $PROVIDES_DRM
+ Provides: kernel = %{version}
+ Provides: kernel-uname-r = %{version}
+ Provides: installonlypkg(kernel) = %{version}
# $UTS_MACHINE as a fallback of _arch in case
@@ -63,4 +66,4 @@ $S Source: kernel-$__KERNELRELEASE.tar.gz
Group: Development/System
- Obsoletes: kernel-headers
Provides: kernel-headers = %{version}
+ Provides: installonlypkg(kernel) = %{version}
%description headers
@@ -75,2 +78,5 @@ $S$M Summary: Development package for building kernel modules to match the $__KE
$S$M Group: System Environment/Kernel
+$S$M Provides: kernel-devel = %{version}
+$S$M Provides: kernel-devel-uname-r = %{version}
+$S$M Provides: installonlypkg(kernel) = %{version}
$S$M AutoReqProv: no
@@ -80,2 +86,18 @@ $S$M against the $__KERNELRELEASE kernel package.
$S$M
+$S # Opt out of a lot of Fedora hardening flags etc...
+$S # See https://src.fedoraproject.org/rpms/redhat-rpm-config//blob/rawhide/f/buildflags.md
+$S %undefine _package_note_file
+$S %undefine _auto_set_build_flags
+$S %undefine _include_frame_pointers
+$S %define _build_id_flags -Wl,--build-id=none
+$S %undefine _annotated_build
+$S %undefine _fortify_level
+$S %undefine _hardened_build
+$S %global _lto_cflags %{nil}
+$S %global _configure_gnuconfig_hack 0
+$S %global _configure_libtool_hardening_hack 0
+$S # Nearly had to go to the deep web to find documentation on this one... Gosh
+$S # See https://github.com/rpm-software-management/rpm/blob/master/macros.in#L471
+$S %define _build_id_links none
+$S
$S %prep

View File

@@ -0,0 +1,46 @@
diff --git a/Makefile b/Makefile
--- a/Makefile
+++ b/Makefile
@@ -442,7 +442,7 @@ endif
HOSTPKG_CONFIG = pkg-config
KBUILD_USERHOSTCFLAGS := -Wall -Wmissing-prototypes -Wstrict-prototypes \
- -O2 -fomit-frame-pointer -std=gnu11 \
+ -O3 -fomit-frame-pointer -std=gnu11 \
-Wdeclaration-after-statement
KBUILD_USERCFLAGS := $(KBUILD_USERHOSTCFLAGS) $(USERCFLAGS)
KBUILD_USERLDFLAGS := $(USERLDFLAGS)
@@ -474,7 +474,7 @@ endif
-Wclippy::dbg_macro
KBUILD_HOSTCFLAGS := $(KBUILD_USERHOSTCFLAGS) $(HOST_LFS_CFLAGS) $(HOSTCFLAGS)
-KBUILD_HOSTCXXFLAGS := -Wall -O2 $(HOST_LFS_CFLAGS) $(HOSTCXXFLAGS)
+KBUILD_HOSTCXXFLAGS := -Wall -O3 $(HOST_LFS_CFLAGS) $(HOSTCXXFLAGS)
KBUILD_HOSTRUSTFLAGS := $(rust_common_flags) -O -Cstrip=debuginfo \
-Zallow-features= $(HOSTRUSTFLAGS)
KBUILD_HOSTLDFLAGS := $(HOST_LFS_LDFLAGS) $(HOSTLDFLAGS)
@@ -757,7 +757,7 @@ KBUILD_CFLAGS += $(call cc-disable-warning, format-overflow)
KBUILD_CFLAGS += $(call cc-disable-warning, address-of-packed-member)
ifdef CONFIG_CC_OPTIMIZE_FOR_PERFORMANCE
-KBUILD_CFLAGS += -O2
+KBUILD_CFLAGS += -O3
KBUILD_RUSTFLAGS += -Copt-level=2
else ifdef CONFIG_CC_OPTIMIZE_FOR_SIZE
KBUILD_CFLAGS += -Os
diff --git a/init/Kconfig b/init/Kconfig
--- a/init/Kconfig
+++ b/init/Kconfig
@@ -1401,10 +1401,10 @@ choice
default CC_OPTIMIZE_FOR_PERFORMANCE
config CC_OPTIMIZE_FOR_PERFORMANCE
- bool "Optimize for performance (-O2)"
+ bool "Optimize for performance (-O3)"
help
This is the default optimization level for the kernel, building
- with the "-O2" compiler flag for best performance and most
+ with the "-O3" compiler flag for best performance and most
helpful compile-time warnings.
config CC_OPTIMIZE_FOR_SIZE

View File

@@ -0,0 +1,151 @@
From d50977b164e708bf523a35ef53315355528c3ca6 Mon Sep 17 00:00:00 2001
From: "Jan Alexander Steffens (heftig)" <jan.steffens@gmail.com>
Date: Mon, 16 Sep 2019 04:53:20 +0200
Subject: [PATCH] ZEN: Add sysctl and CONFIG to disallow unprivileged
CLONE_NEWUSER
Our default behavior continues to match the vanilla kernel.
---
include/linux/user_namespace.h | 4 ++++
init/Kconfig | 16 ++++++++++++++++
kernel/fork.c | 14 ++++++++++++++
kernel/sysctl.c | 12 ++++++++++++
kernel/user_namespace.c | 7 +++++++
5 files changed, 53 insertions(+)
diff --git a/include/linux/user_namespace.h b/include/linux/user_namespace.h
index 45f09bec02c485..87b20e2ee27445 100644
--- a/include/linux/user_namespace.h
+++ b/include/linux/user_namespace.h
@@ -148,6 +148,8 @@ static inline void set_userns_rlimit_max(struct user_namespace *ns,
#ifdef CONFIG_USER_NS
+extern int unprivileged_userns_clone;
+
static inline struct user_namespace *get_user_ns(struct user_namespace *ns)
{
if (ns)
@@ -181,6 +183,8 @@ extern bool current_in_userns(const struct user_namespace *target_ns);
struct ns_common *ns_get_owner(struct ns_common *ns);
#else
+#define unprivileged_userns_clone 0
+
static inline struct user_namespace *get_user_ns(struct user_namespace *ns)
{
return &init_user_ns;
diff --git a/init/Kconfig b/init/Kconfig
index 94125d3b6893c7..9f7139b536f638 100644
--- a/init/Kconfig
+++ b/init/Kconfig
@@ -1247,6 +1247,22 @@ config USER_NS
If unsure, say N.
+config USER_NS_UNPRIVILEGED
+ bool "Allow unprivileged users to create namespaces"
+ default y
+ depends on USER_NS
+ help
+ When disabled, unprivileged users will not be able to create
+ new namespaces. Allowing users to create their own namespaces
+ has been part of several recent local privilege escalation
+ exploits, so if you need user namespaces but are
+ paranoid^Wsecurity-conscious you want to disable this.
+
+ This setting can be overridden at runtime via the
+ kernel.unprivileged_userns_clone sysctl.
+
+ If unsure, say Y.
+
config PID_NS
bool "PID Namespaces"
default y
diff --git a/kernel/fork.c b/kernel/fork.c
index 08969f5aa38d59..ff601cb7a1fae0 100644
--- a/kernel/fork.c
+++ b/kernel/fork.c
@@ -98,6 +98,10 @@
#include <linux/io_uring.h>
#include <linux/bpf.h>
+#ifdef CONFIG_USER_NS
+#include <linux/user_namespace.h>
+#endif
+
#include <asm/pgalloc.h>
#include <linux/uaccess.h>
#include <asm/mmu_context.h>
@@ -2008,6 +2012,10 @@ static __latent_entropy struct task_struct *copy_process(
if ((clone_flags & (CLONE_NEWUSER|CLONE_FS)) == (CLONE_NEWUSER|CLONE_FS))
return ERR_PTR(-EINVAL);
+ if ((clone_flags & CLONE_NEWUSER) && !unprivileged_userns_clone)
+ if (!capable(CAP_SYS_ADMIN))
+ return ERR_PTR(-EPERM);
+
/*
* Thread groups must share signals as well, and detached threads
* can only be started up within the thread group.
@@ -3166,6 +3174,12 @@ int ksys_unshare(unsigned long unshare_flags)
if (unshare_flags & CLONE_NEWNS)
unshare_flags |= CLONE_FS;
+ if ((unshare_flags & CLONE_NEWUSER) && !unprivileged_userns_clone) {
+ err = -EPERM;
+ if (!capable(CAP_SYS_ADMIN))
+ goto bad_unshare_out;
+ }
+
err = check_unshare_flags(unshare_flags);
if (err)
goto bad_unshare_out;
diff --git a/kernel/sysctl.c b/kernel/sysctl.c
index c6d9dec11b749d..9a4514ad481b21 100644
--- a/kernel/sysctl.c
+++ b/kernel/sysctl.c
@@ -81,6 +81,9 @@
#ifdef CONFIG_RT_MUTEXES
#include <linux/rtmutex.h>
#endif
+#ifdef CONFIG_USER_NS
+#include <linux/user_namespace.h>
+#endif
/* shared constants to be used in various sysctls */
const int sysctl_vals[] = { 0, 1, 2, 3, 4, 100, 200, 1000, 3000, INT_MAX, 65535, -1 };
@@ -1659,6 +1662,15 @@ static struct ctl_table kern_table[] = {
.mode = 0644,
.proc_handler = proc_dointvec,
},
+#ifdef CONFIG_USER_NS
+ {
+ .procname = "unprivileged_userns_clone",
+ .data = &unprivileged_userns_clone,
+ .maxlen = sizeof(int),
+ .mode = 0644,
+ .proc_handler = proc_dointvec,
+ },
+#endif
#ifdef CONFIG_PROC_SYSCTL
{
.procname = "tainted",
diff --git a/kernel/user_namespace.c b/kernel/user_namespace.c
index 54211dbd516c57..16ca0c1516298d 100644
--- a/kernel/user_namespace.c
+++ b/kernel/user_namespace.c
@@ -22,6 +22,13 @@
#include <linux/bsearch.h>
#include <linux/sort.h>
+/* sysctl */
+#ifdef CONFIG_USER_NS_UNPRIVILEGED
+int unprivileged_userns_clone = 1;
+#else
+int unprivileged_userns_clone;
+#endif
+
static struct kmem_cache *user_ns_cachep __read_mostly;
static DEFINE_MUTEX(userns_state_mutex);

View File

@@ -0,0 +1,244 @@
From 5ae86c8436b83762bc6cf46bea1da6ace2d3f50e Mon Sep 17 00:00:00 2001
From: Paul Gofman <pgofman@codeweavers.com>
Date: Wed, 6 May 2020 14:37:44 +0300
Subject: [PATCH 1/2] mm: Support soft dirty flag reset for VA range.
---
fs/proc/task_mmu.c | 129 ++++++++++++++++++++++++++++++++++++---------
1 file changed, 103 insertions(+), 26 deletions(-)
diff --git a/fs/proc/task_mmu.c b/fs/proc/task_mmu.c
index 3cec6fbef725..7c7865028f10 100644
--- a/fs/proc/task_mmu.c
+++ b/fs/proc/task_mmu.c
@@ -1032,6 +1032,8 @@ enum clear_refs_types {
struct clear_refs_private {
enum clear_refs_types type;
+ unsigned long start, end;
+ bool clear_range;
};
#ifdef CONFIG_MEM_SOFT_DIRTY
@@ -1125,6 +1127,8 @@ static int clear_refs_pte_range(pmd_t *pmd, unsigned long addr,
spinlock_t *ptl;
struct page *page;
+ BUG_ON(addr < cp->start || end > cp->end);
+
ptl = pmd_trans_huge_lock(pmd, vma);
if (ptl) {
if (cp->type == CLEAR_REFS_SOFT_DIRTY) {
@@ -1181,9 +1185,11 @@ static int clear_refs_test_walk(unsigned long start, unsigned long end,
struct clear_refs_private *cp = walk->private;
struct vm_area_struct *vma = walk->vma;
- if (vma->vm_flags & VM_PFNMAP)
+ if (!cp->clear_range && (vma->vm_flags & VM_PFNMAP))
return 1;
+ BUG_ON(start < cp->start || end > cp->end);
+
/*
* Writing 1 to /proc/pid/clear_refs affects all pages.
* Writing 2 to /proc/pid/clear_refs only affects anonymous pages.
@@ -1206,10 +1212,12 @@ static ssize_t clear_refs_write(struct file *file, const char __user *buf,
size_t count, loff_t *ppos)
{
struct task_struct *task;
- char buffer[PROC_NUMBUF];
+ char buffer[18];
struct mm_struct *mm;
struct vm_area_struct *vma;
enum clear_refs_types type;
+ unsigned long start, end;
+ bool clear_range;
int itype;
int rv;
@@ -1218,12 +1226,34 @@ static ssize_t clear_refs_write(struct file *file, const char __user *buf,
count = sizeof(buffer) - 1;
if (copy_from_user(buffer, buf, count))
return -EFAULT;
- rv = kstrtoint(strstrip(buffer), 10, &itype);
- if (rv < 0)
- return rv;
- type = (enum clear_refs_types)itype;
- if (type < CLEAR_REFS_ALL || type >= CLEAR_REFS_LAST)
- return -EINVAL;
+
+ if (buffer[0] == '6')
+ {
+ static int once;
+
+ if (!once++)
+ printk(KERN_DEBUG "task_mmu: Using POC clear refs range implementation.\n");
+
+ if (count != 17)
+ return -EINVAL;
+
+ type = CLEAR_REFS_SOFT_DIRTY;
+ start = *(unsigned long *)(buffer + 1);
+ end = *(unsigned long *)(buffer + 1 + 8);
+ }
+ else
+ {
+ rv = kstrtoint(strstrip(buffer), 10, &itype);
+ if (rv < 0)
+ return rv;
+ type = (enum clear_refs_types)itype;
+
+ if (type < CLEAR_REFS_ALL || type >= CLEAR_REFS_LAST)
+ return -EINVAL;
+
+ start = 0;
+ end = -1UL;
+ }
task = get_proc_task(file_inode(file));
if (!task)
@@ -1235,40 +1265,86 @@ static ssize_t clear_refs_write(struct file *file, const char __user *buf,
.type = type,
};
- if (mmap_write_lock_killable(mm)) {
- count = -EINTR;
- goto out_mm;
+ if (start || end != -1UL)
+ {
+ start = min(start, -1) & PAGE_MASK;
+ end = min(end, -1) & PAGE_MASK;
+
+ if (start >= end)
+ {
+ count = -EINVAL;
+ goto out_mm;
+ }
+ clear_range = true;
}
+ else
+ {
+ clear_range = false;
+ }
+
+ cp.start = start;
+ cp.end = end;
+ cp.clear_range = clear_range;
+
if (type == CLEAR_REFS_MM_HIWATER_RSS) {
+ if (mmap_write_lock_killable(mm)) {
+ count = -EINTR;
+ goto out_mm;
+ }
+
/*
* Writing 5 to /proc/pid/clear_refs resets the peak
* resident set size to this mm's current rss value.
*/
reset_mm_hiwater_rss(mm);
- goto out_unlock;
+ mmap_write_unlock(mm);
+ goto out_mm;
}
if (type == CLEAR_REFS_SOFT_DIRTY) {
- for_each_vma(vmi, vma) {
- if (!(vma->vm_flags & VM_SOFTDIRTY))
- continue;
- vm_flags_clear(vma, VM_SOFTDIRTY);
- vma_set_page_prot(vma);
+ if (mmap_read_lock_killable(mm)) {
+ count = -EINTR;
+ goto out_mm;
}
-
+ if (!clear_range)
+ for_each_vma(vmi, vma) {
+ if (!(vma->vm_flags & VM_SOFTDIRTY))
+ continue;
+ mmap_read_unlock(mm);
+ if (mmap_write_lock_killable(mm)) {
+ count = -EINTR;
+ goto out_mm;
+ }
+ for_each_vma(vmi, vma) {
+ vm_flags_clear(vma, VM_SOFTDIRTY);
+ vma_set_page_prot(vma);
+ }
+ mmap_write_downgrade(mm);
+ break;
+ }
inc_tlb_flush_pending(mm);
mmu_notifier_range_init(&range, MMU_NOTIFY_SOFT_DIRTY,
- 0, mm, 0, -1UL);
+ 0, mm, start, end);
mmu_notifier_invalidate_range_start(&range);
}
- walk_page_range(mm, 0, -1, &clear_refs_walk_ops, &cp);
+ else
+ {
+ if (mmap_write_lock_killable(mm)) {
+ count = -EINTR;
+ goto out_mm;
+ }
+ }
+ walk_page_range(mm, start, end == -1UL ? -1 : end, &clear_refs_walk_ops, &cp);
if (type == CLEAR_REFS_SOFT_DIRTY) {
mmu_notifier_invalidate_range_end(&range);
flush_tlb_mm(mm);
dec_tlb_flush_pending(mm);
+ mmap_read_unlock(mm);
+ }
+ else
+ {
+ mmap_write_unlock(mm);
}
-out_unlock:
- mmap_write_unlock(mm);
out_mm:
mmput(mm);
}
@@ -1301,6 +1377,7 @@ struct pagemapread {
#define PM_PFRAME_MASK GENMASK_ULL(PM_PFRAME_BITS - 1, 0)
#define PM_SOFT_DIRTY BIT_ULL(55)
#define PM_MMAP_EXCLUSIVE BIT_ULL(56)
+#define PM_SOFT_DIRTY_PAGE BIT_ULL(57)
#define PM_UFFD_WP BIT_ULL(57)
#define PM_FILE BIT_ULL(61)
#define PM_SWAP BIT_ULL(62)
@@ -1373,13 +1450,13 @@ static pagemap_entry_t pte_to_pagemap_entry(struct pagemapread *pm,
flags |= PM_PRESENT;
page = vm_normal_page(vma, addr, pte);
if (pte_soft_dirty(pte))
- flags |= PM_SOFT_DIRTY;
+ flags |= PM_SOFT_DIRTY | PM_SOFT_DIRTY_PAGE;
if (pte_uffd_wp(pte))
flags |= PM_UFFD_WP;
} else if (is_swap_pte(pte)) {
swp_entry_t entry;
if (pte_swp_soft_dirty(pte))
- flags |= PM_SOFT_DIRTY;
+ flags |= PM_SOFT_DIRTY | PM_SOFT_DIRTY_PAGE;
if (pte_swp_uffd_wp(pte))
flags |= PM_UFFD_WP;
entry = pte_to_swp_entry(pte);
@@ -1500,7 +1500,7 @@
flags |= PM_PRESENT;
if (pmd_soft_dirty(pmd))
- flags |= PM_SOFT_DIRTY;
+ flags |= PM_SOFT_DIRTY | PM_SOFT_DIRTY_PAGE;
if (pmd_uffd_wp(pmd))
flags |= PM_UFFD_WP;
if (pm->show_pfn)
@@ -1442,7 +1519,7 @@ static int pagemap_pmd_range(pmd_t *pmdp, unsigned long addr, unsigned long end,
}
flags |= PM_SWAP;
if (pmd_swp_soft_dirty(pmd))
- flags |= PM_SOFT_DIRTY;
+ flags |= PM_SOFT_DIRTY | PM_SOFT_DIRTY_PAGE;
if (pmd_swp_uffd_wp(pmd))
flags |= PM_UFFD_WP;
VM_BUG_ON(!is_pmd_migration_entry(pmd));
--
2.30.2

View File

@@ -0,0 +1,620 @@
From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
From: Arjan van de Ven <arjan@linux.intel.com>
Date: Mon, 14 Mar 2016 11:10:58 -0600
Subject: [PATCH] pci pme wakeups
Reduce wakeups for PME checks, which are a workaround for miswired
boards (sadly, too many of them) in laptops.
---
drivers/pci/pci.c | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/drivers/pci/pci.c b/drivers/pci/pci.c
index c9338f9..6974fbf 100644
--- a/drivers/pci/pci.c
+++ b/drivers/pci/pci.c
@@ -62,7 +62,7 @@ struct pci_pme_device {
struct pci_dev *dev;
};
-#define PME_TIMEOUT 1000 /* How long between PME checks */
+#define PME_TIMEOUT 4000 /* How long between PME checks */
static void pci_dev_d3_sleep(struct pci_dev *dev)
{
--
https://clearlinux.org
From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
From: Arjan van de Ven <arjan@linux.intel.com>
Date: Sat, 19 Mar 2016 21:32:19 -0400
Subject: [PATCH] intel_idle: tweak cpuidle cstates
Increase target_residency in cpuidle cstate
Tune intel_idle to be a bit less agressive;
Clear linux is cleaner in hygiene (wakupes) than the average linux,
so we can afford changing these in a way that increases
performance while keeping power efficiency
---
drivers/idle/intel_idle.c | 44 +++++++++++++++++++--------------------
1 file changed, 22 insertions(+), 22 deletions(-)
diff --git a/drivers/idle/intel_idle.c b/drivers/idle/intel_idle.c
index f449584..c994d24 100644
--- a/drivers/idle/intel_idle.c
+++ b/drivers/idle/intel_idle.c
@@ -531,7 +531,7 @@ static struct cpuidle_state hsw_cstates[] __initdata = {
.desc = "MWAIT 0x01",
.flags = MWAIT2flg(0x01) | CPUIDLE_FLAG_ALWAYS_ENABLE,
.exit_latency = 10,
- .target_residency = 20,
+ .target_residency = 120,
.enter = &intel_idle,
.enter_s2idle = intel_idle_s2idle, },
{
@@ -539,7 +539,7 @@ static struct cpuidle_state hsw_cstates[] __initdata = {
.desc = "MWAIT 0x10",
.flags = MWAIT2flg(0x10) | CPUIDLE_FLAG_TLB_FLUSHED,
.exit_latency = 33,
- .target_residency = 100,
+ .target_residency = 900,
.enter = &intel_idle,
.enter_s2idle = intel_idle_s2idle, },
{
@@ -547,7 +547,7 @@ static struct cpuidle_state hsw_cstates[] __initdata = {
.desc = "MWAIT 0x20",
.flags = MWAIT2flg(0x20) | CPUIDLE_FLAG_TLB_FLUSHED,
.exit_latency = 133,
- .target_residency = 400,
+ .target_residency = 1000,
.enter = &intel_idle,
.enter_s2idle = intel_idle_s2idle, },
{
@@ -555,7 +555,7 @@ static struct cpuidle_state hsw_cstates[] __initdata = {
.desc = "MWAIT 0x32",
.flags = MWAIT2flg(0x32) | CPUIDLE_FLAG_TLB_FLUSHED,
.exit_latency = 166,
- .target_residency = 500,
+ .target_residency = 1500,
.enter = &intel_idle,
.enter_s2idle = intel_idle_s2idle, },
{
@@ -563,7 +563,7 @@ static struct cpuidle_state hsw_cstates[] __initdata = {
.desc = "MWAIT 0x40",
.flags = MWAIT2flg(0x40) | CPUIDLE_FLAG_TLB_FLUSHED,
.exit_latency = 300,
- .target_residency = 900,
+ .target_residency = 2000,
.enter = &intel_idle,
.enter_s2idle = intel_idle_s2idle, },
{
@@ -571,7 +571,7 @@ static struct cpuidle_state hsw_cstates[] __initdata = {
.desc = "MWAIT 0x50",
.flags = MWAIT2flg(0x50) | CPUIDLE_FLAG_TLB_FLUSHED,
.exit_latency = 600,
- .target_residency = 1800,
+ .target_residency = 5000,
.enter = &intel_idle,
.enter_s2idle = intel_idle_s2idle, },
{
@@ -579,7 +579,7 @@ static struct cpuidle_state hsw_cstates[] __initdata = {
.desc = "MWAIT 0x60",
.flags = MWAIT2flg(0x60) | CPUIDLE_FLAG_TLB_FLUSHED,
.exit_latency = 2600,
- .target_residency = 7700,
+ .target_residency = 9000,
.enter = &intel_idle,
.enter_s2idle = intel_idle_s2idle, },
{
@@ -599,7 +599,7 @@ static struct cpuidle_state bdw_cstates[] __initdata = {
.desc = "MWAIT 0x01",
.flags = MWAIT2flg(0x01) | CPUIDLE_FLAG_ALWAYS_ENABLE,
.exit_latency = 10,
- .target_residency = 20,
+ .target_residency = 120,
.enter = &intel_idle,
.enter_s2idle = intel_idle_s2idle, },
{
@@ -607,7 +607,7 @@ static struct cpuidle_state bdw_cstates[] __initdata = {
.desc = "MWAIT 0x10",
.flags = MWAIT2flg(0x10) | CPUIDLE_FLAG_TLB_FLUSHED,
.exit_latency = 40,
- .target_residency = 100,
+ .target_residency = 1000,
.enter = &intel_idle,
.enter_s2idle = intel_idle_s2idle, },
{
@@ -615,7 +615,7 @@ static struct cpuidle_state bdw_cstates[] __initdata = {
.desc = "MWAIT 0x20",
.flags = MWAIT2flg(0x20) | CPUIDLE_FLAG_TLB_FLUSHED,
.exit_latency = 133,
- .target_residency = 400,
+ .target_residency = 1000,
.enter = &intel_idle,
.enter_s2idle = intel_idle_s2idle, },
{
@@ -623,7 +623,7 @@ static struct cpuidle_state bdw_cstates[] __initdata = {
.desc = "MWAIT 0x32",
.flags = MWAIT2flg(0x32) | CPUIDLE_FLAG_TLB_FLUSHED,
.exit_latency = 166,
- .target_residency = 500,
+ .target_residency = 2000,
.enter = &intel_idle,
.enter_s2idle = intel_idle_s2idle, },
{
@@ -631,7 +631,7 @@ static struct cpuidle_state bdw_cstates[] __initdata = {
.desc = "MWAIT 0x40",
.flags = MWAIT2flg(0x40) | CPUIDLE_FLAG_TLB_FLUSHED,
.exit_latency = 300,
- .target_residency = 900,
+ .target_residency = 4000,
.enter = &intel_idle,
.enter_s2idle = intel_idle_s2idle, },
{
@@ -639,7 +639,7 @@ static struct cpuidle_state bdw_cstates[] __initdata = {
.desc = "MWAIT 0x50",
.flags = MWAIT2flg(0x50) | CPUIDLE_FLAG_TLB_FLUSHED,
.exit_latency = 600,
- .target_residency = 1800,
+ .target_residency = 7000,
.enter = &intel_idle,
.enter_s2idle = intel_idle_s2idle, },
{
@@ -647,7 +647,7 @@ static struct cpuidle_state bdw_cstates[] __initdata = {
.desc = "MWAIT 0x60",
.flags = MWAIT2flg(0x60) | CPUIDLE_FLAG_TLB_FLUSHED,
.exit_latency = 2600,
- .target_residency = 7700,
+ .target_residency = 9000,
.enter = &intel_idle,
.enter_s2idle = intel_idle_s2idle, },
{
@@ -668,7 +668,7 @@ static struct cpuidle_state skl_cstates[] __initdata = {
.desc = "MWAIT 0x01",
.flags = MWAIT2flg(0x01) | CPUIDLE_FLAG_ALWAYS_ENABLE,
.exit_latency = 10,
- .target_residency = 20,
+ .target_residency = 120,
.enter = &intel_idle,
.enter_s2idle = intel_idle_s2idle, },
{
@@ -676,7 +676,7 @@ static struct cpuidle_state skl_cstates[] __initdata = {
.desc = "MWAIT 0x10",
.flags = MWAIT2flg(0x10) | CPUIDLE_FLAG_TLB_FLUSHED,
.exit_latency = 70,
- .target_residency = 100,
+ .target_residency = 1000,
.enter = &intel_idle,
.enter_s2idle = intel_idle_s2idle, },
{
@@ -684,7 +684,7 @@ static struct cpuidle_state skl_cstates[] __initdata = {
.desc = "MWAIT 0x20",
.flags = MWAIT2flg(0x20) | CPUIDLE_FLAG_TLB_FLUSHED,
.exit_latency = 85,
- .target_residency = 200,
+ .target_residency = 600,
.enter = &intel_idle,
.enter_s2idle = intel_idle_s2idle, },
{
@@ -692,7 +692,7 @@ static struct cpuidle_state skl_cstates[] __initdata = {
.desc = "MWAIT 0x33",
.flags = MWAIT2flg(0x33) | CPUIDLE_FLAG_TLB_FLUSHED,
.exit_latency = 124,
- .target_residency = 800,
+ .target_residency = 3000,
.enter = &intel_idle,
.enter_s2idle = intel_idle_s2idle, },
{
@@ -700,7 +700,7 @@ static struct cpuidle_state skl_cstates[] __initdata = {
.desc = "MWAIT 0x40",
.flags = MWAIT2flg(0x40) | CPUIDLE_FLAG_TLB_FLUSHED,
.exit_latency = 200,
- .target_residency = 800,
+ .target_residency = 3200,
.enter = &intel_idle,
.enter_s2idle = intel_idle_s2idle, },
{
@@ -708,7 +708,7 @@ static struct cpuidle_state skl_cstates[] __initdata = {
.desc = "MWAIT 0x50",
.flags = MWAIT2flg(0x50) | CPUIDLE_FLAG_TLB_FLUSHED,
.exit_latency = 480,
- .target_residency = 5000,
+ .target_residency = 9000,
.enter = &intel_idle,
.enter_s2idle = intel_idle_s2idle, },
{
@@ -716,7 +716,7 @@ static struct cpuidle_state skl_cstates[] __initdata = {
.desc = "MWAIT 0x60",
.flags = MWAIT2flg(0x60) | CPUIDLE_FLAG_TLB_FLUSHED,
.exit_latency = 890,
- .target_residency = 5000,
+ .target_residency = 9000,
.enter = &intel_idle,
.enter_s2idle = intel_idle_s2idle, },
{
@@ -737,7 +737,7 @@ static struct cpuidle_state skx_cstates[] __initdata = {
.desc = "MWAIT 0x01",
.flags = MWAIT2flg(0x01) | CPUIDLE_FLAG_ALWAYS_ENABLE,
.exit_latency = 10,
- .target_residency = 20,
+ .target_residency = 300,
.enter = &intel_idle,
.enter_s2idle = intel_idle_s2idle, },
{
--
https://clearlinux.org
From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
From: Arjan van de Ven <arjan@linux.intel.com>
Date: Fri, 6 Jan 2017 15:34:09 +0000
Subject: [PATCH] ipv4/tcp: allow the memory tuning for tcp to go a little
bigger than default
---
net/ipv4/tcp.c | 4 ++--
1 file changed, 2 insertions(+), 2 deletions(-)
diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c
index 30c1142..4345075 100644
--- a/net/ipv4/tcp.c
+++ b/net/ipv4/tcp.c
@@ -4201,8 +4201,8 @@ void __init tcp_init(void)
tcp_init_mem();
/* Set per-socket limits to no more than 1/128 the pressure threshold */
limit = nr_free_buffer_pages() << (PAGE_SHIFT - 7);
- max_wshare = min(4UL*1024*1024, limit);
- max_rshare = min(6UL*1024*1024, limit);
+ max_wshare = min(16UL*1024*1024, limit);
+ max_rshare = min(16UL*1024*1024, limit);
init_net.ipv4.sysctl_tcp_wmem[0] = SK_MEM_QUANTUM;
init_net.ipv4.sysctl_tcp_wmem[1] = 16*1024;
--
https://clearlinux.org
From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
From: Arjan van de Ven <arjan@linux.intel.com>
Date: Sun, 18 Feb 2018 23:35:41 +0000
Subject: [PATCH] locking: rwsem: spin faster
tweak rwsem owner spinning a bit
---
kernel/locking/rwsem.c | 4 +++-
1 file changed, 3 insertions(+), 1 deletion(-)
diff --git a/kernel/locking/rwsem.c b/kernel/locking/rwsem.c
index f11b9bd..1bbfcc1 100644
--- a/kernel/locking/rwsem.c
+++ b/kernel/locking/rwsem.c
@@ -717,6 +717,7 @@ rwsem_spin_on_owner(struct rw_semaphore *sem, unsigned long nonspinnable)
struct task_struct *new, *owner;
unsigned long flags, new_flags;
enum owner_state state;
+ int i = 0;
owner = rwsem_owner_flags(sem, &flags);
state = rwsem_owner_state(owner, flags, nonspinnable);
@@ -750,7 +751,8 @@ rwsem_spin_on_owner(struct rw_semaphore *sem, unsigned long nonspinnable)
break;
}
- cpu_relax();
+ if (i++ > 1000)
+ cpu_relax();
}
rcu_read_unlock();
--
https://clearlinux.org
From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
From: Arjan van de Ven <arjan@linux.intel.com>
Date: Thu, 2 Jun 2016 23:36:32 -0500
Subject: [PATCH] initialize ata before graphics
ATA init is the long pole in the boot process, and its asynchronous.
move the graphics init after it so that ata and graphics initialize
in parallel
---
drivers/Makefile | 15 ++++++++-------
1 file changed, 8 insertions(+), 7 deletions(-)
diff --git a/drivers/Makefile b/drivers/Makefile
index c0cd1b9..af1e2fb 100644
--- a/drivers/Makefile
+++ b/drivers/Makefile
@@ -59,15 +59,8 @@ obj-y += char/
# iommu/ comes before gpu as gpu are using iommu controllers
obj-y += iommu/
-# gpu/ comes after char for AGP vs DRM startup and after iommu
-obj-y += gpu/
-
obj-$(CONFIG_CONNECTOR) += connector/
-# i810fb and intelfb depend on char/agp/
-obj-$(CONFIG_FB_I810) += video/fbdev/i810/
-obj-$(CONFIG_FB_INTEL) += video/fbdev/intelfb/
-
obj-$(CONFIG_PARPORT) += parport/
obj-$(CONFIG_NVM) += lightnvm/
obj-y += base/ block/ misc/ mfd/ nfc/
@@ -80,6 +73,14 @@ obj-$(CONFIG_IDE) += ide/
obj-y += scsi/
obj-y += nvme/
obj-$(CONFIG_ATA) += ata/
+
+# gpu/ comes after char for AGP vs DRM startup and after iommu
+obj-y += gpu/
+
+# i810fb and intelfb depend on char/agp/
+obj-$(CONFIG_FB_I810) += video/fbdev/i810/
+obj-$(CONFIG_FB_INTEL) += video/fbdev/intelfb/
+
obj-$(CONFIG_TARGET_CORE) += target/
obj-$(CONFIG_MTD) += mtd/
obj-$(CONFIG_SPI) += spi/
--
https://clearlinux.org
From 676c2dc63592f52b716515573a3a825582a371e9 Mon Sep 17 00:00:00 2001
From: Arjan van de Ven <arjan@linux.intel.com>
Date: Sat, 8 Dec 2018 18:21:32 +0000
Subject: [PATCH 1/9] x86/vdso: Use lfence instead of rep and nop
Signed-off-by: Alexandre Frade <kernel@xanmod.org>
---
arch/x86/include/asm/vdso/processor.h | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/arch/x86/include/asm/vdso/processor.h b/arch/x86/include/asm/vdso/processor.h
index 57b1a7034c64..e2c45674f989 100644
--- a/arch/x86/include/asm/vdso/processor.h
+++ b/arch/x86/include/asm/vdso/processor.h
@@ -10,7 +10,7 @@
/* REP NOP (PAUSE) is a good thing to insert into busy-wait loops. */
static __always_inline void rep_nop(void)
{
- asm volatile("rep; nop" ::: "memory");
+ asm volatile("lfence" ::: "memory");
}
static __always_inline void cpu_relax(void)
--
2.39.1
From 48dc9669f8db68adc480ffc2698ed8204440e45b Mon Sep 17 00:00:00 2001
From: Arjan van de Ven <arjan@linux.intel.com>
Date: Thu, 13 Dec 2018 01:00:49 +0000
Subject: [PATCH 2/9] sched/wait: Do accept() in LIFO order for cache
efficiency
Signed-off-by: Alexandre Frade <kernel@xanmod.org>
---
include/linux/wait.h | 2 ++
kernel/sched/wait.c | 24 ++++++++++++++++++++++++
net/ipv4/inet_connection_sock.c | 2 +-
3 files changed, 27 insertions(+), 1 deletion(-)
diff --git a/include/linux/wait.h b/include/linux/wait.h
index a0307b516b09..edc21128f387 100644
--- a/include/linux/wait.h
+++ b/include/linux/wait.h
@@ -165,6 +165,7 @@ static inline bool wq_has_sleeper(struct wait_queue_head *wq_head)
extern void add_wait_queue(struct wait_queue_head *wq_head, struct wait_queue_entry *wq_entry);
extern void add_wait_queue_exclusive(struct wait_queue_head *wq_head, struct wait_queue_entry *wq_entry);
+extern void add_wait_queue_exclusive_lifo(struct wait_queue_head *wq_head, struct wait_queue_entry *wq_entry);
extern void add_wait_queue_priority(struct wait_queue_head *wq_head, struct wait_queue_entry *wq_entry);
extern void remove_wait_queue(struct wait_queue_head *wq_head, struct wait_queue_entry *wq_entry);
@@ -1192,6 +1193,7 @@ do { \
*/
void prepare_to_wait(struct wait_queue_head *wq_head, struct wait_queue_entry *wq_entry, int state);
bool prepare_to_wait_exclusive(struct wait_queue_head *wq_head, struct wait_queue_entry *wq_entry, int state);
+void prepare_to_wait_exclusive_lifo(struct wait_queue_head *wq_head, struct wait_queue_entry *wq_entry, int state);
long prepare_to_wait_event(struct wait_queue_head *wq_head, struct wait_queue_entry *wq_entry, int state);
void finish_wait(struct wait_queue_head *wq_head, struct wait_queue_entry *wq_entry);
long wait_woken(struct wait_queue_entry *wq_entry, unsigned mode, long timeout);
diff --git a/kernel/sched/wait.c b/kernel/sched/wait.c
index 133b74730738..1647fb8662eb 100644
--- a/kernel/sched/wait.c
+++ b/kernel/sched/wait.c
@@ -47,6 +47,17 @@ void add_wait_queue_priority(struct wait_queue_head *wq_head, struct wait_queue_
}
EXPORT_SYMBOL_GPL(add_wait_queue_priority);
+void add_wait_queue_exclusive_lifo(struct wait_queue_head *wq_head, struct wait_queue_entry *wq_entry)
+{
+ unsigned long flags;
+
+ wq_entry->flags |= WQ_FLAG_EXCLUSIVE;
+ spin_lock_irqsave(&wq_head->lock, flags);
+ __add_wait_queue(wq_head, wq_entry);
+ spin_unlock_irqrestore(&wq_head->lock, flags);
+}
+EXPORT_SYMBOL(add_wait_queue_exclusive_lifo);
+
void remove_wait_queue(struct wait_queue_head *wq_head, struct wait_queue_entry *wq_entry)
{
unsigned long flags;
@@ -293,6 +304,19 @@ prepare_to_wait_exclusive(struct wait_queue_head *wq_head, struct wait_queue_ent
}
EXPORT_SYMBOL(prepare_to_wait_exclusive);
+void prepare_to_wait_exclusive_lifo(struct wait_queue_head *wq_head, struct wait_queue_entry *wq_entry, int state)
+{
+ unsigned long flags;
+
+ wq_entry->flags |= WQ_FLAG_EXCLUSIVE;
+ spin_lock_irqsave(&wq_head->lock, flags);
+ if (list_empty(&wq_entry->entry))
+ __add_wait_queue(wq_head, wq_entry);
+ set_current_state(state);
+ spin_unlock_irqrestore(&wq_head->lock, flags);
+}
+EXPORT_SYMBOL(prepare_to_wait_exclusive_lifo);
+
void init_wait_entry(struct wait_queue_entry *wq_entry, int flags)
{
wq_entry->flags = flags;
diff --git a/net/ipv4/inet_connection_sock.c b/net/ipv4/inet_connection_sock.c
index f2c43f67187d..9885bfb429a2 100644
--- a/net/ipv4/inet_connection_sock.c
+++ b/net/ipv4/inet_connection_sock.c
@@ -606,7 +606,7 @@ static int inet_csk_wait_for_connect(struct sock *sk, long timeo)
* having to remove and re-insert us on the wait queue.
*/
for (;;) {
- prepare_to_wait_exclusive(sk_sleep(sk), &wait,
+ prepare_to_wait_exclusive_lifo(sk_sleep(sk), &wait,
TASK_INTERRUPTIBLE);
release_sock(sk);
if (reqsk_queue_empty(&icsk->icsk_accept_queue))
--
2.39.1
From afa213811c5490906caf394b20bb4b616fc6f12a Mon Sep 17 00:00:00 2001
From: Ricardo Neri <ricardo.neri-calderon@linux.intel.com>
Date: Thu, 25 Aug 2022 15:55:26 -0700
Subject: [PATCH 3/9] sched/fair: Simplify asym_packing logic for SMT sched
groups
When the destination CPU is an SMT sibling and idle, it can only help the
busiest group if all of its other SMT siblings are also idle. Otherwise,
there is not increase in throughput.
It does not matter whether the busiest group has SMT siblings. Simply
check if there are any tasks running on the local group before proceeding.
Cc: Ben Segall <bsegall@google.com>
Cc: Daniel Bristot de Oliveira <bristot@redhat.com>
Cc: Dietmar Eggemann <dietmar.eggemann@arm.com>
Cc: Len Brown <len.brown@intel.com>
Cc: Mel Gorman <mgorman@suse.de>
Cc: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
Cc: Srinivas Pandruvada <srinivas.pandruvada@linux.intel.com>
Cc: Steven Rostedt <rostedt@goodmis.org>
Cc: Tim C. Chen <tim.c.chen@intel.com>
Cc: Valentin Schneider <vschneid@redhat.com>
Cc: x86@kernel.org
Cc: linux-kernel@vger.kernel.org
Reviewed-by: Len Brown <len.brown@intel.com>
Signed-off-by: Ricardo Neri <ricardo.neri-calderon@linux.intel.com>
Signed-off-by: Alexandre Frade <kernel@xanmod.org>
---
kernel/sched/fair.c | 29 +++++++++--------------------
1 file changed, 9 insertions(+), 20 deletions(-)
diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c
index 345cc5e9fa6e..60f9690a5626 100644
--- a/kernel/sched/fair.c
+++ b/kernel/sched/fair.c
@@ -8921,12 +8921,10 @@ static bool asym_smt_can_pull_tasks(int dst_cpu, struct sd_lb_stats *sds,
struct sched_group *sg)
{
#ifdef CONFIG_SCHED_SMT
- bool local_is_smt, sg_is_smt;
+ bool local_is_smt;
int sg_busy_cpus;
local_is_smt = sds->local->flags & SD_SHARE_CPUCAPACITY;
- sg_is_smt = sg->flags & SD_SHARE_CPUCAPACITY;
-
sg_busy_cpus = sgs->group_weight - sgs->idle_cpus;
if (!local_is_smt) {
@@ -8947,25 +8945,16 @@ static bool asym_smt_can_pull_tasks(int dst_cpu, struct sd_lb_stats *sds,
return sched_asym_prefer(dst_cpu, sg->asym_prefer_cpu);
}
- /* @dst_cpu has SMT siblings. */
-
- if (sg_is_smt) {
- int local_busy_cpus = sds->local->group_weight -
- sds->local_stat.idle_cpus;
- int busy_cpus_delta = sg_busy_cpus - local_busy_cpus;
-
- if (busy_cpus_delta == 1)
- return sched_asym_prefer(dst_cpu, sg->asym_prefer_cpu);
-
- return false;
- }
-
/*
- * @sg does not have SMT siblings. Ensure that @sds::local does not end
- * up with more than one busy SMT sibling and only pull tasks if there
- * are not busy CPUs (i.e., no CPU has running tasks).
+ * @dst_cpu has SMT siblings. When both @dst_cpu and the busiest core
+ * have one or more busy siblings, moving tasks between them results
+ * in the same throughput. Only if all the siblings of @dst_cpu are
+ * idle throughput can increase.
+ *
+ * If the difference in the number of busy CPUs is two or more, let
+ * find_busiest_group() take care of it.
*/
- if (!sds->local_stat.sum_nr_running)
+ if (sg_busy_cpus == 1 && !sds->local_stat.sum_nr_running)
return sched_asym_prefer(dst_cpu, sg->asym_prefer_cpu);
return false;
--
2.39.1
From a1f627fd10ced4f5eeae678bc4ba96ea7fa01d7e Mon Sep 17 00:00:00 2001
From: Ricardo Neri <ricardo.neri-calderon@linux.intel.com>
Date: Thu, 25 Aug 2022 15:55:28 -0700
Subject: [PATCH 4/9] sched/fair: Let lower-priority CPUs do active balancing
When more than one SMT siblings of a physical core are busy, an idle CPU
of lower priority can help.
Indicate that the low priority CPU can do active balancing from the high-
priority CPU only if they belong to separate cores.
Cc: Ben Segall <bsegall@google.com>
Cc: Daniel Bristot de Oliveira <bristot@redhat.com>
Cc: Dietmar Eggemann <dietmar.eggemann@arm.com>
Cc: Len Brown <len.brown@intel.com>
Cc: Mel Gorman <mgorman@suse.de>
Cc: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
Cc: Srinivas Pandruvada <srinivas.pandruvada@linux.intel.com>
Cc: Steven Rostedt <rostedt@goodmis.org>
Cc: Tim C. Chen <tim.c.chen@intel.com>
Cc: Valentin Schneider <vschneid@redhat.com>
Cc: x86@kernel.org
Cc: linux-kernel@vger.kernel.org
Reviewed-by: Len Brown <len.brown@intel.com>
Signed-off-by: Ricardo Neri <ricardo.neri-calderon@linux.intel.com>
Signed-off-by: Alexandre Frade <kernel@xanmod.org>
---
kernel/sched/fair.c | 7 ++++++-
1 file changed, 6 insertions(+), 1 deletion(-)
diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c
index 60f9690a5626..67b0eacad0e9 100644
--- a/kernel/sched/fair.c
+++ b/kernel/sched/fair.c
@@ -10176,9 +10176,14 @@ asym_active_balance(struct lb_env *env)
* ASYM_PACKING needs to force migrate tasks from busy but
* lower priority CPUs in order to pack all tasks in the
* highest priority CPUs.
+ *
+ * If the busy CPU has higher priority but is an SMT sibling
+ * in which other SMT siblings are also busy, a lower-priority
+ * CPU in a separate core can help.
*/
return env->idle != CPU_NOT_IDLE && (env->sd->flags & SD_ASYM_PACKING) &&
- sched_asym_prefer(env->dst_cpu, env->src_cpu);
+ (sched_asym_prefer(env->dst_cpu, env->src_cpu) ||
+ !(env->sd->flags & SD_SHARE_CPUCAPACITY));
}
static inline bool
--
2.39.1

View File

@@ -0,0 +1,363 @@
From 9c85113cf4019e7b277a44e72bda8b78347aa72f Mon Sep 17 00:00:00 2001
From: Paul Gofman <pgofman@codeweavers.com>
Date: Thu, 7 May 2020 14:05:31 +0300
Subject: [PATCH 2/2] mm: Support soft dirty flag read with reset.
---
fs/proc/base.c | 3 +
fs/proc/internal.h | 1 +
fs/proc/task_mmu.c | 144 +++++++++++++++++++++++++++++++++++++++------
3 files changed, 130 insertions(+), 18 deletions(-)
diff --git a/fs/proc/base.c b/fs/proc/base.c
index b3422cda2a91..8199ae2411ca 100644
--- a/fs/proc/base.c
+++ b/fs/proc/base.c
@@ -3202,6 +3202,9 @@ static const struct pid_entry tgid_base_stuff[] = {
REG("smaps", S_IRUGO, proc_pid_smaps_operations),
REG("smaps_rollup", S_IRUGO, proc_pid_smaps_rollup_operations),
REG("pagemap", S_IRUSR, proc_pagemap_operations),
+#ifdef CONFIG_MEM_SOFT_DIRTY
+ REG("pagemap_reset", S_IRUSR, proc_pagemap_reset_operations),
+#endif
#endif
#ifdef CONFIG_SECURITY
DIR("attr", S_IRUGO|S_IXUGO, proc_attr_dir_inode_operations, proc_attr_dir_operations),
diff --git a/fs/proc/internal.h b/fs/proc/internal.h
index f60b379dcdc7..36a901cf0e7f 100644
--- a/fs/proc/internal.h
+++ b/fs/proc/internal.h
@@ -303,6 +303,7 @@ extern const struct file_operations proc_pid_smaps_operations;
extern const struct file_operations proc_pid_smaps_rollup_operations;
extern const struct file_operations proc_clear_refs_operations;
extern const struct file_operations proc_pagemap_operations;
+extern const struct file_operations proc_pagemap_reset_operations;
extern unsigned long task_vsize(struct mm_struct *);
extern unsigned long task_statm(struct mm_struct *,
diff --git a/fs/proc/task_mmu.c b/fs/proc/task_mmu.c
index 7c7865028f10..a21694967915 100644
--- a/fs/proc/task_mmu.c
+++ b/fs/proc/task_mmu.c
@@ -1056,8 +1056,8 @@ static inline bool pte_is_pinned(struct vm_area_struct *vma, unsigned long addr,
return page_maybe_dma_pinned(page);
}
-static inline void clear_soft_dirty(struct vm_area_struct *vma,
- unsigned long addr, pte_t *pte)
+static inline bool clear_soft_dirty(struct vm_area_struct *vma,
+ unsigned long addr, pte_t *pte)
{
/*
* The soft-dirty tracker uses #PF-s to catch writes
@@ -1066,37 +1066,46 @@ static inline void clear_soft_dirty(struct vm_area_struct *vma,
* of how soft-dirty works.
*/
pte_t ptent = *pte;
+ bool ret = false;
if (pte_present(ptent)) {
pte_t old_pte;
if (pte_is_pinned(vma, addr, ptent))
- return;
+ return ret;
old_pte = ptep_modify_prot_start(vma, addr, pte);
+ ret = pte_soft_dirty(old_pte);
ptent = pte_wrprotect(old_pte);
ptent = pte_clear_soft_dirty(ptent);
ptep_modify_prot_commit(vma, addr, pte, old_pte, ptent);
} else if (is_swap_pte(ptent)) {
+ ret = pte_swp_soft_dirty(ptent);
ptent = pte_swp_clear_soft_dirty(ptent);
set_pte_at(vma->vm_mm, addr, pte, ptent);
}
+ return ret;
}
#else
-static inline void clear_soft_dirty(struct vm_area_struct *vma,
+static inline bool clear_soft_dirty(struct vm_area_struct *vma,
unsigned long addr, pte_t *pte)
{
+ return false;
}
#endif
#if defined(CONFIG_MEM_SOFT_DIRTY) && defined(CONFIG_TRANSPARENT_HUGEPAGE)
-static inline void clear_soft_dirty_pmd(struct vm_area_struct *vma,
+static inline bool clear_soft_dirty_pmd(struct vm_area_struct *vma,
unsigned long addr, pmd_t *pmdp)
{
pmd_t old, pmd = *pmdp;
+ bool ret = false;
if (pmd_present(pmd)) {
/* See comment in change_huge_pmd() */
old = pmdp_invalidate(vma, addr, pmdp);
+
+ ret = pmd_soft_dirty(old);
+
if (pmd_dirty(old))
pmd = pmd_mkdirty(pmd);
if (pmd_young(old))
@@ -1107,14 +1116,17 @@ static inline void clear_soft_dirty_pmd(struct vm_area_struct *vma,
set_pmd_at(vma->vm_mm, addr, pmdp, pmd);
} else if (is_migration_entry(pmd_to_swp_entry(pmd))) {
+ ret = pmd_swp_soft_dirty(pmd);
pmd = pmd_swp_clear_soft_dirty(pmd);
set_pmd_at(vma->vm_mm, addr, pmdp, pmd);
}
+ return ret;
}
#else
-static inline void clear_soft_dirty_pmd(struct vm_area_struct *vma,
+static inline bool clear_soft_dirty_pmd(struct vm_area_struct *vma,
unsigned long addr, pmd_t *pmdp)
{
+ return false;
}
#endif
@@ -1367,6 +1379,7 @@ struct pagemapread {
int pos, len; /* units: PM_ENTRY_BYTES, not bytes */
pagemap_entry_t *buffer;
bool show_pfn;
+ bool reset;
};
#define PAGEMAP_WALK_SIZE (PMD_SIZE)
@@ -1398,6 +1411,14 @@ static int add_to_pagemap(unsigned long addr, pagemap_entry_t *pme,
return 0;
}
+static int add_addr_to_pagemap(unsigned long addr, struct pagemapread *pm)
+{
+ ((unsigned long *)pm->buffer)[pm->pos++] = addr;
+ if (pm->pos >= pm->len)
+ return PM_END_OF_BUFFER;
+ return 0;
+}
+
static int pagemap_pte_hole(unsigned long start, unsigned long end,
__always_unused int depth, struct mm_walk *walk)
{
@@ -1405,6 +1426,9 @@ static int pagemap_pte_hole(unsigned long start, unsigned long end,
unsigned long addr = start;
int err = 0;
+ if (pm->reset)
+ goto out;
+
while (addr < end) {
struct vm_area_struct *vma = find_vma(walk->mm, addr);
pagemap_entry_t pme = make_pme(0, 0);
@@ -1439,8 +1463,9 @@ static int pagemap_pte_hole(unsigned long start, unsigned long end,
}
static pagemap_entry_t pte_to_pagemap_entry(struct pagemapread *pm,
- struct vm_area_struct *vma, unsigned long addr, pte_t pte)
+ struct vm_area_struct *vma, unsigned long addr, pte_t *pte_addr)
{
+ pte_t pte = *pte_addr;
u64 frame = 0, flags = 0;
struct page *page = NULL;
@@ -1493,6 +1518,20 @@ static int pagemap_pmd_range(pmd_t *pmdp, unsigned long addr, unsigned long end,
pmd_t pmd = *pmdp;
struct page *page = NULL;
+ if (pm->reset)
+ {
+ if (clear_soft_dirty_pmd(vma, addr, pmdp))
+ {
+ for (; addr != end; addr += PAGE_SIZE)
+ {
+ err = add_addr_to_pagemap(addr, pm);
+ if (err)
+ break;
+ }
+ }
+ goto trans_huge_done;
+ }
+
if (vma->vm_flags & VM_SOFTDIRTY)
flags |= PM_SOFT_DIRTY;
@@ -1541,6 +1580,7 @@ static int pagemap_pmd_range(pmd_t *pmdp, unsigned long addr, unsigned long end,
frame += (1 << MAX_SWAPFILES_SHIFT);
}
}
+trans_huge_done:
spin_unlock(ptl);
return err;
}
@@ -1555,10 +1595,18 @@ static int pagemap_pmd_range(pmd_t *pmdp, unsigned long addr, unsigned long end,
*/
orig_pte = pte = pte_offset_map_lock(walk->mm, pmdp, addr, &ptl);
for (; addr < end; pte++, addr += PAGE_SIZE) {
- pagemap_entry_t pme;
+ if (pm->reset)
+ {
+ if (clear_soft_dirty(vma, addr, pte))
+ err = add_addr_to_pagemap(addr, pm);
+ }
+ else
+ {
+ pagemap_entry_t pme;
- pme = pte_to_pagemap_entry(pm, vma, addr, *pte);
- err = add_to_pagemap(addr, &pme, pm);
+ pme = pte_to_pagemap_entry(pm, vma, addr, pte);
+ err = add_to_pagemap(addr, &pme, pm);
+ }
if (err)
break;
}
@@ -1650,8 +1698,8 @@ static const struct mm_walk_ops pagemap_ops = {
* determine which areas of memory are actually mapped and llseek to
* skip over unmapped regions.
*/
-static ssize_t pagemap_read(struct file *file, char __user *buf,
- size_t count, loff_t *ppos)
+static ssize_t do_pagemap_read(struct file *file, char __user *buf,
+ size_t count, loff_t *ppos, bool reset)
{
struct mm_struct *mm = file->private_data;
struct pagemapread pm;
@@ -1660,6 +1708,8 @@ static ssize_t pagemap_read(struct file *file, char __user *buf,
unsigned long start_vaddr;
unsigned long end_vaddr;
int ret = 0, copied = 0;
+ struct mmu_notifier_range range;
+ size_t buffer_len;
if (!mm || !mmget_not_zero(mm))
goto out;
@@ -1675,19 +1725,38 @@ static ssize_t pagemap_read(struct file *file, char __user *buf,
/* do not disclose physical addresses: attack vector */
pm.show_pfn = file_ns_capable(file, &init_user_ns, CAP_SYS_ADMIN);
+ pm.reset = reset;
- pm.len = (PAGEMAP_WALK_SIZE >> PAGE_SHIFT);
- pm.buffer = kmalloc_array(pm.len, PM_ENTRY_BYTES, GFP_KERNEL);
+ buffer_len = min(PAGEMAP_WALK_SIZE >> PAGE_SHIFT, count / PM_ENTRY_BYTES);
+
+ pm.buffer = kmalloc_array(buffer_len, PM_ENTRY_BYTES, GFP_KERNEL);
ret = -ENOMEM;
if (!pm.buffer)
goto out_mm;
src = *ppos;
svpfn = src / PM_ENTRY_BYTES;
- end_vaddr = mm->task_size;
+
+ start_vaddr = svpfn << PAGE_SHIFT;
+
+ if (reset)
+ {
+ if (count < sizeof(end_vaddr))
+ {
+ ret = -EINVAL;
+ goto out_mm;
+ }
+ if (copy_from_user(&end_vaddr, buf, sizeof(end_vaddr)))
+ return -EFAULT;
+ end_vaddr = min(end_vaddr, mm->task_size);
+ }
+ else
+ {
+ end_vaddr = mm->task_size;
+ start_vaddr = end_vaddr;
+ }
/* watch out for wraparound */
- start_vaddr = end_vaddr;
if (svpfn <= (ULONG_MAX >> PAGE_SHIFT)) {
ret = mmap_read_lock_killable(mm);
if (ret)
@@ -1707,18 +1776,35 @@ static ssize_t pagemap_read(struct file *file, char __user *buf,
unsigned long end;
pm.pos = 0;
- end = (start_vaddr + PAGEMAP_WALK_SIZE) & PAGEMAP_WALK_MASK;
+ pm.len = min(buffer_len, count / PM_ENTRY_BYTES);
+
+ end = reset ? end_vaddr : (start_vaddr + (pm.len << PAGE_SHIFT));
/* overflow ? */
if (end < start_vaddr || end > end_vaddr)
end = end_vaddr;
+
ret = mmap_read_lock_killable(mm);
if (ret)
goto out_free;
+
+ if (reset)
+ {
+ inc_tlb_flush_pending(mm);
+ mmu_notifier_range_init(&range, MMU_NOTIFY_SOFT_DIRTY,
+ 0, mm, start_vaddr, end);
+ mmu_notifier_invalidate_range_start(&range);
+ }
ret = walk_page_range(mm, start_vaddr, end, &pagemap_ops, &pm);
+ if (reset)
+ {
+ mmu_notifier_invalidate_range_end(&range);
+ flush_tlb_mm(mm);
+ dec_tlb_flush_pending(mm);
+ }
mmap_read_unlock(mm);
- start_vaddr = end;
len = min(count, PM_ENTRY_BYTES * pm.pos);
+ BUG_ON(ret && ret != PM_END_OF_BUFFER);
if (copy_to_user(buf, pm.buffer, len)) {
ret = -EFAULT;
goto out_free;
@@ -1726,6 +1812,8 @@ static ssize_t pagemap_read(struct file *file, char __user *buf,
copied += len;
buf += len;
count -= len;
+
+ start_vaddr = reset && pm.pos == pm.len ? ((unsigned long *)pm.buffer)[pm.pos - 1] + PAGE_SIZE : end;
}
*ppos += copied;
if (!ret || ret == PM_END_OF_BUFFER)
@@ -1739,6 +1827,18 @@ static ssize_t pagemap_read(struct file *file, char __user *buf,
return ret;
}
+static ssize_t pagemap_read(struct file *file, char __user *buf,
+ size_t count, loff_t *ppos)
+{
+ return do_pagemap_read(file, buf, count, ppos, false);
+}
+
+static ssize_t pagemap_reset_read(struct file *file, char __user *buf,
+ size_t count, loff_t *ppos)
+{
+ return do_pagemap_read(file, buf, count, ppos, true);
+}
+
static int pagemap_open(struct inode *inode, struct file *file)
{
struct mm_struct *mm;
@@ -1765,6 +1865,14 @@ const struct file_operations proc_pagemap_operations = {
.open = pagemap_open,
.release = pagemap_release,
};
+
+const struct file_operations proc_pagemap_reset_operations = {
+ .llseek = mem_lseek, /* borrow this */
+ .read = pagemap_reset_read,
+ .open = pagemap_open,
+ .release = pagemap_release,
+};
+
#endif /* CONFIG_PROC_PAGE_MONITOR */
#ifdef CONFIG_NUMA
--
2.30.2

View File

@@ -0,0 +1,879 @@
From f7f49141a5dbe9c99d78196b58c44307fb2e6be3 Mon Sep 17 00:00:00 2001
From: Tk-Glitch <ti3nou@gmail.com>
Date: Wed, 4 Jul 2018 04:30:08 +0200
Subject: [PATCH 01/17] glitched
---
init/Makefile | 2 +-
1 file changed, 1 insertions(+), 1 deletions(-)
diff --git a/init/Makefile b/init/Makefile
index baf3ab8d9d49..854e32e6aec7 100755
--- a/init/Makefile
+++ b/init/Makefile
@@ -19,7 +19,7 @@ else
# Maximum length of UTS_VERSION is 64 chars
filechk_uts_version = \
- utsver=$$(echo '$(pound)'"$(build-version)" $(smp-flag-y) $(preempt-flag-y) "$(build-timestamp)" | cut -b -64); \
+ utsver=$$(echo '$(pound)'"$(build-version)" $(smp-flag-y) $(preempt-flag-y) "TKG" "$(build-timestamp)" | cut -b -64); \
echo '$(pound)'define UTS_VERSION \""$${utsver}"\"
#
--
2.28.0
From c304f43d14e98d4bf1215fc10bc5012f554bdd8a Mon Sep 17 00:00:00 2001
From: Alexandre Frade <admfrade@gmail.com>
Date: Mon, 29 Jan 2018 16:59:22 +0000
Subject: [PATCH 02/17] dcache: cache_pressure = 50 decreases the rate at which
VFS caches are reclaimed
Signed-off-by: Alexandre Frade <admfrade@gmail.com>
---
fs/dcache.c | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/fs/dcache.c b/fs/dcache.c
index 361ea7ab30ea..0c5cf69b241a 100644
--- a/fs/dcache.c
+++ b/fs/dcache.c
@@ -71,7 +71,7 @@
* If no ancestor relationship:
* arbitrary, since it's serialized on rename_lock
*/
-int sysctl_vfs_cache_pressure __read_mostly = 100;
+int sysctl_vfs_cache_pressure __read_mostly = 50;
EXPORT_SYMBOL_GPL(sysctl_vfs_cache_pressure);
__cacheline_aligned_in_smp DEFINE_SEQLOCK(rename_lock);
--
2.28.0
diff --git a/kernel/sched/rt.c b/kernel/sched/rt.c
index f788cd61df21..2bfbb4213707 100644
--- a/kernel/sched/rt.c
+++ b/kernel/sched/rt.c
@@ -15,9 +15,9 @@ __read_mostly int scheduler_running;
/*
* part of the period that we allow rt tasks to run in us.
- * default: 0.95s
+ * XanMod default: 0.98s
*/
-int sysctl_sched_rt_runtime = 950000;
+int sysctl_sched_rt_runtime = 980000;
#ifdef CONFIG_SYSCTL
static int sysctl_sched_rr_timeslice = (MSEC_PER_SEC / HZ) * RR_TIMESLICE;
--
2.28.0
From acc49f33a10f61dc66c423888cbb883ba46710e4 Mon Sep 17 00:00:00 2001
From: Alexandre Frade <admfrade@gmail.com>
Date: Mon, 29 Jan 2018 17:41:29 +0000
Subject: [PATCH 04/17] scripts: disable the localversion "+" tag of a git repo
Signed-off-by: Alexandre Frade <admfrade@gmail.com>
---
scripts/setlocalversion | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/scripts/setlocalversion b/scripts/setlocalversion
index 20f2efd57b11..0552d8b9f582 100755
--- a/scripts/setlocalversion
+++ b/scripts/setlocalversion
@@ -54,7 +54,7 @@ scm_version()
# If only the short version is requested, don't bother
# running further git commands
if $short; then
- echo "+"
+ #echo "+"
return
fi
# If we are past the tagged commit, we pretty print it.
--
2.28.0
From 360c6833e07cc9fdef5746f6bc45bdbc7212288d Mon Sep 17 00:00:00 2001
From: "Jan Alexander Steffens (heftig)" <jan.steffens@gmail.com>
Date: Fri, 26 Oct 2018 11:22:33 +0100
Subject: [PATCH 06/17] infiniband: Fix __read_overflow2 error with -O3
inlining
---
drivers/infiniband/core/addr.c | 1 +
1 file changed, 1 insertion(+)
diff --git a/drivers/infiniband/core/addr.c b/drivers/infiniband/core/addr.c
index 3a98439bba83..6efc4f907f58 100644
--- a/drivers/infiniband/core/addr.c
+++ b/drivers/infiniband/core/addr.c
@@ -820,6 +820,7 @@ int rdma_addr_find_l2_eth_by_grh(const union ib_gid *sgid,
union {
struct sockaddr_in _sockaddr_in;
struct sockaddr_in6 _sockaddr_in6;
+ struct sockaddr_ib _sockaddr_ib;
} sgid_addr, dgid_addr;
int ret;
--
2.28.0
From f85ed068b4d0e6c31edce8574a95757a60e58b87 Mon Sep 17 00:00:00 2001
From: Etienne Juvigny <Ti3noU@gmail.com>
Date: Mon, 3 Sep 2018 17:36:25 +0200
Subject: [PATCH 07/17] Zenify & stuff
---
init/Kconfig | 32 ++++++++++++++++++++++++++++++++
kernel/sched/fair.c | 25 +++++++++++++++++++++++++
mm/page-writeback.c | 8 ++++++++
3 files changed, 65 insertions(+)
diff --git a/init/Kconfig b/init/Kconfig
index 3ae8678e1145..da708eed0f1e 100644
--- a/init/Kconfig
+++ b/init/Kconfig
@@ -92,6 +92,38 @@ config THREAD_INFO_IN_TASK
menu "General setup"
+config ZENIFY
+ bool "A selection of patches from Zen/Liquorix kernel and additional tweaks for a better gaming experience"
+ default y
+ help
+ Tunes the kernel for responsiveness at the cost of throughput and power usage.
+
+ --- Virtual Memory Subsystem ---------------------------
+
+ Mem dirty before bg writeback..: 10 % -> 20 %
+ Mem dirty before sync writeback: 20 % -> 50 %
+
+ --- Block Layer ----------------------------------------
+
+ Queue depth...............: 128 -> 512
+ Default MQ scheduler......: mq-deadline -> bfq
+
+ --- CFS CPU Scheduler ----------------------------------
+
+ Scheduling latency.............: 6 -> 3 ms
+ Minimal granularity............: 0.75 -> 0.3 ms
+ Wakeup granularity.............: 1 -> 0.5 ms
+ CPU migration cost.............: 0.5 -> 0.25 ms
+ Bandwidth slice size...........: 5 -> 3 ms
+ Ondemand fine upscaling limit..: 95 % -> 85 %
+
+ --- MuQSS CPU Scheduler --------------------------------
+
+ Scheduling interval............: 6 -> 3 ms
+ ISO task max realtime use......: 70 % -> 25 %
+ Ondemand coarse upscaling limit: 80 % -> 45 %
+ Ondemand fine upscaling limit..: 95 % -> 45 %
+
config BROKEN
bool
diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c
index 6b3b59cc51d6..2a0072192c3d 100644
--- a/kernel/sched/fair.c
+++ b/kernel/sched/fair.c
@@ -37,8 +37,13 @@
*
* (default: 6ms * (1 + ilog(ncpus)), units: nanoseconds)
*/
+#ifdef CONFIG_ZENIFY
+unsigned int sysctl_sched_latency = 3000000ULL;
+static unsigned int normalized_sysctl_sched_latency = 3000000ULL;
+#else
unsigned int sysctl_sched_latency = 6000000ULL;
static unsigned int normalized_sysctl_sched_latency = 6000000ULL;
+#endif
/*
* The initial- and re-scaling of tunables is configurable
@@ -58,21 +63,34 @@ enum sched_tunable_scaling sysctl_sched_tunable_scaling = SCHED_TUNABLESCALING_L
*
* (default: 0.75 msec * (1 + ilog(ncpus)), units: nanoseconds)
*/
+#ifdef CONFIG_ZENIFY
+unsigned int sysctl_sched_min_granularity = 300000ULL;
+static unsigned int normalized_sysctl_sched_min_granularity = 300000ULL;
+#else
unsigned int sysctl_sched_min_granularity = 750000ULL;
static unsigned int normalized_sysctl_sched_min_granularity = 750000ULL;
+#endif
/*
* Minimal preemption granularity for CPU-bound SCHED_IDLE tasks.
* Applies only when SCHED_IDLE tasks compete with normal tasks.
*
* (default: 0.75 msec)
*/
+#ifdef CONFIG_ZENIFY
+unsigned int sysctl_sched_idle_min_granularity = 300000ULL;
+#else
unsigned int sysctl_sched_idle_min_granularity = 750000ULL;
+#endif
/*
* This value is kept at sysctl_sched_latency/sysctl_sched_min_granularity
*/
+#ifdef CONFIG_ZENIFY
+static unsigned int sched_nr_latency = 10;
+#else
static unsigned int sched_nr_latency = 8;
+#endif
/*
* After fork, child runs first. If set to 0 (default) then
@@ -128,8 +149,12 @@ int __weak arch_asym_cpu_priority(int cpu)
*
* (default: 5 msec, units: microseconds)
*/
+#ifdef CONFIG_ZENIFY
+static unsigned int sysctl_sched_cfs_bandwidth_slice = 3000UL;
+#else
static unsigned int sysctl_sched_cfs_bandwidth_slice = 5000UL;
#endif
+#endif
#ifdef CONFIG_SYSCTL
static struct ctl_table sched_fair_sysctls[] = {
diff --git a/mm/page-writeback.c b/mm/page-writeback.c
index 28b3e7a67565..01a1aef2b9b1 100644
--- a/mm/page-writeback.c
+++ b/mm/page-writeback.c
@@ -71,7 +71,11 @@ static long ratelimit_pages = 32;
/*
* Start background writeback (via writeback threads) at this percentage
*/
+#ifdef CONFIG_ZENIFY
+static int dirty_background_ratio = 20;
+#else
static int dirty_background_ratio = 10;
+#endif
/*
* dirty_background_bytes starts at 0 (disabled) so that it is a function of
@@ -88,7 +92,11 @@ int vm_highmem_is_dirtyable;
/*
* The generator of dirty data starts writeback at this percentage
*/
+#ifdef CONFIG_ZENIFY
+static int vm_dirty_ratio = 50;
+#else
static int vm_dirty_ratio = 20;
+#endif
/*
* vm_dirty_bytes starts at 0 (disabled) so that it is a function of
--
2.28.0
From e92e67143385cf285851e12aa8b7f083dd38dd24 Mon Sep 17 00:00:00 2001
From: Steven Barrett <damentz@liquorix.net>
Date: Sun, 16 Jan 2011 18:57:32 -0600
Subject: [PATCH 08/17] ZEN: Allow TCP YeAH as default congestion control
4.4: In my tests YeAH dramatically slowed down transfers over a WLAN,
reducing throughput from ~65Mbps (CUBIC) to ~7MBps (YeAH) over 10
seconds (netperf TCP_STREAM) including long stalls.
Be careful when choosing this. ~heftig
---
net/ipv4/Kconfig | 4 ++++
1 file changed, 4 insertions(+)
diff --git a/net/ipv4/Kconfig b/net/ipv4/Kconfig
index e64e59b536d3..bfb55ef7ebbe 100644
--- a/net/ipv4/Kconfig
+++ b/net/ipv4/Kconfig
@@ -691,6 +691,9 @@ choice
config DEFAULT_VEGAS
bool "Vegas" if TCP_CONG_VEGAS=y
+ config DEFAULT_YEAH
+ bool "YeAH" if TCP_CONG_YEAH=y
+
config DEFAULT_VENO
bool "Veno" if TCP_CONG_VENO=y
@@ -724,6 +727,7 @@ config DEFAULT_TCP_CONG
default "htcp" if DEFAULT_HTCP
default "hybla" if DEFAULT_HYBLA
default "vegas" if DEFAULT_VEGAS
+ default "yeah" if DEFAULT_YEAH
default "westwood" if DEFAULT_WESTWOOD
default "veno" if DEFAULT_VENO
default "reno" if DEFAULT_RENO
--
2.28.0
From 76dbe7477bfde1b5e8bf29a71b5af7ab2be9b98e Mon Sep 17 00:00:00 2001
From: Steven Barrett <steven@liquorix.net>
Date: Wed, 28 Nov 2018 19:01:27 -0600
Subject: [PATCH 09/17] zen: Use [defer+madvise] as default khugepaged defrag
strategy
For some reason, the default strategy to respond to THP fault fallbacks
is still just madvise, meaning stall if the program wants transparent
hugepages, but don't trigger a background reclaim / compaction if THP
begins to fail allocations. This creates a snowball affect where we
still use the THP code paths, but we almost always fail once a system
has been active and busy for a while.
The option "defer" was created for interactive systems where THP can
still improve performance. If we have to fallback to a regular page due
to an allocation failure or anything else, we will trigger a background
reclaim and compaction so future THP attempts succeed and previous
attempts eventually have their smaller pages combined without stalling
running applications.
We still want madvise to stall applications that explicitely want THP,
so defer+madvise _does_ make a ton of sense. Make it the default for
interactive systems, especially if the kernel maintainer left
transparent hugepages on "always".
Reasoning and details in the original patch: https://lwn.net/Articles/711248/
---
mm/huge_memory.c | 4 ++++
1 file changed, 4 insertions(+)
diff --git a/mm/huge_memory.c b/mm/huge_memory.c
index 74300e337c3c..9277f22c10a7 100644
--- a/mm/huge_memory.c
+++ b/mm/huge_memory.c
@@ -53,7 +53,11 @@ unsigned long transparent_hugepage_flags __read_mostly =
#ifdef CONFIG_TRANSPARENT_HUGEPAGE_MADVISE
(1<<TRANSPARENT_HUGEPAGE_REQ_MADV_FLAG)|
#endif
+#ifdef CONFIG_ZENIFY
+ (1<<TRANSPARENT_HUGEPAGE_DEFRAG_KSWAPD_OR_MADV_FLAG)|
+#else
(1<<TRANSPARENT_HUGEPAGE_DEFRAG_REQ_MADV_FLAG)|
+#endif
(1<<TRANSPARENT_HUGEPAGE_DEFRAG_KHUGEPAGED_FLAG)|
(1<<TRANSPARENT_HUGEPAGE_USE_ZERO_PAGE_FLAG);
--
2.28.0
From 2b65a1329cb220b43c19c4d0de5833fae9e2b22d Mon Sep 17 00:00:00 2001
From: Alexandre Frade <admfrade@gmail.com>
Date: Wed, 24 Oct 2018 16:58:52 -0300
Subject: [PATCH 10/17] net/sched: allow configuring cake qdisc as default
Signed-off-by: Alexandre Frade <admfrade@gmail.com>
---
net/sched/Kconfig | 4 ++++
1 file changed, 4 insertions(+)
diff --git a/net/sched/Kconfig b/net/sched/Kconfig
index 84badf00647e..6a922bca9f39 100644
--- a/net/sched/Kconfig
+++ b/net/sched/Kconfig
@@ -471,6 +471,9 @@ choice
config DEFAULT_SFQ
bool "Stochastic Fair Queue" if NET_SCH_SFQ
+ config DEFAULT_CAKE
+ bool "Common Applications Kept Enhanced" if NET_SCH_CAKE
+
config DEFAULT_PFIFO_FAST
bool "Priority FIFO Fast"
endchoice
@@ -481,6 +484,7 @@ config DEFAULT_NET_SCH
default "fq" if DEFAULT_FQ
default "fq_codel" if DEFAULT_FQ_CODEL
default "sfq" if DEFAULT_SFQ
+ default "cake" if DEFAULT_CAKE
default "pfifo_fast"
endif
--
2.28.0
From 90240bcd90a568878738e66c0d45bed3e38e347b Mon Sep 17 00:00:00 2001
From: Tk-Glitch <ti3nou@gmail.com>
Date: Fri, 19 Apr 2019 12:33:38 +0200
Subject: [PATCH 12/17] Set vm.max_map_count to 262144 by default
The value is still pretty low, and AMD64-ABI and ELF extended numbering
supports that, so we should be fine on modern x86 systems.
This fixes crashes in some applications using more than 65535 vmas (also
affects some windows games running in wine, such as Star Citizen).
---
include/linux/mm.h | 3 +--
1 file changed, 1 insertion(+), 2 deletions(-)
diff --git a/include/linux/mm.h b/include/linux/mm.h
index bc05c3588aa3..b0cefe94920d 100644
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -190,8 +190,7 @@ static inline void __mm_zero_struct_page(struct page *page)
* not a hard limit any more. Although some userspace tools can be surprised by
* that.
*/
-#define MAPCOUNT_ELF_CORE_MARGIN (5)
-#define DEFAULT_MAX_MAP_COUNT (USHRT_MAX - MAPCOUNT_ELF_CORE_MARGIN)
+#define DEFAULT_MAX_MAP_COUNT (262144)
extern int sysctl_max_map_count;
--
2.28.0
From 3a34034dba5efe91bcec491efe8c66e8087f509b Mon Sep 17 00:00:00 2001
From: Tk-Glitch <ti3nou@gmail.com>
Date: Mon, 27 Jul 2020 00:19:18 +0200
Subject: [PATCH 13/17] mm: bump DEFAULT_MAX_MAP_COUNT
Some games such as Detroit: Become Human tend to be very crash prone with
lower values.
---
include/linux/mm.h | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/include/linux/mm.h b/include/linux/mm.h
index b0cefe94920d..890165099b07 100644
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -190,7 +190,7 @@ static inline void __mm_zero_struct_page(struct page *page)
* not a hard limit any more. Although some userspace tools can be surprised by
* that.
*/
-#define DEFAULT_MAX_MAP_COUNT (262144)
+#define DEFAULT_MAX_MAP_COUNT (16777216)
extern int sysctl_max_map_count;
--
2.28.0
From 977812938da7c7226415778c340832141d9278b7 Mon Sep 17 00:00:00 2001
From: Alexandre Frade <admfrade@gmail.com>
Date: Mon, 25 Nov 2019 15:13:06 -0300
Subject: [PATCH 14/17] elevator: set default scheduler to bfq for blk-mq
Signed-off-by: Alexandre Frade <admfrade@gmail.com>
---
block/elevator.c | 6 +++---
1 file changed, 3 insertions(+), 3 deletions(-)
diff --git a/block/elevator.c b/block/elevator.c
index 4eab3d70e880..79669aa39d79 100644
--- a/block/elevator.c
+++ b/block/elevator.c
@@ -623,19 +623,19 @@ static inline bool elv_support_iosched(struct request_queue *q)
}
/*
- * For single queue devices, default to using mq-deadline. If we have multiple
- * queues or mq-deadline is not available, default to "none".
+ * For single queue devices, default to using bfq. If we have multiple
+ * queues or bfq is not available, default to "none".
*/
static struct elevator_type *elevator_get_default(struct request_queue *q)
{
if (q->tag_set && q->tag_set->flags & BLK_MQ_F_NO_SCHED_BY_DEFAULT)
return NULL;
if (q->nr_hw_queues != 1 &&
!blk_mq_is_shared_tags(q->tag_set->flags))
return NULL;
- return elevator_find_get(q, "mq-deadline");
+ return elevator_find_get(q, "bfq");
}
/*
--
2.28.0
From 3c229f434aca65c4ca61772bc03c3e0370817b92 Mon Sep 17 00:00:00 2001
From: Alexandre Frade <kernel@xanmod.org>
Date: Mon, 3 Aug 2020 17:05:04 +0000
Subject: [PATCH 16/17] mm: set 2 megabytes for address_space-level file
read-ahead pages size
Signed-off-by: Alexandre Frade <kernel@xanmod.org>
---
include/linux/pagemap.h | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/include/linux/pagemap.h b/include/linux/pagemap.h
index cf2468da68e9..007dea784451 100644
--- a/include/linux/pagemap.h
+++ b/include/linux/pagemap.h
@@ -655,7 +655,7 @@ int replace_page_cache_page(struct page *old, struct page *new, gfp_t gfp_mask);
void delete_from_page_cache_batch(struct address_space *mapping,
struct pagevec *pvec);
-#define VM_READAHEAD_PAGES (SZ_128K / PAGE_SIZE)
+#define VM_READAHEAD_PAGES (SZ_2M / PAGE_SIZE)
void page_cache_sync_readahead(struct address_space *, struct file_ra_state *,
struct file *, pgoff_t index, unsigned long req_count);
--
2.28.0
From 716f41cf6631f3a85834dcb67b4ce99185b6387f Mon Sep 17 00:00:00 2001
From: Steven Barrett <steven@liquorix.net>
Date: Wed, 15 Jan 2020 20:43:56 -0600
Subject: [PATCH 17/17] ZEN: intel-pstate: Implement "enable" parameter
If intel-pstate is compiled into the kernel, it will preempt the loading
of acpi-cpufreq so you can take advantage of hardware p-states without
any friction.
However, intel-pstate is not completely superior to cpufreq's ondemand
for one reason. There's no concept of an up_threshold property.
In ondemand, up_threshold essentially reduces the maximum utilization to
compare against, allowing you to hit max frequencies and turbo boost
from a much lower core utilization.
With intel-pstate, you have the concept of minimum and maximum
performance, but no tunable that lets you define, maximum frequency
means 50% core utilization. For just this oversight, there's reasons
you may want ondemand.
Lets support setting "enable" in kernel boot parameters. This lets
kernel maintainers include "intel_pstate=disable" statically in the
static boot parameters, but let users of the kernel override this
selection.
---
Documentation/admin-guide/kernel-parameters.txt | 3 +++
drivers/cpufreq/intel_pstate.c | 2 ++
2 files changed, 5 insertions(+)
diff --git a/Documentation/admin-guide/kernel-parameters.txt b/Documentation/admin-guide/kernel-parameters.txt
index fb95fad81c79..3e92fee81e33 100644
--- a/Documentation/admin-guide/kernel-parameters.txt
+++ b/Documentation/admin-guide/kernel-parameters.txt
@@ -1857,6 +1857,9 @@
disable
Do not enable intel_pstate as the default
scaling driver for the supported processors
+ enable
+ Enable intel_pstate in-case "disable" was passed
+ previously in the kernel boot parameters
passive
Use intel_pstate as a scaling driver, but configure it
to work with generic cpufreq governors (instead of
diff --git a/drivers/cpufreq/intel_pstate.c b/drivers/cpufreq/intel_pstate.c
index 36a469150ff9..aee891c9b78a 100644
--- a/drivers/cpufreq/intel_pstate.c
+++ b/drivers/cpufreq/intel_pstate.c
@@ -2845,6 +2845,8 @@ static int __init intel_pstate_setup(char *str)
if (!strcmp(str, "no_hwp"))
no_hwp = 1;
+ if (!strcmp(str, "enable"))
+ no_load = 0;
if (!strcmp(str, "force"))
force_load = 1;
if (!strcmp(str, "hwp_only"))
--
2.28.0
From 379cbab18b5c75c622b93e2c5abdfac141fe9654 Mon Sep 17 00:00:00 2001
From: Kenny Levinsen <kl@kl.wtf>
Date: Sun, 27 Dec 2020 14:43:13 +0000
Subject: [PATCH] ZEN: Input: evdev - use call_rcu when detaching client
Significant time was spent on synchronize_rcu in evdev_detach_client
when applications closed evdev devices. Switching VT away from a
graphical environment commonly leads to mass input device closures,
which could lead to noticable delays on systems with many input devices.
Replace synchronize_rcu with call_rcu, deferring reclaim of the evdev
client struct till after the RCU grace period instead of blocking the
calling application.
While this does not solve all slow evdev fd closures, it takes care of a
good portion of them, including this simple test:
#include <fcntl.h>
#include <unistd.h>
int main(int argc, char *argv[])
{
int idx, fd;
const char *path = "/dev/input/event0";
for (idx = 0; idx < 1000; idx++) {
if ((fd = open(path, O_RDWR)) == -1) {
return -1;
}
close(fd);
}
return 0;
}
Time to completion of above test when run locally:
Before: 0m27.111s
After: 0m0.018s
Signed-off-by: Kenny Levinsen <kl@kl.wtf>
---
drivers/input/evdev.c | 19 +++++++++++--------
1 file changed, 11 insertions(+), 8 deletions(-)
diff --git a/drivers/input/evdev.c b/drivers/input/evdev.c
index 95f90699d2b17b..2b10fe29d2c8d9 100644
--- a/drivers/input/evdev.c
+++ b/drivers/input/evdev.c
@@ -46,6 +46,7 @@ struct evdev_client {
struct fasync_struct *fasync;
struct evdev *evdev;
struct list_head node;
+ struct rcu_head rcu;
enum input_clock_type clk_type;
bool revoked;
unsigned long *evmasks[EV_CNT];
@@ -377,13 +378,22 @@ static void evdev_attach_client(struct evdev *evdev,
spin_unlock(&evdev->client_lock);
}
+static void evdev_reclaim_client(struct rcu_head *rp)
+{
+ struct evdev_client *client = container_of(rp, struct evdev_client, rcu);
+ unsigned int i;
+ for (i = 0; i < EV_CNT; ++i)
+ bitmap_free(client->evmasks[i]);
+ kvfree(client);
+}
+
static void evdev_detach_client(struct evdev *evdev,
struct evdev_client *client)
{
spin_lock(&evdev->client_lock);
list_del_rcu(&client->node);
spin_unlock(&evdev->client_lock);
- synchronize_rcu();
+ call_rcu(&client->rcu, evdev_reclaim_client);
}
static int evdev_open_device(struct evdev *evdev)
@@ -436,7 +446,6 @@ static int evdev_release(struct inode *inode, struct file *file)
{
struct evdev_client *client = file->private_data;
struct evdev *evdev = client->evdev;
- unsigned int i;
mutex_lock(&evdev->mutex);
@@ -448,11 +457,6 @@ static int evdev_release(struct inode *inode, struct file *file)
evdev_detach_client(evdev, client);
- for (i = 0; i < EV_CNT; ++i)
- bitmap_free(client->evmasks[i]);
-
- kvfree(client);
-
evdev_close_device(evdev);
return 0;
@@ -495,7 +499,6 @@ static int evdev_open(struct inode *inode, struct file *file)
err_free_client:
evdev_detach_client(evdev, client);
- kvfree(client);
return error;
}
From 2aafb56f20e4b63d8c4af172fe9d017c64bc4129 Mon Sep 17 00:00:00 2001
From: Sultan Alsawaf <sultan@kerneltoast.com>
Date: Wed, 20 Oct 2021 20:50:11 -0700
Subject: [PATCH] ZEN: mm: Lower the non-hugetlbpage pageblock size to reduce
scheduling delays
The page allocator processes free pages in groups of pageblocks, where
the size of a pageblock is typically quite large (1024 pages without
hugetlbpage support). Pageblocks are processed atomically with the zone
lock held, which can cause severe scheduling delays on both the CPU
going through the pageblock and any other CPUs waiting to acquire the
zone lock. A frequent offender is move_freepages_block(), which is used
by rmqueue() for page allocation.
As it turns out, there's no requirement for pageblocks to be so large,
so the pageblock order can simply be reduced to ease the scheduling
delays and zone lock contention. PAGE_ALLOC_COSTLY_ORDER is used as a
reasonable setting to ensure non-costly page allocation requests can
still be serviced without always needing to free up more than one
pageblock's worth of pages at a time.
This has a noticeable effect on overall system latency when memory
pressure is elevated. The various mm functions which operate on
pageblocks no longer appear in the preemptoff tracer, where previously
they would spend up to 100 ms on a mobile arm64 CPU processing a
pageblock with preemption disabled and the zone lock held.
Signed-off-by: Sultan Alsawaf <sultan@kerneltoast.com>
---
include/linux/pageblock-flags.h | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/include/linux/pageblock-flags.h b/include/linux/pageblock-flags.h
index 5f1ae07d724b88..97cda629c9e909 100644
--- a/include/linux/pageblock-flags.h
+++ b/include/linux/pageblock-flags.h
@@ -48,7 +48,7 @@ extern unsigned int pageblock_order;
#else /* CONFIG_HUGETLB_PAGE */
/* If huge pages are not used, group by MAX_ORDER_NR_PAGES */
-#define pageblock_order MAX_ORDER
+#define pageblock_order PAGE_ALLOC_COSTLY_ORDER
#endif /* CONFIG_HUGETLB_PAGE */
From f22bc56be85e69c71c8e36041193856bb8b01525 Mon Sep 17 00:00:00 2001
From: Sultan Alsawaf <sultan@kerneltoast.com>
Date: Wed, 20 Oct 2021 20:50:32 -0700
Subject: [PATCH] ZEN: mm: Don't hog the CPU and zone lock in rmqueue_bulk()
There is noticeable scheduling latency and heavy zone lock contention
stemming from rmqueue_bulk's single hold of the zone lock while doing
its work, as seen with the preemptoff tracer. There's no actual need for
rmqueue_bulk() to hold the zone lock the entire time; it only does so
for supposed efficiency. As such, we can relax the zone lock and even
reschedule when IRQs are enabled in order to keep the scheduling delays
and zone lock contention at bay. Forward progress is still guaranteed,
as the zone lock can only be relaxed after page removal.
With this change, rmqueue_bulk() no longer appears as a serious offender
in the preemptoff tracer, and system latency is noticeably improved.
Signed-off-by: Sultan Alsawaf <sultan@kerneltoast.com>
---
mm/page_alloc.c | 23 ++++++++++++++++++-----
1 file changed, 18 insertions(+), 5 deletions(-)
diff --git a/mm/page_alloc.c b/mm/page_alloc.c
index a0b0397e29ee4c..87a983a356530c 100644
--- a/mm/page_alloc.c
+++ b/mm/page_alloc.c
@@ -3118,15 +3119,16 @@ __rmqueue(struct zone *zone, unsigned int order, int migratetype,
}
/*
- * Obtain a specified number of elements from the buddy allocator, all under
- * a single hold of the lock, for efficiency. Add them to the supplied list.
- * Returns the number of new pages which were placed at *list.
+ * Obtain a specified number of elements from the buddy allocator, and relax the
+ * zone lock when needed. Add them to the supplied list. Returns the number of
+ * new pages which were placed at *list.
*/
static int rmqueue_bulk(struct zone *zone, unsigned int order,
unsigned long count, struct list_head *list,
int migratetype, unsigned int alloc_flags)
{
unsigned long flags;
- int i;
+ const bool can_resched = !preempt_count() && !irqs_disabled();
+ int i, allocated = 0, last_mod = 0;
/* Caller must hold IRQ-safe pcp->lock so IRQs are disabled. */
spin_lock(&zone->lock);
@@ -3137,6 +3138,18 @@ static int rmqueue_bulk(struct zone *zone, unsigned int order,
if (unlikely(page == NULL))
break;
+ /* Reschedule and ease the contention on the lock if needed */
+ if (i + 1 < count && ((can_resched && need_resched()) ||
+ spin_needbreak(&zone->lock))) {
+ __mod_zone_page_state(zone, NR_FREE_PAGES,
+ -((i + 1 - last_mod) << order));
+ last_mod = i + 1;
+ spin_unlock(&zone->lock);
+ if (can_resched)
+ cond_resched();
+ spin_lock(&zone->lock);
+ }
+
if (unlikely(check_pcp_refill(page, order)))
continue;
@@ -3163,7 +3176,7 @@ static int rmqueue_bulk(struct zone *zone, unsigned int order,
* on i. Do not confuse with 'allocated' which is the number of
* pages added to the pcp list.
*/
- __mod_zone_page_state(zone, NR_FREE_PAGES, -(i << order));
+ __mod_zone_page_state(zone, NR_FREE_PAGES, -((i - last_mod) << order));
spin_unlock(&zone->lock);
return allocated;
}
From 6329525a0fa10cd13f39b76948b1296150f75c95 Mon Sep 17 00:00:00 2001
From: Alexandre Frade <kernel@xanmod.org>
Date: Mon, 29 Aug 2022 16:47:26 +0000
Subject: [PATCH 14/16] XANMOD: Makefile: Disable GCC vectorization on trees
Signed-off-by: Alexandre Frade <kernel@xanmod.org>
---
Makefile | 3 +++
1 file changed, 3 insertions(+)
diff --git a/Makefile b/Makefile
index 3f6628780eb2..35a5ae1ede42 100644
--- a/Makefile
+++ b/Makefile
@@ -1069,6 +1069,9 @@ endif
KBUILD_CFLAGS-$(call gcc-min-version, 90100) += -Wno-alloc-size-larger-than
KBUILD_CFLAGS += $(KBUILD_CFLAGS-y) $(CONFIG_CC_IMPLICIT_FALLTHROUGH)
+# disable GCC vectorization on trees
+KBUILD_CFLAGS += $(call cc-option, -fno-tree-vectorize)
+
# disable invalid "can't wrap" optimizations for signed / pointers
KBUILD_CFLAGS += -fno-strict-overflow
--
2.39.1
From f997578464b2c4c63e7bd1afbfef56212ee44f2d Mon Sep 17 00:00:00 2001
From: Etienne JUVIGNY <ti3nou@gmail.com>
Date: Mon, 6 Mar 2023 13:54:09 +0100
Subject: Don't add -dirty versioning on unclean trees
diff --git a/scripts/setlocalversion b/scripts/setlocalversion
index ca5795e16..ad0d94477 100755
--- a/scripts/setlocalversion
+++ b/scripts/setlocalversion
@@ -85,12 +85,12 @@ scm_version()
# git-diff-index does not refresh the index, so it may give misleading
# results.
# See git-update-index(1), git-diff-index(1), and git-status(1).
- if {
- git --no-optional-locks status -uno --porcelain 2>/dev/null ||
- git diff-index --name-only HEAD
- } | read dummy; then
- printf '%s' -dirty
- fi
+ #if {
+ # git --no-optional-locks status -uno --porcelain 2>/dev/null ||
+ # git diff-index --name-only HEAD
+ #} | read dummy; then
+ # printf '%s' -dirty
+ #fi
}
collect_files()

View File

@@ -0,0 +1,36 @@
diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c
index 6b3b59cc51d6..2a0072192c3d 100644
--- a/kernel/sched/fair.c
+++ b/kernel/sched/fair.c
@@ -81,10 +95,17 @@ unsigned int sysctl_sched_child_runs_first __read_mostly;
*
* (default: 1 msec * (1 + ilog(ncpus)), units: nanoseconds)
*/
+#ifdef CONFIG_ZENIFY
+unsigned int sysctl_sched_wakeup_granularity = 500000UL;
+static unsigned int normalized_sysctl_sched_wakeup_granularity = 500000UL;
+
+const_debug unsigned int sysctl_sched_migration_cost = 50000UL;
+#else
unsigned int sysctl_sched_wakeup_granularity = 1000000UL;
static unsigned int normalized_sysctl_sched_wakeup_granularity = 1000000UL;
const_debug unsigned int sysctl_sched_migration_cost = 500000UL;
+#endif
int sched_thermal_decay_shift;
static int __init setup_sched_thermal_decay_shift(char *str)
diff --git a/kernel/sched/topology.c b/kernel/sched/topology.c
index 051aaf65c..705df5511 100644
--- a/kernel/sched/topology.c
+++ b/kernel/sched/topology.c
@@ -208,7 +208,7 @@ sd_parent_degenerate(struct sched_domain *sd, struct sched_domain *parent)
#if defined(CONFIG_ENERGY_MODEL) && defined(CONFIG_CPU_FREQ_GOV_SCHEDUTIL)
DEFINE_STATIC_KEY_FALSE(sched_energy_present);
-static unsigned int sysctl_sched_energy_aware = 1;
+static unsigned int sysctl_sched_energy_aware = 0;
static DEFINE_MUTEX(sched_energy_mutex);
static bool sched_energy_update;

View File

@@ -0,0 +1,118 @@
diff --git a/kernel/Kconfig.hz b/kernel/Kconfig.hz
index 2a202a846757..1d9c7ed79b11 100644
--- a/kernel/Kconfig.hz
+++ b/kernel/Kconfig.hz
@@ -4,7 +4,7 @@
choice
prompt "Timer frequency"
- default HZ_250
+ default HZ_500
help
Allows the configuration of the timer frequency. It is customary
to have the timer interrupt run at 1000 Hz but 100 Hz may be more
@@ -39,6 +39,13 @@ choice
on SMP and NUMA systems and exactly dividing by both PAL and
NTSC frame rates for video and multimedia work.
+ config HZ_500
+ bool "500 HZ"
+ help
+ 500 Hz is a balanced timer frequency. Provides fast interactivity
+ on desktops with great smoothness without increasing CPU power
+ consumption and sacrificing the battery life on laptops.
+
config HZ_1000
bool "1000 HZ"
help
@@ -52,6 +59,7 @@ config HZ
default 100 if HZ_100
default 250 if HZ_250
default 300 if HZ_300
+ default 500 if HZ_500
default 1000 if HZ_1000
config SCHED_HRTICK
diff --git a/kernel/Kconfig.hz b/kernel/Kconfig.hz
index 2a202a846757..1d9c7ed79b11 100644
--- a/kernel/Kconfig.hz
+++ b/kernel/Kconfig.hz
@@ -4,7 +4,7 @@
choice
prompt "Timer frequency"
- default HZ_500
+ default HZ_750
help
Allows the configuration of the timer frequency. It is customary
to have the timer interrupt run at 1000 Hz but 100 Hz may be more
@@ -46,6 +46,13 @@ choice
on desktops with great smoothness without increasing CPU power
consumption and sacrificing the battery life on laptops.
+ config HZ_750
+ bool "750 HZ"
+ help
+ 750 Hz is a good timer frequency for desktops. Provides fast
+ interactivity with great smoothness without sacrificing too
+ much throughput.
+
config HZ_1000
bool "1000 HZ"
help
@@ -60,6 +67,7 @@ config HZ
default 250 if HZ_250
default 300 if HZ_300
default 500 if HZ_500
+ default 750 if HZ_750
default 1000 if HZ_1000
config SCHED_HRTICK
diff --git a/drivers/cpufreq/cpufreq_ondemand.c b/drivers/cpufreq/cpufreq_ondemand.c
index 6b423eebfd5d..61e3271675d6 100644
--- a/drivers/cpufreq/cpufreq_ondemand.c
+++ b/drivers/cpufreq/cpufreq_ondemand.c
@@ -21,10 +21,10 @@
#include "cpufreq_ondemand.h"
/* On-demand governor macros */
-#define DEF_FREQUENCY_UP_THRESHOLD (80)
-#define DEF_SAMPLING_DOWN_FACTOR (1)
+#define DEF_FREQUENCY_UP_THRESHOLD (55)
+#define DEF_SAMPLING_DOWN_FACTOR (5)
#define MAX_SAMPLING_DOWN_FACTOR (100000)
-#define MICRO_FREQUENCY_UP_THRESHOLD (95)
+#define MICRO_FREQUENCY_UP_THRESHOLD (63)
#define MICRO_FREQUENCY_MIN_SAMPLE_RATE (10000)
#define MIN_FREQUENCY_UP_THRESHOLD (1)
#define MAX_FREQUENCY_UP_THRESHOLD (100)
From cba31b19f8c38696b13ba48e0e8b6dbe747d6bae Mon Sep 17 00:00:00 2001
From: Alexandre Frade <admfrade@gmail.com>
Date: Mon, 29 Jan 2018 17:31:25 +0000
Subject: [PATCH 10/16] XANMOD: mm/vmscan: vm_swappiness = 30 decreases the
amount of swapping
Signed-off-by: Alexandre Frade <admfrade@gmail.com>
Signed-off-by: Alexandre Frade <kernel@xanmod.org>
---
mm/vmscan.c | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/mm/vmscan.c b/mm/vmscan.c
index 5b7b8d4f5297..549684b29418 100644
--- a/mm/vmscan.c
+++ b/mm/vmscan.c
@@ -190,7 +190,7 @@ struct scan_control {
/*
* From 0 .. 200. Higher means more swappy.
*/
-int vm_swappiness = 60;
+int vm_swappiness = 30;
static void set_task_reclaim_state(struct task_struct *task,
struct reclaim_state *rs)
--
2.39.1

View File

@@ -0,0 +1,193 @@
From cdeab384f48dd9c88e2dff2e9ad8d57dca1a1b1c Mon Sep 17 00:00:00 2001
From: Mark Weiman <mark.weiman@markzz.com>
Date: Sun, 12 Aug 2018 11:36:21 -0400
Subject: [PATCH] pci: Enable overrides for missing ACS capabilities
This an updated version of Alex Williamson's patch from:
https://lkml.org/lkml/2013/5/30/513
Original commit message follows:
PCIe ACS (Access Control Services) is the PCIe 2.0+ feature that
allows us to control whether transactions are allowed to be redirected
in various subnodes of a PCIe topology. For instance, if two
endpoints are below a root port or downsteam switch port, the
downstream port may optionally redirect transactions between the
devices, bypassing upstream devices. The same can happen internally
on multifunction devices. The transaction may never be visible to the
upstream devices.
One upstream device that we particularly care about is the IOMMU. If
a redirection occurs in the topology below the IOMMU, then the IOMMU
cannot provide isolation between devices. This is why the PCIe spec
encourages topologies to include ACS support. Without it, we have to
assume peer-to-peer DMA within a hierarchy can bypass IOMMU isolation.
Unfortunately, far too many topologies do not support ACS to make this
a steadfast requirement. Even the latest chipsets from Intel are only
sporadically supporting ACS. We have trouble getting interconnect
vendors to include the PCIe spec required PCIe capability, let alone
suggested features.
Therefore, we need to add some flexibility. The pcie_acs_override=
boot option lets users opt-in specific devices or sets of devices to
assume ACS support. The "downstream" option assumes full ACS support
on root ports and downstream switch ports. The "multifunction"
option assumes the subset of ACS features available on multifunction
endpoints and upstream switch ports are supported. The "id:nnnn:nnnn"
option enables ACS support on devices matching the provided vendor
and device IDs, allowing more strategic ACS overrides. These options
may be combined in any order. A maximum of 16 id specific overrides
are available. It's suggested to use the most limited set of options
necessary to avoid completely disabling ACS across the topology.
Note to hardware vendors, we have facilities to permanently quirk
specific devices which enforce isolation but not provide an ACS
capability. Please contact me to have your devices added and save
your customers the hassle of this boot option.
Signed-off-by: Mark Weiman <mark.weiman@markzz.com>
---
.../admin-guide/kernel-parameters.txt | 9 ++
drivers/pci/quirks.c | 101 ++++++++++++++++++
2 files changed, 110 insertions(+)
diff --git a/Documentation/admin-guide/kernel-parameters.txt b/Documentation/admin-guide/kernel-parameters.txt
index aefd358a5ca3..173b3596fd9e 100644
--- a/Documentation/admin-guide/kernel-parameters.txt
+++ b/Documentation/admin-guide/kernel-parameters.txt
@@ -3190,6 +3190,15 @@
nomsi [MSI] If the PCI_MSI kernel config parameter is
enabled, this kernel boot option can be used to
disable the use of MSI interrupts system-wide.
+ pcie_acs_override =
+ [PCIE] Override missing PCIe ACS support for:
+ downstream
+ All downstream ports - full ACS capabilities
+ multifunction
+ All multifunction devices - multifunction ACS subset
+ id:nnnn:nnnn
+ Specific device - full ACS capabilities
+ Specified as vid:did (vendor/device ID) in hex
noioapicquirk [APIC] Disable all boot interrupt quirks.
Safety option to keep boot IRQs enabled. This
should never be necessary.
diff --git a/drivers/pci/quirks.c b/drivers/pci/quirks.c
index 4700d24e5d55..8f7a3d7fd9c1 100644
--- a/drivers/pci/quirks.c
+++ b/drivers/pci/quirks.c
@@ -3372,6 +3372,106 @@ static void quirk_no_bus_reset(struct pci_dev *dev)
dev->dev_flags |= PCI_DEV_FLAGS_NO_BUS_RESET;
}
+static bool acs_on_downstream;
+static bool acs_on_multifunction;
+
+#define NUM_ACS_IDS 16
+struct acs_on_id {
+ unsigned short vendor;
+ unsigned short device;
+};
+static struct acs_on_id acs_on_ids[NUM_ACS_IDS];
+static u8 max_acs_id;
+
+static __init int pcie_acs_override_setup(char *p)
+{
+ if (!p)
+ return -EINVAL;
+
+ while (*p) {
+ if (!strncmp(p, "downstream", 10))
+ acs_on_downstream = true;
+ if (!strncmp(p, "multifunction", 13))
+ acs_on_multifunction = true;
+ if (!strncmp(p, "id:", 3)) {
+ char opt[5];
+ int ret;
+ long val;
+
+ if (max_acs_id >= NUM_ACS_IDS - 1) {
+ pr_warn("Out of PCIe ACS override slots (%d)\n",
+ NUM_ACS_IDS);
+ goto next;
+ }
+
+ p += 3;
+ snprintf(opt, 5, "%s", p);
+ ret = kstrtol(opt, 16, &val);
+ if (ret) {
+ pr_warn("PCIe ACS ID parse error %d\n", ret);
+ goto next;
+ }
+ acs_on_ids[max_acs_id].vendor = val;
+
+ p += strcspn(p, ":");
+ if (*p != ':') {
+ pr_warn("PCIe ACS invalid ID\n");
+ goto next;
+ }
+
+ p++;
+ snprintf(opt, 5, "%s", p);
+ ret = kstrtol(opt, 16, &val);
+ if (ret) {
+ pr_warn("PCIe ACS ID parse error %d\n", ret);
+ goto next;
+ }
+ acs_on_ids[max_acs_id].device = val;
+ max_acs_id++;
+ }
+next:
+ p += strcspn(p, ",");
+ if (*p == ',')
+ p++;
+ }
+
+ if (acs_on_downstream || acs_on_multifunction || max_acs_id)
+ pr_warn("Warning: PCIe ACS overrides enabled; This may allow non-IOMMU protected peer-to-peer DMA\n");
+
+ return 0;
+}
+early_param("pcie_acs_override", pcie_acs_override_setup);
+
+static int pcie_acs_overrides(struct pci_dev *dev, u16 acs_flags)
+{
+ int i;
+
+ /* Never override ACS for legacy devices or devices with ACS caps */
+ if (!pci_is_pcie(dev) ||
+ pci_find_ext_capability(dev, PCI_EXT_CAP_ID_ACS))
+ return -ENOTTY;
+
+ for (i = 0; i < max_acs_id; i++)
+ if (acs_on_ids[i].vendor == dev->vendor &&
+ acs_on_ids[i].device == dev->device)
+ return 1;
+
+ switch (pci_pcie_type(dev)) {
+ case PCI_EXP_TYPE_DOWNSTREAM:
+ case PCI_EXP_TYPE_ROOT_PORT:
+ if (acs_on_downstream)
+ return 1;
+ break;
+ case PCI_EXP_TYPE_ENDPOINT:
+ case PCI_EXP_TYPE_UPSTREAM:
+ case PCI_EXP_TYPE_LEG_END:
+ case PCI_EXP_TYPE_RC_END:
+ if (acs_on_multifunction && dev->multifunction)
+ return 1;
+ }
+
+ return -ENOTTY;
+}
/*
* Some Atheros AR9xxx and QCA988x chips do not behave after a bus reset.
* The device will throw a Link Down error on AER-capable systems and
@@ -5102,6 +5102,7 @@
{ PCI_VENDOR_ID_ZHAOXIN, PCI_ANY_ID, pci_quirk_zhaoxin_pcie_ports_acs },
/* Wangxun nics */
{ PCI_VENDOR_ID_WANGXUN, PCI_ANY_ID, pci_quirk_wangxun_nic_acs },
+ { PCI_ANY_ID, PCI_ANY_ID, pcie_acs_overrides },
{ 0 }
};

View File

@@ -13,12 +13,12 @@ of Proton to still use fsync in new kernel releases.
Signed-off-by: André Almeida <andrealmeid@collabora.com>
---
include/uapi/linux/futex.h | 12 ++++++
kernel/futex/core.c | 75 +++++++++++++++++++++++++++++++++++++-
2 files changed, 86 insertions(+), 1 deletion(-)
include/uapi/linux/futex.h | 13 +++++++
kernel/futex/syscalls.c | 75 +++++++++++++++++++++++++++++++++++++-
2 files changed, 87 insertions(+), 1 deletion(-)
diff --git a/include/uapi/linux/futex.h b/include/uapi/linux/futex.h
index 2a06b99f9803..417c5d89b745 100644
index 71a5df8d2689..d375ab21cbf8 100644
--- a/include/uapi/linux/futex.h
+++ b/include/uapi/linux/futex.h
@@ -22,6 +22,7 @@
@@ -29,7 +29,7 @@ index 2a06b99f9803..417c5d89b745 100644
#define FUTEX_PRIVATE_FLAG 128
#define FUTEX_CLOCK_REALTIME 256
@@ -68,6 +69,17 @@ struct futex_waitv {
@@ -68,6 +69,18 @@ struct futex_waitv {
__u32 __reserved;
};
@@ -44,14 +44,15 @@ index 2a06b99f9803..417c5d89b745 100644
+ __u32 val;
+ __u32 bitset;
+};
+
/*
* Support for robust futexes: the kernel cleans up held futexes at
diff --git a/kernel/futex/core.c b/kernel/futex/core.c
index 4a9e7ce3714a..c3f2e65afab8 100644
--- a/kernel/futex/core.c
+++ b/kernel/futex/core.c
@@ -4012,6 +4012,7 @@ static __always_inline bool futex_cmd_has_timeout(u32 cmd)
* thread exit time.
diff --git a/kernel/futex/syscalls.c b/kernel/futex/syscalls.c
index 6f91a07a6a83..2f4d4c04ede2 100644
--- a/kernel/futex/syscalls.c
+++ b/kernel/futex/syscalls.c
@@ -158,6 +158,7 @@ static __always_inline bool futex_cmd_has_timeout(u32 cmd)
case FUTEX_LOCK_PI2:
case FUTEX_WAIT_BITSET:
case FUTEX_WAIT_REQUEUE_PI:
@@ -59,7 +60,7 @@ index 4a9e7ce3714a..c3f2e65afab8 100644
return true;
}
return false;
@@ -4024,13 +4025,79 @@ futex_init_timeout(u32 cmd, u32 op, struct timespec64 *ts, ktime_t *t)
@@ -170,13 +171,79 @@ futex_init_timeout(u32 cmd, u32 op, struct timespec64 *ts, ktime_t *t)
return -EINVAL;
*t = timespec64_to_ktime(*ts);
@@ -140,7 +141,7 @@ index 4a9e7ce3714a..c3f2e65afab8 100644
SYSCALL_DEFINE6(futex, u32 __user *, uaddr, int, op, u32, val,
const struct __kernel_timespec __user *, utime,
u32 __user *, uaddr2, u32, val3)
@@ -4050,6 +4117,9 @@ SYSCALL_DEFINE6(futex, u32 __user *, uaddr, int, op, u32, val,
@@ -196,6 +263,9 @@ SYSCALL_DEFINE6(futex, u32 __user *, uaddr, int, op, u32, val,
tp = &t;
}
@@ -150,7 +151,7 @@ index 4a9e7ce3714a..c3f2e65afab8 100644
return do_futex(uaddr, op, val, tp, uaddr2, (unsigned long)utime, val3);
}
@@ -4551,6 +4621,9 @@ SYSCALL_DEFINE6(futex_time32, u32 __user *, uaddr, int, op, u32, val,
@@ -392,6 +462,9 @@ SYSCALL_DEFINE6(futex_time32, u32 __user *, uaddr, int, op, u32, val,
tp = &t;
}

File diff suppressed because it is too large Load Diff

View File

@@ -0,0 +1,66 @@
From e5e77ad2223f662e1615266d8ef39a8db7e65a70 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Felix=20H=C3=A4dicke?= <felixhaedicke@web.de>
Date: Thu, 19 Nov 2020 09:22:32 +0100
Subject: HID: quirks: Add Apple Magic Trackpad 2 to hid_have_special_driver
list
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit
The Apple Magic Trackpad 2 is handled by the magicmouse driver. And
there were severe stability issues when both drivers (hid-generic and
hid-magicmouse) were loaded for this device.
Fixes: https://bugzilla.kernel.org/show_bug.cgi?id=210241
Signed-off-by: Felix Hädicke <felixhaedicke@web.de>
---
drivers/hid/hid-quirks.c | 2 ++
1 file changed, 2 insertions(+)
diff --git a/drivers/hid/hid-quirks.c b/drivers/hid/hid-quirks.c
index bf7ecab5d9e5..142e9dae2837 100644
--- a/drivers/hid/hid-quirks.c
+++ b/drivers/hid/hid-quirks.c
@@ -478,6 +478,8 @@ static const struct hid_device_id hid_have_special_driver[] = {
#if IS_ENABLED(CONFIG_HID_MAGICMOUSE)
{ HID_BLUETOOTH_DEVICE(USB_VENDOR_ID_APPLE, USB_DEVICE_ID_APPLE_MAGICMOUSE) },
{ HID_BLUETOOTH_DEVICE(USB_VENDOR_ID_APPLE, USB_DEVICE_ID_APPLE_MAGICTRACKPAD) },
+ { HID_BLUETOOTH_DEVICE(BT_VENDOR_ID_APPLE, USB_DEVICE_ID_APPLE_MAGICTRACKPAD2) },
+ { HID_USB_DEVICE(USB_VENDOR_ID_APPLE, USB_DEVICE_ID_APPLE_MAGICTRACKPAD2) },
#endif
#if IS_ENABLED(CONFIG_HID_MAYFLASH)
{ HID_USB_DEVICE(USB_VENDOR_ID_DRAGONRISE, USB_DEVICE_ID_DRAGONRISE_PS3) },
--
cgit v1.2.3-1-gf6bb5
From f7f49141a5dbe9c99d78196b58c44307fb2e6be3 Mon Sep 17 00:00:00 2001
From: Tk-Glitch <ti3nou@gmail.com>
Date: Wed, 3 Feb 2021 11:20:12 +0200
Subject: Revert "cpufreq: Avoid configuring old governors as default with intel_pstate"
This is an undesirable behavior for us since our aggressive ondemand performs
better than schedutil for gaming when using intel_pstate in passive mode.
Also it interferes with the option to select the desired default governor we have.
diff --git a/drivers/cpufreq/Kconfig b/drivers/cpufreq/Kconfig
index 2c7171e0b0010..85de313ddec29 100644
--- a/drivers/cpufreq/Kconfig
+++ b/drivers/cpufreq/Kconfig
@@ -71,7 +71,6 @@ config CPU_FREQ_DEFAULT_GOV_USERSPACE
config CPU_FREQ_DEFAULT_GOV_ONDEMAND
bool "ondemand"
- depends on !(X86_INTEL_PSTATE && SMP)
select CPU_FREQ_GOV_ONDEMAND
select CPU_FREQ_GOV_PERFORMANCE
help
@@ -83,7 +84,6 @@ config CPU_FREQ_DEFAULT_GOV_ONDEMAND
config CPU_FREQ_DEFAULT_GOV_CONSERVATIVE
bool "conservative"
- depends on !(X86_INTEL_PSTATE && SMP)
select CPU_FREQ_GOV_CONSERVATIVE
select CPU_FREQ_GOV_PERFORMANCE
help

View File

@@ -0,0 +1,35 @@
diff --git a/scripts/package/mkspec b/scripts/package/mkspec
index 70392fd2f..34f98648f 100755
--- a/scripts/package/mkspec
+++ b/scripts/package/mkspec
@@ -63,4 +66,4 @@ $S Source: kernel-$__KERNELRELEASE.tar.gz
Group: Development/System
- Obsoletes: kernel-headers
Provides: kernel-headers = %{version}
+ Provides: installonlypkg(kernel) = %{version}
%description headers
@@ -75,2 +78,5 @@ $S$M Summary: Development package for building kernel modules to match the $__KE
$S$M Group: System Environment/Kernel
+$S$M Provides: kernel-devel = %{version}
+$S$M Provides: kernel-devel-uname-r = %{version}
+$S$M Provides: installonlypkg(kernel) = %{version}
$S$M AutoReqProv: no
@@ -80,2 +86,18 @@ $S$M against the $__KERNELRELEASE kernel package.
$S$M
+$S # Opt out of a lot of Fedora hardening flags etc...
+$S # See https://src.fedoraproject.org/rpms/redhat-rpm-config//blob/rawhide/f/buildflags.md
+$S %undefine _package_note_file
+$S %undefine _auto_set_build_flags
+$S %undefine _include_frame_pointers
+$S %define _build_id_flags -Wl,--build-id=none
+$S %undefine _annotated_build
+$S %undefine _fortify_level
+$S %undefine _hardened_build
+$S %global _lto_cflags %{nil}
+$S %global _configure_gnuconfig_hack 0
+$S %global _configure_libtool_hardening_hack 0
+$S # Nearly had to go to the deep web to find documentation on this one... Gosh
+$S # See https://github.com/rpm-software-management/rpm/blob/master/macros.in#L471
+$S %define _build_id_links none
+$S
$S %prep

View File

@@ -0,0 +1,46 @@
diff --git a/Makefile b/Makefile
--- a/Makefile
+++ b/Makefile
@@ -442,7 +442,7 @@ endif
HOSTPKG_CONFIG = pkg-config
KBUILD_USERHOSTCFLAGS := -Wall -Wmissing-prototypes -Wstrict-prototypes \
- -O2 -fomit-frame-pointer -std=gnu11 \
+ -O3 -fomit-frame-pointer -std=gnu11 \
-Wdeclaration-after-statement
KBUILD_USERCFLAGS := $(KBUILD_USERHOSTCFLAGS) $(USERCFLAGS)
KBUILD_USERLDFLAGS := $(USERLDFLAGS)
@@ -474,7 +474,7 @@ endif
-Wclippy::dbg_macro
KBUILD_HOSTCFLAGS := $(KBUILD_USERHOSTCFLAGS) $(HOST_LFS_CFLAGS) $(HOSTCFLAGS)
-KBUILD_HOSTCXXFLAGS := -Wall -O2 $(HOST_LFS_CFLAGS) $(HOSTCXXFLAGS)
+KBUILD_HOSTCXXFLAGS := -Wall -O3 $(HOST_LFS_CFLAGS) $(HOSTCXXFLAGS)
KBUILD_HOSTRUSTFLAGS := $(rust_common_flags) -O -Cstrip=debuginfo \
-Zallow-features= $(HOSTRUSTFLAGS)
KBUILD_HOSTLDFLAGS := $(HOST_LFS_LDFLAGS) $(HOSTLDFLAGS)
@@ -757,7 +757,7 @@ KBUILD_CFLAGS += $(call cc-disable-warning, format-overflow)
KBUILD_CFLAGS += $(call cc-disable-warning, address-of-packed-member)
ifdef CONFIG_CC_OPTIMIZE_FOR_PERFORMANCE
-KBUILD_CFLAGS += -O2
+KBUILD_CFLAGS += -O3
KBUILD_RUSTFLAGS += -Copt-level=2
else ifdef CONFIG_CC_OPTIMIZE_FOR_SIZE
KBUILD_CFLAGS += -Os
diff --git a/init/Kconfig b/init/Kconfig
--- a/init/Kconfig
+++ b/init/Kconfig
@@ -1401,10 +1401,10 @@ choice
default CC_OPTIMIZE_FOR_PERFORMANCE
config CC_OPTIMIZE_FOR_PERFORMANCE
- bool "Optimize for performance (-O2)"
+ bool "Optimize for performance (-O3)"
help
This is the default optimization level for the kernel, building
- with the "-O2" compiler flag for best performance and most
+ with the "-O3" compiler flag for best performance and most
helpful compile-time warnings.
config CC_OPTIMIZE_FOR_SIZE