Compare commits

..

1 Commits

Author SHA1 Message Date
Sravan Balaji
e8a95fd951 PDS Kernel Configuration 2022-11-24 12:52:33 -05:00
33 changed files with 1070 additions and 127607 deletions

View File

@@ -57,7 +57,7 @@ else
fi fi
pkgname=("${pkgbase}" "${pkgbase}-headers") pkgname=("${pkgbase}" "${pkgbase}-headers")
pkgver="${_basekernel}"."${_sub}" pkgver="${_basekernel}"."${_sub}"
pkgrel=273 pkgrel=272
pkgdesc='Linux-tkg' pkgdesc='Linux-tkg'
arch=('x86_64') # no i686 in here arch=('x86_64') # no i686 in here
url="https://www.kernel.org/" url="https://www.kernel.org/"
@@ -262,7 +262,7 @@ hackheaders() {
msg2 "Stripping build tools..." msg2 "Stripping build tools..."
local file local file
while read -rd '' file; do while read -rd '' file; do
case "$(file -Sib "$file")" in case "$(file -bi "$file")" in
application/x-sharedlib\;*) # Libraries (.so) application/x-sharedlib\;*) # Libraries (.so)
strip -v $STRIP_SHARED "$file" ;; strip -v $STRIP_SHARED "$file" ;;
application/x-archive\;*) # Libraries (.a) application/x-archive\;*) # Libraries (.a)

View File

@@ -57,10 +57,6 @@ _diffconfig_name=""
# [install.sh specific] Use tmpfs as a work directory, recommended when RAM >= 32GB to reduce HDD/SSD usage. For more information, see https://wiki.archlinux.org/title/Tmpfs # [install.sh specific] Use tmpfs as a work directory, recommended when RAM >= 32GB to reduce HDD/SSD usage. For more information, see https://wiki.archlinux.org/title/Tmpfs
_use_tmpfs="false" _use_tmpfs="false"
# Always make a fresh clone of the source in tmpfs to speed up compilation times
# ! This will take ~20GB of RAM by itself, so don't use on <32GB RAM systems !
_source_in_tmpfs="false"
# [install.sh specific] tmpfs folder path, only used when _use_tmpfs="true". # [install.sh specific] tmpfs folder path, only used when _use_tmpfs="true".
# Creates a linux-tkg work folder within that pathmake sure to have nothing important in "$_tmpfs_path/linux-tkg" # Creates a linux-tkg work folder within that pathmake sure to have nothing important in "$_tmpfs_path/linux-tkg"
_tmpfs_path="/tmp" _tmpfs_path="/tmp"
@@ -192,8 +188,8 @@ _zenify="true"
_compileroptlevel="2" _compileroptlevel="2"
# CPU compiler optimizations - Defaults to prompt at kernel config if left empty # CPU compiler optimizations - Defaults to prompt at kernel config if left empty
# AMD CPUs : "k8" "k8sse3" "k10" "barcelona" "bobcat" "jaguar" "bulldozer" "piledriver" "steamroller" "excavator" "zen" "zen2" "zen3" "zen4" (zen3 opt support depends on GCC11) (zen4 opt support depends on GCC13) # AMD CPUs : "k8" "k8sse3" "k10" "barcelona" "bobcat" "jaguar" "bulldozer" "piledriver" "steamroller" "excavator" "zen" "zen2" "zen3" (zen3 opt support depends on GCC11)
# Intel CPUs : "mpsc"(P4 & older Netburst based Xeon) "atom" "core2" "nehalem" "westmere" "silvermont" "sandybridge" "ivybridge" "haswell" "broadwell" "skylake" "skylakex" "cannonlake" "icelake" "goldmont" "goldmontplus" "cascadelake" "cooperlake" "tigerlake" "sapphirerapids" "rocketlake" "alderlake" "raptorlake" "meteorlake" (raptorlake and meteorlake opt support require GCC13) # Intel CPUs : "mpsc"(P4 & older Netburst based Xeon) "atom" "core2" "nehalem" "westmere" "silvermont" "sandybridge" "ivybridge" "haswell" "broadwell" "skylake" "skylakex" "cannonlake" "icelake" "goldmont" "goldmontplus" "cascadelake" "cooperlake" "tigerlake" "sapphirerapids" "rocketlake" "alderlake"
# Other options : # Other options :
# - "native_amd" (use compiler autodetection - Selecting your arch manually in the list above is recommended instead of this option) # - "native_amd" (use compiler autodetection - Selecting your arch manually in the list above is recommended instead of this option)
# - "native_intel" (use compiler autodetection and will prompt for P6_NOPS - Selecting your arch manually in the list above is recommended instead of this option) # - "native_intel" (use compiler autodetection and will prompt for P6_NOPS - Selecting your arch manually in the list above is recommended instead of this option)
@@ -245,7 +241,7 @@ _aggressive_ondemand="true"
_tcp_cong_alg="" _tcp_cong_alg=""
# You can pass a default set of kernel command line options here - example: "intel_pstate=passive nowatchdog amdgpu.ppfeaturemask=0xfffd7fff mitigations=off" # You can pass a default set of kernel command line options here - example: "intel_pstate=passive nowatchdog amdgpu.ppfeaturemask=0xfffd7fff mitigations=off"
_custom_commandline="intel_pstate=passive split_lock_detect=off" _custom_commandline="intel_pstate=passive"
# Selection of Clearlinux patches # Selection of Clearlinux patches
_clear_patches="true" _clear_patches="true"
@@ -264,10 +260,10 @@ _kernel_localversion=""
# Set to "true" to add back missing symbol for AES-NI/AVX support on ZFS - This is a legacy option that can be ignored on 5.10+ kernels - https://github.com/NixOS/nixpkgs/blob/master/pkgs/os-specific/linux/kernel/export_kernel_fpu_functions.patch # Set to "true" to add back missing symbol for AES-NI/AVX support on ZFS - This is a legacy option that can be ignored on 5.10+ kernels - https://github.com/NixOS/nixpkgs/blob/master/pkgs/os-specific/linux/kernel/export_kernel_fpu_functions.patch
_zfsfix="true" _zfsfix="true"
# Set to your maximum number of CPUs (physical + logical cores) - Lower means less overhead - You can set it to "$(nproc)" to use the current host's CPU(s) core count, or leave empty to use default # Set to your maximum number of CPUs (physical + logical cores) - Lower means less overhead - You can set it to "$(nproc)" to use the current host's CPU(s) core count, or leave empty to get a prompt
# If you set this to a lower value than you have cores, some cores will be disabled # If you set this to a lower value than you have cores, some cores will be disabled
# Default Arch kernel value is 320 # Default Arch kernel value is 320
_NR_CPUS_value="" _NR_CPUS_value="128"
#### USER PATCHES #### #### USER PATCHES ####

View File

@@ -1,10 +1,10 @@
# #
# Automatically generated file; DO NOT EDIT. # Automatically generated file; DO NOT EDIT.
# Linux/x86 6.1.0-arch1 Kernel Configuration # Linux/x86 6.1.0-rc1 Kernel Configuration
# #
CONFIG_CC_VERSION_TEXT="gcc (GCC) 12.2.0" CONFIG_CC_VERSION_TEXT="gcc (TkG-mostlyportable) 12.2.1 20220822"
CONFIG_CC_IS_GCC=y CONFIG_CC_IS_GCC=y
CONFIG_GCC_VERSION=120200 CONFIG_GCC_VERSION=120201
CONFIG_CLANG_VERSION=0 CONFIG_CLANG_VERSION=0
CONFIG_AS_IS_GNU=y CONFIG_AS_IS_GNU=y
CONFIG_AS_VERSION=23900 CONFIG_AS_VERSION=23900
@@ -17,7 +17,7 @@ CONFIG_CC_HAS_ASM_GOTO_OUTPUT=y
CONFIG_CC_HAS_ASM_GOTO_TIED_OUTPUT=y CONFIG_CC_HAS_ASM_GOTO_TIED_OUTPUT=y
CONFIG_CC_HAS_ASM_INLINE=y CONFIG_CC_HAS_ASM_INLINE=y
CONFIG_CC_HAS_NO_PROFILE_FN_ATTR=y CONFIG_CC_HAS_NO_PROFILE_FN_ATTR=y
CONFIG_PAHOLE_VERSION=124 CONFIG_PAHOLE_VERSION=123
CONFIG_IRQ_WORK=y CONFIG_IRQ_WORK=y
CONFIG_BUILDTIME_TABLE_SORT=y CONFIG_BUILDTIME_TABLE_SORT=y
CONFIG_THREAD_INFO_IN_TASK=y CONFIG_THREAD_INFO_IN_TASK=y
@@ -659,7 +659,7 @@ CONFIG_CPU_FREQ_GOV_SCHEDUTIL=y
# #
CONFIG_X86_INTEL_PSTATE=y CONFIG_X86_INTEL_PSTATE=y
CONFIG_X86_PCC_CPUFREQ=m CONFIG_X86_PCC_CPUFREQ=m
CONFIG_X86_AMD_PSTATE=y CONFIG_X86_AMD_PSTATE=m
CONFIG_X86_AMD_PSTATE_UT=m CONFIG_X86_AMD_PSTATE_UT=m
CONFIG_X86_ACPI_CPUFREQ=m CONFIG_X86_ACPI_CPUFREQ=m
CONFIG_X86_ACPI_CPUFREQ_CPB=y CONFIG_X86_ACPI_CPUFREQ_CPB=y
@@ -1217,7 +1217,6 @@ CONFIG_INET_ESP=m
CONFIG_INET_ESP_OFFLOAD=m CONFIG_INET_ESP_OFFLOAD=m
CONFIG_INET_ESPINTCP=y CONFIG_INET_ESPINTCP=y
CONFIG_INET_IPCOMP=m CONFIG_INET_IPCOMP=m
CONFIG_INET_TABLE_PERTURB_ORDER=16
CONFIG_INET_XFRM_TUNNEL=m CONFIG_INET_XFRM_TUNNEL=m
CONFIG_INET_TUNNEL=m CONFIG_INET_TUNNEL=m
CONFIG_INET_DIAG=m CONFIG_INET_DIAG=m
@@ -2639,7 +2638,7 @@ CONFIG_UACCE=m
CONFIG_PVPANIC=y CONFIG_PVPANIC=y
CONFIG_PVPANIC_MMIO=m CONFIG_PVPANIC_MMIO=m
CONFIG_PVPANIC_PCI=m CONFIG_PVPANIC_PCI=m
CONFIG_GP_PCI1XXXX=m # CONFIG_GP_PCI1XXXX is not set
# end of Misc devices # end of Misc devices
# #
@@ -3242,7 +3241,7 @@ CONFIG_NGBE=m
CONFIG_TXGBE=m CONFIG_TXGBE=m
CONFIG_JME=m CONFIG_JME=m
CONFIG_NET_VENDOR_ADI=y CONFIG_NET_VENDOR_ADI=y
CONFIG_ADIN1110=m # CONFIG_ADIN1110 is not set
CONFIG_NET_VENDOR_LITEX=y CONFIG_NET_VENDOR_LITEX=y
CONFIG_NET_VENDOR_MARVELL=y CONFIG_NET_VENDOR_MARVELL=y
CONFIG_MVMDIO=m CONFIG_MVMDIO=m
@@ -3273,7 +3272,7 @@ CONFIG_MLX5_TC_CT=y
CONFIG_MLX5_TC_SAMPLE=y CONFIG_MLX5_TC_SAMPLE=y
CONFIG_MLX5_CORE_EN_DCB=y CONFIG_MLX5_CORE_EN_DCB=y
CONFIG_MLX5_CORE_IPOIB=y CONFIG_MLX5_CORE_IPOIB=y
CONFIG_MLX5_EN_MACSEC=y # CONFIG_MLX5_EN_MACSEC is not set
CONFIG_MLX5_EN_IPSEC=y CONFIG_MLX5_EN_IPSEC=y
CONFIG_MLX5_EN_TLS=y CONFIG_MLX5_EN_TLS=y
CONFIG_MLX5_SW_STEERING=y CONFIG_MLX5_SW_STEERING=y
@@ -3496,8 +3495,7 @@ CONFIG_DP83TD510_PHY=m
CONFIG_VITESSE_PHY=m CONFIG_VITESSE_PHY=m
CONFIG_XILINX_GMII2RGMII=m CONFIG_XILINX_GMII2RGMII=m
CONFIG_MICREL_KS8995MA=m CONFIG_MICREL_KS8995MA=m
CONFIG_PSE_CONTROLLER=y # CONFIG_PSE_CONTROLLER is not set
CONFIG_PSE_REGULATOR=m
CONFIG_CAN_DEV=m CONFIG_CAN_DEV=m
CONFIG_CAN_VCAN=m CONFIG_CAN_VCAN=m
CONFIG_CAN_VXCAN=m CONFIG_CAN_VXCAN=m
@@ -4092,7 +4090,7 @@ CONFIG_KEYBOARD_MCS=m
CONFIG_KEYBOARD_MPR121=m CONFIG_KEYBOARD_MPR121=m
CONFIG_KEYBOARD_NEWTON=m CONFIG_KEYBOARD_NEWTON=m
CONFIG_KEYBOARD_OPENCORES=m CONFIG_KEYBOARD_OPENCORES=m
CONFIG_KEYBOARD_PINEPHONE=m # CONFIG_KEYBOARD_PINEPHONE is not set
CONFIG_KEYBOARD_SAMSUNG=m CONFIG_KEYBOARD_SAMSUNG=m
CONFIG_KEYBOARD_STOWAWAY=m CONFIG_KEYBOARD_STOWAWAY=m
CONFIG_KEYBOARD_SUNKBD=m CONFIG_KEYBOARD_SUNKBD=m
@@ -4332,7 +4330,7 @@ CONFIG_INPUT_PCAP=m
CONFIG_INPUT_ADXL34X=m CONFIG_INPUT_ADXL34X=m
CONFIG_INPUT_ADXL34X_I2C=m CONFIG_INPUT_ADXL34X_I2C=m
CONFIG_INPUT_ADXL34X_SPI=m CONFIG_INPUT_ADXL34X_SPI=m
CONFIG_INPUT_IBM_PANEL=m # CONFIG_INPUT_IBM_PANEL is not set
CONFIG_INPUT_IMS_PCU=m CONFIG_INPUT_IMS_PCU=m
CONFIG_INPUT_IQS269A=m CONFIG_INPUT_IQS269A=m
CONFIG_INPUT_IQS626A=m CONFIG_INPUT_IQS626A=m
@@ -4346,7 +4344,7 @@ CONFIG_INPUT_DRV260X_HAPTICS=m
CONFIG_INPUT_DRV2665_HAPTICS=m CONFIG_INPUT_DRV2665_HAPTICS=m
CONFIG_INPUT_DRV2667_HAPTICS=m CONFIG_INPUT_DRV2667_HAPTICS=m
CONFIG_INPUT_RAVE_SP_PWRBUTTON=m CONFIG_INPUT_RAVE_SP_PWRBUTTON=m
CONFIG_INPUT_RT5120_PWRKEY=m # CONFIG_INPUT_RT5120_PWRKEY is not set
CONFIG_RMI4_CORE=m CONFIG_RMI4_CORE=m
CONFIG_RMI4_I2C=m CONFIG_RMI4_I2C=m
CONFIG_RMI4_SPI=m CONFIG_RMI4_SPI=m
@@ -4456,7 +4454,7 @@ CONFIG_SERIAL_ARC_NR_PORTS=1
CONFIG_SERIAL_RP2=m CONFIG_SERIAL_RP2=m
CONFIG_SERIAL_RP2_NR_UARTS=32 CONFIG_SERIAL_RP2_NR_UARTS=32
CONFIG_SERIAL_FSL_LPUART=m CONFIG_SERIAL_FSL_LPUART=m
CONFIG_SERIAL_FSL_LPUART_CONSOLE=y # CONFIG_SERIAL_FSL_LPUART_CONSOLE is not set
CONFIG_SERIAL_FSL_LINFLEXUART=m CONFIG_SERIAL_FSL_LINFLEXUART=m
CONFIG_SERIAL_MEN_Z135=m CONFIG_SERIAL_MEN_Z135=m
CONFIG_SERIAL_SPRD=m CONFIG_SERIAL_SPRD=m
@@ -4493,7 +4491,7 @@ CONFIG_IPMI_IPMB=m
CONFIG_IPMI_WATCHDOG=m CONFIG_IPMI_WATCHDOG=m
CONFIG_IPMI_POWEROFF=m CONFIG_IPMI_POWEROFF=m
CONFIG_IPMB_DEVICE_INTERFACE=m CONFIG_IPMB_DEVICE_INTERFACE=m
CONFIG_HW_RANDOM=y CONFIG_HW_RANDOM=m
CONFIG_HW_RANDOM_TIMERIOMEM=m CONFIG_HW_RANDOM_TIMERIOMEM=m
CONFIG_HW_RANDOM_INTEL=m CONFIG_HW_RANDOM_INTEL=m
CONFIG_HW_RANDOM_AMD=m CONFIG_HW_RANDOM_AMD=m
@@ -4520,10 +4518,10 @@ CONFIG_DEVPORT=y
CONFIG_HPET=y CONFIG_HPET=y
# CONFIG_HPET_MMAP is not set # CONFIG_HPET_MMAP is not set
CONFIG_HANGCHECK_TIMER=m CONFIG_HANGCHECK_TIMER=m
CONFIG_TCG_TPM=y CONFIG_TCG_TPM=m
CONFIG_HW_RANDOM_TPM=y CONFIG_HW_RANDOM_TPM=y
CONFIG_TCG_TIS_CORE=y CONFIG_TCG_TIS_CORE=m
CONFIG_TCG_TIS=y CONFIG_TCG_TIS=m
CONFIG_TCG_TIS_SPI=m CONFIG_TCG_TIS_SPI=m
CONFIG_TCG_TIS_SPI_CR50=y CONFIG_TCG_TIS_SPI_CR50=y
CONFIG_TCG_TIS_I2C=m CONFIG_TCG_TIS_I2C=m
@@ -4535,7 +4533,7 @@ CONFIG_TCG_NSC=m
CONFIG_TCG_ATMEL=m CONFIG_TCG_ATMEL=m
CONFIG_TCG_INFINEON=m CONFIG_TCG_INFINEON=m
CONFIG_TCG_XEN=m CONFIG_TCG_XEN=m
CONFIG_TCG_CRB=y CONFIG_TCG_CRB=m
CONFIG_TCG_VTPM_PROXY=m CONFIG_TCG_VTPM_PROXY=m
CONFIG_TCG_TIS_ST33ZP24=m CONFIG_TCG_TIS_ST33ZP24=m
CONFIG_TCG_TIS_ST33ZP24_I2C=m CONFIG_TCG_TIS_ST33ZP24_I2C=m
@@ -4635,7 +4633,7 @@ CONFIG_I2C_DIOLAN_U2C=m
CONFIG_I2C_DLN2=m CONFIG_I2C_DLN2=m
CONFIG_I2C_CP2615=m CONFIG_I2C_CP2615=m
CONFIG_I2C_PARPORT=m CONFIG_I2C_PARPORT=m
CONFIG_I2C_PCI1XXXX=m # CONFIG_I2C_PCI1XXXX is not set
CONFIG_I2C_ROBOTFUZZ_OSIF=m CONFIG_I2C_ROBOTFUZZ_OSIF=m
CONFIG_I2C_TAOS_EVM=m CONFIG_I2C_TAOS_EVM=m
CONFIG_I2C_TINY_USB=m CONFIG_I2C_TINY_USB=m
@@ -4686,7 +4684,7 @@ CONFIG_SPI_INTEL_PCI=m
CONFIG_SPI_INTEL_PLATFORM=m CONFIG_SPI_INTEL_PLATFORM=m
CONFIG_SPI_LM70_LLP=m CONFIG_SPI_LM70_LLP=m
CONFIG_SPI_MICROCHIP_CORE=m CONFIG_SPI_MICROCHIP_CORE=m
CONFIG_SPI_MICROCHIP_CORE_QSPI=m # CONFIG_SPI_MICROCHIP_CORE_QSPI is not set
# CONFIG_SPI_LANTIQ_SSC is not set # CONFIG_SPI_LANTIQ_SSC is not set
CONFIG_SPI_OC_TINY=m CONFIG_SPI_OC_TINY=m
CONFIG_SPI_PXA2XX=m CONFIG_SPI_PXA2XX=m
@@ -4752,7 +4750,7 @@ CONFIG_PINCONF=y
CONFIG_GENERIC_PINCONF=y CONFIG_GENERIC_PINCONF=y
# CONFIG_DEBUG_PINCTRL is not set # CONFIG_DEBUG_PINCTRL is not set
CONFIG_PINCTRL_AMD=y CONFIG_PINCTRL_AMD=y
CONFIG_PINCTRL_CY8C95X0=m # CONFIG_PINCTRL_CY8C95X0 is not set
CONFIG_PINCTRL_DA9062=m CONFIG_PINCTRL_DA9062=m
CONFIG_PINCTRL_MCP23S08_I2C=m CONFIG_PINCTRL_MCP23S08_I2C=m
CONFIG_PINCTRL_MCP23S08_SPI=m CONFIG_PINCTRL_MCP23S08_SPI=m
@@ -5010,7 +5008,6 @@ CONFIG_CHARGER_MAX8997=m
CONFIG_CHARGER_MAX8998=m CONFIG_CHARGER_MAX8998=m
CONFIG_CHARGER_MP2629=m CONFIG_CHARGER_MP2629=m
CONFIG_CHARGER_MT6360=m CONFIG_CHARGER_MT6360=m
CONFIG_CHARGER_MT6370=m
CONFIG_CHARGER_BQ2415X=m CONFIG_CHARGER_BQ2415X=m
CONFIG_CHARGER_BQ24190=m CONFIG_CHARGER_BQ24190=m
CONFIG_CHARGER_BQ24257=m CONFIG_CHARGER_BQ24257=m
@@ -5367,7 +5364,7 @@ CONFIG_ADVANTECH_WDT=m
CONFIG_ALIM1535_WDT=m CONFIG_ALIM1535_WDT=m
CONFIG_ALIM7101_WDT=m CONFIG_ALIM7101_WDT=m
CONFIG_EBC_C384_WDT=m CONFIG_EBC_C384_WDT=m
CONFIG_EXAR_WDT=m # CONFIG_EXAR_WDT is not set
CONFIG_F71808E_WDT=m CONFIG_F71808E_WDT=m
CONFIG_SP5100_TCO=m CONFIG_SP5100_TCO=m
CONFIG_SBC_FITPC2_WATCHDOG=m CONFIG_SBC_FITPC2_WATCHDOG=m
@@ -5501,10 +5498,10 @@ CONFIG_MFD_MAX8925=y
CONFIG_MFD_MAX8997=y CONFIG_MFD_MAX8997=y
CONFIG_MFD_MAX8998=y CONFIG_MFD_MAX8998=y
CONFIG_MFD_MT6360=m CONFIG_MFD_MT6360=m
CONFIG_MFD_MT6370=m # CONFIG_MFD_MT6370 is not set
CONFIG_MFD_MT6397=m CONFIG_MFD_MT6397=m
CONFIG_MFD_MENF21BMC=m CONFIG_MFD_MENF21BMC=m
CONFIG_MFD_OCELOT=m # CONFIG_MFD_OCELOT is not set
CONFIG_EZX_PCAP=y CONFIG_EZX_PCAP=y
CONFIG_MFD_VIPERBOARD=m CONFIG_MFD_VIPERBOARD=m
CONFIG_MFD_RETU=m CONFIG_MFD_RETU=m
@@ -5629,7 +5626,6 @@ CONFIG_REGULATOR_MT6332=m
CONFIG_REGULATOR_MT6358=m CONFIG_REGULATOR_MT6358=m
CONFIG_REGULATOR_MT6359=m CONFIG_REGULATOR_MT6359=m
CONFIG_REGULATOR_MT6360=m CONFIG_REGULATOR_MT6360=m
CONFIG_REGULATOR_MT6370=m
CONFIG_REGULATOR_MT6397=m CONFIG_REGULATOR_MT6397=m
CONFIG_REGULATOR_PALMAS=m CONFIG_REGULATOR_PALMAS=m
CONFIG_REGULATOR_PCA9450=m CONFIG_REGULATOR_PCA9450=m
@@ -6760,7 +6756,6 @@ CONFIG_BACKLIGHT_PWM=m
CONFIG_BACKLIGHT_DA903X=m CONFIG_BACKLIGHT_DA903X=m
CONFIG_BACKLIGHT_DA9052=m CONFIG_BACKLIGHT_DA9052=m
CONFIG_BACKLIGHT_MAX8925=m CONFIG_BACKLIGHT_MAX8925=m
CONFIG_BACKLIGHT_MT6370=m
CONFIG_BACKLIGHT_APPLE=m CONFIG_BACKLIGHT_APPLE=m
CONFIG_BACKLIGHT_QCOM_WLED=m CONFIG_BACKLIGHT_QCOM_WLED=m
CONFIG_BACKLIGHT_RT4831=m CONFIG_BACKLIGHT_RT4831=m
@@ -7198,8 +7193,9 @@ CONFIG_SND_SOC_SOF_INTEL_COMMON=m
CONFIG_SND_SOC_SOF_BAYTRAIL=m CONFIG_SND_SOC_SOF_BAYTRAIL=m
# CONFIG_SND_SOC_SOF_BROADWELL is not set # CONFIG_SND_SOC_SOF_BROADWELL is not set
CONFIG_SND_SOC_SOF_MERRIFIELD=m CONFIG_SND_SOC_SOF_MERRIFIELD=m
# CONFIG_SND_SOC_SOF_SKYLAKE is not set CONFIG_SND_SOC_SOF_INTEL_SKL=m
# CONFIG_SND_SOC_SOF_KABYLAKE is not set CONFIG_SND_SOC_SOF_SKYLAKE=m
CONFIG_SND_SOC_SOF_KABYLAKE=m
CONFIG_SND_SOC_SOF_INTEL_APL=m CONFIG_SND_SOC_SOF_INTEL_APL=m
CONFIG_SND_SOC_SOF_APOLLOLAKE=m CONFIG_SND_SOC_SOF_APOLLOLAKE=m
CONFIG_SND_SOC_SOF_GEMINILAKE=m CONFIG_SND_SOC_SOF_GEMINILAKE=m
@@ -7402,8 +7398,7 @@ CONFIG_SND_SOC_SIGMADSP_REGMAP=m
CONFIG_SND_SOC_SIMPLE_AMPLIFIER=m CONFIG_SND_SOC_SIMPLE_AMPLIFIER=m
CONFIG_SND_SOC_SIMPLE_MUX=m CONFIG_SND_SOC_SIMPLE_MUX=m
CONFIG_SND_SOC_SPDIF=m CONFIG_SND_SOC_SPDIF=m
CONFIG_SND_SOC_SRC4XXX_I2C=m # CONFIG_SND_SOC_SRC4XXX_I2C is not set
CONFIG_SND_SOC_SRC4XXX=m
CONFIG_SND_SOC_SSM2305=m CONFIG_SND_SOC_SSM2305=m
CONFIG_SND_SOC_SSM2518=m CONFIG_SND_SOC_SSM2518=m
CONFIG_SND_SOC_SSM2602=m CONFIG_SND_SOC_SSM2602=m
@@ -7561,7 +7556,7 @@ CONFIG_HID_KYE=m
CONFIG_HID_UCLOGIC=m CONFIG_HID_UCLOGIC=m
CONFIG_HID_WALTOP=m CONFIG_HID_WALTOP=m
CONFIG_HID_VIEWSONIC=m CONFIG_HID_VIEWSONIC=m
CONFIG_HID_VRC2=m # CONFIG_HID_VRC2 is not set
CONFIG_HID_XIAOMI=m CONFIG_HID_XIAOMI=m
CONFIG_HID_GYRATION=m CONFIG_HID_GYRATION=m
CONFIG_HID_ICADE=m CONFIG_HID_ICADE=m
@@ -7606,7 +7601,7 @@ CONFIG_HID_PICOLCD_CIR=y
CONFIG_HID_PLANTRONICS=m CONFIG_HID_PLANTRONICS=m
CONFIG_HID_PLAYSTATION=m CONFIG_HID_PLAYSTATION=m
CONFIG_PLAYSTATION_FF=y CONFIG_PLAYSTATION_FF=y
CONFIG_HID_PXRC=m # CONFIG_HID_PXRC is not set
CONFIG_HID_RAZER=m CONFIG_HID_RAZER=m
CONFIG_HID_PRIMAX=m CONFIG_HID_PRIMAX=m
CONFIG_HID_RETRODE=m CONFIG_HID_RETRODE=m
@@ -8067,7 +8062,6 @@ CONFIG_TYPEC_TCPM=m
CONFIG_TYPEC_TCPCI=m CONFIG_TYPEC_TCPCI=m
CONFIG_TYPEC_RT1711H=m CONFIG_TYPEC_RT1711H=m
CONFIG_TYPEC_MT6360=m CONFIG_TYPEC_MT6360=m
CONFIG_TYPEC_TCPCI_MT6370=m
CONFIG_TYPEC_TCPCI_MAXIM=m CONFIG_TYPEC_TCPCI_MAXIM=m
CONFIG_TYPEC_FUSB302=m CONFIG_TYPEC_FUSB302=m
CONFIG_TYPEC_WCOVE=m CONFIG_TYPEC_WCOVE=m
@@ -9239,7 +9233,7 @@ CONFIG_MEN_Z188_ADC=m
CONFIG_MP2629_ADC=m CONFIG_MP2629_ADC=m
CONFIG_NAU7802=m CONFIG_NAU7802=m
CONFIG_PALMAS_GPADC=m CONFIG_PALMAS_GPADC=m
CONFIG_RICHTEK_RTQ6056=m # CONFIG_RICHTEK_RTQ6056 is not set
CONFIG_SD_ADC_MODULATOR=m CONFIG_SD_ADC_MODULATOR=m
CONFIG_TI_ADC081C=m CONFIG_TI_ADC081C=m
CONFIG_TI_ADC0832=m CONFIG_TI_ADC0832=m
@@ -9487,9 +9481,8 @@ CONFIG_ADIS16480=m
CONFIG_BMI160=m CONFIG_BMI160=m
CONFIG_BMI160_I2C=m CONFIG_BMI160_I2C=m
CONFIG_BMI160_SPI=m CONFIG_BMI160_SPI=m
CONFIG_BOSCH_BNO055=m # CONFIG_BOSCH_BNO055_SERIAL is not set
CONFIG_BOSCH_BNO055_SERIAL=m # CONFIG_BOSCH_BNO055_I2C is not set
CONFIG_BOSCH_BNO055_I2C=m
CONFIG_FXOS8700=m CONFIG_FXOS8700=m
CONFIG_FXOS8700_I2C=m CONFIG_FXOS8700_I2C=m
CONFIG_FXOS8700_SPI=m CONFIG_FXOS8700_SPI=m
@@ -9834,7 +9827,7 @@ CONFIG_DEV_DAX_HMEM_DEVICES=y
CONFIG_DEV_DAX_KMEM=m CONFIG_DEV_DAX_KMEM=m
CONFIG_NVMEM=y CONFIG_NVMEM=y
CONFIG_NVMEM_SYSFS=y CONFIG_NVMEM_SYSFS=y
CONFIG_NVMEM_RAVE_SP_EEPROM=m # CONFIG_NVMEM_RAVE_SP_EEPROM is not set
CONFIG_NVMEM_RMEM=m CONFIG_NVMEM_RMEM=m
# #
@@ -11230,7 +11223,6 @@ CONFIG_ASYNC_RAID6_TEST=m
# CONFIG_TEST_BITMAP is not set # CONFIG_TEST_BITMAP is not set
# CONFIG_TEST_UUID is not set # CONFIG_TEST_UUID is not set
# CONFIG_TEST_XARRAY is not set # CONFIG_TEST_XARRAY is not set
# CONFIG_TEST_MAPLE_TREE is not set
# CONFIG_TEST_RHASHTABLE is not set # CONFIG_TEST_RHASHTABLE is not set
# CONFIG_TEST_SIPHASH is not set # CONFIG_TEST_SIPHASH is not set
# CONFIG_TEST_IDA is not set # CONFIG_TEST_IDA is not set

View File

@@ -1,14 +0,0 @@
[Trigger]
Type = File
Operation = Install
Operation = Upgrade
Operation = Remove
Target = usr/lib/modules/*/
Target = !usr/lib/modules/*/?*
[Action]
Description = Cleaning up...
When = PostTransaction
Exec = /usr/share/libalpm/scripts/cleanup
NeedsTargets

View File

@@ -1,10 +0,0 @@
#!/bin/bash
for _f in /usr/lib/modules/*tkg*; do
if [[ ! -e ${_f}/vmlinuz ]]; then
rm -rf "$_f"
fi
done
# vim:set ft=sh sw=2 et:

File diff suppressed because it is too large Load Diff

View File

@@ -1,7 +1,7 @@
#!/bin/bash #!/bin/bash
# List of kernels that are maintained upstream # List of kernels that are maintained upstream
_current_kernels=("6.2" "6.1" "6.0" "5.15" "5.10" "5.4") _current_kernels=("6.1" "6.0" "5.15" "5.10" "5.4")
# List of kernels that are no longer maintained upstream # List of kernels that are no longer maintained upstream
_eol_kernels=("5.19" "5.18" "5.17" "5.16" "5.14" "5.13" "5.12" "5.11" "5.9" "5.8" "5.7") _eol_kernels=("5.19" "5.18" "5.17" "5.16" "5.14" "5.13" "5.12" "5.11" "5.9" "5.8" "5.7")
@@ -39,32 +39,32 @@ done
# PREEMPT_RT's supported kernel subversion # PREEMPT_RT's supported kernel subversion
typeset -Ag _rt_subver_map typeset -Ag _rt_subver_map
_rt_subver_map=( _rt_subver_map=(
["5.4"]="221" ["5.4"]="209"
["5.9"]="1" ["5.9"]="1"
["5.10"]="153" ["5.10"]="153"
["5.11"]="4" ["5.11"]="4"
["5.14"]="2" ["5.14"]="2"
["5.15"]="79" ["5.15"]="76"
["5.16"]="2" ["5.16"]="2"
["5.17"]="1" ["5.17"]="1"
["6.0"]="5" ["6.0"]="5"
["6.1"]="rc7" ["6.1"]="rc3"
) )
# PREEMPT_RT's patch revision for the kernel # PREEMPT_RT's patch revision for the kernel
# We separated this to allow for forcing the application of the patch when _preempt_rt_force=1 on version mismatch # We separated this to allow for forcing the application of the patch when _preempt_rt_force=1 on version mismatch
typeset -Ag _rt_rev_map typeset -Ag _rt_rev_map
_rt_rev_map=( _rt_rev_map=(
["5.4"]="79" ["5.4"]="77"
["5.9"]="20" ["5.9"]="20"
["5.10"]="76" ["5.10"]="76"
["5.11"]="11" ["5.11"]="11"
["5.14"]="21" ["5.14"]="21"
["5.15"]="54" ["5.15"]="53"
["5.16"]="19" ["5.16"]="19"
["5.17"]="17" ["5.17"]="17"
["6.0"]="14" ["6.0"]="14"
["6.1"]="5" ["6.1"]="2"
) )
_undefine() { _undefine() {
@@ -281,11 +281,9 @@ _set_cpu_scheduler() {
_avail_cpu_scheds=("cfs" "pds" "bmq" "cacule" "tt" "bore") _avail_cpu_scheds=("cfs" "pds" "bmq" "cacule" "tt" "bore")
elif [ "$_kver" = "600" ]; then elif [ "$_kver" = "600" ]; then
_avail_cpu_scheds=("cfs" "pds" "bmq" "tt" "bore") _avail_cpu_scheds=("cfs" "pds" "bmq" "tt" "bore")
_projectc_unoff="1"
elif [ "$_kver" = "601" ]; then elif [ "$_kver" = "601" ]; then
_avail_cpu_scheds=("cfs" "pds" "bmq" "tt" "bore") _avail_cpu_scheds=("cfs" "bore")
_projectc_unoff=1
elif [ "$_kver" = "602" ]; then
_avail_cpu_scheds=("cfs" "tt" "bore")
else else
_avail_cpu_scheds=("cfs") _avail_cpu_scheds=("cfs")
fi fi
@@ -392,15 +390,9 @@ _linux_git_branch_checkout() {
cd "$_where" cd "$_where"
if ! [ -d linux-src-git ] || ( [ "$_source_in_tmpfs" = "true" ] && ! [ -d /tmp/linux-src-git ] ); then if ! [ -d linux-src-git ]; then
msg2 "First initialization of the linux source code git folder" msg2 "First initialization of the linux source code git folder"
if [ "$_source_in_tmpfs" = "true" ]; then
rm -rf "${_where}/linux-src-git"
mkdir "/tmp/linux-src-git"
ln -s "/tmp/linux-src-git" "${_where}"
else
mkdir linux-src-git mkdir linux-src-git
fi
cd linux-src-git cd linux-src-git
git init git init
@@ -408,11 +400,6 @@ _linux_git_branch_checkout() {
git remote add "$remote" "${_kernel_git_remotes[$remote]}" git remote add "$remote" "${_kernel_git_remotes[$remote]}"
done done
else else
if [ "$_source_in_tmpfs" = "true" ]; then
rm -rf "${_where}/linux-src-git"
ln -s "/tmp/linux-src-git" "${_where}"
fi
cd linux-src-git cd linux-src-git
# Remove "origin" remote if present # Remove "origin" remote if present
@@ -897,19 +884,13 @@ _tkg_srcprep() {
if [ "$_kver" = "504" ] || [ "$_kver" = "509" ]; then if [ "$_kver" = "504" ] || [ "$_kver" = "509" ]; then
scripts/config --set-val "RCU_BOOST_DELAY" "0" scripts/config --set-val "RCU_BOOST_DELAY" "0"
fi fi
_disable "NTP_PPS" "ZSWAP_COMPRESSOR_DEFAULT_LZO" "PROFILE_ALL_BRANCHES" _disable "NTP_PPS" "CPU_FREQ_DEFAULT_GOV_PERFORMANCE_NODEF" "ZSWAP_COMPRESSOR_DEFAULT_LZO" "PROFILE_ALL_BRANCHES"
_enable "CRYPTO_LZ4" "CRYPTO_LZ4HC" "LZ4_COMPRESS" "LZ4HC_COMPRESS" "ZSWAP_COMPRESSOR_DEFAULT_LZ4" "X86_AMD_PSTATE" "AMD_PINCTRL" _enable "CRYPTO_LZ4" "CRYPTO_LZ4HC" "LZ4_COMPRESS" "LZ4HC_COMPRESS" "ZSWAP_COMPRESSOR_DEFAULT_LZ4" "CMDLINE_BOOL" "BLK_DEV_LOOP" "X86_AMD_PSTATE" "AMD_PINCTRL" "CONTEXT_TRACKING_FORCE"
_disable "DEBUG_FORCE_FUNCTION_ALIGN_64B" "X86_P6_NOP" _disable "DEBUG_FORCE_FUNCTION_ALIGN_64B"
scripts/config --set-str "ZSWAP_COMPRESSOR_DEFAULT" "lz4" scripts/config --set-str "ZSWAP_COMPRESSOR_DEFAULT" "lz4"
_enable "CPU_FREQ_DEFAULT_GOV_SCHEDUTIL"
_disable "CPU_FREQ_DEFAULT_GOV_ONDEMAND" "CPU_FREQ_DEFAULT_GOV_CONSERVATIVE" "CPU_FREQ_DEFAULT_GOV_PERFORMANCE" "CPU_FREQ_DEFAULT_GOV_PERFORMANCE_NODEF"
_module "BLK_DEV_LOOP"
if [ -n "$_custom_commandline" ]; then
_enable "CMDLINE_BOOL"
_disable "CMDLINE_OVERRIDE"
scripts/config --set-str "CMDLINE" "${_custom_commandline}" scripts/config --set-str "CMDLINE" "${_custom_commandline}"
fi _disable "CMDLINE_OVERRIDE" "X86_P6_NOP" "CPU_FREQ_DEFAULT_GOV_ONDEMAND" "CPU_FREQ_DEFAULT_GOV_CONSERVATIVE"
#echo "# CONFIG_CPU_FREQ_DEFAULT_GOV_PERFORMANCE is not set" >> ./.config
# openrgb # openrgb
_module "I2C_NCT6775" _module "I2C_NCT6775"
@@ -970,7 +951,7 @@ _tkg_srcprep() {
_cpu_marchs+=("steamroller" "excavator" "zen" "zen2" "zen3" "zen4" "mpsc" "atom" "core2" "nehalem" "westmere") _cpu_marchs+=("steamroller" "excavator" "zen" "zen2" "zen3" "zen4" "mpsc" "atom" "core2" "nehalem" "westmere")
_cpu_marchs+=("bonnell" "silvermont" "sandybridge" "ivybridge" "haswell" "broadwell" "skylake") _cpu_marchs+=("bonnell" "silvermont" "sandybridge" "ivybridge" "haswell" "broadwell" "skylake")
_cpu_marchs+=("skylakex" "cannonlake" "icelake" "goldmont" "goldmontplus" "cascadelake") _cpu_marchs+=("skylakex" "cannonlake" "icelake" "goldmont" "goldmontplus" "cascadelake")
_cpu_marchs+=("cooperlake" "tigerlake" "sapphirerapids" "rocketlake" "alderlake" "raptorlake" "meteorlake") _cpu_marchs+=("cooperlake" "tigerlake" "sapphirerapids" "rocketlake" "alderlake" "meteorlake")
typeset -A _generic_march_map typeset -A _generic_march_map
_generic_march_map=( _generic_march_map=(
@@ -1362,14 +1343,14 @@ _tkg_srcprep() {
_tickless="${_selected_index}" _tickless="${_selected_index}"
fi fi
if [ "$_tickless" = "0" ]; then if [ "$_tickless" = "0" ]; then
_disable "NO_HZ_FULL_NODEF" "NO_HZ_IDLE" "NO_HZ_FULL" "NO_HZ" "NO_HZ_COMMON" "VIRT_CPU_ACCOUNTING" "VIRT_CPU_ACCOUNTING_GEN" _disable "NO_HZ_FULL_NODEF" "NO_HZ_IDLE" "NO_HZ_FULL" "NO_HZ" "NO_HZ_COMMON" "VIRT_CPU_ACCOUNTING_GEN"
_enable "HZ_PERIODIC" "TICK_CPU_ACCOUNTING" _enable "HZ_PERIODIC" "TICK_CPU_ACCOUNTING"
elif [ "$_tickless" = "1" ]; then elif [ "$_tickless" = "1" ]; then
_disable "HZ_PERIODIC" "NO_HZ_IDLE" "TICK_CPU_ACCOUNTING" "CONTEXT_TRACKING_FORCE" _disable "HZ_PERIODIC" "NO_HZ_IDLE" "TICK_CPU_ACCOUNTING"
_enable "NO_HZ_FULL_NODEF" "NO_HZ_FULL" "NO_HZ" "NO_HZ_COMMON" "CONTEXT_TRACKING" "VIRT_CPU_ACCOUNTING" "VIRT_CPU_ACCOUNTING_GEN" _enable "NO_HZ_FULL_NODEF" "NO_HZ_FULL" "NO_HZ" "NO_HZ_COMMON" "CONTEXT_TRACKING" "VIRT_CPU_ACCOUNTING_GEN"
else else
_disable "NO_HZ_FULL_NODEF" "HZ_PERIODIC" "NO_HZ_FULL" "TICK_CPU_ACCOUNTING" "CONTEXT_TRACKING_FORCE" _disable "NO_HZ_FULL_NODEF" "HZ_PERIODIC" "NO_HZ_FULL" "TICK_CPU_ACCOUNTING"
_enable "NO_HZ_IDLE" "NO_HZ" "NO_HZ_COMMON" "CONTEXT_TRACKING" "VIRT_CPU_ACCOUNTING" "VIRT_CPU_ACCOUNTING_GEN" _enable "NO_HZ_IDLE" "NO_HZ" "NO_HZ_COMMON" "VIRT_CPU_ACCOUNTING_GEN"
fi fi
# acs override # acs override
@@ -1613,9 +1594,22 @@ CONFIG_DEBUG_INFO_BTF_MODULES=y\r
fi fi
# NR_CPUS # NR_CPUS
if [ -z "$_NR_CPUS_value" ]; then
plain ""
plain "Set NR_CPUS value to the current host's threads count?"
plain "For best results, it should be equal to the maximum number of threads the target machine has."
plain "If you want to use the resulting kernel on a machine with more threads, you can hit enter or answer N to use a default of 128."
read -rp "`echo $' > N/y : '`" CONDITION_nrcpus;
fi
if [[ "$CONDITION_nrcpus" =~ [yY] ]]; then
_NR_CPUS_value="$(nproc)"
fi
if [ -n "$_NR_CPUS_value" ]; then if [ -n "$_NR_CPUS_value" ]; then
scripts/config --set-val "NR_CPUS" "$_NR_CPUS_value" scripts/config --set-val "NR_CPUS" "$_NR_CPUS_value"
_enable "FORCE_NR_CPUS" _enable "FORCE_NR_CPUS"
else
scripts/config --set-val "NR_CPUS" "128"
_disable "FORCE_NR_CPUS"
fi fi
fi fi

View File

@@ -709,7 +709,7 @@ index 000000000000..8b0ddbdd24e4
+#include <asm/switch_to.h> +#include <asm/switch_to.h>
+ +
+#include "../workqueue_internal.h" +#include "../workqueue_internal.h"
+#include "../../io_uring/io-wq.h" +#include "../../fs/io-wq.h"
+#include "../smpboot.h" +#include "../smpboot.h"
+ +
+#include "pelt.h" +#include "pelt.h"

View File

@@ -1,3 +1,56 @@
From 711a56e8f6314d77141b0f661e6c13c8a2c4dddf Mon Sep 17 00:00:00 2001
From: Tor Vic <torvic9@mailbox.org>
Date: Wed, 16 Nov 2022 11:29:00 +0100
Subject: [PATCH] Project-C 6.0-rc0-vd
---
.../admin-guide/kernel-parameters.txt | 6 +
Documentation/admin-guide/sysctl/kernel.rst | 10 +
Documentation/scheduler/sched-BMQ.txt | 110 +
fs/proc/base.c | 2 +-
include/asm-generic/resource.h | 2 +-
include/linux/sched.h | 33 +-
include/linux/sched/deadline.h | 20 +
include/linux/sched/prio.h | 26 +
include/linux/sched/rt.h | 2 +
include/linux/sched/topology.h | 3 +-
init/Kconfig | 34 +
init/init_task.c | 18 +
kernel/Kconfig.preempt | 2 +-
kernel/cgroup/cpuset.c | 4 +-
kernel/delayacct.c | 2 +-
kernel/exit.c | 4 +-
kernel/locking/rtmutex.c | 16 +-
kernel/sched/Makefile | 5 +
kernel/sched/alt_core.c | 7959 +++++++++++++++++
kernel/sched/alt_debug.c | 31 +
kernel/sched/alt_sched.h | 645 ++
kernel/sched/bmq.h | 110 +
kernel/sched/build_policy.c | 8 +-
kernel/sched/build_utility.c | 3 +-
kernel/sched/cpufreq_schedutil.c | 10 +
kernel/sched/cputime.c | 10 +-
kernel/sched/debug.c | 10 +
kernel/sched/idle.c | 2 +
kernel/sched/pds.h | 127 +
kernel/sched/pelt.c | 4 +-
kernel/sched/pelt.h | 8 +-
kernel/sched/sched.h | 9 +
kernel/sched/stats.c | 4 +
kernel/sched/stats.h | 2 +
kernel/sched/topology.c | 17 +
kernel/sysctl.c | 15 +
kernel/time/hrtimer.c | 2 +
kernel/time/posix-cpu-timers.c | 10 +-
kernel/trace/trace_selftest.c | 5 +
39 files changed, 9267 insertions(+), 23 deletions(-)
create mode 100644 Documentation/scheduler/sched-BMQ.txt
create mode 100644 kernel/sched/alt_core.c
create mode 100644 kernel/sched/alt_debug.c
create mode 100644 kernel/sched/alt_sched.h
create mode 100644 kernel/sched/bmq.h
create mode 100644 kernel/sched/pds.h
diff --git a/Documentation/admin-guide/kernel-parameters.txt b/Documentation/admin-guide/kernel-parameters.txt diff --git a/Documentation/admin-guide/kernel-parameters.txt b/Documentation/admin-guide/kernel-parameters.txt
index 426fa892d311..43b06e44128c 100644 index 426fa892d311..43b06e44128c 100644
--- a/Documentation/admin-guide/kernel-parameters.txt --- a/Documentation/admin-guide/kernel-parameters.txt
@@ -352,7 +405,7 @@ index 816df6cc444e..c8da08e18c91 100644
#else #else
static inline void rebuild_sched_domains_energy(void) static inline void rebuild_sched_domains_energy(void)
diff --git a/init/Kconfig b/init/Kconfig diff --git a/init/Kconfig b/init/Kconfig
index 532362fcfe31..2bf9e67b73c9 100644 index 532362fcfe31..d9ccd98f2856 100644
--- a/init/Kconfig --- a/init/Kconfig
+++ b/init/Kconfig +++ b/init/Kconfig
@@ -808,6 +808,7 @@ menu "Scheduler features" @@ -808,6 +808,7 @@ menu "Scheduler features"
@@ -369,15 +422,15 @@ index 532362fcfe31..2bf9e67b73c9 100644
+menuconfig SCHED_ALT +menuconfig SCHED_ALT
+ bool "Alternative CPU Schedulers" + bool "Alternative CPU Schedulers"
+ default y + default n
+ help + help
+ This feature enable alternative CPU scheduler" + This feature enables the ProjectC alternative CPU schedulers."
+ +
+if SCHED_ALT +if SCHED_ALT
+ +
+choice +choice
+ prompt "Alternative CPU Scheduler" + prompt "Alternative CPU schedulers"
+ default SCHED_BMQ + default SCHED_PDS
+ +
+config SCHED_BMQ +config SCHED_BMQ
+ bool "BMQ CPU scheduler" + bool "BMQ CPU scheduler"
@@ -632,10 +685,10 @@ index 976092b7bd45..31d587c16ec1 100644
obj-y += build_utility.o obj-y += build_utility.o
diff --git a/kernel/sched/alt_core.c b/kernel/sched/alt_core.c diff --git a/kernel/sched/alt_core.c b/kernel/sched/alt_core.c
new file mode 100644 new file mode 100644
index 000000000000..03e3956194f7 index 000000000000..18dfee354f9b
--- /dev/null --- /dev/null
+++ b/kernel/sched/alt_core.c +++ b/kernel/sched/alt_core.c
@@ -0,0 +1,7890 @@ @@ -0,0 +1,7959 @@
+/* +/*
+ * kernel/sched/alt_core.c + * kernel/sched/alt_core.c
+ * + *
@@ -705,7 +758,7 @@ index 000000000000..03e3956194f7
+#define sched_feat(x) (0) +#define sched_feat(x) (0)
+#endif /* CONFIG_SCHED_DEBUG */ +#endif /* CONFIG_SCHED_DEBUG */
+ +
+#define ALT_SCHED_VERSION "v6.0-r0" +#define ALT_SCHED_VERSION "v6.0-r0-vd"
+ +
+/* rt_prio(prio) defined in include/linux/sched/rt.h */ +/* rt_prio(prio) defined in include/linux/sched/rt.h */
+#define rt_task(p) rt_prio((p)->prio) +#define rt_task(p) rt_prio((p)->prio)
@@ -785,7 +838,91 @@ index 000000000000..03e3956194f7
+#ifdef CONFIG_SCHED_SMT +#ifdef CONFIG_SCHED_SMT
+static cpumask_t sched_sg_idle_mask ____cacheline_aligned_in_smp; +static cpumask_t sched_sg_idle_mask ____cacheline_aligned_in_smp;
+#endif +#endif
+static cpumask_t sched_rq_watermark[SCHED_QUEUE_BITS] ____cacheline_aligned_in_smp; +
+#define BITS_PER_ATOMIC_LONG_T BITS_PER_LONG
+typedef struct sched_bitmask {
+ atomic_long_t bits[DIV_ROUND_UP(SCHED_QUEUE_BITS, BITS_PER_ATOMIC_LONG_T)];
+} sched_bitmask_t;
+static sched_bitmask_t sched_rq_watermark[NR_CPUS] ____cacheline_aligned_in_smp;
+
+#define x(p, set, mask) \
+ do { \
+ smp_mb__before_atomic(); \
+ if (set) \
+ atomic_long_or((mask), (p)); \
+ else \
+ atomic_long_and(~(mask), (p)); \
+ smp_mb__after_atomic(); \
+ } while (0)
+
+static __always_inline void sched_rq_watermark_fill_downwards(int cpu, unsigned int end,
+ unsigned int start, bool set)
+{
+ unsigned int start_idx, start_bit;
+ unsigned int end_idx, end_bit;
+ atomic_long_t *p;
+
+ if (end == start) {
+ return;
+ }
+
+ start_idx = start / BITS_PER_ATOMIC_LONG_T;
+ start_bit = start % BITS_PER_ATOMIC_LONG_T;
+ end_idx = (end - 1) / BITS_PER_ATOMIC_LONG_T;
+ end_bit = (end - 1) % BITS_PER_ATOMIC_LONG_T;
+ p = &sched_rq_watermark[cpu].bits[end_idx];
+
+ if (end_idx == start_idx) {
+ x(p, set, (~0UL >> (BITS_PER_ATOMIC_LONG_T - 1 - end_bit)) & (~0UL << start_bit));
+ return;
+ }
+
+ if (end_bit != BITS_PER_ATOMIC_LONG_T - 1) {
+ x(p, set, (~0UL >> (BITS_PER_ATOMIC_LONG_T - 1 - end_bit)));
+ p -= 1;
+ end_idx -= 1;
+ }
+
+ while (end_idx != start_idx) {
+ smp_mb__before_atomic();
+ atomic_long_set(p, set ? ~0UL : 0);
+ smp_mb__after_atomic();
+ p -= 1;
+ end_idx -= 1;
+ }
+
+ x(p, set, ~0UL << start_bit);
+}
+
+#undef x
+
+static __always_inline bool sched_rq_watermark_and(cpumask_t *dstp, const cpumask_t *cpus, int prio, bool not)
+{
+ int cpu;
+ bool ret = false;
+ int idx = prio / BITS_PER_ATOMIC_LONG_T;
+ int bit = prio % BITS_PER_ATOMIC_LONG_T;
+
+ cpumask_clear(dstp);
+ for_each_cpu(cpu, cpus)
+ if (test_bit(bit, (long*)&sched_rq_watermark[cpu].bits[idx].counter) == !not) {
+ __cpumask_set_cpu(cpu, dstp);
+ ret = true;
+ }
+ return ret;
+}
+
+static __always_inline bool sched_rq_watermark_test(const cpumask_t *cpus, int prio, bool not)
+{
+ int cpu;
+ int idx = prio / BITS_PER_ATOMIC_LONG_T;
+ int bit = prio % BITS_PER_ATOMIC_LONG_T;
+
+ for_each_cpu(cpu, cpus)
+ if (test_bit(bit, (long*)&sched_rq_watermark[cpu].bits[idx].counter) == !not)
+ return true;
+ return false;
+}
+ +
+/* sched_queue related functions */ +/* sched_queue related functions */
+static inline void sched_queue_init(struct sched_queue *q) +static inline void sched_queue_init(struct sched_queue *q)
@@ -814,7 +951,6 @@ index 000000000000..03e3956194f7
+{ +{
+ unsigned long watermark = find_first_bit(rq->queue.bitmap, SCHED_QUEUE_BITS); + unsigned long watermark = find_first_bit(rq->queue.bitmap, SCHED_QUEUE_BITS);
+ unsigned long last_wm = rq->watermark; + unsigned long last_wm = rq->watermark;
+ unsigned long i;
+ int cpu; + int cpu;
+ +
+ if (watermark == last_wm) + if (watermark == last_wm)
@@ -823,28 +959,25 @@ index 000000000000..03e3956194f7
+ rq->watermark = watermark; + rq->watermark = watermark;
+ cpu = cpu_of(rq); + cpu = cpu_of(rq);
+ if (watermark < last_wm) { + if (watermark < last_wm) {
+ for (i = last_wm; i > watermark; i--) + sched_rq_watermark_fill_downwards(cpu, SCHED_QUEUE_BITS - watermark, SCHED_QUEUE_BITS - last_wm, false);
+ cpumask_clear_cpu(cpu, sched_rq_watermark + SCHED_QUEUE_BITS - i);
+#ifdef CONFIG_SCHED_SMT +#ifdef CONFIG_SCHED_SMT
+ if (static_branch_likely(&sched_smt_present) && + if (static_branch_likely(&sched_smt_present) &&
+ IDLE_TASK_SCHED_PRIO == last_wm) + unlikely(IDLE_TASK_SCHED_PRIO == last_wm))
+ cpumask_andnot(&sched_sg_idle_mask, + cpumask_andnot(&sched_sg_idle_mask,
+ &sched_sg_idle_mask, cpu_smt_mask(cpu)); + &sched_sg_idle_mask, cpu_smt_mask(cpu));
+#endif +#endif
+ return; + return;
+ } + }
+ /* last_wm < watermark */ + /* last_wm < watermark */
+ for (i = watermark; i > last_wm; i--) + sched_rq_watermark_fill_downwards(cpu, SCHED_QUEUE_BITS - last_wm, SCHED_QUEUE_BITS - watermark, true);
+ cpumask_set_cpu(cpu, sched_rq_watermark + SCHED_QUEUE_BITS - i);
+#ifdef CONFIG_SCHED_SMT +#ifdef CONFIG_SCHED_SMT
+ if (static_branch_likely(&sched_smt_present) && + if (static_branch_likely(&sched_smt_present) &&
+ IDLE_TASK_SCHED_PRIO == watermark) { + unlikely(IDLE_TASK_SCHED_PRIO == watermark)) {
+ cpumask_t tmp; + const cpumask_t *smt_mask = cpu_smt_mask(cpu);
+ +
+ cpumask_and(&tmp, cpu_smt_mask(cpu), sched_rq_watermark); + if (!sched_rq_watermark_test(smt_mask, 0, true))
+ if (cpumask_equal(&tmp, cpu_smt_mask(cpu)))
+ cpumask_or(&sched_sg_idle_mask, + cpumask_or(&sched_sg_idle_mask,
+ &sched_sg_idle_mask, cpu_smt_mask(cpu)); + &sched_sg_idle_mask, smt_mask);
+ } + }
+#endif +#endif
+} +}
@@ -1261,21 +1394,15 @@ index 000000000000..03e3956194f7
+ rq->load_stamp = time; + rq->load_stamp = time;
+} +}
+ +
+unsigned long rq_load_util(int cpu) +unsigned long rq_load_util(struct rq *rq, int cpu)
+{ +{
+ struct rq *rq; + return RQ_LOAD_HISTORY_TO_UTIL(rq->load_history) * (arch_scale_cpu_capacity(cpu) >> RQ_UTIL_SHIFT);
+ unsigned long max;
+
+ rq = cpu_rq(cpu);
+ max = arch_scale_cpu_capacity(cpu);
+
+ return RQ_LOAD_HISTORY_TO_UTIL(rq->load_history) * (max >> RQ_UTIL_SHIFT);
+} +}
+ +
+#ifdef CONFIG_SMP +#ifdef CONFIG_SMP
+unsigned long sched_cpu_util(int cpu) +unsigned long sched_cpu_util(int cpu)
+{ +{
+ return rq_load_util(cpu); + return rq_load_util(cpu_rq(cpu), cpu);
+} +}
+#endif /* CONFIG_SMP */ +#endif /* CONFIG_SMP */
+ +
@@ -1452,15 +1579,11 @@ index 000000000000..03e3956194f7
+ ({ \ + ({ \
+ typeof(ptr) _ptr = (ptr); \ + typeof(ptr) _ptr = (ptr); \
+ typeof(mask) _mask = (mask); \ + typeof(mask) _mask = (mask); \
+ typeof(*_ptr) _old, _val = *_ptr; \ + typeof(*_ptr) _val = *_ptr; \
+ \ + \
+ for (;;) { \ + do { \
+ _old = cmpxchg(_ptr, _val, _val | _mask); \ + } while (!try_cmpxchg(_ptr, &_val, _val | _mask)); \
+ if (_old == _val) \ + _val; \
+ break; \
+ _val = _old; \
+ } \
+ _old; \
+}) +})
+ +
+#if defined(CONFIG_SMP) && defined(TIF_POLLING_NRFLAG) +#if defined(CONFIG_SMP) && defined(TIF_POLLING_NRFLAG)
@@ -1469,7 +1592,7 @@ index 000000000000..03e3956194f7
+ * this avoids any races wrt polling state changes and thereby avoids + * this avoids any races wrt polling state changes and thereby avoids
+ * spurious IPIs. + * spurious IPIs.
+ */ + */
+static bool set_nr_and_not_polling(struct task_struct *p) +static inline bool set_nr_and_not_polling(struct task_struct *p)
+{ +{
+ struct thread_info *ti = task_thread_info(p); + struct thread_info *ti = task_thread_info(p);
+ return !(fetch_or(&ti->flags, _TIF_NEED_RESCHED) & _TIF_POLLING_NRFLAG); + return !(fetch_or(&ti->flags, _TIF_NEED_RESCHED) & _TIF_POLLING_NRFLAG);
@@ -1484,30 +1607,28 @@ index 000000000000..03e3956194f7
+static bool set_nr_if_polling(struct task_struct *p) +static bool set_nr_if_polling(struct task_struct *p)
+{ +{
+ struct thread_info *ti = task_thread_info(p); + struct thread_info *ti = task_thread_info(p);
+ typeof(ti->flags) old, val = READ_ONCE(ti->flags); + typeof(ti->flags) val = READ_ONCE(ti->flags);
+ +
+ for (;;) { + for (;;) {
+ if (!(val & _TIF_POLLING_NRFLAG)) + if (!(val & _TIF_POLLING_NRFLAG))
+ return false; + return false;
+ if (val & _TIF_NEED_RESCHED) + if (val & _TIF_NEED_RESCHED)
+ return true; + return true;
+ old = cmpxchg(&ti->flags, val, val | _TIF_NEED_RESCHED); + if (try_cmpxchg(&ti->flags, &val, val | _TIF_NEED_RESCHED))
+ if (old == val)
+ break; + break;
+ val = old;
+ } + }
+ return true; + return true;
+} +}
+ +
+#else +#else
+static bool set_nr_and_not_polling(struct task_struct *p) +static inline bool set_nr_and_not_polling(struct task_struct *p)
+{ +{
+ set_tsk_need_resched(p); + set_tsk_need_resched(p);
+ return true; + return true;
+} +}
+ +
+#ifdef CONFIG_SMP +#ifdef CONFIG_SMP
+static bool set_nr_if_polling(struct task_struct *p) +static inline bool set_nr_if_polling(struct task_struct *p)
+{ +{
+ return false; + return false;
+} +}
@@ -2181,7 +2302,7 @@ index 000000000000..03e3956194f7
+ rq = cpu_rq(new_cpu); + rq = cpu_rq(new_cpu);
+ +
+ raw_spin_lock(&rq->lock); + raw_spin_lock(&rq->lock);
+ BUG_ON(task_cpu(p) != new_cpu); + WARN_ON_ONCE(task_cpu(p) != new_cpu);
+ sched_task_sanity_check(p, rq); + sched_task_sanity_check(p, rq);
+ enqueue_task(p, rq, 0); + enqueue_task(p, rq, 0);
+ p->on_rq = TASK_ON_RQ_QUEUED; + p->on_rq = TASK_ON_RQ_QUEUED;
@@ -2547,9 +2668,9 @@ index 000000000000..03e3956194f7
+#ifdef CONFIG_SCHED_SMT +#ifdef CONFIG_SCHED_SMT
+ cpumask_and(&tmp, &chk_mask, &sched_sg_idle_mask) || + cpumask_and(&tmp, &chk_mask, &sched_sg_idle_mask) ||
+#endif +#endif
+ cpumask_and(&tmp, &chk_mask, sched_rq_watermark) || + sched_rq_watermark_and(&tmp, &chk_mask, 0, false) ||
+ cpumask_and(&tmp, &chk_mask, + sched_rq_watermark_and(&tmp, &chk_mask,
+ sched_rq_watermark + SCHED_QUEUE_BITS - 1 - task_sched_prio(p))) + SCHED_QUEUE_BITS - 1 - task_sched_prio(p), false))
+ return best_mask_cpu(task_cpu(p), &tmp); + return best_mask_cpu(task_cpu(p), &tmp);
+ +
+ return best_mask_cpu(task_cpu(p), &chk_mask); + return best_mask_cpu(task_cpu(p), &chk_mask);
@@ -2990,13 +3111,6 @@ index 000000000000..03e3956194f7
+ if (!llist) + if (!llist)
+ return; + return;
+ +
+ /*
+ * rq::ttwu_pending racy indication of out-standing wakeups.
+ * Races such that false-negatives are possible, since they
+ * are shorter lived that false-positives would be.
+ */
+ WRITE_ONCE(rq->ttwu_pending, 0);
+
+ rq_lock_irqsave(rq, &rf); + rq_lock_irqsave(rq, &rf);
+ update_rq_clock(rq); + update_rq_clock(rq);
+ +
@@ -3010,6 +3124,17 @@ index 000000000000..03e3956194f7
+ ttwu_do_activate(rq, p, p->sched_remote_wakeup ? WF_MIGRATED : 0); + ttwu_do_activate(rq, p, p->sched_remote_wakeup ? WF_MIGRATED : 0);
+ } + }
+ +
+ /*
+ * Must be after enqueueing at least once task such that
+ * idle_cpu() does not observe a false-negative -- if it does,
+ * it is possible for select_idle_siblings() to stack a number
+ * of tasks on this CPU during that window.
+ *
+ * It is ok to clear ttwu_pending when another task pending.
+ * We will receive IPI after local irq enabled and then enqueue it.
+ * Since now nr_running > 0, idle_cpu() will always get correct result.
+ */
+ WRITE_ONCE(rq->ttwu_pending, 0);
+ rq_unlock_irqrestore(rq, &rf); + rq_unlock_irqrestore(rq, &rf);
+} +}
+ +
@@ -3480,6 +3605,40 @@ index 000000000000..03e3956194f7
+ return success; + return success;
+} +}
+ +
+static bool __task_needs_rq_lock(struct task_struct *p)
+{
+ unsigned int state = READ_ONCE(p->__state);
+
+ /*
+ * Since pi->lock blocks try_to_wake_up(), we don't need rq->lock when
+ * the task is blocked. Make sure to check @state since ttwu() can drop
+ * locks at the end, see ttwu_queue_wakelist().
+ */
+ if (state == TASK_RUNNING || state == TASK_WAKING)
+ return true;
+
+ /*
+ * Ensure we load p->on_rq after p->__state, otherwise it would be
+ * possible to, falsely, observe p->on_rq == 0.
+ *
+ * See try_to_wake_up() for a longer comment.
+ */
+ smp_rmb();
+ if (p->on_rq)
+ return true;
+
+#ifdef CONFIG_SMP
+ /*
+ * Ensure the task has finished __schedule() and will not be referenced
+ * anymore. Again, see try_to_wake_up() for a longer comment.
+ */
+ smp_rmb();
+ smp_cond_load_acquire(&p->on_cpu, !VAL);
+#endif
+
+ return false;
+}
+
+/** +/**
+ * task_call_func - Invoke a function on task in fixed state + * task_call_func - Invoke a function on task in fixed state
+ * @p: Process for which the function is to be invoked, can be @current. + * @p: Process for which the function is to be invoked, can be @current.
@@ -3497,28 +3656,12 @@ index 000000000000..03e3956194f7
+int task_call_func(struct task_struct *p, task_call_f func, void *arg) +int task_call_func(struct task_struct *p, task_call_f func, void *arg)
+{ +{
+ struct rq *rq = NULL; + struct rq *rq = NULL;
+ unsigned int state;
+ struct rq_flags rf; + struct rq_flags rf;
+ int ret; + int ret;
+ +
+ raw_spin_lock_irqsave(&p->pi_lock, rf.flags); + raw_spin_lock_irqsave(&p->pi_lock, rf.flags);
+ +
+ state = READ_ONCE(p->__state); + if (__task_needs_rq_lock(p))
+
+ /*
+ * Ensure we load p->on_rq after p->__state, otherwise it would be
+ * possible to, falsely, observe p->on_rq == 0.
+ *
+ * See try_to_wake_up() for a longer comment.
+ */
+ smp_rmb();
+
+ /*
+ * Since pi->lock blocks try_to_wake_up(), we don't need rq->lock when
+ * the task is blocked. Make sure to check @state since ttwu() can drop
+ * locks at the end, see ttwu_queue_wakelist().
+ */
+ if (state == TASK_RUNNING || state == TASK_WAKING || p->on_rq)
+ rq = __task_rq_lock(p, &rf); + rq = __task_rq_lock(p, &rf);
+ +
+ /* + /*
@@ -3931,8 +4074,7 @@ index 000000000000..03e3956194f7
+ * Claim the task as running, we do this before switching to it + * Claim the task as running, we do this before switching to it
+ * such that any running task will have this set. + * such that any running task will have this set.
+ * + *
+ * See the smp_load_acquire(&p->on_cpu) case in ttwu() and + * See the ttwu() WF_ON_CPU case and its ordering comment.
+ * its ordering comment.
+ */ + */
+ WRITE_ONCE(next->on_cpu, 1); + WRITE_ONCE(next->on_cpu, 1);
+} +}
@@ -4002,7 +4144,7 @@ index 000000000000..03e3956194f7
+ if (likely(!head)) + if (likely(!head))
+ return NULL; + return NULL;
+ +
+ lockdep_assert_rq_held(rq); + lockdep_assert_held(&rq->lock);
+ /* + /*
+ * Must not take balance_push_callback off the list when + * Must not take balance_push_callback off the list when
+ * splice_balance_callbacks() and balance_callbacks() are not + * splice_balance_callbacks() and balance_callbacks() are not
@@ -4673,7 +4815,7 @@ index 000000000000..03e3956194f7
+ * find potential cpus which can migrate the current running task + * find potential cpus which can migrate the current running task
+ */ + */
+ if (cpumask_test_cpu(cpu, &sched_sg_idle_mask) && + if (cpumask_test_cpu(cpu, &sched_sg_idle_mask) &&
+ cpumask_andnot(&chk, cpu_online_mask, sched_rq_watermark) && + sched_rq_watermark_and(&chk, cpu_online_mask, 0, true) &&
+ cpumask_andnot(&chk, &chk, &sched_rq_pending_mask)) { + cpumask_andnot(&chk, &chk, &sched_rq_pending_mask)) {
+ int i; + int i;
+ +
@@ -4815,7 +4957,7 @@ index 000000000000..03e3956194f7
+int __init sched_tick_offload_init(void) +int __init sched_tick_offload_init(void)
+{ +{
+ tick_work_cpu = alloc_percpu(struct tick_work); + tick_work_cpu = alloc_percpu(struct tick_work);
+ BUG_ON(!tick_work_cpu); + WARN_ON_ONCE(!tick_work_cpu);
+ return 0; + return 0;
+} +}
+ +
@@ -4981,9 +5123,8 @@ index 000000000000..03e3956194f7
+#ifdef ALT_SCHED_DEBUG +#ifdef ALT_SCHED_DEBUG
+void alt_sched_debug(void) +void alt_sched_debug(void)
+{ +{
+ printk(KERN_INFO "sched: pending: 0x%04lx, idle: 0x%04lx, sg_idle: 0x%04lx\n", + printk(KERN_INFO "sched: pending: 0x%04lx, sg_idle: 0x%04lx\n",
+ sched_rq_pending_mask.bits[0], + sched_rq_pending_mask.bits[0],
+ sched_rq_watermark[0].bits[0],
+ sched_sg_idle_mask.bits[0]); + sched_sg_idle_mask.bits[0]);
+} +}
+#else +#else
@@ -5621,7 +5762,7 @@ index 000000000000..03e3956194f7
+ enum ctx_state prev_state; + enum ctx_state prev_state;
+ +
+ /* Catch callers which need to be fixed */ + /* Catch callers which need to be fixed */
+ BUG_ON(preempt_count() || !irqs_disabled()); + WARN_ON_ONCE(preempt_count() || !irqs_disabled());
+ +
+ prev_state = exception_enter(); + prev_state = exception_enter();
+ +
@@ -5796,29 +5937,17 @@ index 000000000000..03e3956194f7
+EXPORT_SYMBOL(set_user_nice); +EXPORT_SYMBOL(set_user_nice);
+ +
+/* +/*
+ * is_nice_reduction - check if nice value is an actual reduction
+ *
+ * Similar to can_nice() but does not perform a capability check.
+ *
+ * @p: task
+ * @nice: nice value
+ */
+static bool is_nice_reduction(const struct task_struct *p, const int nice)
+{
+ /* Convert nice value [19,-20] to rlimit style value [1,40]: */
+ int nice_rlim = nice_to_rlimit(nice);
+
+ return (nice_rlim <= task_rlimit(p, RLIMIT_NICE));
+}
+
+/*
+ * can_nice - check if a task can reduce its nice value + * can_nice - check if a task can reduce its nice value
+ * @p: task + * @p: task
+ * @nice: nice value + * @nice: nice value
+ */ + */
+int can_nice(const struct task_struct *p, const int nice) +int can_nice(const struct task_struct *p, const int nice)
+{ +{
+ return is_nice_reduction(p, nice) || capable(CAP_SYS_NICE); + /* Convert nice value [19,-20] to rlimit style value [1,40] */
+ int nice_rlim = nice_to_rlimit(nice);
+
+ return (nice_rlim <= task_rlimit(p, RLIMIT_NICE) ||
+ capable(CAP_SYS_NICE));
+} +}
+ +
+#ifdef __ARCH_WANT_SYS_NICE +#ifdef __ARCH_WANT_SYS_NICE
@@ -5969,45 +6098,6 @@ index 000000000000..03e3956194f7
+ return match; + return match;
+} +}
+ +
+/*
+ * Allow unprivileged RT tasks to decrease priority.
+ * Only issue a capable test if needed and only once to avoid an audit
+ * event on permitted non-privileged operations:
+ */
+static int user_check_sched_setscheduler(struct task_struct *p,
+ const struct sched_attr *attr,
+ int policy, int reset_on_fork)
+{
+ if (rt_policy(policy)) {
+ unsigned long rlim_rtprio = task_rlimit(p, RLIMIT_RTPRIO);
+
+ /* Can't set/change the rt policy: */
+ if (policy != p->policy && !rlim_rtprio)
+ goto req_priv;
+
+ /* Can't increase priority: */
+ if (attr->sched_priority > p->rt_priority &&
+ attr->sched_priority > rlim_rtprio)
+ goto req_priv;
+ }
+
+ /* Can't change other user's priorities: */
+ if (!check_same_owner(p))
+ goto req_priv;
+
+ /* Normal users shall not reset the sched_reset_on_fork flag: */
+ if (p->sched_reset_on_fork && !reset_on_fork)
+ goto req_priv;
+
+ return 0;
+
+req_priv:
+ if (!capable(CAP_SYS_NICE))
+ return -EPERM;
+
+ return 0;
+}
+
+static int __sched_setscheduler(struct task_struct *p, +static int __sched_setscheduler(struct task_struct *p,
+ const struct sched_attr *attr, + const struct sched_attr *attr,
+ bool user, bool pi) + bool user, bool pi)
@@ -6027,7 +6117,7 @@ index 000000000000..03e3956194f7
+ raw_spinlock_t *lock; + raw_spinlock_t *lock;
+ +
+ /* The pi code expects interrupts enabled */ + /* The pi code expects interrupts enabled */
+ BUG_ON(pi && in_interrupt()); + WARN_ON_ONCE(pi && in_interrupt());
+ +
+ /* + /*
+ * Alt schedule FW supports SCHED_DEADLINE by squash it as prio 0 SCHED_FIFO + * Alt schedule FW supports SCHED_DEADLINE by squash it as prio 0 SCHED_FIFO
@@ -6064,11 +6154,34 @@ index 000000000000..03e3956194f7
+ (attr->sched_priority != 0)) + (attr->sched_priority != 0))
+ return -EINVAL; + return -EINVAL;
+ +
+ if (user) { + /*
+ retval = user_check_sched_setscheduler(p, attr, policy, reset_on_fork); + * Allow unprivileged RT tasks to decrease priority:
+ if (retval) + */
+ return retval; + if (user && !capable(CAP_SYS_NICE)) {
+ if (SCHED_FIFO == policy || SCHED_RR == policy) {
+ unsigned long rlim_rtprio =
+ task_rlimit(p, RLIMIT_RTPRIO);
+ +
+ /* Can't set/change the rt policy */
+ if (policy != p->policy && !rlim_rtprio)
+ return -EPERM;
+
+ /* Can't increase priority */
+ if (attr->sched_priority > p->rt_priority &&
+ attr->sched_priority > rlim_rtprio)
+ return -EPERM;
+ }
+
+ /* Can't change other user's priorities */
+ if (!check_same_owner(p))
+ return -EPERM;
+
+ /* Normal users shall not reset the sched_reset_on_fork flag */
+ if (p->sched_reset_on_fork && !reset_on_fork)
+ return -EPERM;
+ }
+
+ if (user) {
+ retval = security_task_setscheduler(p); + retval = security_task_setscheduler(p);
+ if (retval) + if (retval)
+ return retval; + return retval;
@@ -7492,7 +7605,7 @@ index 000000000000..03e3956194f7
+{ +{
+ struct mm_struct *mm = current->active_mm; + struct mm_struct *mm = current->active_mm;
+ +
+ BUG_ON(current != this_rq()->idle); + WARN_ON_ONCE(current != this_rq()->idle);
+ +
+ if (mm != &init_mm) { + if (mm != &init_mm) {
+ switch_mm(mm, &init_mm, current); + switch_mm(mm, &init_mm, current);
@@ -8006,8 +8119,17 @@ index 000000000000..03e3956194f7
+ wait_bit_init(); + wait_bit_init();
+ +
+#ifdef CONFIG_SMP +#ifdef CONFIG_SMP
+ for (i = 0; i < SCHED_QUEUE_BITS; i++) + for (i = 0; i < nr_cpu_ids; i++) {
+ cpumask_copy(sched_rq_watermark + i, cpu_present_mask); + long val = cpumask_test_cpu(i, cpu_present_mask) ? -1L : 0;
+ int j;
+ for (j = 0; j < DIV_ROUND_UP(SCHED_QUEUE_BITS, BITS_PER_ATOMIC_LONG_T); j++)
+ atomic_long_set(&sched_rq_watermark[i].bits[j], val);
+ }
+ for (i = nr_cpu_ids; i < NR_CPUS; i++) {
+ int j;
+ for (j = 0; j < DIV_ROUND_UP(SCHED_QUEUE_BITS, BITS_PER_ATOMIC_LONG_T); j++)
+ atomic_long_set(&sched_rq_watermark[i].bits[j], 0);
+ }
+#endif +#endif
+ +
+#ifdef CONFIG_CGROUP_SCHED +#ifdef CONFIG_CGROUP_SCHED
@@ -8565,7 +8687,7 @@ index 000000000000..1212a031700e
+{} +{}
diff --git a/kernel/sched/alt_sched.h b/kernel/sched/alt_sched.h diff --git a/kernel/sched/alt_sched.h b/kernel/sched/alt_sched.h
new file mode 100644 new file mode 100644
index 000000000000..bec061f2ae10 index 000000000000..318431c553ca
--- /dev/null --- /dev/null
+++ b/kernel/sched/alt_sched.h +++ b/kernel/sched/alt_sched.h
@@ -0,0 +1,645 @@ @@ -0,0 +1,645 @@
@@ -8801,7 +8923,7 @@ index 000000000000..bec061f2ae10
+#endif /* CONFIG_NO_HZ_COMMON */ +#endif /* CONFIG_NO_HZ_COMMON */
+}; +};
+ +
+extern unsigned long rq_load_util(int cpu); +extern unsigned long rq_load_util(struct rq *rq, int cpu);
+ +
+extern unsigned long calc_load_update; +extern unsigned long calc_load_update;
+extern atomic_long_t calc_load_tasks; +extern atomic_long_t calc_load_tasks;
@@ -9356,10 +9478,18 @@ index d9dc9ab3773f..71a25540d65e 100644
+#include "deadline.c" +#include "deadline.c"
+#endif +#endif
diff --git a/kernel/sched/build_utility.c b/kernel/sched/build_utility.c diff --git a/kernel/sched/build_utility.c b/kernel/sched/build_utility.c
index 99bdd96f454f..23f80a86d2d7 100644 index 99bdd96f454f..bc17d5a6fc41 100644
--- a/kernel/sched/build_utility.c --- a/kernel/sched/build_utility.c
+++ b/kernel/sched/build_utility.c +++ b/kernel/sched/build_utility.c
@@ -85,7 +85,9 @@ @@ -34,7 +34,6 @@
#include <linux/nospec.h>
#include <linux/proc_fs.h>
#include <linux/psi.h>
-#include <linux/psi.h>
#include <linux/ptrace_api.h>
#include <linux/sched_clock.h>
#include <linux/security.h>
@@ -85,7 +84,9 @@
#ifdef CONFIG_SMP #ifdef CONFIG_SMP
# include "cpupri.c" # include "cpupri.c"
@@ -9370,7 +9500,7 @@ index 99bdd96f454f..23f80a86d2d7 100644
#endif #endif
diff --git a/kernel/sched/cpufreq_schedutil.c b/kernel/sched/cpufreq_schedutil.c diff --git a/kernel/sched/cpufreq_schedutil.c b/kernel/sched/cpufreq_schedutil.c
index 1207c78f85c1..3ed06d7ef4f8 100644 index 1207c78f85c1..f66b715e4287 100644
--- a/kernel/sched/cpufreq_schedutil.c --- a/kernel/sched/cpufreq_schedutil.c
+++ b/kernel/sched/cpufreq_schedutil.c +++ b/kernel/sched/cpufreq_schedutil.c
@@ -159,9 +159,14 @@ static void sugov_get_util(struct sugov_cpu *sg_cpu) @@ -159,9 +159,14 @@ static void sugov_get_util(struct sugov_cpu *sg_cpu)
@@ -9383,7 +9513,7 @@ index 1207c78f85c1..3ed06d7ef4f8 100644
FREQUENCY_UTIL, NULL); FREQUENCY_UTIL, NULL);
+#else +#else
+ sg_cpu->bw_dl = 0; + sg_cpu->bw_dl = 0;
+ sg_cpu->util = rq_load_util(cpu_of(rq)); + sg_cpu->util = rq_load_util(rq, sg_cpu->cpu);
+#endif /* CONFIG_SCHED_ALT */ +#endif /* CONFIG_SCHED_ALT */
} }
@@ -9894,7 +10024,7 @@ index 8739c2a5a54e..d8dd6c15eb47 100644
+#endif /* CONFIG_NUMA */ +#endif /* CONFIG_NUMA */
+#endif +#endif
diff --git a/kernel/sysctl.c b/kernel/sysctl.c diff --git a/kernel/sysctl.c b/kernel/sysctl.c
index 205d605cacc5..7dd950601cca 100644 index 205d605cacc5..c1dac3a542b8 100644
--- a/kernel/sysctl.c --- a/kernel/sysctl.c
+++ b/kernel/sysctl.c +++ b/kernel/sysctl.c
@@ -86,6 +86,10 @@ @@ -86,6 +86,10 @@
@@ -9908,23 +10038,7 @@ index 205d605cacc5..7dd950601cca 100644
#ifdef CONFIG_PERF_EVENTS #ifdef CONFIG_PERF_EVENTS
static const int six_hundred_forty_kb = 640 * 1024; static const int six_hundred_forty_kb = 640 * 1024;
#endif #endif
@@ -1631,6 +1635,7 @@ int proc_do_static_key(struct ctl_table *table, int write, @@ -1943,6 +1947,17 @@ static struct ctl_table kern_table[] = {
}
static struct ctl_table kern_table[] = {
+#ifndef CONFIG_SCHED_ALT
#ifdef CONFIG_NUMA_BALANCING
{
.procname = "numa_balancing",
@@ -1642,6 +1647,7 @@ static struct ctl_table kern_table[] = {
.extra2 = SYSCTL_FOUR,
},
#endif /* CONFIG_NUMA_BALANCING */
+#endif /* !CONFIG_SCHED_ALT */
{
.procname = "panic",
.data = &panic_timeout,
@@ -1943,6 +1949,17 @@ static struct ctl_table kern_table[] = {
.proc_handler = proc_dointvec, .proc_handler = proc_dointvec,
}, },
#endif #endif
@@ -10037,3 +10151,56 @@ index a2d301f58ced..2ccdede8585c 100644
}; };
struct wakeup_test_data *x = data; struct wakeup_test_data *x = data;
--
2.38.1
diff --git a/kernel/sched/alt_core.c b/kernel/sched/alt_core.c
index f3bac14124c3..27eafbccf23d 100644
--- a/kernel/sched/alt_core.c
+++ b/kernel/sched/alt_core.c
@@ -1448,11 +1448,13 @@ void set_task_cpu(struct task_struct *p, unsigned int new_cpu)
WARN_ON_ONCE(is_migration_disabled(p));
#endif
- if (task_cpu(p) == new_cpu)
- return;
+
trace_sched_migrate_task(p, new_cpu);
- rseq_migrate(p);
- perf_event_task_migrate(p);
+
+ if (task_cpu(p) != new_cpu) {
+ rseq_migrate(p);
+ perf_event_task_migrate(p);
+ }
__set_task_cpu(p, new_cpu);
}
diff --git a/kernel/sched/alt_core.c b/kernel/sched/alt_core.c
index f3bac14124c3..5678c247c0ab 100644
--- a/kernel/sched/alt_core.c
+++ b/kernel/sched/alt_core.c
@@ -810,8 +810,8 @@ unsigned long get_wchan(struct task_struct *p)
* Context: rq->lock
*/
#define __SCHED_DEQUEUE_TASK(p, rq, flags) \
- psi_dequeue(p, flags & DEQUEUE_SLEEP); \
sched_info_dequeue(rq, p); \
+ psi_dequeue(p, flags & DEQUEUE_SLEEP); \
\
list_del(&p->sq_node); \
if (list_empty(&rq->queue.heads[p->sq_idx])) \
diff --git a/kernel/sched/alt_core.c b/kernel/sched/alt_core.c
index f3bac14124c3..349a2c92d534 100644
--- a/kernel/sched/alt_core.c
+++ b/kernel/sched/alt_core.c
@@ -4404,8 +4404,8 @@ static inline void schedule_debug(struct task_struct *prev, bool preempt)
/*
* Compile time debug macro
- * #define ALT_SCHED_DEBUG
*/
+#define ALT_SCHED_DEBUG
#ifdef ALT_SCHED_DEBUG
void alt_sched_debug(void)

View File

@@ -64,6 +64,140 @@ index 2c7171e0b0010..85de313ddec29 100644
select CPU_FREQ_GOV_PERFORMANCE select CPU_FREQ_GOV_PERFORMANCE
help help
From 2535fbde890f14c78b750139fcf87d1143850626 Mon Sep 17 00:00:00 2001
From: Johannes Weiner <hannes@cmpxchg.org>
Date: Tue, 2 Aug 2022 12:28:11 -0400
Subject: [PATCH] mm: vmscan: fix extreme overreclaim and swap floods
During proactive reclaim, we sometimes observe severe overreclaim, with
several thousand times more pages reclaimed than requested.
This trace was obtained from shrink_lruvec() during such an instance:
prio:0 anon_cost:1141521 file_cost:7767
nr_reclaimed:4387406 nr_to_reclaim:1047 (or_factor:4190)
nr=[7161123 345 578 1111]
While he reclaimer requested 4M, vmscan reclaimed close to 16G, most of it
by swapping. These requests take over a minute, during which the write()
to memory.reclaim is unkillably stuck inside the kernel.
Digging into the source, this is caused by the proportional reclaim
bailout logic. This code tries to resolve a fundamental conflict: to
reclaim roughly what was requested, while also aging all LRUs fairly and
in accordance to their size, swappiness, refault rates etc. The way it
attempts fairness is that once the reclaim goal has been reached, it stops
scanning the LRUs with the smaller remaining scan targets, and adjusts the
remainder of the bigger LRUs according to how much of the smaller LRUs was
scanned. It then finishes scanning that remainder regardless of the
reclaim goal.
This works fine if priority levels are low and the LRU lists are
comparable in size. However, in this instance, the cgroup that is
targeted by proactive reclaim has almost no files left - they've already
been squeezed out by proactive reclaim earlier - and the remaining anon
pages are hot. Anon rotations cause the priority level to drop to 0,
which results in reclaim targeting all of anon (a lot) and all of file
(almost nothing). By the time reclaim decides to bail, it has scanned
most or all of the file target, and therefor must also scan most or all of
the enormous anon target. This target is thousands of times larger than
the reclaim goal, thus causing the overreclaim.
The bailout code hasn't changed in years, why is this failing now? The
most likely explanations are two other recent changes in anon reclaim:
1. Before the series starting with commit 5df741963d52 ("mm: fix LRU
balancing effect of new transparent huge pages"), the VM was
overall relatively reluctant to swap at all, even if swap was
configured. This means the LRU balancing code didn't come into play
as often as it does now, and mostly in high pressure situations
where pronounced swap activity wouldn't be as surprising.
2. For historic reasons, shrink_lruvec() loops on the scan targets of
all LRU lists except the active anon one, meaning it would bail if
the only remaining pages to scan were active anon - even if there
were a lot of them.
Before the series starting with commit ccc5dc67340c ("mm/vmscan:
make active/inactive ratio as 1:1 for anon lru"), most anon pages
would live on the active LRU; the inactive one would contain only a
handful of preselected reclaim candidates. After the series, anon
gets aged similarly to file, and the inactive list is the default
for new anon pages as well, making it often the much bigger list.
As a result, the VM is now more likely to actually finish large
anon targets than before.
Change the code such that only one SWAP_CLUSTER_MAX-sized nudge toward the
larger LRU lists is made before bailing out on a met reclaim goal.
This fixes the extreme overreclaim problem.
Fairness is more subtle and harder to evaluate. No obvious misbehavior
was observed on the test workload, in any case. Conceptually, fairness
should primarily be a cumulative effect from regular, lower priority
scans. Once the VM is in trouble and needs to escalate scan targets to
make forward progress, fairness needs to take a backseat. This is also
acknowledged by the myriad exceptions in get_scan_count(). This patch
makes fairness decrease gradually, as it keeps fairness work static over
increasing priority levels with growing scan targets. This should make
more sense - although we may have to re-visit the exact values.
Link: https://lkml.kernel.org/r/20220802162811.39216-1-hannes@cmpxchg.org
Signed-off-by: Johannes Weiner <hannes@cmpxchg.org>
Reviewed-by: Rik van Riel <riel@surriel.com>
Acked-by: Mel Gorman <mgorman@techsingularity.net>
Cc: Hugh Dickins <hughd@google.com>
Cc: Joonsoo Kim <iamjoonsoo.kim@lge.com>
Cc: <stable@vger.kernel.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
---
mm/vmscan.c | 10 ++++------
1 file changed, 4 insertions(+), 6 deletions(-)
diff --git a/mm/vmscan.c b/mm/vmscan.c
index 382dbe97329f33..266eb8cfe93a67 100644
--- a/mm/vmscan.c
+++ b/mm/vmscan.c
@@ -2955,8 +2955,8 @@ static void shrink_lruvec(struct lruvec *lruvec, struct scan_control *sc)
enum lru_list lru;
unsigned long nr_reclaimed = 0;
unsigned long nr_to_reclaim = sc->nr_to_reclaim;
+ bool proportional_reclaim;
struct blk_plug plug;
- bool scan_adjusted;
get_scan_count(lruvec, sc, nr);
@@ -2974,8 +2974,8 @@ static void shrink_lruvec(struct lruvec *lruvec, struct scan_control *sc)
* abort proportional reclaim if either the file or anon lru has already
* dropped to zero at the first pass.
*/
- scan_adjusted = (!cgroup_reclaim(sc) && !current_is_kswapd() &&
- sc->priority == DEF_PRIORITY);
+ proportional_reclaim = (!cgroup_reclaim(sc) && !current_is_kswapd() &&
+ sc->priority == DEF_PRIORITY);
blk_start_plug(&plug);
while (nr[LRU_INACTIVE_ANON] || nr[LRU_ACTIVE_FILE] ||
@@ -2995,7 +2995,7 @@ static void shrink_lruvec(struct lruvec *lruvec, struct scan_control *sc)
cond_resched();
- if (nr_reclaimed < nr_to_reclaim || scan_adjusted)
+ if (nr_reclaimed < nr_to_reclaim || proportional_reclaim)
continue;
/*
@@ -3046,8 +3046,6 @@ static void shrink_lruvec(struct lruvec *lruvec, struct scan_control *sc)
nr_scanned = targets[lru] - nr[lru];
nr[lru] = targets[lru] * (100 - percentage) / 100;
nr[lru] -= min(nr[lru], nr_scanned);
-
- scan_adjusted = true;
}
blk_finish_plug(&plug);
sc->nr_reclaimed += nr_reclaimed;
From 430daaab3c78de6bd82f10cfb5a0f016c6e583f6 Mon Sep 17 00:00:00 2001 From 430daaab3c78de6bd82f10cfb5a0f016c6e583f6 Mon Sep 17 00:00:00 2001
From: Desmond Cheong Zhi Xi <desmondcheongzx@gmail.com> From: Desmond Cheong Zhi Xi <desmondcheongzx@gmail.com>
Date: Mon, 4 Oct 2021 14:07:34 -0400 Date: Mon, 4 Oct 2021 14:07:34 -0400
@@ -388,3 +522,38 @@ index 1fd3cbca20a2..c7bf189d50de 100644
-- --
2.25.1 2.25.1
From fb23dad87a0bfb6fdfde3dc1d18104da631d050a Mon Sep 17 00:00:00 2001
From: Sjoerd Simons <sjoerd@collabora.com>
Date: Sat, 8 Oct 2022 21:57:51 +0200
Subject: [PATCH] soundwire: intel: Initialize clock stop timeout
The bus->clk_stop_timeout member is only initialized to a non-zero value
during the codec driver probe. This can lead to corner cases where this
value remains pegged at zero when the bus suspends, which results in an
endless loop in sdw_bus_wait_for_clk_prep_deprep().
Corner cases include configurations with no codecs described in the
firmware, or delays in probing codec drivers.
Initializing the default timeout to the smallest non-zero value avoid this
problem and allows for the existing logic to be preserved: the
bus->clk_stop_timeout is set as the maximum required by all codecs
connected on the bus.
Signed-off-by: Sjoerd Simons <sjoerd@collabora.com>
---
drivers/soundwire/intel.c | 1 +
1 file changed, 1 insertion(+)
diff --git a/drivers/soundwire/intel.c b/drivers/soundwire/intel.c
index af6c1a93372d90..002bc26b525e87 100644
--- a/drivers/soundwire/intel.c
+++ b/drivers/soundwire/intel.c
@@ -1307,6 +1307,7 @@ static int intel_link_probe(struct auxiliary_device *auxdev,
cdns->msg_count = 0;
bus->link_id = auxdev->id;
+ bus->clk_stop_timeout = 1;
sdw_cdns_probe(cdns);

View File

@@ -1,45 +1,121 @@
From d50977b164e708bf523a35ef53315355528c3ca6 Mon Sep 17 00:00:00 2001 From 5ec2dd3a095442ec1a21d86042a4994f2ba24e63 Mon Sep 17 00:00:00 2001
From: "Jan Alexander Steffens (heftig)" <jan.steffens@gmail.com> Message-Id: <5ec2dd3a095442ec1a21d86042a4994f2ba24e63.1512651251.git.jan.steffens@gmail.com>
Date: Mon, 16 Sep 2019 04:53:20 +0200 From: Serge Hallyn <serge.hallyn@canonical.com>
Subject: [PATCH] ZEN: Add sysctl and CONFIG to disallow unprivileged Date: Fri, 31 May 2013 19:12:12 +0100
CLONE_NEWUSER Subject: [PATCH] add sysctl to disallow unprivileged CLONE_NEWUSER by default
Our default behavior continues to match the vanilla kernel. Signed-off-by: Serge Hallyn <serge.hallyn@ubuntu.com>
[bwh: Remove unneeded binary sysctl bits]
Signed-off-by: Daniel Micay <danielmicay@gmail.com>
--- ---
include/linux/user_namespace.h | 4 ++++ kernel/fork.c | 15 +++++++++++++++
init/Kconfig | 16 ++++++++++++++++
kernel/fork.c | 14 ++++++++++++++
kernel/sysctl.c | 12 ++++++++++++ kernel/sysctl.c | 12 ++++++++++++
kernel/user_namespace.c | 7 +++++++ kernel/user_namespace.c | 3 +++
5 files changed, 53 insertions(+) 3 files changed, 30 insertions(+)
diff --git a/include/linux/user_namespace.h b/include/linux/user_namespace.h diff --git a/kernel/fork.c b/kernel/fork.c
index 45f09bec02c485..87b20e2ee27445 100644 index 07cc743698d3668e..4011d68a8ff9305c 100644
--- a/include/linux/user_namespace.h --- a/kernel/fork.c
+++ b/include/linux/user_namespace.h +++ b/kernel/fork.c
@@ -148,6 +148,8 @@ static inline void set_userns_rlimit_max(struct user_namespace *ns, @@ -102,6 +102,11 @@
#ifdef CONFIG_USER_NS
#define CREATE_TRACE_POINTS
#include <trace/events/task.h>
+#ifdef CONFIG_USER_NS
+extern int unprivileged_userns_clone; +extern int unprivileged_userns_clone;
+ +#else
static inline struct user_namespace *get_user_ns(struct user_namespace *ns)
{
if (ns)
@@ -181,6 +183,8 @@ extern bool current_in_userns(const struct user_namespace *target_ns);
struct ns_common *ns_get_owner(struct ns_common *ns);
#else
+#define unprivileged_userns_clone 0 +#define unprivileged_userns_clone 0
+#endif
/*
* Minimum number of threads to boot the kernel
@@ -1555,6 +1560,10 @@ static __latent_entropy struct task_struct *copy_process(
if ((clone_flags & (CLONE_NEWUSER|CLONE_FS)) == (CLONE_NEWUSER|CLONE_FS))
return ERR_PTR(-EINVAL);
+ if ((clone_flags & CLONE_NEWUSER) && !unprivileged_userns_clone)
+ if (!capable(CAP_SYS_ADMIN))
+ return ERR_PTR(-EPERM);
+ +
static inline struct user_namespace *get_user_ns(struct user_namespace *ns) /*
* Thread groups must share signals as well, and detached threads
* can only be started up within the thread group.
@@ -2348,6 +2357,12 @@ SYSCALL_DEFINE1(unshare, unsigned long, unshare_flags)
if (unshare_flags & CLONE_NEWNS)
unshare_flags |= CLONE_FS;
+ if ((unshare_flags & CLONE_NEWUSER) && !unprivileged_userns_clone) {
+ err = -EPERM;
+ if (!capable(CAP_SYS_ADMIN))
+ goto bad_unshare_out;
+ }
+
err = check_unshare_flags(unshare_flags);
if (err)
goto bad_unshare_out;
diff --git a/kernel/sysctl.c b/kernel/sysctl.c
index b86520ed3fb60fbf..f7dab3760839f1a1 100644
--- a/kernel/sysctl.c
+++ b/kernel/sysctl.c
@@ -105,6 +105,9 @@ extern int core_uses_pid;
#if defined(CONFIG_SYSCTL)
+#ifdef CONFIG_USER_NS
+extern int unprivileged_userns_clone;
+#endif
/* Constants used for minimum and maximum */
#ifdef CONFIG_PERF_EVENTS
@@ -513,6 +516,15 @@ static struct ctl_table kern_table[] = {
.proc_handler = proc_dointvec,
},
#endif
+#ifdef CONFIG_USER_NS
+ {
+ .procname = "unprivileged_userns_clone",
+ .data = &unprivileged_userns_clone,
+ .maxlen = sizeof(int),
+ .mode = 0644,
+ .proc_handler = proc_dointvec,
+ },
+#endif
#ifdef CONFIG_PROC_SYSCTL
{ {
return &init_user_ns; .procname = "tainted",
diff --git a/kernel/user_namespace.c b/kernel/user_namespace.c
index c490f1e4313b998a..dd03bd39d7bf194d 100644
--- a/kernel/user_namespace.c
+++ b/kernel/user_namespace.c
@@ -24,6 +24,9 @@
#include <linux/projid.h>
#include <linux/fs_struct.h>
+/* sysctl */
+int unprivileged_userns_clone;
+
static struct kmem_cache *user_ns_cachep __read_mostly;
static DEFINE_MUTEX(userns_state_mutex);
--
2.15.1
From b5202296055dd333db4425120d3f93ef4e6a0573 Mon Sep 17 00:00:00 2001
From: "Jan Alexander Steffens (heftig)" <jan.steffens@gmail.com>
Date: Thu, 7 Dec 2017 13:50:48 +0100
Subject: ZEN: Add CONFIG for unprivileged_userns_clone
This way our default behavior continues to match the vanilla kernel.
---
init/Kconfig | 16 ++++++++++++++++
kernel/user_namespace.c | 4 ++++
2 files changed, 20 insertions(+)
diff --git a/init/Kconfig b/init/Kconfig diff --git a/init/Kconfig b/init/Kconfig
index 94125d3b6893c7..9f7139b536f638 100644 index 4592bf7997c0..f3df02990aff 100644
--- a/init/Kconfig --- a/init/Kconfig
+++ b/init/Kconfig +++ b/init/Kconfig
@@ -1247,6 +1247,22 @@ config USER_NS @@ -1004,6 +1004,22 @@ config USER_NS
If unsure, say N. If unsure, say N.
@@ -62,90 +138,19 @@ index 94125d3b6893c7..9f7139b536f638 100644
config PID_NS config PID_NS
bool "PID Namespaces" bool "PID Namespaces"
default y default y
diff --git a/kernel/fork.c b/kernel/fork.c
index 08969f5aa38d59..ff601cb7a1fae0 100644
--- a/kernel/fork.c
+++ b/kernel/fork.c
@@ -98,6 +98,10 @@
#include <linux/io_uring.h>
#include <linux/bpf.h>
+#ifdef CONFIG_USER_NS
+#include <linux/user_namespace.h>
+#endif
+
#include <asm/pgalloc.h>
#include <linux/uaccess.h>
#include <asm/mmu_context.h>
@@ -2008,6 +2012,10 @@ static __latent_entropy struct task_struct *copy_process(
if ((clone_flags & (CLONE_NEWUSER|CLONE_FS)) == (CLONE_NEWUSER|CLONE_FS))
return ERR_PTR(-EINVAL);
+ if ((clone_flags & CLONE_NEWUSER) && !unprivileged_userns_clone)
+ if (!capable(CAP_SYS_ADMIN))
+ return ERR_PTR(-EPERM);
+
/*
* Thread groups must share signals as well, and detached threads
* can only be started up within the thread group.
@@ -3166,6 +3174,12 @@ int ksys_unshare(unsigned long unshare_flags)
if (unshare_flags & CLONE_NEWNS)
unshare_flags |= CLONE_FS;
+ if ((unshare_flags & CLONE_NEWUSER) && !unprivileged_userns_clone) {
+ err = -EPERM;
+ if (!capable(CAP_SYS_ADMIN))
+ goto bad_unshare_out;
+ }
+
err = check_unshare_flags(unshare_flags);
if (err)
goto bad_unshare_out;
diff --git a/kernel/sysctl.c b/kernel/sysctl.c
index c6d9dec11b749d..9a4514ad481b21 100644
--- a/kernel/sysctl.c
+++ b/kernel/sysctl.c
@@ -81,6 +81,9 @@
#ifdef CONFIG_RT_MUTEXES
#include <linux/rtmutex.h>
#endif
+#ifdef CONFIG_USER_NS
+#include <linux/user_namespace.h>
+#endif
/* shared constants to be used in various sysctls */
const int sysctl_vals[] = { 0, 1, 2, 3, 4, 100, 200, 1000, 3000, INT_MAX, 65535, -1 };
@@ -1659,6 +1662,15 @@ static struct ctl_table kern_table[] = {
.mode = 0644,
.proc_handler = proc_dointvec,
},
+#ifdef CONFIG_USER_NS
+ {
+ .procname = "unprivileged_userns_clone",
+ .data = &unprivileged_userns_clone,
+ .maxlen = sizeof(int),
+ .mode = 0644,
+ .proc_handler = proc_dointvec,
+ },
+#endif
#ifdef CONFIG_PROC_SYSCTL
{
.procname = "tainted",
diff --git a/kernel/user_namespace.c b/kernel/user_namespace.c diff --git a/kernel/user_namespace.c b/kernel/user_namespace.c
index 54211dbd516c57..16ca0c1516298d 100644 index 6b9dbc257e34..107b17f0d528 100644
--- a/kernel/user_namespace.c --- a/kernel/user_namespace.c
+++ b/kernel/user_namespace.c +++ b/kernel/user_namespace.c
@@ -22,6 +22,13 @@ @@ -27,7 +27,11 @@
#include <linux/bsearch.h>
#include <linux/sort.h> #include <linux/sort.h>
+/* sysctl */ /* sysctl */
+#ifdef CONFIG_USER_NS_UNPRIVILEGED +#ifdef CONFIG_USER_NS_UNPRIVILEGED
+int unprivileged_userns_clone = 1; +int unprivileged_userns_clone = 1;
+#else +#else
+int unprivileged_userns_clone; int unprivileged_userns_clone;
+#endif +#endif
+
static struct kmem_cache *user_ns_cachep __read_mostly; static struct kmem_cache *user_ns_cachep __read_mostly;
static DEFINE_MUTEX(userns_state_mutex); static DEFINE_MUTEX(userns_state_mutex);

View File

@@ -403,6 +403,34 @@ index 84badf00647e..6a922bca9f39 100644
2.28.0 2.28.0
From 816ee502759e954304693813bd03d94986b28dba Mon Sep 17 00:00:00 2001
From: Tk-Glitch <ti3nou@gmail.com>
Date: Mon, 18 Feb 2019 17:40:57 +0100
Subject: [PATCH 11/17] mm: Set watermark_scale_factor to 200 (from 10)
Multiple users have reported it's helping reducing/eliminating stuttering
with DXVK.
---
mm/page_alloc.c | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/mm/page_alloc.c b/mm/page_alloc.c
index 898ff44f2c7b..e72074034793 100644
--- a/mm/page_alloc.c
+++ b/mm/page_alloc.c
@@ -330,7 +330,7 @@ int watermark_boost_factor __read_mostly;
int min_free_kbytes = 1024;
int user_min_free_kbytes = -1;
int watermark_boost_factor __read_mostly = 15000;
-int watermark_scale_factor = 10;
+int watermark_scale_factor = 200;
static unsigned long nr_kernel_pages __initdata;
static unsigned long nr_all_pages __initdata;
--
2.28.0
From 90240bcd90a568878738e66c0d45bed3e38e347b Mon Sep 17 00:00:00 2001 From 90240bcd90a568878738e66c0d45bed3e38e347b Mon Sep 17 00:00:00 2001
From: Tk-Glitch <ti3nou@gmail.com> From: Tk-Glitch <ti3nou@gmail.com>
Date: Fri, 19 Apr 2019 12:33:38 +0200 Date: Fri, 19 Apr 2019 12:33:38 +0200
@@ -462,6 +490,7 @@ index b0cefe94920d..890165099b07 100644
-- --
2.28.0 2.28.0
From 977812938da7c7226415778c340832141d9278b7 Mon Sep 17 00:00:00 2001 From 977812938da7c7226415778c340832141d9278b7 Mon Sep 17 00:00:00 2001
From: Alexandre Frade <admfrade@gmail.com> From: Alexandre Frade <admfrade@gmail.com>
Date: Mon, 25 Nov 2019 15:13:06 -0300 Date: Mon, 25 Nov 2019 15:13:06 -0300
@@ -590,232 +619,3 @@ index 36a469150ff9..aee891c9b78a 100644
-- --
2.28.0 2.28.0
From 379cbab18b5c75c622b93e2c5abdfac141fe9654 Mon Sep 17 00:00:00 2001
From: Kenny Levinsen <kl@kl.wtf>
Date: Sun, 27 Dec 2020 14:43:13 +0000
Subject: [PATCH] ZEN: Input: evdev - use call_rcu when detaching client
Significant time was spent on synchronize_rcu in evdev_detach_client
when applications closed evdev devices. Switching VT away from a
graphical environment commonly leads to mass input device closures,
which could lead to noticable delays on systems with many input devices.
Replace synchronize_rcu with call_rcu, deferring reclaim of the evdev
client struct till after the RCU grace period instead of blocking the
calling application.
While this does not solve all slow evdev fd closures, it takes care of a
good portion of them, including this simple test:
#include <fcntl.h>
#include <unistd.h>
int main(int argc, char *argv[])
{
int idx, fd;
const char *path = "/dev/input/event0";
for (idx = 0; idx < 1000; idx++) {
if ((fd = open(path, O_RDWR)) == -1) {
return -1;
}
close(fd);
}
return 0;
}
Time to completion of above test when run locally:
Before: 0m27.111s
After: 0m0.018s
Signed-off-by: Kenny Levinsen <kl@kl.wtf>
---
drivers/input/evdev.c | 19 +++++++++++--------
1 file changed, 11 insertions(+), 8 deletions(-)
diff --git a/drivers/input/evdev.c b/drivers/input/evdev.c
index 95f90699d2b17b..2b10fe29d2c8d9 100644
--- a/drivers/input/evdev.c
+++ b/drivers/input/evdev.c
@@ -46,6 +46,7 @@ struct evdev_client {
struct fasync_struct *fasync;
struct evdev *evdev;
struct list_head node;
+ struct rcu_head rcu;
enum input_clock_type clk_type;
bool revoked;
unsigned long *evmasks[EV_CNT];
@@ -377,13 +378,22 @@ static void evdev_attach_client(struct evdev *evdev,
spin_unlock(&evdev->client_lock);
}
+static void evdev_reclaim_client(struct rcu_head *rp)
+{
+ struct evdev_client *client = container_of(rp, struct evdev_client, rcu);
+ unsigned int i;
+ for (i = 0; i < EV_CNT; ++i)
+ bitmap_free(client->evmasks[i]);
+ kvfree(client);
+}
+
static void evdev_detach_client(struct evdev *evdev,
struct evdev_client *client)
{
spin_lock(&evdev->client_lock);
list_del_rcu(&client->node);
spin_unlock(&evdev->client_lock);
- synchronize_rcu();
+ call_rcu(&client->rcu, evdev_reclaim_client);
}
static int evdev_open_device(struct evdev *evdev)
@@ -436,7 +446,6 @@ static int evdev_release(struct inode *inode, struct file *file)
{
struct evdev_client *client = file->private_data;
struct evdev *evdev = client->evdev;
- unsigned int i;
mutex_lock(&evdev->mutex);
@@ -448,11 +457,6 @@ static int evdev_release(struct inode *inode, struct file *file)
evdev_detach_client(evdev, client);
- for (i = 0; i < EV_CNT; ++i)
- bitmap_free(client->evmasks[i]);
-
- kvfree(client);
-
evdev_close_device(evdev);
return 0;
@@ -495,7 +499,6 @@ static int evdev_open(struct inode *inode, struct file *file)
err_free_client:
evdev_detach_client(evdev, client);
- kvfree(client);
return error;
}
From 2aafb56f20e4b63d8c4af172fe9d017c64bc4129 Mon Sep 17 00:00:00 2001
From: Sultan Alsawaf <sultan@kerneltoast.com>
Date: Wed, 20 Oct 2021 20:50:11 -0700
Subject: [PATCH] ZEN: mm: Lower the non-hugetlbpage pageblock size to reduce
scheduling delays
The page allocator processes free pages in groups of pageblocks, where
the size of a pageblock is typically quite large (1024 pages without
hugetlbpage support). Pageblocks are processed atomically with the zone
lock held, which can cause severe scheduling delays on both the CPU
going through the pageblock and any other CPUs waiting to acquire the
zone lock. A frequent offender is move_freepages_block(), which is used
by rmqueue() for page allocation.
As it turns out, there's no requirement for pageblocks to be so large,
so the pageblock order can simply be reduced to ease the scheduling
delays and zone lock contention. PAGE_ALLOC_COSTLY_ORDER is used as a
reasonable setting to ensure non-costly page allocation requests can
still be serviced without always needing to free up more than one
pageblock's worth of pages at a time.
This has a noticeable effect on overall system latency when memory
pressure is elevated. The various mm functions which operate on
pageblocks no longer appear in the preemptoff tracer, where previously
they would spend up to 100 ms on a mobile arm64 CPU processing a
pageblock with preemption disabled and the zone lock held.
Signed-off-by: Sultan Alsawaf <sultan@kerneltoast.com>
---
include/linux/pageblock-flags.h | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/include/linux/pageblock-flags.h b/include/linux/pageblock-flags.h
index 5f1ae07d724b88..97cda629c9e909 100644
--- a/include/linux/pageblock-flags.h
+++ b/include/linux/pageblock-flags.h
@@ -48,7 +48,7 @@ extern unsigned int pageblock_order;
#else /* CONFIG_HUGETLB_PAGE */
/* If huge pages are not used, group by MAX_ORDER_NR_PAGES */
-#define pageblock_order (MAX_ORDER-1)
+#define pageblock_order PAGE_ALLOC_COSTLY_ORDER
#endif /* CONFIG_HUGETLB_PAGE */
From f22bc56be85e69c71c8e36041193856bb8b01525 Mon Sep 17 00:00:00 2001
From: Sultan Alsawaf <sultan@kerneltoast.com>
Date: Wed, 20 Oct 2021 20:50:32 -0700
Subject: [PATCH] ZEN: mm: Don't hog the CPU and zone lock in rmqueue_bulk()
There is noticeable scheduling latency and heavy zone lock contention
stemming from rmqueue_bulk's single hold of the zone lock while doing
its work, as seen with the preemptoff tracer. There's no actual need for
rmqueue_bulk() to hold the zone lock the entire time; it only does so
for supposed efficiency. As such, we can relax the zone lock and even
reschedule when IRQs are enabled in order to keep the scheduling delays
and zone lock contention at bay. Forward progress is still guaranteed,
as the zone lock can only be relaxed after page removal.
With this change, rmqueue_bulk() no longer appears as a serious offender
in the preemptoff tracer, and system latency is noticeably improved.
Signed-off-by: Sultan Alsawaf <sultan@kerneltoast.com>
---
mm/page_alloc.c | 23 ++++++++++++++++++-----
1 file changed, 18 insertions(+), 5 deletions(-)
diff --git a/mm/page_alloc.c b/mm/page_alloc.c
index a0b0397e29ee4c..87a983a356530c 100644
--- a/mm/page_alloc.c
+++ b/mm/page_alloc.c
@@ -3119,15 +3119,16 @@ __rmqueue(struct zone *zone, unsigned int order, int migratetype,
}
/*
- * Obtain a specified number of elements from the buddy allocator, all under
- * a single hold of the lock, for efficiency. Add them to the supplied list.
- * Returns the number of new pages which were placed at *list.
+ * Obtain a specified number of elements from the buddy allocator, and relax the
+ * zone lock when needed. Add them to the supplied list. Returns the number of
+ * new pages which were placed at *list.
*/
static int rmqueue_bulk(struct zone *zone, unsigned int order,
unsigned long count, struct list_head *list,
int migratetype, unsigned int alloc_flags)
{
- int i, allocated = 0;
+ const bool can_resched = !preempt_count() && !irqs_disabled();
+ int i, allocated = 0, last_mod = 0;
/* Caller must hold IRQ-safe pcp->lock so IRQs are disabled. */
spin_lock(&zone->lock);
@@ -3137,6 +3138,18 @@ static int rmqueue_bulk(struct zone *zone, unsigned int order,
if (unlikely(page == NULL))
break;
+ /* Reschedule and ease the contention on the lock if needed */
+ if (i + 1 < count && ((can_resched && need_resched()) ||
+ spin_needbreak(&zone->lock))) {
+ __mod_zone_page_state(zone, NR_FREE_PAGES,
+ -((i + 1 - last_mod) << order));
+ last_mod = i + 1;
+ spin_unlock(&zone->lock);
+ if (can_resched)
+ cond_resched();
+ spin_lock(&zone->lock);
+ }
+
if (unlikely(check_pcp_refill(page, order)))
continue;
@@ -3163,7 +3176,7 @@ static int rmqueue_bulk(struct zone *zone, unsigned int order,
* on i. Do not confuse with 'allocated' which is the number of
* pages added to the pcp list.
*/
- __mod_zone_page_state(zone, NR_FREE_PAGES, -(i << order));
+ __mod_zone_page_state(zone, NR_FREE_PAGES, -((i - last_mod) << order));
spin_unlock(&zone->lock);
return allocated;
}

View File

@@ -1,90 +0,0 @@
From f7f49141a5dbe9c99d78196b58c44307fb2e6be3 Mon Sep 17 00:00:00 2001
From: Tk-Glitch <ti3nou@gmail.com>
Date: Wed, 4 Jul 2018 04:30:08 +0200
Subject: glitched - PDS
diff --git a/kernel/Kconfig.hz b/kernel/Kconfig.hz
index 2a202a846757..1d9c7ed79b11 100644
--- a/kernel/Kconfig.hz
+++ b/kernel/Kconfig.hz
@@ -4,7 +4,7 @@
choice
prompt "Timer frequency"
- default HZ_250
+ default HZ_500
help
Allows the configuration of the timer frequency. It is customary
to have the timer interrupt run at 1000 Hz but 100 Hz may be more
@@ -39,6 +39,13 @@ choice
on SMP and NUMA systems and exactly dividing by both PAL and
NTSC frame rates for video and multimedia work.
+ config HZ_500
+ bool "500 HZ"
+ help
+ 500 Hz is a balanced timer frequency. Provides fast interactivity
+ on desktops with great smoothness without increasing CPU power
+ consumption and sacrificing the battery life on laptops.
+
config HZ_1000
bool "1000 HZ"
help
@@ -52,6 +59,7 @@ config HZ
default 100 if HZ_100
default 250 if HZ_250
default 300 if HZ_300
+ default 500 if HZ_500
default 1000 if HZ_1000
config SCHED_HRTICK
diff --git a/kernel/Kconfig.hz b/kernel/Kconfig.hz
index 2a202a846757..1d9c7ed79b11 100644
--- a/kernel/Kconfig.hz
+++ b/kernel/Kconfig.hz
@@ -4,7 +4,7 @@
choice
prompt "Timer frequency"
- default HZ_500
+ default HZ_750
help
Allows the configuration of the timer frequency. It is customary
to have the timer interrupt run at 1000 Hz but 100 Hz may be more
@@ -46,6 +46,13 @@ choice
on desktops with great smoothness without increasing CPU power
consumption and sacrificing the battery life on laptops.
+ config HZ_750
+ bool "750 HZ"
+ help
+ 750 Hz is a good timer frequency for desktops. Provides fast
+ interactivity with great smoothness without sacrificing too
+ much throughput.
+
config HZ_1000
bool "1000 HZ"
help
@@ -60,6 +67,7 @@ config HZ
default 250 if HZ_250
default 300 if HZ_300
default 500 if HZ_500
+ default 750 if HZ_750
default 1000 if HZ_1000
config SCHED_HRTICK
diff --git a/mm/vmscan.c b/mm/vmscan.c
index 9270a4370d54..30d01e647417 100644
--- a/mm/vmscan.c
+++ b/mm/vmscan.c
@@ -169,7 +169,7 @@
/*
* From 0 .. 200. Higher means more swappy.
*/
-int vm_swappiness = 60;
+int vm_swappiness = 20;
static void set_task_reclaim_state(struct task_struct *task,
struct reclaim_state *rs)

File diff suppressed because it is too large Load Diff

View File

@@ -1,90 +0,0 @@
From f7f49141a5dbe9c99d78196b58c44307fb2e6be3 Mon Sep 17 00:00:00 2001
From: Tk-Glitch <ti3nou@gmail.com>
Date: Wed, 4 Jul 2018 04:30:08 +0200
Subject: glitched - BMQ
diff --git a/kernel/Kconfig.hz b/kernel/Kconfig.hz
index 2a202a846757..1d9c7ed79b11 100644
--- a/kernel/Kconfig.hz
+++ b/kernel/Kconfig.hz
@@ -4,7 +4,7 @@
choice
prompt "Timer frequency"
- default HZ_250
+ default HZ_500
help
Allows the configuration of the timer frequency. It is customary
to have the timer interrupt run at 1000 Hz but 100 Hz may be more
@@ -39,6 +39,13 @@ choice
on SMP and NUMA systems and exactly dividing by both PAL and
NTSC frame rates for video and multimedia work.
+ config HZ_500
+ bool "500 HZ"
+ help
+ 500 Hz is a balanced timer frequency. Provides fast interactivity
+ on desktops with great smoothness without increasing CPU power
+ consumption and sacrificing the battery life on laptops.
+
config HZ_1000
bool "1000 HZ"
help
@@ -52,6 +59,7 @@ config HZ
default 100 if HZ_100
default 250 if HZ_250
default 300 if HZ_300
+ default 500 if HZ_500
default 1000 if HZ_1000
config SCHED_HRTICK
diff --git a/kernel/Kconfig.hz b/kernel/Kconfig.hz
index 2a202a846757..1d9c7ed79b11 100644
--- a/kernel/Kconfig.hz
+++ b/kernel/Kconfig.hz
@@ -4,7 +4,7 @@
choice
prompt "Timer frequency"
- default HZ_500
+ default HZ_750
help
Allows the configuration of the timer frequency. It is customary
to have the timer interrupt run at 1000 Hz but 100 Hz may be more
@@ -46,6 +46,13 @@ choice
on desktops with great smoothness without increasing CPU power
consumption and sacrificing the battery life on laptops.
+ config HZ_750
+ bool "750 HZ"
+ help
+ 750 Hz is a good timer frequency for desktops. Provides fast
+ interactivity with great smoothness without sacrificing too
+ much throughput.
+
config HZ_1000
bool "1000 HZ"
help
@@ -60,6 +67,7 @@ config HZ
default 250 if HZ_250
default 300 if HZ_300
default 500 if HZ_500
+ default 750 if HZ_750
default 1000 if HZ_1000
config SCHED_HRTICK
diff --git a/mm/vmscan.c b/mm/vmscan.c
index 9270a4370d54..30d01e647417 100644
--- a/mm/vmscan.c
+++ b/mm/vmscan.c
@@ -169,7 +169,7 @@
/*
* From 0 .. 200. Higher means more swappy.
*/
-int vm_swappiness = 60;
+int vm_swappiness = 20;
static void set_task_reclaim_state(struct task_struct *task,
struct reclaim_state *rs)

View File

@@ -1,18 +0,0 @@
diff --git a/drivers/cpufreq/cpufreq_ondemand.c b/drivers/cpufreq/cpufreq_ondemand.c
index 6b423eebfd5d..61e3271675d6 100644
--- a/drivers/cpufreq/cpufreq_ondemand.c
+++ b/drivers/cpufreq/cpufreq_ondemand.c
@@ -21,10 +21,10 @@
#include "cpufreq_ondemand.h"
/* On-demand governor macros */
-#define DEF_FREQUENCY_UP_THRESHOLD (80)
-#define DEF_SAMPLING_DOWN_FACTOR (1)
+#define DEF_FREQUENCY_UP_THRESHOLD (55)
+#define DEF_SAMPLING_DOWN_FACTOR (5)
#define MAX_SAMPLING_DOWN_FACTOR (100000)
-#define MICRO_FREQUENCY_UP_THRESHOLD (95)
+#define MICRO_FREQUENCY_UP_THRESHOLD (63)
#define MICRO_FREQUENCY_MIN_SAMPLE_RATE (10000)
#define MIN_FREQUENCY_UP_THRESHOLD (1)
#define MAX_FREQUENCY_UP_THRESHOLD (100)

File diff suppressed because it is too large Load Diff

View File

@@ -64,349 +64,365 @@ index 2c7171e0b0010..85de313ddec29 100644
select CPU_FREQ_GOV_PERFORMANCE select CPU_FREQ_GOV_PERFORMANCE
help help
From 0c079d3f88df5f8286cd5c91b54bdac7c819be85 Mon Sep 17 00:00:00 2001 From 2535fbde890f14c78b750139fcf87d1143850626 Mon Sep 17 00:00:00 2001
From: Matthew Auld <matthew.auld@intel.com> From: Johannes Weiner <hannes@cmpxchg.org>
Date: Tue, 6 Dec 2022 16:11:41 +0000 Date: Tue, 2 Aug 2022 12:28:11 -0400
Subject: [PATCH] drm/i915: improve the catch-all evict to handle lock Subject: [PATCH] mm: vmscan: fix extreme overreclaim and swap floods
contention
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit
The catch-all evict can fail due to object lock contention, since it During proactive reclaim, we sometimes observe severe overreclaim, with
only goes as far as trylocking the object, due to us already holding the several thousand times more pages reclaimed than requested.
vm->mutex. Doing a full object lock here can deadlock, since the
vm->mutex is always our inner lock. Add another execbuf pass which drops
the vm->mutex and then tries to grab the object will the full lock,
before then retrying the eviction. This should be good enough for now to
fix the immediate regression with userspace seeing -ENOSPC from execbuf
due to contended object locks during GTT eviction.
Testcase: igt@gem_ppgtt@shrink-vs-evict-* This trace was obtained from shrink_lruvec() during such an instance:
Fixes: 7e00897be8bf ("drm/i915: Add object locking to i915_gem_evict_for_node and i915_gem_evict_something, v2.")
References: https://gitlab.freedesktop.org/drm/intel/-/issues/7627
References: https://gitlab.freedesktop.org/drm/intel/-/issues/7570
References: https://bugzilla.mozilla.org/show_bug.cgi?id=1779558
Signed-off-by: Matthew Auld <matthew.auld@intel.com>
Cc: Maarten Lankhorst <maarten.lankhorst@linux.intel.com>
Cc: Thomas Hellström <thomas.hellstrom@linux.intel.com>
Cc: Tvrtko Ursulin <tvrtko.ursulin@linux.intel.com>
Cc: Andrzej Hajda <andrzej.hajda@intel.com>
Cc: Mani Milani <mani@chromium.org>
Cc: <stable@vger.kernel.org> # v5.18+
Revision 1 of https://patchwork.freedesktop.org/series/111686/ prio:0 anon_cost:1141521 file_cost:7767
nr_reclaimed:4387406 nr_to_reclaim:1047 (or_factor:4190)
nr=[7161123 345 578 1111]
While he reclaimer requested 4M, vmscan reclaimed close to 16G, most of it
by swapping. These requests take over a minute, during which the write()
to memory.reclaim is unkillably stuck inside the kernel.
Digging into the source, this is caused by the proportional reclaim
bailout logic. This code tries to resolve a fundamental conflict: to
reclaim roughly what was requested, while also aging all LRUs fairly and
in accordance to their size, swappiness, refault rates etc. The way it
attempts fairness is that once the reclaim goal has been reached, it stops
scanning the LRUs with the smaller remaining scan targets, and adjusts the
remainder of the bigger LRUs according to how much of the smaller LRUs was
scanned. It then finishes scanning that remainder regardless of the
reclaim goal.
This works fine if priority levels are low and the LRU lists are
comparable in size. However, in this instance, the cgroup that is
targeted by proactive reclaim has almost no files left - they've already
been squeezed out by proactive reclaim earlier - and the remaining anon
pages are hot. Anon rotations cause the priority level to drop to 0,
which results in reclaim targeting all of anon (a lot) and all of file
(almost nothing). By the time reclaim decides to bail, it has scanned
most or all of the file target, and therefor must also scan most or all of
the enormous anon target. This target is thousands of times larger than
the reclaim goal, thus causing the overreclaim.
The bailout code hasn't changed in years, why is this failing now? The
most likely explanations are two other recent changes in anon reclaim:
1. Before the series starting with commit 5df741963d52 ("mm: fix LRU
balancing effect of new transparent huge pages"), the VM was
overall relatively reluctant to swap at all, even if swap was
configured. This means the LRU balancing code didn't come into play
as often as it does now, and mostly in high pressure situations
where pronounced swap activity wouldn't be as surprising.
2. For historic reasons, shrink_lruvec() loops on the scan targets of
all LRU lists except the active anon one, meaning it would bail if
the only remaining pages to scan were active anon - even if there
were a lot of them.
Before the series starting with commit ccc5dc67340c ("mm/vmscan:
make active/inactive ratio as 1:1 for anon lru"), most anon pages
would live on the active LRU; the inactive one would contain only a
handful of preselected reclaim candidates. After the series, anon
gets aged similarly to file, and the inactive list is the default
for new anon pages as well, making it often the much bigger list.
As a result, the VM is now more likely to actually finish large
anon targets than before.
Change the code such that only one SWAP_CLUSTER_MAX-sized nudge toward the
larger LRU lists is made before bailing out on a met reclaim goal.
This fixes the extreme overreclaim problem.
Fairness is more subtle and harder to evaluate. No obvious misbehavior
was observed on the test workload, in any case. Conceptually, fairness
should primarily be a cumulative effect from regular, lower priority
scans. Once the VM is in trouble and needs to escalate scan targets to
make forward progress, fairness needs to take a backseat. This is also
acknowledged by the myriad exceptions in get_scan_count(). This patch
makes fairness decrease gradually, as it keeps fairness work static over
increasing priority levels with growing scan targets. This should make
more sense - although we may have to re-visit the exact values.
Link: https://lkml.kernel.org/r/20220802162811.39216-1-hannes@cmpxchg.org
Signed-off-by: Johannes Weiner <hannes@cmpxchg.org>
Reviewed-by: Rik van Riel <riel@surriel.com>
Acked-by: Mel Gorman <mgorman@techsingularity.net>
Cc: Hugh Dickins <hughd@google.com>
Cc: Joonsoo Kim <iamjoonsoo.kim@lge.com>
Cc: <stable@vger.kernel.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
--- ---
.../gpu/drm/i915/gem/i915_gem_execbuffer.c | 25 +++++++++++-- mm/vmscan.c | 10 ++++------
drivers/gpu/drm/i915/gem/i915_gem_mman.c | 2 +- 1 file changed, 4 insertions(+), 6 deletions(-)
drivers/gpu/drm/i915/i915_gem_evict.c | 37 ++++++++++++++-----
drivers/gpu/drm/i915/i915_gem_evict.h | 4 +-
drivers/gpu/drm/i915/i915_vma.c | 2 +-
.../gpu/drm/i915/selftests/i915_gem_evict.c | 4 +-
6 files changed, 56 insertions(+), 18 deletions(-)
diff --git a/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c b/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c diff --git a/mm/vmscan.c b/mm/vmscan.c
index 845023c14eb36f..094e92ed28db4f 100644 index 382dbe97329f33..266eb8cfe93a67 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c --- a/mm/vmscan.c
+++ b/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c +++ b/mm/vmscan.c
@@ -741,25 +741,44 @@ static int eb_reserve(struct i915_execbuffer *eb) @@ -2955,8 +2955,8 @@ static void shrink_lruvec(struct lruvec *lruvec, struct scan_control *sc)
* enum lru_list lru;
* Defragmenting is skipped if all objects are pinned at a fixed location. unsigned long nr_reclaimed = 0;
unsigned long nr_to_reclaim = sc->nr_to_reclaim;
+ bool proportional_reclaim;
struct blk_plug plug;
- bool scan_adjusted;
get_scan_count(lruvec, sc, nr);
@@ -2974,8 +2974,8 @@ static void shrink_lruvec(struct lruvec *lruvec, struct scan_control *sc)
* abort proportional reclaim if either the file or anon lru has already
* dropped to zero at the first pass.
*/ */
- for (pass = 0; pass <= 2; pass++) { - scan_adjusted = (!cgroup_reclaim(sc) && !current_is_kswapd() &&
+ for (pass = 0; pass <= 3; pass++) { - sc->priority == DEF_PRIORITY);
int pin_flags = PIN_USER | PIN_VALIDATE; + proportional_reclaim = (!cgroup_reclaim(sc) && !current_is_kswapd() &&
+ sc->priority == DEF_PRIORITY);
if (pass == 0) blk_start_plug(&plug);
pin_flags |= PIN_NONBLOCK; while (nr[LRU_INACTIVE_ANON] || nr[LRU_ACTIVE_FILE] ||
@@ -2995,7 +2995,7 @@ static void shrink_lruvec(struct lruvec *lruvec, struct scan_control *sc)
if (pass >= 1) cond_resched();
- unpinned = eb_unbind(eb, pass == 2);
+ unpinned = eb_unbind(eb, pass >= 2);
if (pass == 2) { - if (nr_reclaimed < nr_to_reclaim || scan_adjusted)
err = mutex_lock_interruptible(&eb->context->vm->mutex); + if (nr_reclaimed < nr_to_reclaim || proportional_reclaim)
if (!err) {
- err = i915_gem_evict_vm(eb->context->vm, &eb->ww);
+ err = i915_gem_evict_vm(eb->context->vm, &eb->ww, NULL);
mutex_unlock(&eb->context->vm->mutex);
}
if (err)
return err;
}
+ if (pass == 3) {
+retry:
+ err = mutex_lock_interruptible(&eb->context->vm->mutex);
+ if (!err) {
+ struct drm_i915_gem_object *busy_bo = NULL;
+
+ err = i915_gem_evict_vm(eb->context->vm, &eb->ww, &busy_bo);
+ mutex_unlock(&eb->context->vm->mutex);
+ if (err && busy_bo) {
+ err = i915_gem_object_lock(busy_bo, &eb->ww);
+ i915_gem_object_put(busy_bo);
+ if (!err)
+ goto retry;
+ }
+ }
+ if (err)
+ return err;
+ }
+
list_for_each_entry(ev, &eb->unbound, bind_link) {
err = eb_reserve_vma(eb, ev, pin_flags);
if (err)
diff --git a/drivers/gpu/drm/i915/gem/i915_gem_mman.c b/drivers/gpu/drm/i915/gem/i915_gem_mman.c
index 73d9eda1d6b7a6..c83d98e1dc5da0 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_mman.c
+++ b/drivers/gpu/drm/i915/gem/i915_gem_mman.c
@@ -369,7 +369,7 @@ static vm_fault_t vm_fault_gtt(struct vm_fault *vmf)
if (vma == ERR_PTR(-ENOSPC)) {
ret = mutex_lock_interruptible(&ggtt->vm.mutex);
if (!ret) {
- ret = i915_gem_evict_vm(&ggtt->vm, &ww);
+ ret = i915_gem_evict_vm(&ggtt->vm, &ww, NULL);
mutex_unlock(&ggtt->vm.mutex);
}
if (ret)
diff --git a/drivers/gpu/drm/i915/i915_gem_evict.c b/drivers/gpu/drm/i915/i915_gem_evict.c
index f025ee4fa52618..a4b4d9b7d26c7a 100644
--- a/drivers/gpu/drm/i915/i915_gem_evict.c
+++ b/drivers/gpu/drm/i915/i915_gem_evict.c
@@ -416,6 +416,11 @@ int i915_gem_evict_for_node(struct i915_address_space *vm,
* @vm: Address space to cleanse
* @ww: An optional struct i915_gem_ww_ctx. If not NULL, i915_gem_evict_vm
* will be able to evict vma's locked by the ww as well.
+ * @busy_bo: Optional pointer to struct drm_i915_gem_object. If not NULL, then
+ * in the event i915_gem_evict_vm() is unable to trylock an object for eviction,
+ * then @busy_bo will point to it. -EBUSY is also returned. The caller must drop
+ * the vm->mutex, before trying again to acquire the contended lock. The caller
+ * also owns a reference to the object.
*
* This function evicts all vmas from a vm.
*
@@ -425,7 +430,8 @@ int i915_gem_evict_for_node(struct i915_address_space *vm,
* To clarify: This is for freeing up virtual address space, not for freeing
* memory in e.g. the shrinker.
*/
-int i915_gem_evict_vm(struct i915_address_space *vm, struct i915_gem_ww_ctx *ww)
+int i915_gem_evict_vm(struct i915_address_space *vm, struct i915_gem_ww_ctx *ww,
+ struct drm_i915_gem_object **busy_bo)
{
int ret = 0;
@@ -457,15 +463,22 @@ int i915_gem_evict_vm(struct i915_address_space *vm, struct i915_gem_ww_ctx *ww)
* the resv is shared among multiple objects, we still
* need the object ref.
*/
- if (dying_vma(vma) ||
+ if (!i915_gem_object_get_rcu(vma->obj) ||
(ww && (dma_resv_locking_ctx(vma->obj->base.resv) == &ww->ctx))) {
__i915_vma_pin(vma);
list_add(&vma->evict_link, &locked_eviction_list);
continue; continue;
}
- if (!i915_gem_object_trylock(vma->obj, ww))
+ if (!i915_gem_object_trylock(vma->obj, ww)) {
+ if (busy_bo) {
+ *busy_bo = vma->obj; /* holds ref */
+ ret = -EBUSY;
+ break;
+ }
+ i915_gem_object_put(vma->obj);
continue;
+ }
__i915_vma_pin(vma);
list_add(&vma->evict_link, &eviction_list);
@@ -473,25 +486,29 @@ int i915_gem_evict_vm(struct i915_address_space *vm, struct i915_gem_ww_ctx *ww)
if (list_empty(&eviction_list) && list_empty(&locked_eviction_list))
break;
- ret = 0;
/* Unbind locked objects first, before unlocking the eviction_list */
list_for_each_entry_safe(vma, vn, &locked_eviction_list, evict_link) {
__i915_vma_unpin(vma);
- if (ret == 0)
+ if (ret == 0) {
ret = __i915_vma_unbind(vma);
- if (ret != -EINTR) /* "Get me out of here!" */
- ret = 0;
+ if (ret != -EINTR) /* "Get me out of here!" */
+ ret = 0;
+ }
+ if (!dying_vma(vma))
+ i915_gem_object_put(vma->obj);
}
list_for_each_entry_safe(vma, vn, &eviction_list, evict_link) {
__i915_vma_unpin(vma);
- if (ret == 0)
+ if (ret == 0) {
ret = __i915_vma_unbind(vma);
- if (ret != -EINTR) /* "Get me out of here!" */
- ret = 0;
+ if (ret != -EINTR) /* "Get me out of here!" */
+ ret = 0;
+ }
i915_gem_object_unlock(vma->obj);
+ i915_gem_object_put(vma->obj);
}
} while (ret == 0);
diff --git a/drivers/gpu/drm/i915/i915_gem_evict.h b/drivers/gpu/drm/i915/i915_gem_evict.h
index e593c530f9bd7a..bf0ee0e4fe6088 100644
--- a/drivers/gpu/drm/i915/i915_gem_evict.h
+++ b/drivers/gpu/drm/i915/i915_gem_evict.h
@@ -11,6 +11,7 @@
struct drm_mm_node;
struct i915_address_space;
struct i915_gem_ww_ctx;
+struct drm_i915_gem_object;
int __must_check i915_gem_evict_something(struct i915_address_space *vm,
struct i915_gem_ww_ctx *ww,
@@ -23,6 +24,7 @@ int __must_check i915_gem_evict_for_node(struct i915_address_space *vm,
struct drm_mm_node *node,
unsigned int flags);
int i915_gem_evict_vm(struct i915_address_space *vm,
- struct i915_gem_ww_ctx *ww);
+ struct i915_gem_ww_ctx *ww,
+ struct drm_i915_gem_object **busy_bo);
#endif /* __I915_GEM_EVICT_H__ */
diff --git a/drivers/gpu/drm/i915/i915_vma.c b/drivers/gpu/drm/i915/i915_vma.c
index f17c09ead7d778..4d06875de14a14 100644
--- a/drivers/gpu/drm/i915/i915_vma.c
+++ b/drivers/gpu/drm/i915/i915_vma.c
@@ -1569,7 +1569,7 @@ static int __i915_ggtt_pin(struct i915_vma *vma, struct i915_gem_ww_ctx *ww,
* locked objects when called from execbuf when pinning
* is removed. This would probably regress badly.
*/
- i915_gem_evict_vm(vm, NULL);
+ i915_gem_evict_vm(vm, NULL, NULL);
mutex_unlock(&vm->mutex);
}
} while (1);
diff --git a/drivers/gpu/drm/i915/selftests/i915_gem_evict.c b/drivers/gpu/drm/i915/selftests/i915_gem_evict.c
index 8c6517d29b8e0c..37068542aafe7f 100644
--- a/drivers/gpu/drm/i915/selftests/i915_gem_evict.c
+++ b/drivers/gpu/drm/i915/selftests/i915_gem_evict.c
@@ -344,7 +344,7 @@ static int igt_evict_vm(void *arg)
/* Everything is pinned, nothing should happen */
mutex_lock(&ggtt->vm.mutex);
- err = i915_gem_evict_vm(&ggtt->vm, NULL);
+ err = i915_gem_evict_vm(&ggtt->vm, NULL, NULL);
mutex_unlock(&ggtt->vm.mutex);
if (err) {
pr_err("i915_gem_evict_vm on a full GGTT returned err=%d]\n",
@@ -356,7 +356,7 @@ static int igt_evict_vm(void *arg)
for_i915_gem_ww(&ww, err, false) {
mutex_lock(&ggtt->vm.mutex);
- err = i915_gem_evict_vm(&ggtt->vm, &ww);
+ err = i915_gem_evict_vm(&ggtt->vm, &ww, NULL);
mutex_unlock(&ggtt->vm.mutex);
}
From 189603b802a2bb276b82a0a4f66528ad29156f46 Mon Sep 17 00:00:00 2001
From: Alexey Izbyshev <izbyshev@ispras.ru>
Date: Sat, 12 Nov 2022 00:54:39 +0300
Subject: [PATCH] futex: Resend potentially swallowed owner death notification
Commit ca16d5bee598 ("futex: Prevent robust futex exit race") addressed
two cases when tasks waiting on a robust non-PI futex remained blocked
despite the futex not being owned anymore:
* if the owner died after writing zero to the futex word, but before
waking up a waiter
* if a task waiting on the futex was woken up, but died before updating
the futex word (effectively swallowing the notification without acting
on it)
In the second case, the task could be woken up either by the previous
owner (after the futex word was reset to zero) or by the kernel (after
the OWNER_DIED bit was set and the TID part of the futex word was reset
to zero) if the previous owner died without the resetting the futex.
Because the referenced commit wakes up a potential waiter only if the
whole futex word is zero, the latter subcase remains unaddressed.
Fix this by looking only at the TID part of the futex when deciding
whether a wake up is needed.
Fixes: ca16d5bee598 ("futex: Prevent robust futex exit race")
Signed-off-by: Alexey Izbyshev <izbyshev@ispras.ru>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Acked-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Link: https://lore.kernel.org/r/20221111215439.248185-1-izbyshev@ispras.ru
---
kernel/futex/core.c | 26 +++++++++++++++++---------
1 file changed, 17 insertions(+), 9 deletions(-)
diff --git a/kernel/futex/core.c b/kernel/futex/core.c
index b22ef1efe75118..514e4582b86341 100644
--- a/kernel/futex/core.c
+++ b/kernel/futex/core.c
@@ -638,6 +638,7 @@ static int handle_futex_death(u32 __user *uaddr, struct task_struct *curr,
bool pi, bool pending_op)
{
u32 uval, nval, mval;
+ pid_t owner;
int err;
/* Futex address must be 32bit aligned */
@@ -659,6 +660,10 @@ static int handle_futex_death(u32 __user *uaddr, struct task_struct *curr,
* 2. A woken up waiter is killed before it can acquire the
* futex in user space.
*
+ * In the second case, the wake up notification could be generated
+ * by the unlock path in user space after setting the futex value
+ * to zero or by the kernel after setting the OWNER_DIED bit below.
+ *
* In both cases the TID validation below prevents a wakeup of
* potential waiters which can cause these waiters to block
* forever.
@@ -667,24 +672,27 @@ static int handle_futex_death(u32 __user *uaddr, struct task_struct *curr,
*
* 1) task->robust_list->list_op_pending != NULL
* @pending_op == true
- * 2) User space futex value == 0
+ * 2) The owner part of user space futex value == 0
* 3) Regular futex: @pi == false
*
* If these conditions are met, it is safe to attempt waking up a
* potential waiter without touching the user space futex value and
- * trying to set the OWNER_DIED bit. The user space futex value is
- * uncontended and the rest of the user space mutex state is
- * consistent, so a woken waiter will just take over the
- * uncontended futex. Setting the OWNER_DIED bit would create
- * inconsistent state and malfunction of the user space owner died
- * handling.
+ * trying to set the OWNER_DIED bit. If the futex value is zero,
+ * the rest of the user space mutex state is consistent, so a woken
+ * waiter will just take over the uncontended futex. Setting the
+ * OWNER_DIED bit would create inconsistent state and malfunction
+ * of the user space owner died handling. Otherwise, the OWNER_DIED
+ * bit is already set, and the woken waiter is expected to deal with
+ * this.
*/
- if (pending_op && !pi && !uval) {
+ owner = uval & FUTEX_TID_MASK;
+
+ if (pending_op && !pi && !owner) {
futex_wake(uaddr, 1, 1, FUTEX_BITSET_MATCH_ANY);
return 0;
}
- if ((uval & FUTEX_TID_MASK) != task_pid_vnr(curr))
+ if (owner != task_pid_vnr(curr))
return 0;
/* /*
@@ -3046,8 +3046,6 @@ static void shrink_lruvec(struct lruvec *lruvec, struct scan_control *sc)
nr_scanned = targets[lru] - nr[lru];
nr[lru] = targets[lru] * (100 - percentage) / 100;
nr[lru] -= min(nr[lru], nr_scanned);
-
- scan_adjusted = true;
}
blk_finish_plug(&plug);
sc->nr_reclaimed += nr_reclaimed;
From 430daaab3c78de6bd82f10cfb5a0f016c6e583f6 Mon Sep 17 00:00:00 2001
From: Desmond Cheong Zhi Xi <desmondcheongzx@gmail.com>
Date: Mon, 4 Oct 2021 14:07:34 -0400
Subject: [PATCH] Bluetooth: fix deadlock for RFCOMM sk state change
Syzbot reports the following task hang [1]:
INFO: task syz-executor255:8499 blocked for more than 143 seconds.
Not tainted 5.14.0-rc7-syzkaller #0
Call Trace:
context_switch kernel/sched/core.c:4681 [inline]
__schedule+0x93a/0x26f0 kernel/sched/core.c:5938
schedule+0xd3/0x270 kernel/sched/core.c:6017
__lock_sock+0x13d/0x260 net/core/sock.c:2644
lock_sock_nested+0xf6/0x120 net/core/sock.c:3185
lock_sock include/net/sock.h:1612 [inline]
rfcomm_sk_state_change+0xb4/0x390 net/bluetooth/rfcomm/sock.c:73
__rfcomm_dlc_close+0x1b6/0x8a0 net/bluetooth/rfcomm/core.c:489
rfcomm_dlc_close+0x1ea/0x240 net/bluetooth/rfcomm/core.c:520
__rfcomm_sock_close+0xac/0x260 net/bluetooth/rfcomm/sock.c:220
rfcomm_sock_shutdown+0xe9/0x210 net/bluetooth/rfcomm/sock.c:931
rfcomm_sock_release+0x5f/0x140 net/bluetooth/rfcomm/sock.c:951
__sock_release+0xcd/0x280 net/socket.c:649
sock_close+0x18/0x20 net/socket.c:1314
__fput+0x288/0x920 fs/file_table.c:280
task_work_run+0xdd/0x1a0 kernel/task_work.c:164
exit_task_work include/linux/task_work.h:32 [inline]
do_exit+0xbd4/0x2a60 kernel/exit.c:825
do_group_exit+0x125/0x310 kernel/exit.c:922
get_signal+0x47f/0x2160 kernel/signal.c:2808
arch_do_signal_or_restart+0x2a9/0x1c40 arch/x86/kernel/signal.c:865
handle_signal_work kernel/entry/common.c:148 [inline]
exit_to_user_mode_loop kernel/entry/common.c:172 [inline]
exit_to_user_mode_prepare+0x17d/0x290 kernel/entry/common.c:209
__syscall_exit_to_user_mode_work kernel/entry/common.c:291 [inline]
syscall_exit_to_user_mode+0x19/0x60 kernel/entry/common.c:302
do_syscall_64+0x42/0xb0 arch/x86/entry/common.c:86
entry_SYSCALL_64_after_hwframe+0x44/0xae
Showing all locks held in the system:
1 lock held by khungtaskd/1653:
#0: ffffffff8b97c280 (rcu_read_lock){....}-{1:2}, at:
debug_show_all_locks+0x53/0x260 kernel/locking/lockdep.c:6446
1 lock held by krfcommd/4781:
#0: ffffffff8d306528 (rfcomm_mutex){+.+.}-{3:3}, at:
rfcomm_process_sessions net/bluetooth/rfcomm/core.c:1979 [inline]
#0: ffffffff8d306528 (rfcomm_mutex){+.+.}-{3:3}, at:
rfcomm_run+0x2ed/0x4a20 net/bluetooth/rfcomm/core.c:2086
2 locks held by in:imklog/8206:
#0: ffff8880182ce5f0 (&f->f_pos_lock){+.+.}-{3:3}, at:
__fdget_pos+0xe9/0x100 fs/file.c:974
#1: ffff8880b9c51a58 (&rq->__lock){-.-.}-{2:2}, at:
raw_spin_rq_lock_nested kernel/sched/core.c:460 [inline]
#1: ffff8880b9c51a58 (&rq->__lock){-.-.}-{2:2}, at: raw_spin_rq_lock
kernel/sched/sched.h:1307 [inline]
#1: ffff8880b9c51a58 (&rq->__lock){-.-.}-{2:2}, at: rq_lock
kernel/sched/sched.h:1610 [inline]
#1: ffff8880b9c51a58 (&rq->__lock){-.-.}-{2:2}, at:
__schedule+0x233/0x26f0 kernel/sched/core.c:5852
4 locks held by syz-executor255/8499:
#0: ffff888039a83690 (&sb->s_type->i_mutex_key#13){+.+.}-{3:3}, at:
inode_lock include/linux/fs.h:774 [inline]
#0: ffff888039a83690 (&sb->s_type->i_mutex_key#13){+.+.}-{3:3}, at:
__sock_release+0x86/0x280 net/socket.c:648
#1:
ffff88802fa31120 (sk_lock-AF_BLUETOOTH-BTPROTO_RFCOMM){+.+.}-{0:0},
at: lock_sock include/net/sock.h:1612 [inline]
#1:
ffff88802fa31120 (sk_lock-AF_BLUETOOTH-BTPROTO_RFCOMM){+.+.}-{0:0},
at: rfcomm_sock_shutdown+0x54/0x210 net/bluetooth/rfcomm/sock.c:928
#2: ffffffff8d306528 (rfcomm_mutex){+.+.}-{3:3}, at:
rfcomm_dlc_close+0x34/0x240 net/bluetooth/rfcomm/core.c:507
#3: ffff888141bd6d28 (&d->lock){+.+.}-{3:3}, at:
__rfcomm_dlc_close+0x162/0x8a0 net/bluetooth/rfcomm/core.c:487
==================================================================
The task hangs because of a deadlock that occurs when lock_sock() is
called in rfcomm_sk_state_change(). One such call stack is:
rfcomm_sock_shutdown():
lock_sock();
__rfcomm_sock_close():
rfcomm_dlc_close():
__rfcomm_dlc_close():
rfcomm_dlc_lock();
rfcomm_sk_state_change():
lock_sock();
lock_sock() has to be called when the sk state is changed because the
lock is not always held when rfcomm_sk_state_change() is
called. However, besides the recursive deadlock, there is also an
issue of a lock hierarchy inversion between rfcomm_dlc_lock() and
lock_sock() if the socket is locked in rfcomm_sk_state_change().
To avoid these issues, we can instead schedule the sk state change in
the global workqueue. This is already the implicit assumption about
how sk state changes happen. For example, in rfcomm_sock_shutdown(),
the call to __rfcomm_sock_close() is followed by
bt_sock_wait_state().
Additionally, the call to rfcomm_sock_kill() inside
rfcomm_sk_state_change() should be removed. The socket shouldn't be
killed here because only rfcomm_sock_release() calls sock_orphan(),
which it already follows up with a call to rfcomm_sock_kill().
Fixes: b7ce436a5d79 ("Bluetooth: switch to lock_sock in RFCOMM")
Link: https://syzkaller.appspot.com/bug?extid=7d51f807c81b190a127d [1]
Reported-by: syzbot+7d51f807c81b190a127d@syzkaller.appspotmail.com
Tested-by: syzbot+7d51f807c81b190a127d@syzkaller.appspotmail.com
Signed-off-by: Desmond Cheong Zhi Xi <desmondcheongzx@gmail.com>
Cc: Hillf Danton <hdanton@sina.com>
---
include/net/bluetooth/rfcomm.h | 3 +++
net/bluetooth/rfcomm/core.c | 2 ++
net/bluetooth/rfcomm/sock.c | 34 ++++++++++++++++++++++------------
3 files changed, 27 insertions(+), 12 deletions(-)
diff --git a/include/net/bluetooth/rfcomm.h b/include/net/bluetooth/rfcomm.h
index 99d26879b02a53..a92799fc5e74d0 100644
--- a/include/net/bluetooth/rfcomm.h
+++ b/include/net/bluetooth/rfcomm.h
@@ -171,6 +171,7 @@ struct rfcomm_dlc {
struct rfcomm_session *session;
struct sk_buff_head tx_queue;
struct timer_list timer;
+ struct work_struct state_change_work;
struct mutex lock;
unsigned long state;
@@ -186,6 +187,7 @@ struct rfcomm_dlc {
u8 sec_level;
u8 role_switch;
u32 defer_setup;
+ int err;
uint mtu;
uint cfc;
@@ -310,6 +312,7 @@ struct rfcomm_pinfo {
u8 role_switch;
};
+void __rfcomm_sk_state_change(struct work_struct *work);
int rfcomm_init_sockets(void);
void rfcomm_cleanup_sockets(void);
diff --git a/net/bluetooth/rfcomm/core.c b/net/bluetooth/rfcomm/core.c
index 7324764384b677..c6494e85cd68b2 100644
--- a/net/bluetooth/rfcomm/core.c
+++ b/net/bluetooth/rfcomm/core.c
@@ -289,6 +289,7 @@ static void rfcomm_dlc_clear_state(struct rfcomm_dlc *d)
d->flags = 0;
d->mscex = 0;
d->sec_level = BT_SECURITY_LOW;
+ d->err = 0;
d->mtu = RFCOMM_DEFAULT_MTU;
d->v24_sig = RFCOMM_V24_RTC | RFCOMM_V24_RTR | RFCOMM_V24_DV;
@@ -306,6 +307,7 @@ struct rfcomm_dlc *rfcomm_dlc_alloc(gfp_t prio)
timer_setup(&d->timer, rfcomm_dlc_timeout, 0);
skb_queue_head_init(&d->tx_queue);
+ INIT_WORK(&d->state_change_work, __rfcomm_sk_state_change);
mutex_init(&d->lock);
refcount_set(&d->refcnt, 1);
diff --git a/net/bluetooth/rfcomm/sock.c b/net/bluetooth/rfcomm/sock.c
index 4bf4ea6cbb5eee..4850dafbaa05fb 100644
--- a/net/bluetooth/rfcomm/sock.c
+++ b/net/bluetooth/rfcomm/sock.c
@@ -61,19 +61,22 @@ static void rfcomm_sk_data_ready(struct rfcomm_dlc *d, struct sk_buff *skb)
rfcomm_dlc_throttle(d);
}
-static void rfcomm_sk_state_change(struct rfcomm_dlc *d, int err)
+void __rfcomm_sk_state_change(struct work_struct *work)
{
+ struct rfcomm_dlc *d = container_of(work, struct rfcomm_dlc,
+ state_change_work);
struct sock *sk = d->owner, *parent;
if (!sk)
return;
- BT_DBG("dlc %p state %ld err %d", d, d->state, err);
-
lock_sock(sk);
+ rfcomm_dlc_lock(d);
- if (err)
- sk->sk_err = err;
+ BT_DBG("dlc %p state %ld err %d", d, d->state, d->err);
+
+ if (d->err)
+ sk->sk_err = d->err;
sk->sk_state = d->state;
@@ -91,15 +94,22 @@ static void rfcomm_sk_state_change(struct rfcomm_dlc *d, int err)
sk->sk_state_change(sk);
}
+ rfcomm_dlc_unlock(d);
release_sock(sk);
+ sock_put(sk);
+}
- if (parent && sock_flag(sk, SOCK_ZAPPED)) {
- /* We have to drop DLC lock here, otherwise
- * rfcomm_sock_destruct() will dead lock. */
- rfcomm_dlc_unlock(d);
- rfcomm_sock_kill(sk);
- rfcomm_dlc_lock(d);
- }
+static void rfcomm_sk_state_change(struct rfcomm_dlc *d, int err)
+{
+ struct sock *sk = d->owner;
+
+ if (!sk)
+ return;
+
+ d->err = err;
+ sock_hold(sk);
+ if (!schedule_work(&d->state_change_work))
+ sock_put(sk);
}
/* ---- Socket functions ---- */

View File

@@ -1,151 +0,0 @@
From d50977b164e708bf523a35ef53315355528c3ca6 Mon Sep 17 00:00:00 2001
From: "Jan Alexander Steffens (heftig)" <jan.steffens@gmail.com>
Date: Mon, 16 Sep 2019 04:53:20 +0200
Subject: [PATCH] ZEN: Add sysctl and CONFIG to disallow unprivileged
CLONE_NEWUSER
Our default behavior continues to match the vanilla kernel.
---
include/linux/user_namespace.h | 4 ++++
init/Kconfig | 16 ++++++++++++++++
kernel/fork.c | 14 ++++++++++++++
kernel/sysctl.c | 12 ++++++++++++
kernel/user_namespace.c | 7 +++++++
5 files changed, 53 insertions(+)
diff --git a/include/linux/user_namespace.h b/include/linux/user_namespace.h
index 45f09bec02c485..87b20e2ee27445 100644
--- a/include/linux/user_namespace.h
+++ b/include/linux/user_namespace.h
@@ -148,6 +148,8 @@ static inline void set_userns_rlimit_max(struct user_namespace *ns,
#ifdef CONFIG_USER_NS
+extern int unprivileged_userns_clone;
+
static inline struct user_namespace *get_user_ns(struct user_namespace *ns)
{
if (ns)
@@ -181,6 +183,8 @@ extern bool current_in_userns(const struct user_namespace *target_ns);
struct ns_common *ns_get_owner(struct ns_common *ns);
#else
+#define unprivileged_userns_clone 0
+
static inline struct user_namespace *get_user_ns(struct user_namespace *ns)
{
return &init_user_ns;
diff --git a/init/Kconfig b/init/Kconfig
index 94125d3b6893c7..9f7139b536f638 100644
--- a/init/Kconfig
+++ b/init/Kconfig
@@ -1247,6 +1247,22 @@ config USER_NS
If unsure, say N.
+config USER_NS_UNPRIVILEGED
+ bool "Allow unprivileged users to create namespaces"
+ default y
+ depends on USER_NS
+ help
+ When disabled, unprivileged users will not be able to create
+ new namespaces. Allowing users to create their own namespaces
+ has been part of several recent local privilege escalation
+ exploits, so if you need user namespaces but are
+ paranoid^Wsecurity-conscious you want to disable this.
+
+ This setting can be overridden at runtime via the
+ kernel.unprivileged_userns_clone sysctl.
+
+ If unsure, say Y.
+
config PID_NS
bool "PID Namespaces"
default y
diff --git a/kernel/fork.c b/kernel/fork.c
index 08969f5aa38d59..ff601cb7a1fae0 100644
--- a/kernel/fork.c
+++ b/kernel/fork.c
@@ -98,6 +98,10 @@
#include <linux/io_uring.h>
#include <linux/bpf.h>
+#ifdef CONFIG_USER_NS
+#include <linux/user_namespace.h>
+#endif
+
#include <asm/pgalloc.h>
#include <linux/uaccess.h>
#include <asm/mmu_context.h>
@@ -2008,6 +2012,10 @@ static __latent_entropy struct task_struct *copy_process(
if ((clone_flags & (CLONE_NEWUSER|CLONE_FS)) == (CLONE_NEWUSER|CLONE_FS))
return ERR_PTR(-EINVAL);
+ if ((clone_flags & CLONE_NEWUSER) && !unprivileged_userns_clone)
+ if (!capable(CAP_SYS_ADMIN))
+ return ERR_PTR(-EPERM);
+
/*
* Thread groups must share signals as well, and detached threads
* can only be started up within the thread group.
@@ -3166,6 +3174,12 @@ int ksys_unshare(unsigned long unshare_flags)
if (unshare_flags & CLONE_NEWNS)
unshare_flags |= CLONE_FS;
+ if ((unshare_flags & CLONE_NEWUSER) && !unprivileged_userns_clone) {
+ err = -EPERM;
+ if (!capable(CAP_SYS_ADMIN))
+ goto bad_unshare_out;
+ }
+
err = check_unshare_flags(unshare_flags);
if (err)
goto bad_unshare_out;
diff --git a/kernel/sysctl.c b/kernel/sysctl.c
index c6d9dec11b749d..9a4514ad481b21 100644
--- a/kernel/sysctl.c
+++ b/kernel/sysctl.c
@@ -81,6 +81,9 @@
#ifdef CONFIG_RT_MUTEXES
#include <linux/rtmutex.h>
#endif
+#ifdef CONFIG_USER_NS
+#include <linux/user_namespace.h>
+#endif
/* shared constants to be used in various sysctls */
const int sysctl_vals[] = { 0, 1, 2, 3, 4, 100, 200, 1000, 3000, INT_MAX, 65535, -1 };
@@ -1659,6 +1662,15 @@ static struct ctl_table kern_table[] = {
.mode = 0644,
.proc_handler = proc_dointvec,
},
+#ifdef CONFIG_USER_NS
+ {
+ .procname = "unprivileged_userns_clone",
+ .data = &unprivileged_userns_clone,
+ .maxlen = sizeof(int),
+ .mode = 0644,
+ .proc_handler = proc_dointvec,
+ },
+#endif
#ifdef CONFIG_PROC_SYSCTL
{
.procname = "tainted",
diff --git a/kernel/user_namespace.c b/kernel/user_namespace.c
index 54211dbd516c57..16ca0c1516298d 100644
--- a/kernel/user_namespace.c
+++ b/kernel/user_namespace.c
@@ -22,6 +22,13 @@
#include <linux/bsearch.h>
#include <linux/sort.h>
+/* sysctl */
+#ifdef CONFIG_USER_NS_UNPRIVILEGED
+int unprivileged_userns_clone = 1;
+#else
+int unprivileged_userns_clone;
+#endif
+
static struct kmem_cache *user_ns_cachep __read_mostly;
static DEFINE_MUTEX(userns_state_mutex);

View File

@@ -1,244 +0,0 @@
From 5ae86c8436b83762bc6cf46bea1da6ace2d3f50e Mon Sep 17 00:00:00 2001
From: Paul Gofman <pgofman@codeweavers.com>
Date: Wed, 6 May 2020 14:37:44 +0300
Subject: [PATCH 1/2] mm: Support soft dirty flag reset for VA range.
---
fs/proc/task_mmu.c | 129 ++++++++++++++++++++++++++++++++++++---------
1 file changed, 103 insertions(+), 26 deletions(-)
diff --git a/fs/proc/task_mmu.c b/fs/proc/task_mmu.c
index 3cec6fbef725..7c7865028f10 100644
--- a/fs/proc/task_mmu.c
+++ b/fs/proc/task_mmu.c
@@ -1032,6 +1032,8 @@ enum clear_refs_types {
struct clear_refs_private {
enum clear_refs_types type;
+ unsigned long start, end;
+ bool clear_range;
};
#ifdef CONFIG_MEM_SOFT_DIRTY
@@ -1125,6 +1127,8 @@ static int clear_refs_pte_range(pmd_t *pmd, unsigned long addr,
spinlock_t *ptl;
struct page *page;
+ BUG_ON(addr < cp->start || end > cp->end);
+
ptl = pmd_trans_huge_lock(pmd, vma);
if (ptl) {
if (cp->type == CLEAR_REFS_SOFT_DIRTY) {
@@ -1181,9 +1185,11 @@ static int clear_refs_test_walk(unsigned long start, unsigned long end,
struct clear_refs_private *cp = walk->private;
struct vm_area_struct *vma = walk->vma;
- if (vma->vm_flags & VM_PFNMAP)
+ if (!cp->clear_range && (vma->vm_flags & VM_PFNMAP))
return 1;
+ BUG_ON(start < cp->start || end > cp->end);
+
/*
* Writing 1 to /proc/pid/clear_refs affects all pages.
* Writing 2 to /proc/pid/clear_refs only affects anonymous pages.
@@ -1206,10 +1212,12 @@ static ssize_t clear_refs_write(struct file *file, const char __user *buf,
size_t count, loff_t *ppos)
{
struct task_struct *task;
- char buffer[PROC_NUMBUF];
+ char buffer[18];
struct mm_struct *mm;
struct vm_area_struct *vma;
enum clear_refs_types type;
+ unsigned long start, end;
+ bool clear_range;
int itype;
int rv;
@@ -1218,12 +1226,34 @@ static ssize_t clear_refs_write(struct file *file, const char __user *buf,
count = sizeof(buffer) - 1;
if (copy_from_user(buffer, buf, count))
return -EFAULT;
- rv = kstrtoint(strstrip(buffer), 10, &itype);
- if (rv < 0)
- return rv;
- type = (enum clear_refs_types)itype;
- if (type < CLEAR_REFS_ALL || type >= CLEAR_REFS_LAST)
- return -EINVAL;
+
+ if (buffer[0] == '6')
+ {
+ static int once;
+
+ if (!once++)
+ printk(KERN_DEBUG "task_mmu: Using POC clear refs range implementation.\n");
+
+ if (count != 17)
+ return -EINVAL;
+
+ type = CLEAR_REFS_SOFT_DIRTY;
+ start = *(unsigned long *)(buffer + 1);
+ end = *(unsigned long *)(buffer + 1 + 8);
+ }
+ else
+ {
+ rv = kstrtoint(strstrip(buffer), 10, &itype);
+ if (rv < 0)
+ return rv;
+ type = (enum clear_refs_types)itype;
+
+ if (type < CLEAR_REFS_ALL || type >= CLEAR_REFS_LAST)
+ return -EINVAL;
+
+ start = 0;
+ end = -1UL;
+ }
task = get_proc_task(file_inode(file));
if (!task)
@@ -1235,40 +1265,86 @@ static ssize_t clear_refs_write(struct file *file, const char __user *buf,
.type = type,
};
- if (mmap_write_lock_killable(mm)) {
- count = -EINTR;
- goto out_mm;
+ if (start || end != -1UL)
+ {
+ start = min(start, -1) & PAGE_MASK;
+ end = min(end, -1) & PAGE_MASK;
+
+ if (start >= end)
+ {
+ count = -EINVAL;
+ goto out_mm;
+ }
+ clear_range = true;
}
+ else
+ {
+ clear_range = false;
+ }
+
+ cp.start = start;
+ cp.end = end;
+ cp.clear_range = clear_range;
+
if (type == CLEAR_REFS_MM_HIWATER_RSS) {
+ if (mmap_write_lock_killable(mm)) {
+ count = -EINTR;
+ goto out_mm;
+ }
+
/*
* Writing 5 to /proc/pid/clear_refs resets the peak
* resident set size to this mm's current rss value.
*/
reset_mm_hiwater_rss(mm);
- goto out_unlock;
+ mmap_write_unlock(mm);
+ goto out_mm;
}
if (type == CLEAR_REFS_SOFT_DIRTY) {
- mas_for_each(&mas, vma, ULONG_MAX) {
- if (!(vma->vm_flags & VM_SOFTDIRTY))
- continue;
- vma->vm_flags &= ~VM_SOFTDIRTY;
- vma_set_page_prot(vma);
+ if (mmap_read_lock_killable(mm)) {
+ count = -EINTR;
+ goto out_mm;
}
-
+ if (!clear_range)
+ mas_for_each(&mas, vma, ULONG_MAX) {
+ if (!(vma->vm_flags & VM_SOFTDIRTY))
+ continue;
+ mmap_read_unlock(mm);
+ if (mmap_write_lock_killable(mm)) {
+ count = -EINTR;
+ goto out_mm;
+ }
+ mas_for_each(&mas, vma, ULONG_MAX) {
+ vma->vm_flags &= ~VM_SOFTDIRTY;
+ vma_set_page_prot(vma);
+ }
+ mmap_write_downgrade(mm);
+ break;
+ }
inc_tlb_flush_pending(mm);
mmu_notifier_range_init(&range, MMU_NOTIFY_SOFT_DIRTY,
- 0, NULL, mm, 0, -1UL);
+ 0, NULL, mm, start, end);
mmu_notifier_invalidate_range_start(&range);
}
- walk_page_range(mm, 0, -1, &clear_refs_walk_ops, &cp);
+ else
+ {
+ if (mmap_write_lock_killable(mm)) {
+ count = -EINTR;
+ goto out_mm;
+ }
+ }
+ walk_page_range(mm, start, end == -1UL ? -1 : end, &clear_refs_walk_ops, &cp);
if (type == CLEAR_REFS_SOFT_DIRTY) {
mmu_notifier_invalidate_range_end(&range);
flush_tlb_mm(mm);
dec_tlb_flush_pending(mm);
+ mmap_read_unlock(mm);
+ }
+ else
+ {
+ mmap_write_unlock(mm);
}
-out_unlock:
- mmap_write_unlock(mm);
out_mm:
mmput(mm);
}
@@ -1301,6 +1377,7 @@ struct pagemapread {
#define PM_PFRAME_MASK GENMASK_ULL(PM_PFRAME_BITS - 1, 0)
#define PM_SOFT_DIRTY BIT_ULL(55)
#define PM_MMAP_EXCLUSIVE BIT_ULL(56)
+#define PM_SOFT_DIRTY_PAGE BIT_ULL(57)
#define PM_UFFD_WP BIT_ULL(57)
#define PM_FILE BIT_ULL(61)
#define PM_SWAP BIT_ULL(62)
@@ -1373,13 +1450,13 @@ static pagemap_entry_t pte_to_pagemap_entry(struct pagemapread *pm,
flags |= PM_PRESENT;
page = vm_normal_page(vma, addr, pte);
if (pte_soft_dirty(pte))
- flags |= PM_SOFT_DIRTY;
+ flags |= PM_SOFT_DIRTY | PM_SOFT_DIRTY_PAGE;
if (pte_uffd_wp(pte))
flags |= PM_UFFD_WP;
} else if (is_swap_pte(pte)) {
swp_entry_t entry;
if (pte_swp_soft_dirty(pte))
- flags |= PM_SOFT_DIRTY;
+ flags |= PM_SOFT_DIRTY | PM_SOFT_DIRTY_PAGE;
if (pte_swp_uffd_wp(pte))
flags |= PM_UFFD_WP;
entry = pte_to_swp_entry(pte);
@@ -1500,7 +1500,7 @@
flags |= PM_PRESENT;
if (pmd_soft_dirty(pmd))
- flags |= PM_SOFT_DIRTY;
+ flags |= PM_SOFT_DIRTY | PM_SOFT_DIRTY_PAGE;
if (pmd_uffd_wp(pmd))
flags |= PM_UFFD_WP;
if (pm->show_pfn)
@@ -1442,7 +1519,7 @@ static int pagemap_pmd_range(pmd_t *pmdp, unsigned long addr, unsigned long end,
}
flags |= PM_SWAP;
if (pmd_swp_soft_dirty(pmd))
- flags |= PM_SOFT_DIRTY;
+ flags |= PM_SOFT_DIRTY | PM_SOFT_DIRTY_PAGE;
if (pmd_swp_uffd_wp(pmd))
flags |= PM_UFFD_WP;
VM_BUG_ON(!is_pmd_migration_entry(pmd));
--
2.30.2

View File

@@ -1,360 +0,0 @@
From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
From: Arjan van de Ven <arjan@linux.intel.com>
Date: Mon, 14 Mar 2016 11:10:58 -0600
Subject: [PATCH] pci pme wakeups
Reduce wakeups for PME checks, which are a workaround for miswired
boards (sadly, too many of them) in laptops.
---
drivers/pci/pci.c | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/drivers/pci/pci.c b/drivers/pci/pci.c
index c9338f9..6974fbf 100644
--- a/drivers/pci/pci.c
+++ b/drivers/pci/pci.c
@@ -62,7 +62,7 @@ struct pci_pme_device {
struct pci_dev *dev;
};
-#define PME_TIMEOUT 1000 /* How long between PME checks */
+#define PME_TIMEOUT 4000 /* How long between PME checks */
static void pci_dev_d3_sleep(struct pci_dev *dev)
{
--
https://clearlinux.org
From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
From: Arjan van de Ven <arjan@linux.intel.com>
Date: Sat, 19 Mar 2016 21:32:19 -0400
Subject: [PATCH] intel_idle: tweak cpuidle cstates
Increase target_residency in cpuidle cstate
Tune intel_idle to be a bit less agressive;
Clear linux is cleaner in hygiene (wakupes) than the average linux,
so we can afford changing these in a way that increases
performance while keeping power efficiency
---
drivers/idle/intel_idle.c | 44 +++++++++++++++++++--------------------
1 file changed, 22 insertions(+), 22 deletions(-)
diff --git a/drivers/idle/intel_idle.c b/drivers/idle/intel_idle.c
index f449584..c994d24 100644
--- a/drivers/idle/intel_idle.c
+++ b/drivers/idle/intel_idle.c
@@ -531,7 +531,7 @@ static struct cpuidle_state hsw_cstates[] __initdata = {
.desc = "MWAIT 0x01",
.flags = MWAIT2flg(0x01) | CPUIDLE_FLAG_ALWAYS_ENABLE,
.exit_latency = 10,
- .target_residency = 20,
+ .target_residency = 120,
.enter = &intel_idle,
.enter_s2idle = intel_idle_s2idle, },
{
@@ -539,7 +539,7 @@ static struct cpuidle_state hsw_cstates[] __initdata = {
.desc = "MWAIT 0x10",
.flags = MWAIT2flg(0x10) | CPUIDLE_FLAG_TLB_FLUSHED,
.exit_latency = 33,
- .target_residency = 100,
+ .target_residency = 900,
.enter = &intel_idle,
.enter_s2idle = intel_idle_s2idle, },
{
@@ -547,7 +547,7 @@ static struct cpuidle_state hsw_cstates[] __initdata = {
.desc = "MWAIT 0x20",
.flags = MWAIT2flg(0x20) | CPUIDLE_FLAG_TLB_FLUSHED,
.exit_latency = 133,
- .target_residency = 400,
+ .target_residency = 1000,
.enter = &intel_idle,
.enter_s2idle = intel_idle_s2idle, },
{
@@ -555,7 +555,7 @@ static struct cpuidle_state hsw_cstates[] __initdata = {
.desc = "MWAIT 0x32",
.flags = MWAIT2flg(0x32) | CPUIDLE_FLAG_TLB_FLUSHED,
.exit_latency = 166,
- .target_residency = 500,
+ .target_residency = 1500,
.enter = &intel_idle,
.enter_s2idle = intel_idle_s2idle, },
{
@@ -563,7 +563,7 @@ static struct cpuidle_state hsw_cstates[] __initdata = {
.desc = "MWAIT 0x40",
.flags = MWAIT2flg(0x40) | CPUIDLE_FLAG_TLB_FLUSHED,
.exit_latency = 300,
- .target_residency = 900,
+ .target_residency = 2000,
.enter = &intel_idle,
.enter_s2idle = intel_idle_s2idle, },
{
@@ -571,7 +571,7 @@ static struct cpuidle_state hsw_cstates[] __initdata = {
.desc = "MWAIT 0x50",
.flags = MWAIT2flg(0x50) | CPUIDLE_FLAG_TLB_FLUSHED,
.exit_latency = 600,
- .target_residency = 1800,
+ .target_residency = 5000,
.enter = &intel_idle,
.enter_s2idle = intel_idle_s2idle, },
{
@@ -579,7 +579,7 @@ static struct cpuidle_state hsw_cstates[] __initdata = {
.desc = "MWAIT 0x60",
.flags = MWAIT2flg(0x60) | CPUIDLE_FLAG_TLB_FLUSHED,
.exit_latency = 2600,
- .target_residency = 7700,
+ .target_residency = 9000,
.enter = &intel_idle,
.enter_s2idle = intel_idle_s2idle, },
{
@@ -599,7 +599,7 @@ static struct cpuidle_state bdw_cstates[] __initdata = {
.desc = "MWAIT 0x01",
.flags = MWAIT2flg(0x01) | CPUIDLE_FLAG_ALWAYS_ENABLE,
.exit_latency = 10,
- .target_residency = 20,
+ .target_residency = 120,
.enter = &intel_idle,
.enter_s2idle = intel_idle_s2idle, },
{
@@ -607,7 +607,7 @@ static struct cpuidle_state bdw_cstates[] __initdata = {
.desc = "MWAIT 0x10",
.flags = MWAIT2flg(0x10) | CPUIDLE_FLAG_TLB_FLUSHED,
.exit_latency = 40,
- .target_residency = 100,
+ .target_residency = 1000,
.enter = &intel_idle,
.enter_s2idle = intel_idle_s2idle, },
{
@@ -615,7 +615,7 @@ static struct cpuidle_state bdw_cstates[] __initdata = {
.desc = "MWAIT 0x20",
.flags = MWAIT2flg(0x20) | CPUIDLE_FLAG_TLB_FLUSHED,
.exit_latency = 133,
- .target_residency = 400,
+ .target_residency = 1000,
.enter = &intel_idle,
.enter_s2idle = intel_idle_s2idle, },
{
@@ -623,7 +623,7 @@ static struct cpuidle_state bdw_cstates[] __initdata = {
.desc = "MWAIT 0x32",
.flags = MWAIT2flg(0x32) | CPUIDLE_FLAG_TLB_FLUSHED,
.exit_latency = 166,
- .target_residency = 500,
+ .target_residency = 2000,
.enter = &intel_idle,
.enter_s2idle = intel_idle_s2idle, },
{
@@ -631,7 +631,7 @@ static struct cpuidle_state bdw_cstates[] __initdata = {
.desc = "MWAIT 0x40",
.flags = MWAIT2flg(0x40) | CPUIDLE_FLAG_TLB_FLUSHED,
.exit_latency = 300,
- .target_residency = 900,
+ .target_residency = 4000,
.enter = &intel_idle,
.enter_s2idle = intel_idle_s2idle, },
{
@@ -639,7 +639,7 @@ static struct cpuidle_state bdw_cstates[] __initdata = {
.desc = "MWAIT 0x50",
.flags = MWAIT2flg(0x50) | CPUIDLE_FLAG_TLB_FLUSHED,
.exit_latency = 600,
- .target_residency = 1800,
+ .target_residency = 7000,
.enter = &intel_idle,
.enter_s2idle = intel_idle_s2idle, },
{
@@ -647,7 +647,7 @@ static struct cpuidle_state bdw_cstates[] __initdata = {
.desc = "MWAIT 0x60",
.flags = MWAIT2flg(0x60) | CPUIDLE_FLAG_TLB_FLUSHED,
.exit_latency = 2600,
- .target_residency = 7700,
+ .target_residency = 9000,
.enter = &intel_idle,
.enter_s2idle = intel_idle_s2idle, },
{
@@ -668,7 +668,7 @@ static struct cpuidle_state skl_cstates[] __initdata = {
.desc = "MWAIT 0x01",
.flags = MWAIT2flg(0x01) | CPUIDLE_FLAG_ALWAYS_ENABLE,
.exit_latency = 10,
- .target_residency = 20,
+ .target_residency = 120,
.enter = &intel_idle,
.enter_s2idle = intel_idle_s2idle, },
{
@@ -676,7 +676,7 @@ static struct cpuidle_state skl_cstates[] __initdata = {
.desc = "MWAIT 0x10",
.flags = MWAIT2flg(0x10) | CPUIDLE_FLAG_TLB_FLUSHED,
.exit_latency = 70,
- .target_residency = 100,
+ .target_residency = 1000,
.enter = &intel_idle,
.enter_s2idle = intel_idle_s2idle, },
{
@@ -684,7 +684,7 @@ static struct cpuidle_state skl_cstates[] __initdata = {
.desc = "MWAIT 0x20",
.flags = MWAIT2flg(0x20) | CPUIDLE_FLAG_TLB_FLUSHED,
.exit_latency = 85,
- .target_residency = 200,
+ .target_residency = 600,
.enter = &intel_idle,
.enter_s2idle = intel_idle_s2idle, },
{
@@ -692,7 +692,7 @@ static struct cpuidle_state skl_cstates[] __initdata = {
.desc = "MWAIT 0x33",
.flags = MWAIT2flg(0x33) | CPUIDLE_FLAG_TLB_FLUSHED,
.exit_latency = 124,
- .target_residency = 800,
+ .target_residency = 3000,
.enter = &intel_idle,
.enter_s2idle = intel_idle_s2idle, },
{
@@ -700,7 +700,7 @@ static struct cpuidle_state skl_cstates[] __initdata = {
.desc = "MWAIT 0x40",
.flags = MWAIT2flg(0x40) | CPUIDLE_FLAG_TLB_FLUSHED,
.exit_latency = 200,
- .target_residency = 800,
+ .target_residency = 3200,
.enter = &intel_idle,
.enter_s2idle = intel_idle_s2idle, },
{
@@ -708,7 +708,7 @@ static struct cpuidle_state skl_cstates[] __initdata = {
.desc = "MWAIT 0x50",
.flags = MWAIT2flg(0x50) | CPUIDLE_FLAG_TLB_FLUSHED,
.exit_latency = 480,
- .target_residency = 5000,
+ .target_residency = 9000,
.enter = &intel_idle,
.enter_s2idle = intel_idle_s2idle, },
{
@@ -716,7 +716,7 @@ static struct cpuidle_state skl_cstates[] __initdata = {
.desc = "MWAIT 0x60",
.flags = MWAIT2flg(0x60) | CPUIDLE_FLAG_TLB_FLUSHED,
.exit_latency = 890,
- .target_residency = 5000,
+ .target_residency = 9000,
.enter = &intel_idle,
.enter_s2idle = intel_idle_s2idle, },
{
@@ -737,7 +737,7 @@ static struct cpuidle_state skx_cstates[] __initdata = {
.desc = "MWAIT 0x01",
.flags = MWAIT2flg(0x01) | CPUIDLE_FLAG_ALWAYS_ENABLE,
.exit_latency = 10,
- .target_residency = 20,
+ .target_residency = 300,
.enter = &intel_idle,
.enter_s2idle = intel_idle_s2idle, },
{
--
https://clearlinux.org
From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
From: Arjan van de Ven <arjan@linux.intel.com>
Date: Fri, 6 Jan 2017 15:34:09 +0000
Subject: [PATCH] ipv4/tcp: allow the memory tuning for tcp to go a little
bigger than default
---
net/ipv4/tcp.c | 4 ++--
1 file changed, 2 insertions(+), 2 deletions(-)
diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c
index 30c1142..4345075 100644
--- a/net/ipv4/tcp.c
+++ b/net/ipv4/tcp.c
@@ -4201,8 +4201,8 @@ void __init tcp_init(void)
tcp_init_mem();
/* Set per-socket limits to no more than 1/128 the pressure threshold */
limit = nr_free_buffer_pages() << (PAGE_SHIFT - 7);
- max_wshare = min(4UL*1024*1024, limit);
- max_rshare = min(6UL*1024*1024, limit);
+ max_wshare = min(16UL*1024*1024, limit);
+ max_rshare = min(16UL*1024*1024, limit);
init_net.ipv4.sysctl_tcp_wmem[0] = SK_MEM_QUANTUM;
init_net.ipv4.sysctl_tcp_wmem[1] = 16*1024;
--
https://clearlinux.org
From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
From: Arjan van de Ven <arjan@linux.intel.com>
Date: Sun, 18 Feb 2018 23:35:41 +0000
Subject: [PATCH] locking: rwsem: spin faster
tweak rwsem owner spinning a bit
---
kernel/locking/rwsem.c | 4 +++-
1 file changed, 3 insertions(+), 1 deletion(-)
diff --git a/kernel/locking/rwsem.c b/kernel/locking/rwsem.c
index f11b9bd..1bbfcc1 100644
--- a/kernel/locking/rwsem.c
+++ b/kernel/locking/rwsem.c
@@ -717,6 +717,7 @@ rwsem_spin_on_owner(struct rw_semaphore *sem, unsigned long nonspinnable)
struct task_struct *new, *owner;
unsigned long flags, new_flags;
enum owner_state state;
+ int i = 0;
owner = rwsem_owner_flags(sem, &flags);
state = rwsem_owner_state(owner, flags, nonspinnable);
@@ -750,7 +751,8 @@ rwsem_spin_on_owner(struct rw_semaphore *sem, unsigned long nonspinnable)
break;
}
- cpu_relax();
+ if (i++ > 1000)
+ cpu_relax();
}
rcu_read_unlock();
--
https://clearlinux.org
From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
From: Arjan van de Ven <arjan@linux.intel.com>
Date: Thu, 2 Jun 2016 23:36:32 -0500
Subject: [PATCH] initialize ata before graphics
ATA init is the long pole in the boot process, and its asynchronous.
move the graphics init after it so that ata and graphics initialize
in parallel
---
drivers/Makefile | 15 ++++++++-------
1 file changed, 8 insertions(+), 7 deletions(-)
diff --git a/drivers/Makefile b/drivers/Makefile
index c0cd1b9..af1e2fb 100644
--- a/drivers/Makefile
+++ b/drivers/Makefile
@@ -59,15 +59,8 @@ obj-y += char/
# iommu/ comes before gpu as gpu are using iommu controllers
obj-y += iommu/
-# gpu/ comes after char for AGP vs DRM startup and after iommu
-obj-y += gpu/
-
obj-$(CONFIG_CONNECTOR) += connector/
-# i810fb and intelfb depend on char/agp/
-obj-$(CONFIG_FB_I810) += video/fbdev/i810/
-obj-$(CONFIG_FB_INTEL) += video/fbdev/intelfb/
-
obj-$(CONFIG_PARPORT) += parport/
obj-$(CONFIG_NVM) += lightnvm/
obj-y += base/ block/ misc/ mfd/ nfc/
@@ -80,6 +73,14 @@ obj-$(CONFIG_IDE) += ide/
obj-y += scsi/
obj-y += nvme/
obj-$(CONFIG_ATA) += ata/
+
+# gpu/ comes after char for AGP vs DRM startup and after iommu
+obj-y += gpu/
+
+# i810fb and intelfb depend on char/agp/
+obj-$(CONFIG_FB_I810) += video/fbdev/i810/
+obj-$(CONFIG_FB_INTEL) += video/fbdev/intelfb/
+
obj-$(CONFIG_TARGET_CORE) += target/
obj-$(CONFIG_MTD) += mtd/
obj-$(CONFIG_SPI) += spi/
--
https://clearlinux.org

View File

@@ -1,363 +0,0 @@
From 9c85113cf4019e7b277a44e72bda8b78347aa72f Mon Sep 17 00:00:00 2001
From: Paul Gofman <pgofman@codeweavers.com>
Date: Thu, 7 May 2020 14:05:31 +0300
Subject: [PATCH 2/2] mm: Support soft dirty flag read with reset.
---
fs/proc/base.c | 3 +
fs/proc/internal.h | 1 +
fs/proc/task_mmu.c | 144 +++++++++++++++++++++++++++++++++++++++------
3 files changed, 130 insertions(+), 18 deletions(-)
diff --git a/fs/proc/base.c b/fs/proc/base.c
index b3422cda2a91..8199ae2411ca 100644
--- a/fs/proc/base.c
+++ b/fs/proc/base.c
@@ -3202,6 +3202,9 @@ static const struct pid_entry tgid_base_stuff[] = {
REG("smaps", S_IRUGO, proc_pid_smaps_operations),
REG("smaps_rollup", S_IRUGO, proc_pid_smaps_rollup_operations),
REG("pagemap", S_IRUSR, proc_pagemap_operations),
+#ifdef CONFIG_MEM_SOFT_DIRTY
+ REG("pagemap_reset", S_IRUSR, proc_pagemap_reset_operations),
+#endif
#endif
#ifdef CONFIG_SECURITY
DIR("attr", S_IRUGO|S_IXUGO, proc_attr_dir_inode_operations, proc_attr_dir_operations),
diff --git a/fs/proc/internal.h b/fs/proc/internal.h
index f60b379dcdc7..36a901cf0e7f 100644
--- a/fs/proc/internal.h
+++ b/fs/proc/internal.h
@@ -303,6 +303,7 @@ extern const struct file_operations proc_pid_smaps_operations;
extern const struct file_operations proc_pid_smaps_rollup_operations;
extern const struct file_operations proc_clear_refs_operations;
extern const struct file_operations proc_pagemap_operations;
+extern const struct file_operations proc_pagemap_reset_operations;
extern unsigned long task_vsize(struct mm_struct *);
extern unsigned long task_statm(struct mm_struct *,
diff --git a/fs/proc/task_mmu.c b/fs/proc/task_mmu.c
index 7c7865028f10..a21694967915 100644
--- a/fs/proc/task_mmu.c
+++ b/fs/proc/task_mmu.c
@@ -1056,8 +1056,8 @@ static inline bool pte_is_pinned(struct vm_area_struct *vma, unsigned long addr,
return page_maybe_dma_pinned(page);
}
-static inline void clear_soft_dirty(struct vm_area_struct *vma,
- unsigned long addr, pte_t *pte)
+static inline bool clear_soft_dirty(struct vm_area_struct *vma,
+ unsigned long addr, pte_t *pte)
{
/*
* The soft-dirty tracker uses #PF-s to catch writes
@@ -1066,37 +1066,46 @@ static inline void clear_soft_dirty(struct vm_area_struct *vma,
* of how soft-dirty works.
*/
pte_t ptent = *pte;
+ bool ret = false;
if (pte_present(ptent)) {
pte_t old_pte;
if (pte_is_pinned(vma, addr, ptent))
- return;
+ return ret;
old_pte = ptep_modify_prot_start(vma, addr, pte);
+ ret = pte_soft_dirty(old_pte);
ptent = pte_wrprotect(old_pte);
ptent = pte_clear_soft_dirty(ptent);
ptep_modify_prot_commit(vma, addr, pte, old_pte, ptent);
} else if (is_swap_pte(ptent)) {
+ ret = pte_swp_soft_dirty(ptent);
ptent = pte_swp_clear_soft_dirty(ptent);
set_pte_at(vma->vm_mm, addr, pte, ptent);
}
+ return ret;
}
#else
-static inline void clear_soft_dirty(struct vm_area_struct *vma,
+static inline bool clear_soft_dirty(struct vm_area_struct *vma,
unsigned long addr, pte_t *pte)
{
+ return false;
}
#endif
#if defined(CONFIG_MEM_SOFT_DIRTY) && defined(CONFIG_TRANSPARENT_HUGEPAGE)
-static inline void clear_soft_dirty_pmd(struct vm_area_struct *vma,
+static inline bool clear_soft_dirty_pmd(struct vm_area_struct *vma,
unsigned long addr, pmd_t *pmdp)
{
pmd_t old, pmd = *pmdp;
+ bool ret = false;
if (pmd_present(pmd)) {
/* See comment in change_huge_pmd() */
old = pmdp_invalidate(vma, addr, pmdp);
+
+ ret = pmd_soft_dirty(old);
+
if (pmd_dirty(old))
pmd = pmd_mkdirty(pmd);
if (pmd_young(old))
@@ -1107,14 +1116,17 @@ static inline void clear_soft_dirty_pmd(struct vm_area_struct *vma,
set_pmd_at(vma->vm_mm, addr, pmdp, pmd);
} else if (is_migration_entry(pmd_to_swp_entry(pmd))) {
+ ret = pmd_swp_soft_dirty(pmd);
pmd = pmd_swp_clear_soft_dirty(pmd);
set_pmd_at(vma->vm_mm, addr, pmdp, pmd);
}
+ return ret;
}
#else
-static inline void clear_soft_dirty_pmd(struct vm_area_struct *vma,
+static inline bool clear_soft_dirty_pmd(struct vm_area_struct *vma,
unsigned long addr, pmd_t *pmdp)
{
+ return false;
}
#endif
@@ -1367,6 +1379,7 @@ struct pagemapread {
int pos, len; /* units: PM_ENTRY_BYTES, not bytes */
pagemap_entry_t *buffer;
bool show_pfn;
+ bool reset;
};
#define PAGEMAP_WALK_SIZE (PMD_SIZE)
@@ -1398,6 +1411,14 @@ static int add_to_pagemap(unsigned long addr, pagemap_entry_t *pme,
return 0;
}
+static int add_addr_to_pagemap(unsigned long addr, struct pagemapread *pm)
+{
+ ((unsigned long *)pm->buffer)[pm->pos++] = addr;
+ if (pm->pos >= pm->len)
+ return PM_END_OF_BUFFER;
+ return 0;
+}
+
static int pagemap_pte_hole(unsigned long start, unsigned long end,
__always_unused int depth, struct mm_walk *walk)
{
@@ -1405,6 +1426,9 @@ static int pagemap_pte_hole(unsigned long start, unsigned long end,
unsigned long addr = start;
int err = 0;
+ if (pm->reset)
+ goto out;
+
while (addr < end) {
struct vm_area_struct *vma = find_vma(walk->mm, addr);
pagemap_entry_t pme = make_pme(0, 0);
@@ -1439,8 +1463,9 @@ static int pagemap_pte_hole(unsigned long start, unsigned long end,
}
static pagemap_entry_t pte_to_pagemap_entry(struct pagemapread *pm,
- struct vm_area_struct *vma, unsigned long addr, pte_t pte)
+ struct vm_area_struct *vma, unsigned long addr, pte_t *pte_addr)
{
+ pte_t pte = *pte_addr;
u64 frame = 0, flags = 0;
struct page *page = NULL;
@@ -1493,6 +1518,20 @@ static int pagemap_pmd_range(pmd_t *pmdp, unsigned long addr, unsigned long end,
pmd_t pmd = *pmdp;
struct page *page = NULL;
+ if (pm->reset)
+ {
+ if (clear_soft_dirty_pmd(vma, addr, pmdp))
+ {
+ for (; addr != end; addr += PAGE_SIZE)
+ {
+ err = add_addr_to_pagemap(addr, pm);
+ if (err)
+ break;
+ }
+ }
+ goto trans_huge_done;
+ }
+
if (vma->vm_flags & VM_SOFTDIRTY)
flags |= PM_SOFT_DIRTY;
@@ -1541,6 +1580,7 @@ static int pagemap_pmd_range(pmd_t *pmdp, unsigned long addr, unsigned long end,
frame += (1 << MAX_SWAPFILES_SHIFT);
}
}
+trans_huge_done:
spin_unlock(ptl);
return err;
}
@@ -1555,10 +1595,18 @@ static int pagemap_pmd_range(pmd_t *pmdp, unsigned long addr, unsigned long end,
*/
orig_pte = pte = pte_offset_map_lock(walk->mm, pmdp, addr, &ptl);
for (; addr < end; pte++, addr += PAGE_SIZE) {
- pagemap_entry_t pme;
+ if (pm->reset)
+ {
+ if (clear_soft_dirty(vma, addr, pte))
+ err = add_addr_to_pagemap(addr, pm);
+ }
+ else
+ {
+ pagemap_entry_t pme;
- pme = pte_to_pagemap_entry(pm, vma, addr, *pte);
- err = add_to_pagemap(addr, &pme, pm);
+ pme = pte_to_pagemap_entry(pm, vma, addr, pte);
+ err = add_to_pagemap(addr, &pme, pm);
+ }
if (err)
break;
}
@@ -1650,8 +1698,8 @@ static const struct mm_walk_ops pagemap_ops = {
* determine which areas of memory are actually mapped and llseek to
* skip over unmapped regions.
*/
-static ssize_t pagemap_read(struct file *file, char __user *buf,
- size_t count, loff_t *ppos)
+static ssize_t do_pagemap_read(struct file *file, char __user *buf,
+ size_t count, loff_t *ppos, bool reset)
{
struct mm_struct *mm = file->private_data;
struct pagemapread pm;
@@ -1660,6 +1708,8 @@ static ssize_t pagemap_read(struct file *file, char __user *buf,
unsigned long start_vaddr;
unsigned long end_vaddr;
int ret = 0, copied = 0;
+ struct mmu_notifier_range range;
+ size_t buffer_len;
if (!mm || !mmget_not_zero(mm))
goto out;
@@ -1675,19 +1725,38 @@ static ssize_t pagemap_read(struct file *file, char __user *buf,
/* do not disclose physical addresses: attack vector */
pm.show_pfn = file_ns_capable(file, &init_user_ns, CAP_SYS_ADMIN);
+ pm.reset = reset;
- pm.len = (PAGEMAP_WALK_SIZE >> PAGE_SHIFT);
- pm.buffer = kmalloc_array(pm.len, PM_ENTRY_BYTES, GFP_KERNEL);
+ buffer_len = min(PAGEMAP_WALK_SIZE >> PAGE_SHIFT, count / PM_ENTRY_BYTES);
+
+ pm.buffer = kmalloc_array(buffer_len, PM_ENTRY_BYTES, GFP_KERNEL);
ret = -ENOMEM;
if (!pm.buffer)
goto out_mm;
src = *ppos;
svpfn = src / PM_ENTRY_BYTES;
- end_vaddr = mm->task_size;
+
+ start_vaddr = svpfn << PAGE_SHIFT;
+
+ if (reset)
+ {
+ if (count < sizeof(end_vaddr))
+ {
+ ret = -EINVAL;
+ goto out_mm;
+ }
+ if (copy_from_user(&end_vaddr, buf, sizeof(end_vaddr)))
+ return -EFAULT;
+ end_vaddr = min(end_vaddr, mm->task_size);
+ }
+ else
+ {
+ end_vaddr = mm->task_size;
+ start_vaddr = end_vaddr;
+ }
/* watch out for wraparound */
- start_vaddr = end_vaddr;
if (svpfn <= (ULONG_MAX >> PAGE_SHIFT))
start_vaddr = untagged_addr(svpfn << PAGE_SHIFT);
@@ -1707,18 +1776,35 @@ static ssize_t pagemap_read(struct file *file, char __user *buf,
unsigned long end;
pm.pos = 0;
- end = (start_vaddr + PAGEMAP_WALK_SIZE) & PAGEMAP_WALK_MASK;
+ pm.len = min(buffer_len, count / PM_ENTRY_BYTES);
+
+ end = reset ? end_vaddr : (start_vaddr + (pm.len << PAGE_SHIFT));
/* overflow ? */
if (end < start_vaddr || end > end_vaddr)
end = end_vaddr;
+
ret = mmap_read_lock_killable(mm);
if (ret)
goto out_free;
+
+ if (reset)
+ {
+ inc_tlb_flush_pending(mm);
+ mmu_notifier_range_init(&range, MMU_NOTIFY_SOFT_DIRTY,
+ 0, NULL, mm, start_vaddr, end);
+ mmu_notifier_invalidate_range_start(&range);
+ }
ret = walk_page_range(mm, start_vaddr, end, &pagemap_ops, &pm);
+ if (reset)
+ {
+ mmu_notifier_invalidate_range_end(&range);
+ flush_tlb_mm(mm);
+ dec_tlb_flush_pending(mm);
+ }
mmap_read_unlock(mm);
- start_vaddr = end;
len = min(count, PM_ENTRY_BYTES * pm.pos);
+ BUG_ON(ret && ret != PM_END_OF_BUFFER);
if (copy_to_user(buf, pm.buffer, len)) {
ret = -EFAULT;
goto out_free;
@@ -1726,6 +1812,8 @@ static ssize_t pagemap_read(struct file *file, char __user *buf,
copied += len;
buf += len;
count -= len;
+
+ start_vaddr = reset && pm.pos == pm.len ? ((unsigned long *)pm.buffer)[pm.pos - 1] + PAGE_SIZE : end;
}
*ppos += copied;
if (!ret || ret == PM_END_OF_BUFFER)
@@ -1739,6 +1827,18 @@ static ssize_t pagemap_read(struct file *file, char __user *buf,
return ret;
}
+static ssize_t pagemap_read(struct file *file, char __user *buf,
+ size_t count, loff_t *ppos)
+{
+ return do_pagemap_read(file, buf, count, ppos, false);
+}
+
+static ssize_t pagemap_reset_read(struct file *file, char __user *buf,
+ size_t count, loff_t *ppos)
+{
+ return do_pagemap_read(file, buf, count, ppos, true);
+}
+
static int pagemap_open(struct inode *inode, struct file *file)
{
struct mm_struct *mm;
@@ -1765,6 +1865,14 @@ const struct file_operations proc_pagemap_operations = {
.open = pagemap_open,
.release = pagemap_release,
};
+
+const struct file_operations proc_pagemap_reset_operations = {
+ .llseek = mem_lseek, /* borrow this */
+ .read = pagemap_reset_read,
+ .open = pagemap_open,
+ .release = pagemap_release,
+};
+
#endif /* CONFIG_PROC_PAGE_MONITOR */
#ifdef CONFIG_NUMA
--
2.30.2

View File

@@ -1,822 +0,0 @@
From f7f49141a5dbe9c99d78196b58c44307fb2e6be3 Mon Sep 17 00:00:00 2001
From: Tk-Glitch <ti3nou@gmail.com>
Date: Wed, 4 Jul 2018 04:30:08 +0200
Subject: [PATCH 01/17] glitched
---
init/Makefile | 2 +-
1 file changed, 1 insertions(+), 1 deletions(-)
diff --git a/init/Makefile b/init/Makefile
index baf3ab8d9d49..854e32e6aec7 100755
--- a/init/Makefile
+++ b/init/Makefile
@@ -19,7 +19,7 @@ else
# Maximum length of UTS_VERSION is 64 chars
filechk_uts_version = \
- utsver=$$(echo '$(pound)'"$(build-version)" $(smp-flag-y) $(preempt-flag-y) "$(build-timestamp)" | cut -b -64); \
+ utsver=$$(echo '$(pound)'"$(build-version)" $(smp-flag-y) $(preempt-flag-y) "TKG" "$(build-timestamp)" | cut -b -64); \
echo '$(pound)'define UTS_VERSION \""$${utsver}"\"
#
--
2.28.0
From c304f43d14e98d4bf1215fc10bc5012f554bdd8a Mon Sep 17 00:00:00 2001
From: Alexandre Frade <admfrade@gmail.com>
Date: Mon, 29 Jan 2018 16:59:22 +0000
Subject: [PATCH 02/17] dcache: cache_pressure = 50 decreases the rate at which
VFS caches are reclaimed
Signed-off-by: Alexandre Frade <admfrade@gmail.com>
---
fs/dcache.c | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/fs/dcache.c b/fs/dcache.c
index 361ea7ab30ea..0c5cf69b241a 100644
--- a/fs/dcache.c
+++ b/fs/dcache.c
@@ -71,7 +71,7 @@
* If no ancestor relationship:
* arbitrary, since it's serialized on rename_lock
*/
-int sysctl_vfs_cache_pressure __read_mostly = 100;
+int sysctl_vfs_cache_pressure __read_mostly = 50;
EXPORT_SYMBOL_GPL(sysctl_vfs_cache_pressure);
__cacheline_aligned_in_smp DEFINE_SEQLOCK(rename_lock);
--
2.28.0
diff --git a/kernel/sched/rt.c b/kernel/sched/rt.c
index f788cd61df21..2bfbb4213707 100644
--- a/kernel/sched/rt.c
+++ b/kernel/sched/rt.c
@@ -15,9 +15,9 @@ __read_mostly int scheduler_running;
/*
* part of the period that we allow rt tasks to run in us.
- * default: 0.95s
+ * XanMod default: 0.98s
*/
-int sysctl_sched_rt_runtime = 950000;
+int sysctl_sched_rt_runtime = 980000;
#ifdef CONFIG_SYSCTL
static int sysctl_sched_rr_timeslice = (MSEC_PER_SEC / HZ) * RR_TIMESLICE;
--
2.28.0
From acc49f33a10f61dc66c423888cbb883ba46710e4 Mon Sep 17 00:00:00 2001
From: Alexandre Frade <admfrade@gmail.com>
Date: Mon, 29 Jan 2018 17:41:29 +0000
Subject: [PATCH 04/17] scripts: disable the localversion "+" tag of a git repo
Signed-off-by: Alexandre Frade <admfrade@gmail.com>
---
scripts/setlocalversion | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/scripts/setlocalversion b/scripts/setlocalversion
index 20f2efd57b11..0552d8b9f582 100755
--- a/scripts/setlocalversion
+++ b/scripts/setlocalversion
@@ -54,7 +54,7 @@ scm_version()
# If only the short version is requested, don't bother
# running further git commands
if $short; then
- echo "+"
+ # echo "+"
return
fi
# If we are past a tagged commit (like
--
2.28.0
From 360c6833e07cc9fdef5746f6bc45bdbc7212288d Mon Sep 17 00:00:00 2001
From: "Jan Alexander Steffens (heftig)" <jan.steffens@gmail.com>
Date: Fri, 26 Oct 2018 11:22:33 +0100
Subject: [PATCH 06/17] infiniband: Fix __read_overflow2 error with -O3
inlining
---
drivers/infiniband/core/addr.c | 1 +
1 file changed, 1 insertion(+)
diff --git a/drivers/infiniband/core/addr.c b/drivers/infiniband/core/addr.c
index 3a98439bba83..6efc4f907f58 100644
--- a/drivers/infiniband/core/addr.c
+++ b/drivers/infiniband/core/addr.c
@@ -820,6 +820,7 @@ int rdma_addr_find_l2_eth_by_grh(const union ib_gid *sgid,
union {
struct sockaddr_in _sockaddr_in;
struct sockaddr_in6 _sockaddr_in6;
+ struct sockaddr_ib _sockaddr_ib;
} sgid_addr, dgid_addr;
int ret;
--
2.28.0
From f85ed068b4d0e6c31edce8574a95757a60e58b87 Mon Sep 17 00:00:00 2001
From: Etienne Juvigny <Ti3noU@gmail.com>
Date: Mon, 3 Sep 2018 17:36:25 +0200
Subject: [PATCH 07/17] Zenify & stuff
---
init/Kconfig | 32 ++++++++++++++++++++++++++++++++
kernel/sched/fair.c | 25 +++++++++++++++++++++++++
mm/page-writeback.c | 8 ++++++++
3 files changed, 65 insertions(+)
diff --git a/init/Kconfig b/init/Kconfig
index 3ae8678e1145..da708eed0f1e 100644
--- a/init/Kconfig
+++ b/init/Kconfig
@@ -92,6 +92,38 @@ config THREAD_INFO_IN_TASK
menu "General setup"
+config ZENIFY
+ bool "A selection of patches from Zen/Liquorix kernel and additional tweaks for a better gaming experience"
+ default y
+ help
+ Tunes the kernel for responsiveness at the cost of throughput and power usage.
+
+ --- Virtual Memory Subsystem ---------------------------
+
+ Mem dirty before bg writeback..: 10 % -> 20 %
+ Mem dirty before sync writeback: 20 % -> 50 %
+
+ --- Block Layer ----------------------------------------
+
+ Queue depth...............: 128 -> 512
+ Default MQ scheduler......: mq-deadline -> bfq
+
+ --- CFS CPU Scheduler ----------------------------------
+
+ Scheduling latency.............: 6 -> 3 ms
+ Minimal granularity............: 0.75 -> 0.3 ms
+ Wakeup granularity.............: 1 -> 0.5 ms
+ CPU migration cost.............: 0.5 -> 0.25 ms
+ Bandwidth slice size...........: 5 -> 3 ms
+ Ondemand fine upscaling limit..: 95 % -> 85 %
+
+ --- MuQSS CPU Scheduler --------------------------------
+
+ Scheduling interval............: 6 -> 3 ms
+ ISO task max realtime use......: 70 % -> 25 %
+ Ondemand coarse upscaling limit: 80 % -> 45 %
+ Ondemand fine upscaling limit..: 95 % -> 45 %
+
config BROKEN
bool
diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c
index 6b3b59cc51d6..2a0072192c3d 100644
--- a/kernel/sched/fair.c
+++ b/kernel/sched/fair.c
@@ -37,8 +37,13 @@
*
* (default: 6ms * (1 + ilog(ncpus)), units: nanoseconds)
*/
+#ifdef CONFIG_ZENIFY
+unsigned int sysctl_sched_latency = 3000000ULL;
+static unsigned int normalized_sysctl_sched_latency = 3000000ULL;
+#else
unsigned int sysctl_sched_latency = 6000000ULL;
static unsigned int normalized_sysctl_sched_latency = 6000000ULL;
+#endif
/*
* The initial- and re-scaling of tunables is configurable
@@ -58,21 +63,34 @@ enum sched_tunable_scaling sysctl_sched_tunable_scaling = SCHED_TUNABLESCALING_L
*
* (default: 0.75 msec * (1 + ilog(ncpus)), units: nanoseconds)
*/
+#ifdef CONFIG_ZENIFY
+unsigned int sysctl_sched_min_granularity = 300000ULL;
+static unsigned int normalized_sysctl_sched_min_granularity = 300000ULL;
+#else
unsigned int sysctl_sched_min_granularity = 750000ULL;
static unsigned int normalized_sysctl_sched_min_granularity = 750000ULL;
+#endif
/*
* Minimal preemption granularity for CPU-bound SCHED_IDLE tasks.
* Applies only when SCHED_IDLE tasks compete with normal tasks.
*
* (default: 0.75 msec)
*/
+#ifdef CONFIG_ZENIFY
+unsigned int sysctl_sched_idle_min_granularity = 300000ULL;
+#else
unsigned int sysctl_sched_idle_min_granularity = 750000ULL;
+#endif
/*
* This value is kept at sysctl_sched_latency/sysctl_sched_min_granularity
*/
+#ifdef CONFIG_ZENIFY
+static unsigned int sched_nr_latency = 10;
+#else
static unsigned int sched_nr_latency = 8;
+#endif
/*
* After fork, child runs first. If set to 0 (default) then
@@ -128,8 +149,12 @@ int __weak arch_asym_cpu_priority(int cpu)
*
* (default: 5 msec, units: microseconds)
*/
+#ifdef CONFIG_ZENIFY
+static unsigned int sysctl_sched_cfs_bandwidth_slice = 3000UL;
+#else
static unsigned int sysctl_sched_cfs_bandwidth_slice = 5000UL;
#endif
+#endif
#ifdef CONFIG_SYSCTL
static struct ctl_table sched_fair_sysctls[] = {
diff --git a/mm/page-writeback.c b/mm/page-writeback.c
index 28b3e7a67565..01a1aef2b9b1 100644
--- a/mm/page-writeback.c
+++ b/mm/page-writeback.c
@@ -71,7 +71,11 @@ static long ratelimit_pages = 32;
/*
* Start background writeback (via writeback threads) at this percentage
*/
+#ifdef CONFIG_ZENIFY
+static int dirty_background_ratio = 20;
+#else
static int dirty_background_ratio = 10;
+#endif
/*
* dirty_background_bytes starts at 0 (disabled) so that it is a function of
@@ -88,7 +92,11 @@ int vm_highmem_is_dirtyable;
/*
* The generator of dirty data starts writeback at this percentage
*/
+#ifdef CONFIG_ZENIFY
+static int vm_dirty_ratio = 50;
+#else
static int vm_dirty_ratio = 20;
+#endif
/*
* vm_dirty_bytes starts at 0 (disabled) so that it is a function of
--
2.28.0
From e92e67143385cf285851e12aa8b7f083dd38dd24 Mon Sep 17 00:00:00 2001
From: Steven Barrett <damentz@liquorix.net>
Date: Sun, 16 Jan 2011 18:57:32 -0600
Subject: [PATCH 08/17] ZEN: Allow TCP YeAH as default congestion control
4.4: In my tests YeAH dramatically slowed down transfers over a WLAN,
reducing throughput from ~65Mbps (CUBIC) to ~7MBps (YeAH) over 10
seconds (netperf TCP_STREAM) including long stalls.
Be careful when choosing this. ~heftig
---
net/ipv4/Kconfig | 4 ++++
1 file changed, 4 insertions(+)
diff --git a/net/ipv4/Kconfig b/net/ipv4/Kconfig
index e64e59b536d3..bfb55ef7ebbe 100644
--- a/net/ipv4/Kconfig
+++ b/net/ipv4/Kconfig
@@ -691,6 +691,9 @@ choice
config DEFAULT_VEGAS
bool "Vegas" if TCP_CONG_VEGAS=y
+ config DEFAULT_YEAH
+ bool "YeAH" if TCP_CONG_YEAH=y
+
config DEFAULT_VENO
bool "Veno" if TCP_CONG_VENO=y
@@ -724,6 +727,7 @@ config DEFAULT_TCP_CONG
default "htcp" if DEFAULT_HTCP
default "hybla" if DEFAULT_HYBLA
default "vegas" if DEFAULT_VEGAS
+ default "yeah" if DEFAULT_YEAH
default "westwood" if DEFAULT_WESTWOOD
default "veno" if DEFAULT_VENO
default "reno" if DEFAULT_RENO
--
2.28.0
From 76dbe7477bfde1b5e8bf29a71b5af7ab2be9b98e Mon Sep 17 00:00:00 2001
From: Steven Barrett <steven@liquorix.net>
Date: Wed, 28 Nov 2018 19:01:27 -0600
Subject: [PATCH 09/17] zen: Use [defer+madvise] as default khugepaged defrag
strategy
For some reason, the default strategy to respond to THP fault fallbacks
is still just madvise, meaning stall if the program wants transparent
hugepages, but don't trigger a background reclaim / compaction if THP
begins to fail allocations. This creates a snowball affect where we
still use the THP code paths, but we almost always fail once a system
has been active and busy for a while.
The option "defer" was created for interactive systems where THP can
still improve performance. If we have to fallback to a regular page due
to an allocation failure or anything else, we will trigger a background
reclaim and compaction so future THP attempts succeed and previous
attempts eventually have their smaller pages combined without stalling
running applications.
We still want madvise to stall applications that explicitely want THP,
so defer+madvise _does_ make a ton of sense. Make it the default for
interactive systems, especially if the kernel maintainer left
transparent hugepages on "always".
Reasoning and details in the original patch: https://lwn.net/Articles/711248/
---
mm/huge_memory.c | 4 ++++
1 file changed, 4 insertions(+)
diff --git a/mm/huge_memory.c b/mm/huge_memory.c
index 74300e337c3c..9277f22c10a7 100644
--- a/mm/huge_memory.c
+++ b/mm/huge_memory.c
@@ -53,7 +53,11 @@ unsigned long transparent_hugepage_flags __read_mostly =
#ifdef CONFIG_TRANSPARENT_HUGEPAGE_MADVISE
(1<<TRANSPARENT_HUGEPAGE_REQ_MADV_FLAG)|
#endif
+#ifdef CONFIG_ZENIFY
+ (1<<TRANSPARENT_HUGEPAGE_DEFRAG_KSWAPD_OR_MADV_FLAG)|
+#else
(1<<TRANSPARENT_HUGEPAGE_DEFRAG_REQ_MADV_FLAG)|
+#endif
(1<<TRANSPARENT_HUGEPAGE_DEFRAG_KHUGEPAGED_FLAG)|
(1<<TRANSPARENT_HUGEPAGE_USE_ZERO_PAGE_FLAG);
--
2.28.0
From 2b65a1329cb220b43c19c4d0de5833fae9e2b22d Mon Sep 17 00:00:00 2001
From: Alexandre Frade <admfrade@gmail.com>
Date: Wed, 24 Oct 2018 16:58:52 -0300
Subject: [PATCH 10/17] net/sched: allow configuring cake qdisc as default
Signed-off-by: Alexandre Frade <admfrade@gmail.com>
---
net/sched/Kconfig | 4 ++++
1 file changed, 4 insertions(+)
diff --git a/net/sched/Kconfig b/net/sched/Kconfig
index 84badf00647e..6a922bca9f39 100644
--- a/net/sched/Kconfig
+++ b/net/sched/Kconfig
@@ -471,6 +471,9 @@ choice
config DEFAULT_SFQ
bool "Stochastic Fair Queue" if NET_SCH_SFQ
+ config DEFAULT_CAKE
+ bool "Common Applications Kept Enhanced" if NET_SCH_CAKE
+
config DEFAULT_PFIFO_FAST
bool "Priority FIFO Fast"
endchoice
@@ -481,6 +484,7 @@ config DEFAULT_NET_SCH
default "fq" if DEFAULT_FQ
default "fq_codel" if DEFAULT_FQ_CODEL
default "sfq" if DEFAULT_SFQ
+ default "cake" if DEFAULT_CAKE
default "pfifo_fast"
endif
--
2.28.0
From 90240bcd90a568878738e66c0d45bed3e38e347b Mon Sep 17 00:00:00 2001
From: Tk-Glitch <ti3nou@gmail.com>
Date: Fri, 19 Apr 2019 12:33:38 +0200
Subject: [PATCH 12/17] Set vm.max_map_count to 262144 by default
The value is still pretty low, and AMD64-ABI and ELF extended numbering
supports that, so we should be fine on modern x86 systems.
This fixes crashes in some applications using more than 65535 vmas (also
affects some windows games running in wine, such as Star Citizen).
---
include/linux/mm.h | 3 +--
1 file changed, 1 insertion(+), 2 deletions(-)
diff --git a/include/linux/mm.h b/include/linux/mm.h
index bc05c3588aa3..b0cefe94920d 100644
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -190,8 +190,7 @@ static inline void __mm_zero_struct_page(struct page *page)
* not a hard limit any more. Although some userspace tools can be surprised by
* that.
*/
-#define MAPCOUNT_ELF_CORE_MARGIN (5)
-#define DEFAULT_MAX_MAP_COUNT (USHRT_MAX - MAPCOUNT_ELF_CORE_MARGIN)
+#define DEFAULT_MAX_MAP_COUNT (262144)
extern int sysctl_max_map_count;
--
2.28.0
From 3a34034dba5efe91bcec491efe8c66e8087f509b Mon Sep 17 00:00:00 2001
From: Tk-Glitch <ti3nou@gmail.com>
Date: Mon, 27 Jul 2020 00:19:18 +0200
Subject: [PATCH 13/17] mm: bump DEFAULT_MAX_MAP_COUNT
Some games such as Detroit: Become Human tend to be very crash prone with
lower values.
---
include/linux/mm.h | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/include/linux/mm.h b/include/linux/mm.h
index b0cefe94920d..890165099b07 100644
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -190,7 +190,7 @@ static inline void __mm_zero_struct_page(struct page *page)
* not a hard limit any more. Although some userspace tools can be surprised by
* that.
*/
-#define DEFAULT_MAX_MAP_COUNT (262144)
+#define DEFAULT_MAX_MAP_COUNT (16777216)
extern int sysctl_max_map_count;
--
2.28.0
From 977812938da7c7226415778c340832141d9278b7 Mon Sep 17 00:00:00 2001
From: Alexandre Frade <admfrade@gmail.com>
Date: Mon, 25 Nov 2019 15:13:06 -0300
Subject: [PATCH 14/17] elevator: set default scheduler to bfq for blk-mq
Signed-off-by: Alexandre Frade <admfrade@gmail.com>
---
block/elevator.c | 6 +++---
1 file changed, 3 insertions(+), 3 deletions(-)
diff --git a/block/elevator.c b/block/elevator.c
index 4eab3d70e880..79669aa39d79 100644
--- a/block/elevator.c
+++ b/block/elevator.c
@@ -623,19 +623,19 @@ static inline bool elv_support_iosched(struct request_queue *q)
}
/*
- * For single queue devices, default to using mq-deadline. If we have multiple
- * queues or mq-deadline is not available, default to "none".
+ * For single queue devices, default to using bfq. If we have multiple
+ * queues or bfq is not available, default to "none".
*/
static struct elevator_type *elevator_get_default(struct request_queue *q)
{
if (q->tag_set && q->tag_set->flags & BLK_MQ_F_NO_SCHED_BY_DEFAULT)
return NULL;
if (q->nr_hw_queues != 1 &&
!blk_mq_is_shared_tags(q->tag_set->flags))
return NULL;
- return elevator_find_get(q, "mq-deadline");
+ return elevator_find_get(q, "bfq");
}
/*
--
2.28.0
From 3c229f434aca65c4ca61772bc03c3e0370817b92 Mon Sep 17 00:00:00 2001
From: Alexandre Frade <kernel@xanmod.org>
Date: Mon, 3 Aug 2020 17:05:04 +0000
Subject: [PATCH 16/17] mm: set 2 megabytes for address_space-level file
read-ahead pages size
Signed-off-by: Alexandre Frade <kernel@xanmod.org>
---
include/linux/pagemap.h | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/include/linux/pagemap.h b/include/linux/pagemap.h
index cf2468da68e9..007dea784451 100644
--- a/include/linux/pagemap.h
+++ b/include/linux/pagemap.h
@@ -655,7 +655,7 @@ int replace_page_cache_page(struct page *old, struct page *new, gfp_t gfp_mask);
void delete_from_page_cache_batch(struct address_space *mapping,
struct pagevec *pvec);
-#define VM_READAHEAD_PAGES (SZ_128K / PAGE_SIZE)
+#define VM_READAHEAD_PAGES (SZ_2M / PAGE_SIZE)
void page_cache_sync_readahead(struct address_space *, struct file_ra_state *,
struct file *, pgoff_t index, unsigned long req_count);
--
2.28.0
From 716f41cf6631f3a85834dcb67b4ce99185b6387f Mon Sep 17 00:00:00 2001
From: Steven Barrett <steven@liquorix.net>
Date: Wed, 15 Jan 2020 20:43:56 -0600
Subject: [PATCH 17/17] ZEN: intel-pstate: Implement "enable" parameter
If intel-pstate is compiled into the kernel, it will preempt the loading
of acpi-cpufreq so you can take advantage of hardware p-states without
any friction.
However, intel-pstate is not completely superior to cpufreq's ondemand
for one reason. There's no concept of an up_threshold property.
In ondemand, up_threshold essentially reduces the maximum utilization to
compare against, allowing you to hit max frequencies and turbo boost
from a much lower core utilization.
With intel-pstate, you have the concept of minimum and maximum
performance, but no tunable that lets you define, maximum frequency
means 50% core utilization. For just this oversight, there's reasons
you may want ondemand.
Lets support setting "enable" in kernel boot parameters. This lets
kernel maintainers include "intel_pstate=disable" statically in the
static boot parameters, but let users of the kernel override this
selection.
---
Documentation/admin-guide/kernel-parameters.txt | 3 +++
drivers/cpufreq/intel_pstate.c | 2 ++
2 files changed, 5 insertions(+)
diff --git a/Documentation/admin-guide/kernel-parameters.txt b/Documentation/admin-guide/kernel-parameters.txt
index fb95fad81c79..3e92fee81e33 100644
--- a/Documentation/admin-guide/kernel-parameters.txt
+++ b/Documentation/admin-guide/kernel-parameters.txt
@@ -1857,6 +1857,9 @@
disable
Do not enable intel_pstate as the default
scaling driver for the supported processors
+ enable
+ Enable intel_pstate in-case "disable" was passed
+ previously in the kernel boot parameters
passive
Use intel_pstate as a scaling driver, but configure it
to work with generic cpufreq governors (instead of
diff --git a/drivers/cpufreq/intel_pstate.c b/drivers/cpufreq/intel_pstate.c
index 36a469150ff9..aee891c9b78a 100644
--- a/drivers/cpufreq/intel_pstate.c
+++ b/drivers/cpufreq/intel_pstate.c
@@ -2845,6 +2845,8 @@ static int __init intel_pstate_setup(char *str)
if (!strcmp(str, "no_hwp"))
no_hwp = 1;
+ if (!strcmp(str, "enable"))
+ no_load = 0;
if (!strcmp(str, "force"))
force_load = 1;
if (!strcmp(str, "hwp_only"))
--
2.28.0
From 379cbab18b5c75c622b93e2c5abdfac141fe9654 Mon Sep 17 00:00:00 2001
From: Kenny Levinsen <kl@kl.wtf>
Date: Sun, 27 Dec 2020 14:43:13 +0000
Subject: [PATCH] ZEN: Input: evdev - use call_rcu when detaching client
Significant time was spent on synchronize_rcu in evdev_detach_client
when applications closed evdev devices. Switching VT away from a
graphical environment commonly leads to mass input device closures,
which could lead to noticable delays on systems with many input devices.
Replace synchronize_rcu with call_rcu, deferring reclaim of the evdev
client struct till after the RCU grace period instead of blocking the
calling application.
While this does not solve all slow evdev fd closures, it takes care of a
good portion of them, including this simple test:
#include <fcntl.h>
#include <unistd.h>
int main(int argc, char *argv[])
{
int idx, fd;
const char *path = "/dev/input/event0";
for (idx = 0; idx < 1000; idx++) {
if ((fd = open(path, O_RDWR)) == -1) {
return -1;
}
close(fd);
}
return 0;
}
Time to completion of above test when run locally:
Before: 0m27.111s
After: 0m0.018s
Signed-off-by: Kenny Levinsen <kl@kl.wtf>
---
drivers/input/evdev.c | 19 +++++++++++--------
1 file changed, 11 insertions(+), 8 deletions(-)
diff --git a/drivers/input/evdev.c b/drivers/input/evdev.c
index 95f90699d2b17b..2b10fe29d2c8d9 100644
--- a/drivers/input/evdev.c
+++ b/drivers/input/evdev.c
@@ -46,6 +46,7 @@ struct evdev_client {
struct fasync_struct *fasync;
struct evdev *evdev;
struct list_head node;
+ struct rcu_head rcu;
enum input_clock_type clk_type;
bool revoked;
unsigned long *evmasks[EV_CNT];
@@ -377,13 +378,22 @@ static void evdev_attach_client(struct evdev *evdev,
spin_unlock(&evdev->client_lock);
}
+static void evdev_reclaim_client(struct rcu_head *rp)
+{
+ struct evdev_client *client = container_of(rp, struct evdev_client, rcu);
+ unsigned int i;
+ for (i = 0; i < EV_CNT; ++i)
+ bitmap_free(client->evmasks[i]);
+ kvfree(client);
+}
+
static void evdev_detach_client(struct evdev *evdev,
struct evdev_client *client)
{
spin_lock(&evdev->client_lock);
list_del_rcu(&client->node);
spin_unlock(&evdev->client_lock);
- synchronize_rcu();
+ call_rcu(&client->rcu, evdev_reclaim_client);
}
static int evdev_open_device(struct evdev *evdev)
@@ -436,7 +446,6 @@ static int evdev_release(struct inode *inode, struct file *file)
{
struct evdev_client *client = file->private_data;
struct evdev *evdev = client->evdev;
- unsigned int i;
mutex_lock(&evdev->mutex);
@@ -448,11 +457,6 @@ static int evdev_release(struct inode *inode, struct file *file)
evdev_detach_client(evdev, client);
- for (i = 0; i < EV_CNT; ++i)
- bitmap_free(client->evmasks[i]);
-
- kvfree(client);
-
evdev_close_device(evdev);
return 0;
@@ -495,7 +499,6 @@ static int evdev_open(struct inode *inode, struct file *file)
err_free_client:
evdev_detach_client(evdev, client);
- kvfree(client);
return error;
}
From 2aafb56f20e4b63d8c4af172fe9d017c64bc4129 Mon Sep 17 00:00:00 2001
From: Sultan Alsawaf <sultan@kerneltoast.com>
Date: Wed, 20 Oct 2021 20:50:11 -0700
Subject: [PATCH] ZEN: mm: Lower the non-hugetlbpage pageblock size to reduce
scheduling delays
The page allocator processes free pages in groups of pageblocks, where
the size of a pageblock is typically quite large (1024 pages without
hugetlbpage support). Pageblocks are processed atomically with the zone
lock held, which can cause severe scheduling delays on both the CPU
going through the pageblock and any other CPUs waiting to acquire the
zone lock. A frequent offender is move_freepages_block(), which is used
by rmqueue() for page allocation.
As it turns out, there's no requirement for pageblocks to be so large,
so the pageblock order can simply be reduced to ease the scheduling
delays and zone lock contention. PAGE_ALLOC_COSTLY_ORDER is used as a
reasonable setting to ensure non-costly page allocation requests can
still be serviced without always needing to free up more than one
pageblock's worth of pages at a time.
This has a noticeable effect on overall system latency when memory
pressure is elevated. The various mm functions which operate on
pageblocks no longer appear in the preemptoff tracer, where previously
they would spend up to 100 ms on a mobile arm64 CPU processing a
pageblock with preemption disabled and the zone lock held.
Signed-off-by: Sultan Alsawaf <sultan@kerneltoast.com>
---
include/linux/pageblock-flags.h | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/include/linux/pageblock-flags.h b/include/linux/pageblock-flags.h
index 5f1ae07d724b88..97cda629c9e909 100644
--- a/include/linux/pageblock-flags.h
+++ b/include/linux/pageblock-flags.h
@@ -48,7 +48,7 @@ extern unsigned int pageblock_order;
#else /* CONFIG_HUGETLB_PAGE */
/* If huge pages are not used, group by MAX_ORDER_NR_PAGES */
-#define pageblock_order (MAX_ORDER-1)
+#define pageblock_order PAGE_ALLOC_COSTLY_ORDER
#endif /* CONFIG_HUGETLB_PAGE */
From f22bc56be85e69c71c8e36041193856bb8b01525 Mon Sep 17 00:00:00 2001
From: Sultan Alsawaf <sultan@kerneltoast.com>
Date: Wed, 20 Oct 2021 20:50:32 -0700
Subject: [PATCH] ZEN: mm: Don't hog the CPU and zone lock in rmqueue_bulk()
There is noticeable scheduling latency and heavy zone lock contention
stemming from rmqueue_bulk's single hold of the zone lock while doing
its work, as seen with the preemptoff tracer. There's no actual need for
rmqueue_bulk() to hold the zone lock the entire time; it only does so
for supposed efficiency. As such, we can relax the zone lock and even
reschedule when IRQs are enabled in order to keep the scheduling delays
and zone lock contention at bay. Forward progress is still guaranteed,
as the zone lock can only be relaxed after page removal.
With this change, rmqueue_bulk() no longer appears as a serious offender
in the preemptoff tracer, and system latency is noticeably improved.
Signed-off-by: Sultan Alsawaf <sultan@kerneltoast.com>
---
mm/page_alloc.c | 23 ++++++++++++++++++-----
1 file changed, 18 insertions(+), 5 deletions(-)
diff --git a/mm/page_alloc.c b/mm/page_alloc.c
index a0b0397e29ee4c..87a983a356530c 100644
--- a/mm/page_alloc.c
+++ b/mm/page_alloc.c
@@ -3118,15 +3119,16 @@ __rmqueue(struct zone *zone, unsigned int order, int migratetype,
}
/*
- * Obtain a specified number of elements from the buddy allocator, all under
- * a single hold of the lock, for efficiency. Add them to the supplied list.
- * Returns the number of new pages which were placed at *list.
+ * Obtain a specified number of elements from the buddy allocator, and relax the
+ * zone lock when needed. Add them to the supplied list. Returns the number of
+ * new pages which were placed at *list.
*/
static int rmqueue_bulk(struct zone *zone, unsigned int order,
unsigned long count, struct list_head *list,
int migratetype, unsigned int alloc_flags)
{
unsigned long flags;
- int i, allocated = 0;
+ const bool can_resched = !preempt_count() && !irqs_disabled();
+ int i, allocated = 0, last_mod = 0;
/* Caller must hold IRQ-safe pcp->lock so IRQs are disabled. */
spin_lock(&zone->lock);
@@ -3137,6 +3138,18 @@ static int rmqueue_bulk(struct zone *zone, unsigned int order,
if (unlikely(page == NULL))
break;
+ /* Reschedule and ease the contention on the lock if needed */
+ if (i + 1 < count && ((can_resched && need_resched()) ||
+ spin_needbreak(&zone->lock))) {
+ __mod_zone_page_state(zone, NR_FREE_PAGES,
+ -((i + 1 - last_mod) << order));
+ last_mod = i + 1;
+ spin_unlock(&zone->lock);
+ if (can_resched)
+ cond_resched();
+ spin_lock(&zone->lock);
+ }
+
if (unlikely(check_pcp_refill(page, order)))
continue;
@@ -3163,7 +3176,7 @@ static int rmqueue_bulk(struct zone *zone, unsigned int order,
* on i. Do not confuse with 'allocated' which is the number of
* pages added to the pcp list.
*/
- __mod_zone_page_state(zone, NR_FREE_PAGES, -(i << order));
+ __mod_zone_page_state(zone, NR_FREE_PAGES, -((i - last_mod) << order));
spin_unlock(&zone->lock);
return allocated;
}

View File

@@ -1,22 +0,0 @@
diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c
index 6b3b59cc51d6..2a0072192c3d 100644
--- a/kernel/sched/fair.c
+++ b/kernel/sched/fair.c
@@ -81,10 +95,17 @@ unsigned int sysctl_sched_child_runs_first __read_mostly;
*
* (default: 1 msec * (1 + ilog(ncpus)), units: nanoseconds)
*/
+#ifdef CONFIG_ZENIFY
+unsigned int sysctl_sched_wakeup_granularity = 500000UL;
+static unsigned int normalized_sysctl_sched_wakeup_granularity = 500000UL;
+
+const_debug unsigned int sysctl_sched_migration_cost = 50000UL;
+#else
unsigned int sysctl_sched_wakeup_granularity = 1000000UL;
static unsigned int normalized_sysctl_sched_wakeup_granularity = 1000000UL;
const_debug unsigned int sysctl_sched_migration_cost = 500000UL;
+#endif
int sched_thermal_decay_shift;
static int __init setup_sched_thermal_decay_shift(char *str)

View File

@@ -1,90 +0,0 @@
diff --git a/kernel/Kconfig.hz b/kernel/Kconfig.hz
index 2a202a846757..1d9c7ed79b11 100644
--- a/kernel/Kconfig.hz
+++ b/kernel/Kconfig.hz
@@ -4,7 +4,7 @@
choice
prompt "Timer frequency"
- default HZ_250
+ default HZ_500
help
Allows the configuration of the timer frequency. It is customary
to have the timer interrupt run at 1000 Hz but 100 Hz may be more
@@ -39,6 +39,13 @@ choice
on SMP and NUMA systems and exactly dividing by both PAL and
NTSC frame rates for video and multimedia work.
+ config HZ_500
+ bool "500 HZ"
+ help
+ 500 Hz is a balanced timer frequency. Provides fast interactivity
+ on desktops with great smoothness without increasing CPU power
+ consumption and sacrificing the battery life on laptops.
+
config HZ_1000
bool "1000 HZ"
help
@@ -52,6 +59,7 @@ config HZ
default 100 if HZ_100
default 250 if HZ_250
default 300 if HZ_300
+ default 500 if HZ_500
default 1000 if HZ_1000
config SCHED_HRTICK
diff --git a/kernel/Kconfig.hz b/kernel/Kconfig.hz
index 2a202a846757..1d9c7ed79b11 100644
--- a/kernel/Kconfig.hz
+++ b/kernel/Kconfig.hz
@@ -4,7 +4,7 @@
choice
prompt "Timer frequency"
- default HZ_500
+ default HZ_750
help
Allows the configuration of the timer frequency. It is customary
to have the timer interrupt run at 1000 Hz but 100 Hz may be more
@@ -46,6 +46,13 @@ choice
on desktops with great smoothness without increasing CPU power
consumption and sacrificing the battery life on laptops.
+ config HZ_750
+ bool "750 HZ"
+ help
+ 750 Hz is a good timer frequency for desktops. Provides fast
+ interactivity with great smoothness without sacrificing too
+ much throughput.
+
config HZ_1000
bool "1000 HZ"
help
@@ -60,6 +67,7 @@ config HZ
default 250 if HZ_250
default 300 if HZ_300
default 500 if HZ_500
+ default 750 if HZ_750
default 1000 if HZ_1000
config SCHED_HRTICK
diff --git a/drivers/cpufreq/cpufreq_ondemand.c b/drivers/cpufreq/cpufreq_ondemand.c
index 6b423eebfd5d..61e3271675d6 100644
--- a/drivers/cpufreq/cpufreq_ondemand.c
+++ b/drivers/cpufreq/cpufreq_ondemand.c
@@ -21,10 +21,10 @@
#include "cpufreq_ondemand.h"
/* On-demand governor macros */
-#define DEF_FREQUENCY_UP_THRESHOLD (80)
-#define DEF_SAMPLING_DOWN_FACTOR (1)
+#define DEF_FREQUENCY_UP_THRESHOLD (55)
+#define DEF_SAMPLING_DOWN_FACTOR (5)
#define MAX_SAMPLING_DOWN_FACTOR (100000)
-#define MICRO_FREQUENCY_UP_THRESHOLD (95)
+#define MICRO_FREQUENCY_UP_THRESHOLD (63)
#define MICRO_FREQUENCY_MIN_SAMPLE_RATE (10000)
#define MIN_FREQUENCY_UP_THRESHOLD (1)
#define MAX_FREQUENCY_UP_THRESHOLD (100)

View File

@@ -1,193 +0,0 @@
From cdeab384f48dd9c88e2dff2e9ad8d57dca1a1b1c Mon Sep 17 00:00:00 2001
From: Mark Weiman <mark.weiman@markzz.com>
Date: Sun, 12 Aug 2018 11:36:21 -0400
Subject: [PATCH] pci: Enable overrides for missing ACS capabilities
This an updated version of Alex Williamson's patch from:
https://lkml.org/lkml/2013/5/30/513
Original commit message follows:
PCIe ACS (Access Control Services) is the PCIe 2.0+ feature that
allows us to control whether transactions are allowed to be redirected
in various subnodes of a PCIe topology. For instance, if two
endpoints are below a root port or downsteam switch port, the
downstream port may optionally redirect transactions between the
devices, bypassing upstream devices. The same can happen internally
on multifunction devices. The transaction may never be visible to the
upstream devices.
One upstream device that we particularly care about is the IOMMU. If
a redirection occurs in the topology below the IOMMU, then the IOMMU
cannot provide isolation between devices. This is why the PCIe spec
encourages topologies to include ACS support. Without it, we have to
assume peer-to-peer DMA within a hierarchy can bypass IOMMU isolation.
Unfortunately, far too many topologies do not support ACS to make this
a steadfast requirement. Even the latest chipsets from Intel are only
sporadically supporting ACS. We have trouble getting interconnect
vendors to include the PCIe spec required PCIe capability, let alone
suggested features.
Therefore, we need to add some flexibility. The pcie_acs_override=
boot option lets users opt-in specific devices or sets of devices to
assume ACS support. The "downstream" option assumes full ACS support
on root ports and downstream switch ports. The "multifunction"
option assumes the subset of ACS features available on multifunction
endpoints and upstream switch ports are supported. The "id:nnnn:nnnn"
option enables ACS support on devices matching the provided vendor
and device IDs, allowing more strategic ACS overrides. These options
may be combined in any order. A maximum of 16 id specific overrides
are available. It's suggested to use the most limited set of options
necessary to avoid completely disabling ACS across the topology.
Note to hardware vendors, we have facilities to permanently quirk
specific devices which enforce isolation but not provide an ACS
capability. Please contact me to have your devices added and save
your customers the hassle of this boot option.
Signed-off-by: Mark Weiman <mark.weiman@markzz.com>
---
.../admin-guide/kernel-parameters.txt | 9 ++
drivers/pci/quirks.c | 101 ++++++++++++++++++
2 files changed, 110 insertions(+)
diff --git a/Documentation/admin-guide/kernel-parameters.txt b/Documentation/admin-guide/kernel-parameters.txt
index aefd358a5ca3..173b3596fd9e 100644
--- a/Documentation/admin-guide/kernel-parameters.txt
+++ b/Documentation/admin-guide/kernel-parameters.txt
@@ -3190,6 +3190,15 @@
nomsi [MSI] If the PCI_MSI kernel config parameter is
enabled, this kernel boot option can be used to
disable the use of MSI interrupts system-wide.
+ pcie_acs_override =
+ [PCIE] Override missing PCIe ACS support for:
+ downstream
+ All downstream ports - full ACS capabilities
+ multifunction
+ All multifunction devices - multifunction ACS subset
+ id:nnnn:nnnn
+ Specific device - full ACS capabilities
+ Specified as vid:did (vendor/device ID) in hex
noioapicquirk [APIC] Disable all boot interrupt quirks.
Safety option to keep boot IRQs enabled. This
should never be necessary.
diff --git a/drivers/pci/quirks.c b/drivers/pci/quirks.c
index 4700d24e5d55..8f7a3d7fd9c1 100644
--- a/drivers/pci/quirks.c
+++ b/drivers/pci/quirks.c
@@ -3372,6 +3372,106 @@ static void quirk_no_bus_reset(struct pci_dev *dev)
dev->dev_flags |= PCI_DEV_FLAGS_NO_BUS_RESET;
}
+static bool acs_on_downstream;
+static bool acs_on_multifunction;
+
+#define NUM_ACS_IDS 16
+struct acs_on_id {
+ unsigned short vendor;
+ unsigned short device;
+};
+static struct acs_on_id acs_on_ids[NUM_ACS_IDS];
+static u8 max_acs_id;
+
+static __init int pcie_acs_override_setup(char *p)
+{
+ if (!p)
+ return -EINVAL;
+
+ while (*p) {
+ if (!strncmp(p, "downstream", 10))
+ acs_on_downstream = true;
+ if (!strncmp(p, "multifunction", 13))
+ acs_on_multifunction = true;
+ if (!strncmp(p, "id:", 3)) {
+ char opt[5];
+ int ret;
+ long val;
+
+ if (max_acs_id >= NUM_ACS_IDS - 1) {
+ pr_warn("Out of PCIe ACS override slots (%d)\n",
+ NUM_ACS_IDS);
+ goto next;
+ }
+
+ p += 3;
+ snprintf(opt, 5, "%s", p);
+ ret = kstrtol(opt, 16, &val);
+ if (ret) {
+ pr_warn("PCIe ACS ID parse error %d\n", ret);
+ goto next;
+ }
+ acs_on_ids[max_acs_id].vendor = val;
+
+ p += strcspn(p, ":");
+ if (*p != ':') {
+ pr_warn("PCIe ACS invalid ID\n");
+ goto next;
+ }
+
+ p++;
+ snprintf(opt, 5, "%s", p);
+ ret = kstrtol(opt, 16, &val);
+ if (ret) {
+ pr_warn("PCIe ACS ID parse error %d\n", ret);
+ goto next;
+ }
+ acs_on_ids[max_acs_id].device = val;
+ max_acs_id++;
+ }
+next:
+ p += strcspn(p, ",");
+ if (*p == ',')
+ p++;
+ }
+
+ if (acs_on_downstream || acs_on_multifunction || max_acs_id)
+ pr_warn("Warning: PCIe ACS overrides enabled; This may allow non-IOMMU protected peer-to-peer DMA\n");
+
+ return 0;
+}
+early_param("pcie_acs_override", pcie_acs_override_setup);
+
+static int pcie_acs_overrides(struct pci_dev *dev, u16 acs_flags)
+{
+ int i;
+
+ /* Never override ACS for legacy devices or devices with ACS caps */
+ if (!pci_is_pcie(dev) ||
+ pci_find_ext_capability(dev, PCI_EXT_CAP_ID_ACS))
+ return -ENOTTY;
+
+ for (i = 0; i < max_acs_id; i++)
+ if (acs_on_ids[i].vendor == dev->vendor &&
+ acs_on_ids[i].device == dev->device)
+ return 1;
+
+ switch (pci_pcie_type(dev)) {
+ case PCI_EXP_TYPE_DOWNSTREAM:
+ case PCI_EXP_TYPE_ROOT_PORT:
+ if (acs_on_downstream)
+ return 1;
+ break;
+ case PCI_EXP_TYPE_ENDPOINT:
+ case PCI_EXP_TYPE_UPSTREAM:
+ case PCI_EXP_TYPE_LEG_END:
+ case PCI_EXP_TYPE_RC_END:
+ if (acs_on_multifunction && dev->multifunction)
+ return 1;
+ }
+
+ return -ENOTTY;
+}
/*
* Some Atheros AR9xxx and QCA988x chips do not behave after a bus reset.
* The device will throw a Link Down error on AER-capable systems and
@@ -4513,6 +4613,7 @@ static const struct pci_dev_acs_enabled {
{ PCI_VENDOR_ID_ZHAOXIN, 0x9083, pci_quirk_mf_endpoint_acs },
/* Zhaoxin Root/Downstream Ports */
{ PCI_VENDOR_ID_ZHAOXIN, PCI_ANY_ID, pci_quirk_zhaoxin_pcie_ports_acs },
+ { PCI_ANY_ID, PCI_ANY_ID, pcie_acs_overrides },
{ 0 }
};

View File

@@ -1,166 +0,0 @@
From b70e738f08403950aa3053c36b98c6b0eeb0eb90 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Andr=C3=A9=20Almeida?= <andrealmeid@collabora.com>
Date: Mon, 25 Oct 2021 09:49:42 -0300
Subject: [PATCH] futex: Add entry point for FUTEX_WAIT_MULTIPLE (opcode 31)
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit
Add an option to wait on multiple futexes using the old interface, that
uses opcode 31 through futex() syscall. Do that by just translation the
old interface to use the new code. This allows old and stable versions
of Proton to still use fsync in new kernel releases.
Signed-off-by: André Almeida <andrealmeid@collabora.com>
---
include/uapi/linux/futex.h | 13 +++++++
kernel/futex/syscalls.c | 75 +++++++++++++++++++++++++++++++++++++-
2 files changed, 87 insertions(+), 1 deletion(-)
diff --git a/include/uapi/linux/futex.h b/include/uapi/linux/futex.h
index 71a5df8d2689..d375ab21cbf8 100644
--- a/include/uapi/linux/futex.h
+++ b/include/uapi/linux/futex.h
@@ -22,6 +22,7 @@
#define FUTEX_WAIT_REQUEUE_PI 11
#define FUTEX_CMP_REQUEUE_PI 12
#define FUTEX_LOCK_PI2 13
+#define FUTEX_WAIT_MULTIPLE 31
#define FUTEX_PRIVATE_FLAG 128
#define FUTEX_CLOCK_REALTIME 256
@@ -68,6 +69,18 @@ struct futex_waitv {
__u32 __reserved;
};
+/**
+ * struct futex_wait_block - Block of futexes to be waited for
+ * @uaddr: User address of the futex
+ * @val: Futex value expected by userspace
+ * @bitset: Bitset for the optional bitmasked wakeup
+ */
+struct futex_wait_block {
+ __u32 __user *uaddr;
+ __u32 val;
+ __u32 bitset;
+};
+
/*
* Support for robust futexes: the kernel cleans up held futexes at
* thread exit time.
diff --git a/kernel/futex/syscalls.c b/kernel/futex/syscalls.c
index 6f91a07a6a83..2f4d4c04ede2 100644
--- a/kernel/futex/syscalls.c
+++ b/kernel/futex/syscalls.c
@@ -158,6 +158,7 @@ static __always_inline bool futex_cmd_has_timeout(u32 cmd)
case FUTEX_LOCK_PI2:
case FUTEX_WAIT_BITSET:
case FUTEX_WAIT_REQUEUE_PI:
+ case FUTEX_WAIT_MULTIPLE:
return true;
}
return false;
@@ -170,13 +171,79 @@ futex_init_timeout(u32 cmd, u32 op, struct timespec64 *ts, ktime_t *t)
return -EINVAL;
*t = timespec64_to_ktime(*ts);
- if (cmd == FUTEX_WAIT)
+ if (cmd == FUTEX_WAIT || cmd == FUTEX_WAIT_MULTIPLE)
*t = ktime_add_safe(ktime_get(), *t);
else if (cmd != FUTEX_LOCK_PI && !(op & FUTEX_CLOCK_REALTIME))
*t = timens_ktime_to_host(CLOCK_MONOTONIC, *t);
return 0;
}
+/**
+ * futex_read_wait_block - Read an array of futex_wait_block from userspace
+ * @uaddr: Userspace address of the block
+ * @count: Number of blocks to be read
+ *
+ * This function creates and allocate an array of futex_q (we zero it to
+ * initialize the fields) and then, for each futex_wait_block element from
+ * userspace, fill a futex_q element with proper values.
+ */
+inline struct futex_vector *futex_read_wait_block(u32 __user *uaddr, u32 count)
+{
+ unsigned int i;
+ struct futex_vector *futexv;
+ struct futex_wait_block fwb;
+ struct futex_wait_block __user *entry =
+ (struct futex_wait_block __user *)uaddr;
+
+ if (!count || count > FUTEX_WAITV_MAX)
+ return ERR_PTR(-EINVAL);
+
+ futexv = kcalloc(count, sizeof(*futexv), GFP_KERNEL);
+ if (!futexv)
+ return ERR_PTR(-ENOMEM);
+
+ for (i = 0; i < count; i++) {
+ if (copy_from_user(&fwb, &entry[i], sizeof(fwb))) {
+ kfree(futexv);
+ return ERR_PTR(-EFAULT);
+ }
+
+ futexv[i].w.flags = FUTEX_32;
+ futexv[i].w.val = fwb.val;
+ futexv[i].w.uaddr = (uintptr_t) (fwb.uaddr);
+ futexv[i].q = futex_q_init;
+ }
+
+ return futexv;
+}
+
+int futex_wait_multiple(struct futex_vector *vs, unsigned int count,
+ struct hrtimer_sleeper *to);
+
+int futex_opcode_31(ktime_t *abs_time, u32 __user *uaddr, int count)
+{
+ int ret;
+ struct futex_vector *vs;
+ struct hrtimer_sleeper *to = NULL, timeout;
+
+ to = futex_setup_timer(abs_time, &timeout, 0, 0);
+
+ vs = futex_read_wait_block(uaddr, count);
+
+ if (IS_ERR(vs))
+ return PTR_ERR(vs);
+
+ ret = futex_wait_multiple(vs, count, abs_time ? to : NULL);
+ kfree(vs);
+
+ if (to) {
+ hrtimer_cancel(&to->timer);
+ destroy_hrtimer_on_stack(&to->timer);
+ }
+
+ return ret;
+}
+
SYSCALL_DEFINE6(futex, u32 __user *, uaddr, int, op, u32, val,
const struct __kernel_timespec __user *, utime,
u32 __user *, uaddr2, u32, val3)
@@ -196,6 +263,9 @@ SYSCALL_DEFINE6(futex, u32 __user *, uaddr, int, op, u32, val,
tp = &t;
}
+ if (cmd == FUTEX_WAIT_MULTIPLE)
+ return futex_opcode_31(tp, uaddr, val);
+
return do_futex(uaddr, op, val, tp, uaddr2, (unsigned long)utime, val3);
}
@@ -392,6 +462,9 @@ SYSCALL_DEFINE6(futex_time32, u32 __user *, uaddr, int, op, u32, val,
tp = &t;
}
+ if (cmd == FUTEX_WAIT_MULTIPLE)
+ return futex_opcode_31(tp, uaddr, val);
+
return do_futex(uaddr, op, val, tp, uaddr2, (unsigned long)utime, val3);
}
#endif /* CONFIG_COMPAT_32BIT_TIME */
--
2.33.1

File diff suppressed because it is too large Load Diff

View File

@@ -1,90 +0,0 @@
From f7f49141a5dbe9c99d78196b58c44307fb2e6be3 Mon Sep 17 00:00:00 2001
From: Tk-Glitch <ti3nou@gmail.com>
Date: Wed, 4 Jul 2018 04:30:08 +0200
Subject: glitched - BMQ
diff --git a/kernel/Kconfig.hz b/kernel/Kconfig.hz
index 2a202a846757..1d9c7ed79b11 100644
--- a/kernel/Kconfig.hz
+++ b/kernel/Kconfig.hz
@@ -4,7 +4,7 @@
choice
prompt "Timer frequency"
- default HZ_250
+ default HZ_500
help
Allows the configuration of the timer frequency. It is customary
to have the timer interrupt run at 1000 Hz but 100 Hz may be more
@@ -39,6 +39,13 @@ choice
on SMP and NUMA systems and exactly dividing by both PAL and
NTSC frame rates for video and multimedia work.
+ config HZ_500
+ bool "500 HZ"
+ help
+ 500 Hz is a balanced timer frequency. Provides fast interactivity
+ on desktops with great smoothness without increasing CPU power
+ consumption and sacrificing the battery life on laptops.
+
config HZ_1000
bool "1000 HZ"
help
@@ -52,6 +59,7 @@ config HZ
default 100 if HZ_100
default 250 if HZ_250
default 300 if HZ_300
+ default 500 if HZ_500
default 1000 if HZ_1000
config SCHED_HRTICK
diff --git a/kernel/Kconfig.hz b/kernel/Kconfig.hz
index 2a202a846757..1d9c7ed79b11 100644
--- a/kernel/Kconfig.hz
+++ b/kernel/Kconfig.hz
@@ -4,7 +4,7 @@
choice
prompt "Timer frequency"
- default HZ_500
+ default HZ_750
help
Allows the configuration of the timer frequency. It is customary
to have the timer interrupt run at 1000 Hz but 100 Hz may be more
@@ -46,6 +46,13 @@ choice
on desktops with great smoothness without increasing CPU power
consumption and sacrificing the battery life on laptops.
+ config HZ_750
+ bool "750 HZ"
+ help
+ 750 Hz is a good timer frequency for desktops. Provides fast
+ interactivity with great smoothness without sacrificing too
+ much throughput.
+
config HZ_1000
bool "1000 HZ"
help
@@ -60,6 +67,7 @@ config HZ
default 250 if HZ_250
default 300 if HZ_300
default 500 if HZ_500
+ default 750 if HZ_750
default 1000 if HZ_1000
config SCHED_HRTICK
diff --git a/mm/vmscan.c b/mm/vmscan.c
index 9270a4370d54..30d01e647417 100644
--- a/mm/vmscan.c
+++ b/mm/vmscan.c
@@ -169,7 +169,7 @@
/*
* From 0 .. 200. Higher means more swappy.
*/
-int vm_swappiness = 60;
+int vm_swappiness = 20;
static void set_task_reclaim_state(struct task_struct *task,
struct reclaim_state *rs)

View File

@@ -1,18 +0,0 @@
diff --git a/drivers/cpufreq/cpufreq_ondemand.c b/drivers/cpufreq/cpufreq_ondemand.c
index 6b423eebfd5d..61e3271675d6 100644
--- a/drivers/cpufreq/cpufreq_ondemand.c
+++ b/drivers/cpufreq/cpufreq_ondemand.c
@@ -21,10 +21,10 @@
#include "cpufreq_ondemand.h"
/* On-demand governor macros */
-#define DEF_FREQUENCY_UP_THRESHOLD (80)
-#define DEF_SAMPLING_DOWN_FACTOR (1)
+#define DEF_FREQUENCY_UP_THRESHOLD (55)
+#define DEF_SAMPLING_DOWN_FACTOR (5)
#define MAX_SAMPLING_DOWN_FACTOR (100000)
-#define MICRO_FREQUENCY_UP_THRESHOLD (95)
+#define MICRO_FREQUENCY_UP_THRESHOLD (63)
#define MICRO_FREQUENCY_MIN_SAMPLE_RATE (10000)
#define MIN_FREQUENCY_UP_THRESHOLD (1)
#define MAX_FREQUENCY_UP_THRESHOLD (100)

View File

@@ -1,316 +0,0 @@
From e5e77ad2223f662e1615266d8ef39a8db7e65a70 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Felix=20H=C3=A4dicke?= <felixhaedicke@web.de>
Date: Thu, 19 Nov 2020 09:22:32 +0100
Subject: HID: quirks: Add Apple Magic Trackpad 2 to hid_have_special_driver
list
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit
The Apple Magic Trackpad 2 is handled by the magicmouse driver. And
there were severe stability issues when both drivers (hid-generic and
hid-magicmouse) were loaded for this device.
Fixes: https://bugzilla.kernel.org/show_bug.cgi?id=210241
Signed-off-by: Felix Hädicke <felixhaedicke@web.de>
---
drivers/hid/hid-quirks.c | 2 ++
1 file changed, 2 insertions(+)
diff --git a/drivers/hid/hid-quirks.c b/drivers/hid/hid-quirks.c
index bf7ecab5d9e5..142e9dae2837 100644
--- a/drivers/hid/hid-quirks.c
+++ b/drivers/hid/hid-quirks.c
@@ -478,6 +478,8 @@ static const struct hid_device_id hid_have_special_driver[] = {
#if IS_ENABLED(CONFIG_HID_MAGICMOUSE)
{ HID_BLUETOOTH_DEVICE(USB_VENDOR_ID_APPLE, USB_DEVICE_ID_APPLE_MAGICMOUSE) },
{ HID_BLUETOOTH_DEVICE(USB_VENDOR_ID_APPLE, USB_DEVICE_ID_APPLE_MAGICTRACKPAD) },
+ { HID_BLUETOOTH_DEVICE(BT_VENDOR_ID_APPLE, USB_DEVICE_ID_APPLE_MAGICTRACKPAD2) },
+ { HID_USB_DEVICE(USB_VENDOR_ID_APPLE, USB_DEVICE_ID_APPLE_MAGICTRACKPAD2) },
#endif
#if IS_ENABLED(CONFIG_HID_MAYFLASH)
{ HID_USB_DEVICE(USB_VENDOR_ID_DRAGONRISE, USB_DEVICE_ID_DRAGONRISE_PS3) },
--
cgit v1.2.3-1-gf6bb5
From f7f49141a5dbe9c99d78196b58c44307fb2e6be3 Mon Sep 17 00:00:00 2001
From: Tk-Glitch <ti3nou@gmail.com>
Date: Wed, 3 Feb 2021 11:20:12 +0200
Subject: Revert "cpufreq: Avoid configuring old governors as default with intel_pstate"
This is an undesirable behavior for us since our aggressive ondemand performs
better than schedutil for gaming when using intel_pstate in passive mode.
Also it interferes with the option to select the desired default governor we have.
diff --git a/drivers/cpufreq/Kconfig b/drivers/cpufreq/Kconfig
index 2c7171e0b0010..85de313ddec29 100644
--- a/drivers/cpufreq/Kconfig
+++ b/drivers/cpufreq/Kconfig
@@ -71,7 +71,6 @@ config CPU_FREQ_DEFAULT_GOV_USERSPACE
config CPU_FREQ_DEFAULT_GOV_ONDEMAND
bool "ondemand"
- depends on !(X86_INTEL_PSTATE && SMP)
select CPU_FREQ_GOV_ONDEMAND
select CPU_FREQ_GOV_PERFORMANCE
help
@@ -83,7 +84,6 @@ config CPU_FREQ_DEFAULT_GOV_ONDEMAND
config CPU_FREQ_DEFAULT_GOV_CONSERVATIVE
bool "conservative"
- depends on !(X86_INTEL_PSTATE && SMP)
select CPU_FREQ_GOV_CONSERVATIVE
select CPU_FREQ_GOV_PERFORMANCE
help
From 0c079d3f88df5f8286cd5c91b54bdac7c819be85 Mon Sep 17 00:00:00 2001
From: Matthew Auld <matthew.auld@intel.com>
Date: Tue, 6 Dec 2022 16:11:41 +0000
Subject: [PATCH] drm/i915: improve the catch-all evict to handle lock
contention
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit
The catch-all evict can fail due to object lock contention, since it
only goes as far as trylocking the object, due to us already holding the
vm->mutex. Doing a full object lock here can deadlock, since the
vm->mutex is always our inner lock. Add another execbuf pass which drops
the vm->mutex and then tries to grab the object will the full lock,
before then retrying the eviction. This should be good enough for now to
fix the immediate regression with userspace seeing -ENOSPC from execbuf
due to contended object locks during GTT eviction.
Testcase: igt@gem_ppgtt@shrink-vs-evict-*
Fixes: 7e00897be8bf ("drm/i915: Add object locking to i915_gem_evict_for_node and i915_gem_evict_something, v2.")
References: https://gitlab.freedesktop.org/drm/intel/-/issues/7627
References: https://gitlab.freedesktop.org/drm/intel/-/issues/7570
References: https://bugzilla.mozilla.org/show_bug.cgi?id=1779558
Signed-off-by: Matthew Auld <matthew.auld@intel.com>
Cc: Maarten Lankhorst <maarten.lankhorst@linux.intel.com>
Cc: Thomas Hellström <thomas.hellstrom@linux.intel.com>
Cc: Tvrtko Ursulin <tvrtko.ursulin@linux.intel.com>
Cc: Andrzej Hajda <andrzej.hajda@intel.com>
Cc: Mani Milani <mani@chromium.org>
Cc: <stable@vger.kernel.org> # v5.18+
Revision 1 of https://patchwork.freedesktop.org/series/111686/
---
.../gpu/drm/i915/gem/i915_gem_execbuffer.c | 25 +++++++++++--
drivers/gpu/drm/i915/gem/i915_gem_mman.c | 2 +-
drivers/gpu/drm/i915/i915_gem_evict.c | 37 ++++++++++++++-----
drivers/gpu/drm/i915/i915_gem_evict.h | 4 +-
drivers/gpu/drm/i915/i915_vma.c | 2 +-
.../gpu/drm/i915/selftests/i915_gem_evict.c | 4 +-
6 files changed, 56 insertions(+), 18 deletions(-)
diff --git a/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c b/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c
index 845023c14eb36f..094e92ed28db4f 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c
+++ b/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c
@@ -741,25 +741,44 @@ static int eb_reserve(struct i915_execbuffer *eb)
*
* Defragmenting is skipped if all objects are pinned at a fixed location.
*/
- for (pass = 0; pass <= 2; pass++) {
+ for (pass = 0; pass <= 3; pass++) {
int pin_flags = PIN_USER | PIN_VALIDATE;
if (pass == 0)
pin_flags |= PIN_NONBLOCK;
if (pass >= 1)
- unpinned = eb_unbind(eb, pass == 2);
+ unpinned = eb_unbind(eb, pass >= 2);
if (pass == 2) {
err = mutex_lock_interruptible(&eb->context->vm->mutex);
if (!err) {
- err = i915_gem_evict_vm(eb->context->vm, &eb->ww);
+ err = i915_gem_evict_vm(eb->context->vm, &eb->ww, NULL);
mutex_unlock(&eb->context->vm->mutex);
}
if (err)
return err;
}
+ if (pass == 3) {
+retry:
+ err = mutex_lock_interruptible(&eb->context->vm->mutex);
+ if (!err) {
+ struct drm_i915_gem_object *busy_bo = NULL;
+
+ err = i915_gem_evict_vm(eb->context->vm, &eb->ww, &busy_bo);
+ mutex_unlock(&eb->context->vm->mutex);
+ if (err && busy_bo) {
+ err = i915_gem_object_lock(busy_bo, &eb->ww);
+ i915_gem_object_put(busy_bo);
+ if (!err)
+ goto retry;
+ }
+ }
+ if (err)
+ return err;
+ }
+
list_for_each_entry(ev, &eb->unbound, bind_link) {
err = eb_reserve_vma(eb, ev, pin_flags);
if (err)
diff --git a/drivers/gpu/drm/i915/gem/i915_gem_mman.c b/drivers/gpu/drm/i915/gem/i915_gem_mman.c
index 73d9eda1d6b7a6..c83d98e1dc5da0 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_mman.c
+++ b/drivers/gpu/drm/i915/gem/i915_gem_mman.c
@@ -369,7 +369,7 @@ static vm_fault_t vm_fault_gtt(struct vm_fault *vmf)
if (vma == ERR_PTR(-ENOSPC)) {
ret = mutex_lock_interruptible(&ggtt->vm.mutex);
if (!ret) {
- ret = i915_gem_evict_vm(&ggtt->vm, &ww);
+ ret = i915_gem_evict_vm(&ggtt->vm, &ww, NULL);
mutex_unlock(&ggtt->vm.mutex);
}
if (ret)
diff --git a/drivers/gpu/drm/i915/i915_gem_evict.c b/drivers/gpu/drm/i915/i915_gem_evict.c
index f025ee4fa52618..a4b4d9b7d26c7a 100644
--- a/drivers/gpu/drm/i915/i915_gem_evict.c
+++ b/drivers/gpu/drm/i915/i915_gem_evict.c
@@ -416,6 +416,11 @@ int i915_gem_evict_for_node(struct i915_address_space *vm,
* @vm: Address space to cleanse
* @ww: An optional struct i915_gem_ww_ctx. If not NULL, i915_gem_evict_vm
* will be able to evict vma's locked by the ww as well.
+ * @busy_bo: Optional pointer to struct drm_i915_gem_object. If not NULL, then
+ * in the event i915_gem_evict_vm() is unable to trylock an object for eviction,
+ * then @busy_bo will point to it. -EBUSY is also returned. The caller must drop
+ * the vm->mutex, before trying again to acquire the contended lock. The caller
+ * also owns a reference to the object.
*
* This function evicts all vmas from a vm.
*
@@ -425,7 +430,8 @@ int i915_gem_evict_for_node(struct i915_address_space *vm,
* To clarify: This is for freeing up virtual address space, not for freeing
* memory in e.g. the shrinker.
*/
-int i915_gem_evict_vm(struct i915_address_space *vm, struct i915_gem_ww_ctx *ww)
+int i915_gem_evict_vm(struct i915_address_space *vm, struct i915_gem_ww_ctx *ww,
+ struct drm_i915_gem_object **busy_bo)
{
int ret = 0;
@@ -457,15 +463,22 @@ int i915_gem_evict_vm(struct i915_address_space *vm, struct i915_gem_ww_ctx *ww)
* the resv is shared among multiple objects, we still
* need the object ref.
*/
- if (dying_vma(vma) ||
+ if (!i915_gem_object_get_rcu(vma->obj) ||
(ww && (dma_resv_locking_ctx(vma->obj->base.resv) == &ww->ctx))) {
__i915_vma_pin(vma);
list_add(&vma->evict_link, &locked_eviction_list);
continue;
}
- if (!i915_gem_object_trylock(vma->obj, ww))
+ if (!i915_gem_object_trylock(vma->obj, ww)) {
+ if (busy_bo) {
+ *busy_bo = vma->obj; /* holds ref */
+ ret = -EBUSY;
+ break;
+ }
+ i915_gem_object_put(vma->obj);
continue;
+ }
__i915_vma_pin(vma);
list_add(&vma->evict_link, &eviction_list);
@@ -473,25 +486,29 @@ int i915_gem_evict_vm(struct i915_address_space *vm, struct i915_gem_ww_ctx *ww)
if (list_empty(&eviction_list) && list_empty(&locked_eviction_list))
break;
- ret = 0;
/* Unbind locked objects first, before unlocking the eviction_list */
list_for_each_entry_safe(vma, vn, &locked_eviction_list, evict_link) {
__i915_vma_unpin(vma);
- if (ret == 0)
+ if (ret == 0) {
ret = __i915_vma_unbind(vma);
- if (ret != -EINTR) /* "Get me out of here!" */
- ret = 0;
+ if (ret != -EINTR) /* "Get me out of here!" */
+ ret = 0;
+ }
+ if (!dying_vma(vma))
+ i915_gem_object_put(vma->obj);
}
list_for_each_entry_safe(vma, vn, &eviction_list, evict_link) {
__i915_vma_unpin(vma);
- if (ret == 0)
+ if (ret == 0) {
ret = __i915_vma_unbind(vma);
- if (ret != -EINTR) /* "Get me out of here!" */
- ret = 0;
+ if (ret != -EINTR) /* "Get me out of here!" */
+ ret = 0;
+ }
i915_gem_object_unlock(vma->obj);
+ i915_gem_object_put(vma->obj);
}
} while (ret == 0);
diff --git a/drivers/gpu/drm/i915/i915_gem_evict.h b/drivers/gpu/drm/i915/i915_gem_evict.h
index e593c530f9bd7a..bf0ee0e4fe6088 100644
--- a/drivers/gpu/drm/i915/i915_gem_evict.h
+++ b/drivers/gpu/drm/i915/i915_gem_evict.h
@@ -11,6 +11,7 @@
struct drm_mm_node;
struct i915_address_space;
struct i915_gem_ww_ctx;
+struct drm_i915_gem_object;
int __must_check i915_gem_evict_something(struct i915_address_space *vm,
struct i915_gem_ww_ctx *ww,
@@ -23,6 +24,7 @@ int __must_check i915_gem_evict_for_node(struct i915_address_space *vm,
struct drm_mm_node *node,
unsigned int flags);
int i915_gem_evict_vm(struct i915_address_space *vm,
- struct i915_gem_ww_ctx *ww);
+ struct i915_gem_ww_ctx *ww,
+ struct drm_i915_gem_object **busy_bo);
#endif /* __I915_GEM_EVICT_H__ */
diff --git a/drivers/gpu/drm/i915/i915_vma.c b/drivers/gpu/drm/i915/i915_vma.c
index f17c09ead7d778..4d06875de14a14 100644
--- a/drivers/gpu/drm/i915/i915_vma.c
+++ b/drivers/gpu/drm/i915/i915_vma.c
@@ -1569,7 +1569,7 @@ static int __i915_ggtt_pin(struct i915_vma *vma, struct i915_gem_ww_ctx *ww,
* locked objects when called from execbuf when pinning
* is removed. This would probably regress badly.
*/
- i915_gem_evict_vm(vm, NULL);
+ i915_gem_evict_vm(vm, NULL, NULL);
mutex_unlock(&vm->mutex);
}
} while (1);
diff --git a/drivers/gpu/drm/i915/selftests/i915_gem_evict.c b/drivers/gpu/drm/i915/selftests/i915_gem_evict.c
index 8c6517d29b8e0c..37068542aafe7f 100644
--- a/drivers/gpu/drm/i915/selftests/i915_gem_evict.c
+++ b/drivers/gpu/drm/i915/selftests/i915_gem_evict.c
@@ -344,7 +344,7 @@ static int igt_evict_vm(void *arg)
/* Everything is pinned, nothing should happen */
mutex_lock(&ggtt->vm.mutex);
- err = i915_gem_evict_vm(&ggtt->vm, NULL);
+ err = i915_gem_evict_vm(&ggtt->vm, NULL, NULL);
mutex_unlock(&ggtt->vm.mutex);
if (err) {
pr_err("i915_gem_evict_vm on a full GGTT returned err=%d]\n",
@@ -356,7 +356,7 @@ static int igt_evict_vm(void *arg)
for_i915_gem_ww(&ww, err, false) {
mutex_lock(&ggtt->vm.mutex);
- err = i915_gem_evict_vm(&ggtt->vm, &ww);
+ err = i915_gem_evict_vm(&ggtt->vm, &ww, NULL);
mutex_unlock(&ggtt->vm.mutex);
}

View File

@@ -1,27 +0,0 @@
# Remove the obsoletes line in kernel-headers
# Add provides for kernel-devel so there's no conflict
diff --git a/scripts/package/mkspec b/scripts/package/mkspec
index 7c477ca7d..1158f5559 100755
--- a/scripts/package/mkspec
+++ b/scripts/package/mkspec
@@ -25,0 +26 @@ fi
+PROVIDES_DRM=""
@@ -27 +28 @@ if grep -q CONFIG_DRM=y .config; then
- PROVIDES=kernel-drm
+ PROVIDES_DRM="Provides: kernel-drm = %{version}"
@@ -30 +30,0 @@ fi
-PROVIDES="$PROVIDES kernel-$KERNELRELEASE"
@@ -51 +51,3 @@ $S Source: kernel-$__KERNELRELEASE.tar.gz
- Provides: $PROVIDES
+ $PROVIDES_DRM
+ Provides: kernel = %{version}
+ Provides: kernel-uname-r = %{version}
+ Provides: installonlypkg(kernel) = %{version}
@@ -61 +63 @@ $S Source: kernel-$__KERNELRELEASE.tar.gz
- Obsoletes: kernel-headers
+ Provides: installonlypkg(kernel) = %{version}
@@ -72,0 +75,3 @@ $S$M Group: System Environment/Kernel
+$S$M Provides: kernel-devel = %{version}
+$S$M Provides: kernel-devel-uname-r = %{version}
+$S$M Provides: installonlypkg(kernel) = %{version}

View File

@@ -1,46 +0,0 @@
diff --git a/Makefile b/Makefile
--- a/Makefile
+++ b/Makefile
@@ -442,7 +442,7 @@ endif
HOSTPKG_CONFIG = pkg-config
KBUILD_USERHOSTCFLAGS := -Wall -Wmissing-prototypes -Wstrict-prototypes \
- -O2 -fomit-frame-pointer -std=gnu11 \
+ -O3 -fomit-frame-pointer -std=gnu11 \
-Wdeclaration-after-statement
KBUILD_USERCFLAGS := $(KBUILD_USERHOSTCFLAGS) $(USERCFLAGS)
KBUILD_USERLDFLAGS := $(USERLDFLAGS)
@@ -474,7 +474,7 @@ endif
-Wclippy::dbg_macro
KBUILD_HOSTCFLAGS := $(KBUILD_USERHOSTCFLAGS) $(HOST_LFS_CFLAGS) $(HOSTCFLAGS)
-KBUILD_HOSTCXXFLAGS := -Wall -O2 $(HOST_LFS_CFLAGS) $(HOSTCXXFLAGS)
+KBUILD_HOSTCXXFLAGS := -Wall -O3 $(HOST_LFS_CFLAGS) $(HOSTCXXFLAGS)
KBUILD_HOSTRUSTFLAGS := $(rust_common_flags) -O -Cstrip=debuginfo \
-Zallow-features= $(HOSTRUSTFLAGS)
KBUILD_HOSTLDFLAGS := $(HOST_LFS_LDFLAGS) $(HOSTLDFLAGS)
@@ -757,7 +757,7 @@ KBUILD_CFLAGS += $(call cc-disable-warning, format-overflow)
KBUILD_CFLAGS += $(call cc-disable-warning, address-of-packed-member)
ifdef CONFIG_CC_OPTIMIZE_FOR_PERFORMANCE
-KBUILD_CFLAGS += -O2
+KBUILD_CFLAGS += -O3
KBUILD_RUSTFLAGS += -Copt-level=2
else ifdef CONFIG_CC_OPTIMIZE_FOR_SIZE
KBUILD_CFLAGS += -Os
diff --git a/init/Kconfig b/init/Kconfig
--- a/init/Kconfig
+++ b/init/Kconfig
@@ -1401,10 +1401,10 @@ choice
default CC_OPTIMIZE_FOR_PERFORMANCE
config CC_OPTIMIZE_FOR_PERFORMANCE
- bool "Optimize for performance (-O2)"
+ bool "Optimize for performance (-O3)"
help
This is the default optimization level for the kernel, building
- with the "-O2" compiler flag for best performance and most
+ with the "-O3" compiler flag for best performance and most
helpful compile-time warnings.
config CC_OPTIMIZE_FOR_SIZE