diff --git a/linux-tkg-patches/6.1/0012-misc-additions.patch b/linux-tkg-patches/6.1/0012-misc-additions.patch index 96ca6c4..f6c8c3e 100644 --- a/linux-tkg-patches/6.1/0012-misc-additions.patch +++ b/linux-tkg-patches/6.1/0012-misc-additions.patch @@ -292,3 +292,194 @@ index 4bf4ea6cbb5eee..4850dafbaa05fb 100644 } /* ---- Socket functions ---- */ + +From 727209376f4998bc84db1d5d8af15afea846a92b Mon Sep 17 00:00:00 2001 +From: "Guilherme G. Piccoli" +Date: Mon, 24 Oct 2022 17:02:54 -0300 +Subject: x86/split_lock: Add sysctl to control the misery mode + +Commit b041b525dab9 ("x86/split_lock: Make life miserable for split lockers") +changed the way the split lock detector works when in "warn" mode; +basically, it not only shows the warn message, but also intentionally +introduces a slowdown through sleeping plus serialization mechanism +on such task. Based on discussions in [0], seems the warning alone +wasn't enough motivation for userspace developers to fix their +applications. + +This slowdown is enough to totally break some proprietary (aka. +unfixable) userspace[1]. + +Happens that originally the proposal in [0] was to add a new mode +which would warns + slowdown the "split locking" task, keeping the +old warn mode untouched. In the end, that idea was discarded and +the regular/default "warn" mode now slows down the applications. This +is quite aggressive with regards proprietary/legacy programs that +basically are unable to properly run in kernel with this change. +While it is understandable that a malicious application could DoS +by split locking, it seems unacceptable to regress old/proprietary +userspace programs through a default configuration that previously +worked. An example of such breakage was reported in [1]. + +Add a sysctl to allow controlling the "misery mode" behavior, as per +Thomas suggestion on [2]. This way, users running legacy and/or +proprietary software are allowed to still execute them with a decent +performance while still observing the warning messages on kernel log. + +[0] https://lore.kernel.org/lkml/20220217012721.9694-1-tony.luck@intel.com/ +[1] https://github.com/doitsujin/dxvk/issues/2938 +[2] https://lore.kernel.org/lkml/87pmf4bter.ffs@tglx/ + +[ dhansen: minor changelog tweaks, including clarifying the actual + problem ] + +Fixes: b041b525dab9 ("x86/split_lock: Make life miserable for split lockers") +Suggested-by: Thomas Gleixner +Signed-off-by: Guilherme G. Piccoli +Signed-off-by: Dave Hansen +Reviewed-by: Tony Luck +Tested-by: Andre Almeida +Link: https://lore.kernel.org/all/20221024200254.635256-1-gpiccoli%40igalia.com +--- + Documentation/admin-guide/sysctl/kernel.rst | 23 +++++++++++ + arch/x86/kernel/cpu/intel.c | 63 ++++++++++++++++++++++++----- + 2 files changed, 76 insertions(+), 10 deletions(-) + +diff --git a/Documentation/admin-guide/sysctl/kernel.rst b/Documentation/admin-guide/sysctl/kernel.rst +index 98d1b198b2b4c..c2c64c1b706ff 100644 +--- a/Documentation/admin-guide/sysctl/kernel.rst ++++ b/Documentation/admin-guide/sysctl/kernel.rst +@@ -1314,6 +1314,29 @@ watchdog work to be queued by the watchdog timer function, otherwise the NMI + watchdog — if enabled — can detect a hard lockup condition. + + ++split_lock_mitigate (x86 only) ++============================== ++ ++On x86, each "split lock" imposes a system-wide performance penalty. On larger ++systems, large numbers of split locks from unprivileged users can result in ++denials of service to well-behaved and potentially more important users. ++ ++The kernel mitigates these bad users by detecting split locks and imposing ++penalties: forcing them to wait and only allowing one core to execute split ++locks at a time. ++ ++These mitigations can make those bad applications unbearably slow. Setting ++split_lock_mitigate=0 may restore some application performance, but will also ++increase system exposure to denial of service attacks from split lock users. ++ ++= =================================================================== ++0 Disable the mitigation mode - just warns the split lock on kernel log ++ and exposes the system to denials of service from the split lockers. ++1 Enable the mitigation mode (this is the default) - penalizes the split ++ lockers with intentional performance degradation. ++= =================================================================== ++ ++ + stack_erasing + ============= + +diff --git a/arch/x86/kernel/cpu/intel.c b/arch/x86/kernel/cpu/intel.c +index 2d7ea5480ec33..4278996504833 100644 +--- a/arch/x86/kernel/cpu/intel.c ++++ b/arch/x86/kernel/cpu/intel.c +@@ -1034,8 +1034,32 @@ static const struct { + + static struct ratelimit_state bld_ratelimit; + ++static unsigned int sysctl_sld_mitigate = 1; + static DEFINE_SEMAPHORE(buslock_sem); + ++#ifdef CONFIG_PROC_SYSCTL ++static struct ctl_table sld_sysctls[] = { ++ { ++ .procname = "split_lock_mitigate", ++ .data = &sysctl_sld_mitigate, ++ .maxlen = sizeof(unsigned int), ++ .mode = 0644, ++ .proc_handler = proc_douintvec_minmax, ++ .extra1 = SYSCTL_ZERO, ++ .extra2 = SYSCTL_ONE, ++ }, ++ {} ++}; ++ ++static int __init sld_mitigate_sysctl_init(void) ++{ ++ register_sysctl_init("kernel", sld_sysctls); ++ return 0; ++} ++ ++late_initcall(sld_mitigate_sysctl_init); ++#endif ++ + static inline bool match_option(const char *arg, int arglen, const char *opt) + { + int len = strlen(opt), ratelimit; +@@ -1146,12 +1170,20 @@ static void split_lock_init(void) + split_lock_verify_msr(sld_state != sld_off); + } + +-static void __split_lock_reenable(struct work_struct *work) ++static void __split_lock_reenable_unlock(struct work_struct *work) + { + sld_update_msr(true); + up(&buslock_sem); + } + ++static DECLARE_DELAYED_WORK(sl_reenable_unlock, __split_lock_reenable_unlock); ++ ++static void __split_lock_reenable(struct work_struct *work) ++{ ++ sld_update_msr(true); ++} ++static DECLARE_DELAYED_WORK(sl_reenable, __split_lock_reenable); ++ + /* + * If a CPU goes offline with pending delayed work to re-enable split lock + * detection then the delayed work will be executed on some other CPU. That +@@ -1169,10 +1201,9 @@ static int splitlock_cpu_offline(unsigned int cpu) + return 0; + } + +-static DECLARE_DELAYED_WORK(split_lock_reenable, __split_lock_reenable); +- + static void split_lock_warn(unsigned long ip) + { ++ struct delayed_work *work; + int cpu; + + if (!current->reported_split_lock) +@@ -1180,14 +1211,26 @@ static void split_lock_warn(unsigned long ip) + current->comm, current->pid, ip); + current->reported_split_lock = 1; + +- /* misery factor #1, sleep 10ms before trying to execute split lock */ +- if (msleep_interruptible(10) > 0) +- return; +- /* Misery factor #2, only allow one buslocked disabled core at a time */ +- if (down_interruptible(&buslock_sem) == -EINTR) +- return; ++ if (sysctl_sld_mitigate) { ++ /* ++ * misery factor #1: ++ * sleep 10ms before trying to execute split lock. ++ */ ++ if (msleep_interruptible(10) > 0) ++ return; ++ /* ++ * Misery factor #2: ++ * only allow one buslocked disabled core at a time. ++ */ ++ if (down_interruptible(&buslock_sem) == -EINTR) ++ return; ++ work = &sl_reenable_unlock; ++ } else { ++ work = &sl_reenable; ++ } ++ + cpu = get_cpu(); +- schedule_delayed_work_on(cpu, &split_lock_reenable, 2); ++ schedule_delayed_work_on(cpu, work, 2); + + /* Disable split lock detection on this CPU to make progress */ + sld_update_msr(false);