From a0b1add671e9089ed04284c1117021c8bc3a0a10 Mon Sep 17 00:00:00 2001 From: Tk-Glitch Date: Sun, 24 Jul 2022 13:30:17 +0200 Subject: [PATCH] linux519: Add Project C patch from torvic9 - https://gitlab.com/alfredchen/linux-prjc/-/issues/61 Along with our usual tweaks. I wanted to wait for 5.19 release to offer Alfred's patch but since we're having RC8.. :frog: --- PKGBUILD | 15 +- linux-tkg-config/prepare | 2 +- .../5.19/0005-glitched-pds.patch | 90 + .../5.19/0009-glitched-bmq.patch | 90 + .../5.19/0009-glitched-ondemand-bmq.patch | 18 + .../5.19/0009-prjc_v5.19-r0.patch | 31279 ++++++++++++++++ 6 files changed, 31486 insertions(+), 8 deletions(-) create mode 100644 linux-tkg-patches/5.19/0005-glitched-pds.patch create mode 100644 linux-tkg-patches/5.19/0009-glitched-bmq.patch create mode 100644 linux-tkg-patches/5.19/0009-glitched-ondemand-bmq.patch create mode 100644 linux-tkg-patches/5.19/0009-prjc_v5.19-r0.patch diff --git a/PKGBUILD b/PKGBUILD index 5c78be1..e0f955d 100644 --- a/PKGBUILD +++ b/PKGBUILD @@ -804,14 +804,14 @@ case $_basever in 0003-glitched-base.patch 0003-glitched-cfs.patch 0003-glitched-cfs-additions.patch - #0005-glitched-pds.patch + 0005-glitched-pds.patch 0006-add-acs-overrides_iommu.patch 0007-v5.19-fsync1_via_futex_waitv.patch 0007-v5.19-winesync.patch 0008-5.19-bcachefs.patch - #0009-glitched-ondemand-bmq.patch - #0009-glitched-bmq.patch - #0009-prjc_v5.19-r0.patch + 0009-glitched-ondemand-bmq.patch + 0009-glitched-bmq.patch + 0009-prjc_v5.19-r0.patch #0012-linux-hardened.patch 0012-misc-additions.patch # MM Dirty Soft for WRITE_WATCH support in Wine @@ -828,13 +828,14 @@ case $_basever in '391bf85333326c8c7052dcbcf9b247632b728572ce9cf9ef86ae8352dee7d3e2' '5efd40c392ece498d2d43d5443e6537c2d9ef7cf9820d5ce80b6577fc5d1a4b2' 'e5ea0bb25ee294c655ac3cc30e1eea497799826108fbfb4ef3258c676c1e8a12' - #'fca63d15ca4502aebd73e76d7499b243d2c03db71ff5ab0bf5cf268b2e576320' + 'fca63d15ca4502aebd73e76d7499b243d2c03db71ff5ab0bf5cf268b2e576320' '19661ec0d39f9663452b34433214c755179894528bf73a42f6ba52ccf572832a' '9df628fd530950e37d31da854cb314d536f33c83935adf5c47e71266a55f7004' '057631ecc148b41e0037d200a69cbbfbed8cdcf27eede3d8cd6936566b6ebc68' 'ee9889e94dcb2cee098c39e76e27d714c6071c41a85f3d70361a9cf2c25e4cd3' - #'a557b342111849a5f920bbe1c129f3ff1fc1eff62c6bd6685e0972fc88e39911' - #'766658d5ec9cf204635f735a8927854991d0133b2e34bdcd9ca36d7e34817e27' + '9fad4a40449e09522899955762c8928ae17f4cdaa16e01239fd12592e9d58177' + 'a557b342111849a5f920bbe1c129f3ff1fc1eff62c6bd6685e0972fc88e39911' + 'b9822a7f912a46196facb9e70229b90357fbc7c7c1dfe23a407e1cb014add30d' '213ecf1ba59dc87ed1844c3473d575b85ffe3a567f86735e8c6239c92dbbb493' '1b656ad96004f27e9dc63d7f430b50d5c48510d6d4cd595a81c24b21adb70313' 'b0319a7dff9c48b2f3e3d3597ee154bf92223149a633a8b7ce4026252db86da6') diff --git a/linux-tkg-config/prepare b/linux-tkg-config/prepare index b37f489..f00e1b6 100644 --- a/linux-tkg-config/prepare +++ b/linux-tkg-config/prepare @@ -202,7 +202,7 @@ _set_cpu_scheduler() { elif [ "$_basever" = "518" ]; then _avail_cpu_scheds=("pds" "bmq" "cacule" "tt" "cfs") elif [ "$_basever" = "519" ]; then - _avail_cpu_scheds=("cacule" "tt" "cfs" ) + _avail_cpu_scheds=("pds" "bmq" "cacule" "tt" "cfs" ) else _avail_cpu_scheds=("cfs") fi diff --git a/linux-tkg-patches/5.19/0005-glitched-pds.patch b/linux-tkg-patches/5.19/0005-glitched-pds.patch new file mode 100644 index 0000000..08c9ef3 --- /dev/null +++ b/linux-tkg-patches/5.19/0005-glitched-pds.patch @@ -0,0 +1,90 @@ +From f7f49141a5dbe9c99d78196b58c44307fb2e6be3 Mon Sep 17 00:00:00 2001 +From: Tk-Glitch +Date: Wed, 4 Jul 2018 04:30:08 +0200 +Subject: glitched - PDS + +diff --git a/kernel/Kconfig.hz b/kernel/Kconfig.hz +index 2a202a846757..1d9c7ed79b11 100644 +--- a/kernel/Kconfig.hz ++++ b/kernel/Kconfig.hz +@@ -4,7 +4,7 @@ + + choice + prompt "Timer frequency" +- default HZ_250 ++ default HZ_500 + help + Allows the configuration of the timer frequency. It is customary + to have the timer interrupt run at 1000 Hz but 100 Hz may be more +@@ -39,6 +39,13 @@ choice + on SMP and NUMA systems and exactly dividing by both PAL and + NTSC frame rates for video and multimedia work. + ++ config HZ_500 ++ bool "500 HZ" ++ help ++ 500 Hz is a balanced timer frequency. Provides fast interactivity ++ on desktops with great smoothness without increasing CPU power ++ consumption and sacrificing the battery life on laptops. ++ + config HZ_1000 + bool "1000 HZ" + help +@@ -52,6 +59,7 @@ config HZ + default 100 if HZ_100 + default 250 if HZ_250 + default 300 if HZ_300 ++ default 500 if HZ_500 + default 1000 if HZ_1000 + + config SCHED_HRTICK + +diff --git a/kernel/Kconfig.hz b/kernel/Kconfig.hz +index 2a202a846757..1d9c7ed79b11 100644 +--- a/kernel/Kconfig.hz ++++ b/kernel/Kconfig.hz +@@ -4,7 +4,7 @@ + + choice + prompt "Timer frequency" +- default HZ_500 ++ default HZ_750 + help + Allows the configuration of the timer frequency. It is customary + to have the timer interrupt run at 1000 Hz but 100 Hz may be more +@@ -46,6 +46,13 @@ choice + on desktops with great smoothness without increasing CPU power + consumption and sacrificing the battery life on laptops. + ++ config HZ_750 ++ bool "750 HZ" ++ help ++ 750 Hz is a good timer frequency for desktops. Provides fast ++ interactivity with great smoothness without sacrificing too ++ much throughput. ++ + config HZ_1000 + bool "1000 HZ" + help +@@ -60,6 +67,7 @@ config HZ + default 250 if HZ_250 + default 300 if HZ_300 + default 500 if HZ_500 ++ default 750 if HZ_750 + default 1000 if HZ_1000 + + config SCHED_HRTICK + +diff --git a/mm/vmscan.c b/mm/vmscan.c +index 9270a4370d54..30d01e647417 100644 +--- a/mm/vmscan.c ++++ b/mm/vmscan.c +@@ -169,7 +169,7 @@ + /* + * From 0 .. 200. Higher means more swappy. + */ +-int vm_swappiness = 60; ++int vm_swappiness = 20; + + static void set_task_reclaim_state(struct task_struct *task, + struct reclaim_state *rs) diff --git a/linux-tkg-patches/5.19/0009-glitched-bmq.patch b/linux-tkg-patches/5.19/0009-glitched-bmq.patch new file mode 100644 index 0000000..e42e522 --- /dev/null +++ b/linux-tkg-patches/5.19/0009-glitched-bmq.patch @@ -0,0 +1,90 @@ +From f7f49141a5dbe9c99d78196b58c44307fb2e6be3 Mon Sep 17 00:00:00 2001 +From: Tk-Glitch +Date: Wed, 4 Jul 2018 04:30:08 +0200 +Subject: glitched - BMQ + +diff --git a/kernel/Kconfig.hz b/kernel/Kconfig.hz +index 2a202a846757..1d9c7ed79b11 100644 +--- a/kernel/Kconfig.hz ++++ b/kernel/Kconfig.hz +@@ -4,7 +4,7 @@ + + choice + prompt "Timer frequency" +- default HZ_250 ++ default HZ_500 + help + Allows the configuration of the timer frequency. It is customary + to have the timer interrupt run at 1000 Hz but 100 Hz may be more +@@ -39,6 +39,13 @@ choice + on SMP and NUMA systems and exactly dividing by both PAL and + NTSC frame rates for video and multimedia work. + ++ config HZ_500 ++ bool "500 HZ" ++ help ++ 500 Hz is a balanced timer frequency. Provides fast interactivity ++ on desktops with great smoothness without increasing CPU power ++ consumption and sacrificing the battery life on laptops. ++ + config HZ_1000 + bool "1000 HZ" + help +@@ -52,6 +59,7 @@ config HZ + default 100 if HZ_100 + default 250 if HZ_250 + default 300 if HZ_300 ++ default 500 if HZ_500 + default 1000 if HZ_1000 + + config SCHED_HRTICK + +diff --git a/kernel/Kconfig.hz b/kernel/Kconfig.hz +index 2a202a846757..1d9c7ed79b11 100644 +--- a/kernel/Kconfig.hz ++++ b/kernel/Kconfig.hz +@@ -4,7 +4,7 @@ + + choice + prompt "Timer frequency" +- default HZ_500 ++ default HZ_750 + help + Allows the configuration of the timer frequency. It is customary + to have the timer interrupt run at 1000 Hz but 100 Hz may be more +@@ -46,6 +46,13 @@ choice + on desktops with great smoothness without increasing CPU power + consumption and sacrificing the battery life on laptops. + ++ config HZ_750 ++ bool "750 HZ" ++ help ++ 750 Hz is a good timer frequency for desktops. Provides fast ++ interactivity with great smoothness without sacrificing too ++ much throughput. ++ + config HZ_1000 + bool "1000 HZ" + help +@@ -60,6 +67,7 @@ config HZ + default 250 if HZ_250 + default 300 if HZ_300 + default 500 if HZ_500 ++ default 750 if HZ_750 + default 1000 if HZ_1000 + + config SCHED_HRTICK + +diff --git a/mm/vmscan.c b/mm/vmscan.c +index 9270a4370d54..30d01e647417 100644 +--- a/mm/vmscan.c ++++ b/mm/vmscan.c +@@ -169,7 +169,7 @@ + /* + * From 0 .. 200. Higher means more swappy. + */ +-int vm_swappiness = 60; ++int vm_swappiness = 20; + + static void set_task_reclaim_state(struct task_struct *task, + struct reclaim_state *rs) diff --git a/linux-tkg-patches/5.19/0009-glitched-ondemand-bmq.patch b/linux-tkg-patches/5.19/0009-glitched-ondemand-bmq.patch new file mode 100644 index 0000000..a926040 --- /dev/null +++ b/linux-tkg-patches/5.19/0009-glitched-ondemand-bmq.patch @@ -0,0 +1,18 @@ +diff --git a/drivers/cpufreq/cpufreq_ondemand.c b/drivers/cpufreq/cpufreq_ondemand.c +index 6b423eebfd5d..61e3271675d6 100644 +--- a/drivers/cpufreq/cpufreq_ondemand.c ++++ b/drivers/cpufreq/cpufreq_ondemand.c +@@ -21,10 +21,10 @@ + #include "cpufreq_ondemand.h" + + /* On-demand governor macros */ +-#define DEF_FREQUENCY_UP_THRESHOLD (80) +-#define DEF_SAMPLING_DOWN_FACTOR (1) ++#define DEF_FREQUENCY_UP_THRESHOLD (55) ++#define DEF_SAMPLING_DOWN_FACTOR (5) + #define MAX_SAMPLING_DOWN_FACTOR (100000) +-#define MICRO_FREQUENCY_UP_THRESHOLD (95) ++#define MICRO_FREQUENCY_UP_THRESHOLD (63) + #define MICRO_FREQUENCY_MIN_SAMPLE_RATE (10000) + #define MIN_FREQUENCY_UP_THRESHOLD (1) + #define MAX_FREQUENCY_UP_THRESHOLD (100) diff --git a/linux-tkg-patches/5.19/0009-prjc_v5.19-r0.patch b/linux-tkg-patches/5.19/0009-prjc_v5.19-r0.patch new file mode 100644 index 0000000..814125c --- /dev/null +++ b/linux-tkg-patches/5.19/0009-prjc_v5.19-r0.patch @@ -0,0 +1,31279 @@ +diff --git a/Documentation/admin-guide/kernel-parameters.txt b/Documentation/admin-guide/kernel-parameters.txt +index 2522b11e593f..11e0b608c57d 100644 +--- a/Documentation/admin-guide/kernel-parameters.txt ++++ b/Documentation/admin-guide/kernel-parameters.txt +@@ -5273,6 +5273,12 @@ + sa1100ir [NET] + See drivers/net/irda/sa1100_ir.c. + ++ sched_timeslice= ++ [KNL] Time slice in us for BMQ scheduler. ++ Format: (must be >= 1000) ++ Default: 4000 ++ See Documentation/scheduler/sched-BMQ.txt ++ + sched_verbose [KNL] Enables verbose scheduler debug messages. + + schedstats= [KNL,X86] Enable or disable scheduled statistics. +diff --git a/Documentation/admin-guide/sysctl/kernel.rst b/Documentation/admin-guide/sysctl/kernel.rst +index ddccd1077462..8fd3bfa2ecd9 100644 +--- a/Documentation/admin-guide/sysctl/kernel.rst ++++ b/Documentation/admin-guide/sysctl/kernel.rst +@@ -1524,3 +1524,13 @@ is 10 seconds. + + The softlockup threshold is (``2 * watchdog_thresh``). Setting this + tunable to zero will disable lockup detection altogether. ++ ++yield_type: ++=========== ++ ++BMQ CPU scheduler only. This determines what type of yield calls to ++sched_yield will perform. ++ ++ 0 - No yield. ++ 1 - Deboost and requeue task. (default) ++ 2 - Set run queue skip task. +diff --git a/Documentation/scheduler/sched-BMQ.txt b/Documentation/scheduler/sched-BMQ.txt +new file mode 100644 +index 000000000000..05c84eec0f31 +--- /dev/null ++++ b/Documentation/scheduler/sched-BMQ.txt +@@ -0,0 +1,110 @@ ++ BitMap queue CPU Scheduler ++ -------------------------- ++ ++CONTENT ++======== ++ ++ Background ++ Design ++ Overview ++ Task policy ++ Priority management ++ BitMap Queue ++ CPU Assignment and Migration ++ ++ ++Background ++========== ++ ++BitMap Queue CPU scheduler, referred to as BMQ from here on, is an evolution ++of previous Priority and Deadline based Skiplist multiple queue scheduler(PDS), ++and inspired by Zircon scheduler. The goal of it is to keep the scheduler code ++simple, while efficiency and scalable for interactive tasks, such as desktop, ++movie playback and gaming etc. ++ ++Design ++====== ++ ++Overview ++-------- ++ ++BMQ use per CPU run queue design, each CPU(logical) has it's own run queue, ++each CPU is responsible for scheduling the tasks that are putting into it's ++run queue. ++ ++The run queue is a set of priority queues. Note that these queues are fifo ++queue for non-rt tasks or priority queue for rt tasks in data structure. See ++BitMap Queue below for details. BMQ is optimized for non-rt tasks in the fact ++that most applications are non-rt tasks. No matter the queue is fifo or ++priority, In each queue is an ordered list of runnable tasks awaiting execution ++and the data structures are the same. When it is time for a new task to run, ++the scheduler simply looks the lowest numbered queueue that contains a task, ++and runs the first task from the head of that queue. And per CPU idle task is ++also in the run queue, so the scheduler can always find a task to run on from ++its run queue. ++ ++Each task will assigned the same timeslice(default 4ms) when it is picked to ++start running. Task will be reinserted at the end of the appropriate priority ++queue when it uses its whole timeslice. When the scheduler selects a new task ++from the priority queue it sets the CPU's preemption timer for the remainder of ++the previous timeslice. When that timer fires the scheduler will stop execution ++on that task, select another task and start over again. ++ ++If a task blocks waiting for a shared resource then it's taken out of its ++priority queue and is placed in a wait queue for the shared resource. When it ++is unblocked it will be reinserted in the appropriate priority queue of an ++eligible CPU. ++ ++Task policy ++----------- ++ ++BMQ supports DEADLINE, FIFO, RR, NORMAL, BATCH and IDLE task policy like the ++mainline CFS scheduler. But BMQ is heavy optimized for non-rt task, that's ++NORMAL/BATCH/IDLE policy tasks. Below is the implementation detail of each ++policy. ++ ++DEADLINE ++ It is squashed as priority 0 FIFO task. ++ ++FIFO/RR ++ All RT tasks share one single priority queue in BMQ run queue designed. The ++complexity of insert operation is O(n). BMQ is not designed for system runs ++with major rt policy tasks. ++ ++NORMAL/BATCH/IDLE ++ BATCH and IDLE tasks are treated as the same policy. They compete CPU with ++NORMAL policy tasks, but they just don't boost. To control the priority of ++NORMAL/BATCH/IDLE tasks, simply use nice level. ++ ++ISO ++ ISO policy is not supported in BMQ. Please use nice level -20 NORMAL policy ++task instead. ++ ++Priority management ++------------------- ++ ++RT tasks have priority from 0-99. For non-rt tasks, there are three different ++factors used to determine the effective priority of a task. The effective ++priority being what is used to determine which queue it will be in. ++ ++The first factor is simply the task’s static priority. Which is assigned from ++task's nice level, within [-20, 19] in userland's point of view and [0, 39] ++internally. ++ ++The second factor is the priority boost. This is a value bounded between ++[-MAX_PRIORITY_ADJ, MAX_PRIORITY_ADJ] used to offset the base priority, it is ++modified by the following cases: ++ ++*When a thread has used up its entire timeslice, always deboost its boost by ++increasing by one. ++*When a thread gives up cpu control(voluntary or non-voluntary) to reschedule, ++and its switch-in time(time after last switch and run) below the thredhold ++based on its priority boost, will boost its boost by decreasing by one buti is ++capped at 0 (won’t go negative). ++ ++The intent in this system is to ensure that interactive threads are serviced ++quickly. These are usually the threads that interact directly with the user ++and cause user-perceivable latency. These threads usually do little work and ++spend most of their time blocked awaiting another user event. So they get the ++priority boost from unblocking while background threads that do most of the ++processing receive the priority penalty for using their entire timeslice. +diff --git a/arch/powerpc/platforms/cell/spufs/sched.c b/arch/powerpc/platforms/cell/spufs/sched.c +index 99bd027a7f7c..525372fcd0f2 100644 +--- a/arch/powerpc/platforms/cell/spufs/sched.c ++++ b/arch/powerpc/platforms/cell/spufs/sched.c +@@ -51,11 +51,6 @@ static struct task_struct *spusched_task; + static struct timer_list spusched_timer; + static struct timer_list spuloadavg_timer; + +-/* +- * Priority of a normal, non-rt, non-niced'd process (aka nice level 0). +- */ +-#define NORMAL_PRIO 120 +- + /* + * Frequency of the spu scheduler tick. By default we do one SPU scheduler + * tick for every 10 CPU scheduler ticks. +diff --git a/fs/proc/base.c b/fs/proc/base.c +index 8dfa36a99c74..46397c606e01 100644 +--- a/fs/proc/base.c ++++ b/fs/proc/base.c +@@ -479,7 +479,7 @@ static int proc_pid_schedstat(struct seq_file *m, struct pid_namespace *ns, + seq_puts(m, "0 0 0\n"); + else + seq_printf(m, "%llu %llu %lu\n", +- (unsigned long long)task->se.sum_exec_runtime, ++ (unsigned long long)tsk_seruntime(task), + (unsigned long long)task->sched_info.run_delay, + task->sched_info.pcount); + +diff --git a/include/asm-generic/resource.h b/include/asm-generic/resource.h +index 8874f681b056..59eb72bf7d5f 100644 +--- a/include/asm-generic/resource.h ++++ b/include/asm-generic/resource.h +@@ -23,7 +23,7 @@ + [RLIMIT_LOCKS] = { RLIM_INFINITY, RLIM_INFINITY }, \ + [RLIMIT_SIGPENDING] = { 0, 0 }, \ + [RLIMIT_MSGQUEUE] = { MQ_BYTES_MAX, MQ_BYTES_MAX }, \ +- [RLIMIT_NICE] = { 0, 0 }, \ ++ [RLIMIT_NICE] = { 30, 30 }, \ + [RLIMIT_RTPRIO] = { 0, 0 }, \ + [RLIMIT_RTTIME] = { RLIM_INFINITY, RLIM_INFINITY }, \ + } +diff --git a/include/linux/sched.h b/include/linux/sched.h +index c46f3a63b758..6907241224a5 100644 +--- a/include/linux/sched.h ++++ b/include/linux/sched.h +@@ -750,8 +750,10 @@ struct task_struct { + unsigned int flags; + unsigned int ptrace; + +-#ifdef CONFIG_SMP ++#if defined(CONFIG_SMP) || defined(CONFIG_SCHED_ALT) + int on_cpu; ++#endif ++#if defined(CONFIG_SMP) && !defined(CONFIG_SCHED_ALT) + struct __call_single_node wake_entry; + unsigned int wakee_flips; + unsigned long wakee_flip_decay_ts; +@@ -774,6 +776,17 @@ struct task_struct { + int normal_prio; + unsigned int rt_priority; + ++#ifdef CONFIG_SCHED_ALT ++ u64 last_ran; ++ s64 time_slice; ++ int boost_prio; ++#ifdef CONFIG_SCHED_BMQ ++ int bmq_idx; ++ struct list_head bmq_node; ++#endif /* CONFIG_SCHED_BMQ */ ++ /* sched_clock time spent running */ ++ u64 sched_time; ++#else /* !CONFIG_SCHED_ALT */ + struct sched_entity se; + struct sched_rt_entity rt; + struct sched_dl_entity dl; +@@ -784,6 +797,7 @@ struct task_struct { + unsigned long core_cookie; + unsigned int core_occupation; + #endif ++#endif /* !CONFIG_SCHED_ALT */ + + #ifdef CONFIG_CGROUP_SCHED + struct task_group *sched_task_group; +@@ -1517,6 +1531,15 @@ struct task_struct { + */ + }; + ++#ifdef CONFIG_SCHED_ALT ++#define tsk_seruntime(t) ((t)->sched_time) ++/* replace the uncertian rt_timeout with 0UL */ ++#define tsk_rttimeout(t) (0UL) ++#else /* CFS */ ++#define tsk_seruntime(t) ((t)->se.sum_exec_runtime) ++#define tsk_rttimeout(t) ((t)->rt.timeout) ++#endif /* !CONFIG_SCHED_ALT */ ++ + static inline struct pid *task_pid(struct task_struct *task) + { + return task->thread_pid; +diff --git a/include/linux/sched/deadline.h b/include/linux/sched/deadline.h +index 7c83d4d5a971..529e1b2ebd19 100644 +--- a/include/linux/sched/deadline.h ++++ b/include/linux/sched/deadline.h +@@ -1,5 +1,15 @@ + /* SPDX-License-Identifier: GPL-2.0 */ + ++#ifdef CONFIG_SCHED_ALT ++ ++#ifdef CONFIG_SCHED_BMQ ++#define __tsk_deadline(p) (0UL) ++#endif ++ ++#else ++ ++#define __tsk_deadline(p) ((p)->dl.deadline) ++ + /* + * SCHED_DEADLINE tasks has negative priorities, reflecting + * the fact that any of them has higher prio than RT and +@@ -21,6 +31,7 @@ static inline int dl_task(struct task_struct *p) + { + return dl_prio(p->prio); + } ++#endif /* CONFIG_SCHED_ALT */ + + static inline bool dl_time_before(u64 a, u64 b) + { +diff --git a/include/linux/sched/prio.h b/include/linux/sched/prio.h +index ab83d85e1183..c28676e431be 100644 +--- a/include/linux/sched/prio.h ++++ b/include/linux/sched/prio.h +@@ -18,6 +18,11 @@ + #define MAX_PRIO (MAX_RT_PRIO + NICE_WIDTH) + #define DEFAULT_PRIO (MAX_RT_PRIO + NICE_WIDTH / 2) + ++#ifdef CONFIG_SCHED_ALT ++/* +/- priority levels from the base priority */ ++#define MAX_PRIORITY_ADJ 4 ++#endif ++ + /* + * Convert user-nice values [ -20 ... 0 ... 19 ] + * to static priority [ MAX_RT_PRIO..MAX_PRIO-1 ], +diff --git a/include/linux/sched/rt.h b/include/linux/sched/rt.h +index e5af028c08b4..0a7565d0d3cf 100644 +--- a/include/linux/sched/rt.h ++++ b/include/linux/sched/rt.h +@@ -24,8 +24,10 @@ static inline bool task_is_realtime(struct task_struct *tsk) + + if (policy == SCHED_FIFO || policy == SCHED_RR) + return true; ++#ifndef CONFIG_SCHED_ALT + if (policy == SCHED_DEADLINE) + return true; ++#endif + return false; + } + +diff --git a/init/Kconfig b/init/Kconfig +index c7900e8975f1..0a817c86c966 100644 +--- a/init/Kconfig ++++ b/init/Kconfig +@@ -809,9 +809,33 @@ config GENERIC_SCHED_CLOCK + + menu "Scheduler features" + ++menuconfig SCHED_ALT ++ bool "Alternative CPU Schedulers" ++ default y ++ help ++ This feature enable alternative CPU scheduler" ++ ++if SCHED_ALT ++ ++choice ++ prompt "Alternative CPU Scheduler" ++ default SCHED_BMQ ++ ++config SCHED_BMQ ++ bool "BMQ CPU scheduler" ++ help ++ The BitMap Queue CPU scheduler for excellent interactivity and ++ responsiveness on the desktop and solid scalability on normal ++ hardware and commodity servers. ++ ++endchoice ++ ++endif ++ + config UCLAMP_TASK + bool "Enable utilization clamping for RT/FAIR tasks" + depends on CPU_FREQ_GOV_SCHEDUTIL ++ depends on !SCHED_BMQ + help + This feature enables the scheduler to track the clamped utilization + of each CPU based on RUNNABLE tasks scheduled on that CPU. +@@ -911,6 +935,7 @@ config NUMA_BALANCING + depends on ARCH_SUPPORTS_NUMA_BALANCING + depends on !ARCH_WANT_NUMA_VARIABLE_LOCALITY + depends on SMP && NUMA && MIGRATION && !PREEMPT_RT ++ depends on !SCHED_BMQ + help + This option adds support for automatic NUMA aware memory/task placement. + The mechanism is quite primitive and is based on migrating memory when +@@ -997,7 +1022,7 @@ menuconfig CGROUP_SCHED + bandwidth allocation to such task groups. It uses cgroups to group + tasks. + +-if CGROUP_SCHED ++if CGROUP_SCHED && !SCHED_BMQ + config FAIR_GROUP_SCHED + bool "Group scheduling for SCHED_OTHER" + depends on CGROUP_SCHED +@@ -1268,6 +1293,7 @@ config CHECKPOINT_RESTORE + + config SCHED_AUTOGROUP + bool "Automatic process group scheduling" ++ depends on !SCHED_BMQ + select CGROUPS + select CGROUP_SCHED + select FAIR_GROUP_SCHED +diff --git a/init/init_task.c b/init/init_task.c +index 73cc8f03511a..9017276b1a80 100644 +--- a/init/init_task.c ++++ b/init/init_task.c +@@ -75,9 +75,15 @@ struct task_struct init_task + .stack = init_stack, + .usage = REFCOUNT_INIT(2), + .flags = PF_KTHREAD, ++#ifdef CONFIG_SCHED_ALT ++ .prio = DEFAULT_PRIO + MAX_PRIORITY_ADJ, ++ .static_prio = DEFAULT_PRIO, ++ .normal_prio = DEFAULT_PRIO + MAX_PRIORITY_ADJ, ++#else + .prio = MAX_PRIO - 20, + .static_prio = MAX_PRIO - 20, + .normal_prio = MAX_PRIO - 20, ++#endif + .policy = SCHED_NORMAL, + .cpus_ptr = &init_task.cpus_mask, + .user_cpus_ptr = NULL, +@@ -88,6 +94,14 @@ struct task_struct init_task + .restart_block = { + .fn = do_no_restart_syscall, + }, ++#ifdef CONFIG_SCHED_ALT ++ .boost_prio = 0, ++#ifdef CONFIG_SCHED_BMQ ++ .bmq_idx = 15, ++ .bmq_node = LIST_HEAD_INIT(init_task.bmq_node), ++#endif ++ .time_slice = HZ, ++#else + .se = { + .group_node = LIST_HEAD_INIT(init_task.se.group_node), + }, +@@ -95,6 +109,7 @@ struct task_struct init_task + .run_list = LIST_HEAD_INIT(init_task.rt.run_list), + .time_slice = RR_TIMESLICE, + }, ++#endif + .tasks = LIST_HEAD_INIT(init_task.tasks), + #ifdef CONFIG_SMP + .pushable_tasks = PLIST_NODE_INIT(init_task.pushable_tasks, MAX_PRIO), +diff --git a/kernel/cgroup/cpuset.c b/kernel/cgroup/cpuset.c +index 71a418858a5e..7e3016873db1 100644 +--- a/kernel/cgroup/cpuset.c ++++ b/kernel/cgroup/cpuset.c +@@ -704,7 +704,7 @@ static int validate_change(struct cpuset *cur, struct cpuset *trial) + return ret; + } + +-#ifdef CONFIG_SMP ++#if defined(CONFIG_SMP) && !defined(CONFIG_SCHED_ALT) + /* + * Helper routine for generate_sched_domains(). + * Do cpusets a, b have overlapping effective cpus_allowed masks? +@@ -1100,7 +1100,7 @@ static void rebuild_sched_domains_locked(void) + /* Have scheduler rebuild the domains */ + partition_and_rebuild_sched_domains(ndoms, doms, attr); + } +-#else /* !CONFIG_SMP */ ++#else /* !CONFIG_SMP || CONFIG_SCHED_ALT */ + static void rebuild_sched_domains_locked(void) + { + } +diff --git a/kernel/delayacct.c b/kernel/delayacct.c +index 164ed9ef77a3..c974a84b056f 100644 +--- a/kernel/delayacct.c ++++ b/kernel/delayacct.c +@@ -150,7 +150,7 @@ int delayacct_add_tsk(struct taskstats *d, struct task_struct *tsk) + */ + t1 = tsk->sched_info.pcount; + t2 = tsk->sched_info.run_delay; +- t3 = tsk->se.sum_exec_runtime; ++ t3 = tsk_seruntime(tsk); + + d->cpu_count += t1; + +diff --git a/kernel/exit.c b/kernel/exit.c +index f072959fcab7..da97095a2997 100644 +--- a/kernel/exit.c ++++ b/kernel/exit.c +@@ -124,7 +124,7 @@ static void __exit_signal(struct task_struct *tsk) + sig->curr_target = next_thread(tsk); + } + +- add_device_randomness((const void*) &tsk->se.sum_exec_runtime, ++ add_device_randomness((const void*) &tsk_seruntime(tsk), + sizeof(unsigned long long)); + + /* +@@ -145,7 +145,7 @@ static void __exit_signal(struct task_struct *tsk) + sig->inblock += task_io_get_inblock(tsk); + sig->oublock += task_io_get_oublock(tsk); + task_io_accounting_add(&sig->ioac, &tsk->ioac); +- sig->sum_sched_runtime += tsk->se.sum_exec_runtime; ++ sig->sum_sched_runtime += tsk_seruntime(tsk); + sig->nr_threads--; + __unhash_process(tsk, group_dead); + write_sequnlock(&sig->stats_lock); +diff --git a/kernel/locking/rtmutex.c b/kernel/locking/rtmutex.c +index 7779ee8abc2a..4258d1c08c71 100644 +--- a/kernel/locking/rtmutex.c ++++ b/kernel/locking/rtmutex.c +@@ -300,14 +300,14 @@ static __always_inline void + waiter_update_prio(struct rt_mutex_waiter *waiter, struct task_struct *task) + { + waiter->prio = __waiter_prio(task); +- waiter->deadline = task->dl.deadline; ++ waiter->deadline = __tsk_deadline(task); + } + + /* + * Only use with rt_mutex_waiter_{less,equal}() + */ + #define task_to_waiter(p) \ +- &(struct rt_mutex_waiter){ .prio = __waiter_prio(p), .deadline = (p)->dl.deadline } ++ &(struct rt_mutex_waiter){ .prio = __waiter_prio(p), .deadline = __tsk_deadline(p) } + + static __always_inline int rt_mutex_waiter_less(struct rt_mutex_waiter *left, + struct rt_mutex_waiter *right) +@@ -315,6 +315,7 @@ static __always_inline int rt_mutex_waiter_less(struct rt_mutex_waiter *left, + if (left->prio < right->prio) + return 1; + ++#ifndef CONFIG_SCHED_BMQ + /* + * If both waiters have dl_prio(), we check the deadlines of the + * associated tasks. +@@ -323,6 +324,7 @@ static __always_inline int rt_mutex_waiter_less(struct rt_mutex_waiter *left, + */ + if (dl_prio(left->prio)) + return dl_time_before(left->deadline, right->deadline); ++#endif + + return 0; + } +@@ -333,6 +335,7 @@ static __always_inline int rt_mutex_waiter_equal(struct rt_mutex_waiter *left, + if (left->prio != right->prio) + return 0; + ++#ifndef CONFIG_SCHED_BMQ + /* + * If both waiters have dl_prio(), we check the deadlines of the + * associated tasks. +@@ -341,6 +344,7 @@ static __always_inline int rt_mutex_waiter_equal(struct rt_mutex_waiter *left, + */ + if (dl_prio(left->prio)) + return left->deadline == right->deadline; ++#endif + + return 1; + } +diff --git a/kernel/sched/Makefile b/kernel/sched/Makefile +index 976092b7bd45..01b041388419 100644 +--- a/kernel/sched/Makefile ++++ b/kernel/sched/Makefile +@@ -28,7 +28,11 @@ endif + # These compilation units have roughly the same size and complexity - so their + # build parallelizes well and finishes roughly at once: + # ++ifdef CONFIG_SCHED_ALT ++obj-y += alt_core.o alt_debug.o ++else + obj-y += core.o + obj-y += fair.o ++endif + obj-y += build_policy.o + obj-y += build_utility.o +diff --git a/kernel/sched/alt_core.c b/kernel/sched/alt_core.c +new file mode 100644 +index 000000000000..09ca47de425c +--- /dev/null ++++ b/kernel/sched/alt_core.c +@@ -0,0 +1,5940 @@ ++/* ++ * kernel/sched/alt_core.c ++ * ++ * Core alternative kernel scheduler code and related syscalls ++ * ++ * Copyright (C) 1991-2002 Linus Torvalds ++ * ++ * 2009-08-13 Brainfuck deadline scheduling policy by Con Kolivas deletes ++ * a whole lot of those previous things. ++ * 2017-09-06 Priority and Deadline based Skip list multiple queue kernel ++ * scheduler by Alfred Chen. ++ * 2019-02-20 BMQ(BitMap Queue) kernel scheduler by Alfred Chen. ++ */ ++#include "sched.h" ++ ++#include ++ ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++ ++#include ++ ++#include ++ ++#include "../workqueue_internal.h" ++#include "../../fs/io-wq.h" ++#include "../smpboot.h" ++ ++#include "pelt.h" ++ ++#define CREATE_TRACE_POINTS ++#include ++ ++/* rt_prio(prio) defined in include/linux/sched/rt.h */ ++#define rt_task(p) rt_prio((p)->prio) ++#define rt_policy(policy) ((policy) == SCHED_FIFO || (policy) == SCHED_RR) ++#define task_has_rt_policy(p) (rt_policy((p)->policy)) ++ ++#define STOP_PRIO (MAX_RT_PRIO - 1) ++ ++/* Default time slice is 4 in ms, can be set via kernel parameter "sched_timeslice" */ ++u64 sched_timeslice_ns __read_mostly = (4 * 1000 * 1000); ++ ++static int __init sched_timeslice(char *str) ++{ ++ int timeslice_us; ++ ++ get_option(&str, ×lice_us); ++ if (timeslice_us >= 1000) ++ sched_timeslice_ns = timeslice_us * 1000; ++ ++ return 0; ++} ++early_param("sched_timeslice", sched_timeslice); ++ ++/* Reschedule if less than this many μs left */ ++#define RESCHED_NS (100 * 1000) ++ ++/** ++ * sched_yield_type - Choose what sort of yield sched_yield will perform. ++ * 0: No yield. ++ * 1: Deboost and requeue task. (default) ++ * 2: Set rq skip task. ++ */ ++int sched_yield_type __read_mostly = 1; ++ ++#define rq_switch_time(rq) ((rq)->clock - (rq)->last_ts_switch) ++#define boost_threshold(p) (sched_timeslice_ns >>\ ++ (10 - MAX_PRIORITY_ADJ - (p)->boost_prio)) ++ ++static inline void boost_task(struct task_struct *p) ++{ ++ int limit; ++ ++ switch (p->policy) { ++ case SCHED_NORMAL: ++ limit = -MAX_PRIORITY_ADJ; ++ break; ++ case SCHED_BATCH: ++ case SCHED_IDLE: ++ limit = 0; ++ break; ++ default: ++ return; ++ } ++ ++ if (p->boost_prio > limit) ++ p->boost_prio--; ++} ++ ++static inline void deboost_task(struct task_struct *p) ++{ ++ if (p->boost_prio < MAX_PRIORITY_ADJ) ++ p->boost_prio++; ++} ++ ++#ifdef CONFIG_SMP ++static cpumask_t sched_rq_pending_mask ____cacheline_aligned_in_smp; ++ ++DEFINE_PER_CPU(cpumask_t [NR_CPU_AFFINITY_CHK_LEVEL], sched_cpu_affinity_masks); ++DEFINE_PER_CPU(cpumask_t *, sched_cpu_affinity_end_mask); ++DEFINE_PER_CPU(cpumask_t *, sched_cpu_llc_mask); ++ ++#ifdef CONFIG_SCHED_SMT ++DEFINE_STATIC_KEY_FALSE(sched_smt_present); ++EXPORT_SYMBOL_GPL(sched_smt_present); ++#endif ++ ++/* ++ * Keep a unique ID per domain (we use the first CPUs number in the cpumask of ++ * the domain), this allows us to quickly tell if two cpus are in the same cache ++ * domain, see cpus_share_cache(). ++ */ ++DEFINE_PER_CPU(int, sd_llc_id); ++#endif /* CONFIG_SMP */ ++ ++static DEFINE_MUTEX(sched_hotcpu_mutex); ++ ++DEFINE_PER_CPU_SHARED_ALIGNED(struct rq, runqueues); ++ ++#ifndef prepare_arch_switch ++# define prepare_arch_switch(next) do { } while (0) ++#endif ++#ifndef finish_arch_post_lock_switch ++# define finish_arch_post_lock_switch() do { } while (0) ++#endif ++ ++#define IDLE_WM (IDLE_TASK_SCHED_PRIO) ++ ++static cpumask_t sched_sg_idle_mask ____cacheline_aligned_in_smp; ++static cpumask_t sched_rq_watermark[SCHED_BITS] ____cacheline_aligned_in_smp; ++ ++static inline void update_sched_rq_watermark(struct rq *rq) ++{ ++ unsigned long watermark = find_first_bit(rq->queue.bitmap, SCHED_BITS); ++ unsigned long last_wm = rq->watermark; ++ unsigned long i; ++ int cpu; ++ ++ if (watermark == last_wm) ++ return; ++ ++ rq->watermark = watermark; ++ cpu = cpu_of(rq); ++ if (watermark < last_wm) { ++ for (i = watermark + 1; i <= last_wm; i++) ++ cpumask_andnot(&sched_rq_watermark[i], ++ &sched_rq_watermark[i], cpumask_of(cpu)); ++#ifdef CONFIG_SCHED_SMT ++ if (!static_branch_likely(&sched_smt_present)) ++ return; ++ if (IDLE_WM == last_wm) ++ cpumask_andnot(&sched_sg_idle_mask, ++ &sched_sg_idle_mask, cpu_smt_mask(cpu)); ++#endif ++ return; ++ } ++ /* last_wm < watermark */ ++ for (i = last_wm + 1; i <= watermark; i++) ++ cpumask_set_cpu(cpu, &sched_rq_watermark[i]); ++#ifdef CONFIG_SCHED_SMT ++ if (!static_branch_likely(&sched_smt_present)) ++ return; ++ if (IDLE_WM == watermark) { ++ cpumask_t tmp; ++ cpumask_and(&tmp, cpu_smt_mask(cpu), &sched_rq_watermark[IDLE_WM]); ++ if (cpumask_equal(&tmp, cpu_smt_mask(cpu))) ++ cpumask_or(&sched_sg_idle_mask, cpu_smt_mask(cpu), ++ &sched_sg_idle_mask); ++ } ++#endif ++} ++ ++static inline int task_sched_prio(struct task_struct *p) ++{ ++ return (p->prio < MAX_RT_PRIO)? p->prio : p->prio + p->boost_prio; ++} ++ ++#include "bmq_imp.h" ++ ++static inline struct task_struct *rq_runnable_task(struct rq *rq) ++{ ++ struct task_struct *next = sched_rq_first_task(rq); ++ ++ if (unlikely(next == rq->skip)) ++ next = sched_rq_next_task(next, rq); ++ ++ return next; ++} ++ ++/* ++ * Context: p->pi_lock ++ */ ++static inline struct rq ++*__task_access_lock(struct task_struct *p, raw_spinlock_t **plock) ++{ ++ struct rq *rq; ++ for (;;) { ++ rq = task_rq(p); ++ if (p->on_cpu || task_on_rq_queued(p)) { ++ raw_spin_lock(&rq->lock); ++ if (likely((p->on_cpu || task_on_rq_queued(p)) ++ && rq == task_rq(p))) { ++ *plock = &rq->lock; ++ return rq; ++ } ++ raw_spin_unlock(&rq->lock); ++ } else if (task_on_rq_migrating(p)) { ++ do { ++ cpu_relax(); ++ } while (unlikely(task_on_rq_migrating(p))); ++ } else { ++ *plock = NULL; ++ return rq; ++ } ++ } ++} ++ ++static inline void ++__task_access_unlock(struct task_struct *p, raw_spinlock_t *lock) ++{ ++ if (NULL != lock) ++ raw_spin_unlock(lock); ++} ++ ++static inline struct rq ++*task_access_lock_irqsave(struct task_struct *p, raw_spinlock_t **plock, ++ unsigned long *flags) ++{ ++ struct rq *rq; ++ for (;;) { ++ rq = task_rq(p); ++ if (p->on_cpu || task_on_rq_queued(p)) { ++ raw_spin_lock_irqsave(&rq->lock, *flags); ++ if (likely((p->on_cpu || task_on_rq_queued(p)) ++ && rq == task_rq(p))) { ++ *plock = &rq->lock; ++ return rq; ++ } ++ raw_spin_unlock_irqrestore(&rq->lock, *flags); ++ } else if (task_on_rq_migrating(p)) { ++ do { ++ cpu_relax(); ++ } while (unlikely(task_on_rq_migrating(p))); ++ } else { ++ raw_spin_lock_irqsave(&p->pi_lock, *flags); ++ if (likely(!p->on_cpu && !p->on_rq && ++ rq == task_rq(p))) { ++ *plock = &p->pi_lock; ++ return rq; ++ } ++ raw_spin_unlock_irqrestore(&p->pi_lock, *flags); ++ } ++ } ++} ++ ++static inline void ++task_access_unlock_irqrestore(struct task_struct *p, raw_spinlock_t *lock, ++ unsigned long *flags) ++{ ++ raw_spin_unlock_irqrestore(lock, *flags); ++} ++ ++/* ++ * __task_rq_lock - lock the rq @p resides on. ++ */ ++struct rq *__task_rq_lock(struct task_struct *p, struct rq_flags *rf) ++ __acquires(rq->lock) ++{ ++ struct rq *rq; ++ ++ lockdep_assert_held(&p->pi_lock); ++ ++ for (;;) { ++ rq = task_rq(p); ++ raw_spin_lock(&rq->lock); ++ if (likely(rq == task_rq(p) && !task_on_rq_migrating(p))) ++ return rq; ++ raw_spin_unlock(&rq->lock); ++ ++ while (unlikely(task_on_rq_migrating(p))) ++ cpu_relax(); ++ } ++} ++ ++/* ++ * task_rq_lock - lock p->pi_lock and lock the rq @p resides on. ++ */ ++struct rq *task_rq_lock(struct task_struct *p, struct rq_flags *rf) ++ __acquires(p->pi_lock) ++ __acquires(rq->lock) ++{ ++ struct rq *rq; ++ ++ for (;;) { ++ raw_spin_lock_irqsave(&p->pi_lock, rf->flags); ++ rq = task_rq(p); ++ raw_spin_lock(&rq->lock); ++ /* ++ * move_queued_task() task_rq_lock() ++ * ++ * ACQUIRE (rq->lock) ++ * [S] ->on_rq = MIGRATING [L] rq = task_rq() ++ * WMB (__set_task_cpu()) ACQUIRE (rq->lock); ++ * [S] ->cpu = new_cpu [L] task_rq() ++ * [L] ->on_rq ++ * RELEASE (rq->lock) ++ * ++ * If we observe the old CPU in task_rq_lock(), the acquire of ++ * the old rq->lock will fully serialize against the stores. ++ * ++ * If we observe the new CPU in task_rq_lock(), the address ++ * dependency headed by '[L] rq = task_rq()' and the acquire ++ * will pair with the WMB to ensure we then also see migrating. ++ */ ++ if (likely(rq == task_rq(p) && !task_on_rq_migrating(p))) { ++ return rq; ++ } ++ raw_spin_unlock(&rq->lock); ++ raw_spin_unlock_irqrestore(&p->pi_lock, rf->flags); ++ ++ while (unlikely(task_on_rq_migrating(p))) ++ cpu_relax(); ++ } ++} ++ ++/* ++ * RQ-clock updating methods: ++ */ ++ ++static void update_rq_clock_task(struct rq *rq, s64 delta) ++{ ++/* ++ * In theory, the compile should just see 0 here, and optimize out the call ++ * to sched_rt_avg_update. But I don't trust it... ++ */ ++ s64 __maybe_unused steal = 0, irq_delta = 0; ++ ++#ifdef CONFIG_IRQ_TIME_ACCOUNTING ++ irq_delta = irq_time_read(cpu_of(rq)) - rq->prev_irq_time; ++ ++ /* ++ * Since irq_time is only updated on {soft,}irq_exit, we might run into ++ * this case when a previous update_rq_clock() happened inside a ++ * {soft,}irq region. ++ * ++ * When this happens, we stop ->clock_task and only update the ++ * prev_irq_time stamp to account for the part that fit, so that a next ++ * update will consume the rest. This ensures ->clock_task is ++ * monotonic. ++ * ++ * It does however cause some slight miss-attribution of {soft,}irq ++ * time, a more accurate solution would be to update the irq_time using ++ * the current rq->clock timestamp, except that would require using ++ * atomic ops. ++ */ ++ if (irq_delta > delta) ++ irq_delta = delta; ++ ++ rq->prev_irq_time += irq_delta; ++ delta -= irq_delta; ++#endif ++#ifdef CONFIG_PARAVIRT_TIME_ACCOUNTING ++ if (static_key_false((¶virt_steal_rq_enabled))) { ++ steal = paravirt_steal_clock(cpu_of(rq)); ++ steal -= rq->prev_steal_time_rq; ++ ++ if (unlikely(steal > delta)) ++ steal = delta; ++ ++ rq->prev_steal_time_rq += steal; ++ delta -= steal; ++ } ++#endif ++ ++ rq->clock_task += delta; ++ ++#ifdef CONFIG_HAVE_SCHED_AVG_IRQ ++ if ((irq_delta + steal)) ++ update_irq_load_avg(rq, irq_delta + steal); ++#endif ++} ++ ++static inline void update_rq_clock(struct rq *rq) ++{ ++ s64 delta = sched_clock_cpu(cpu_of(rq)) - rq->clock; ++ ++ if (unlikely(delta <= 0)) ++ return; ++ rq->clock += delta; ++ update_rq_clock_task(rq, delta); ++} ++ ++#ifdef CONFIG_NO_HZ_FULL ++/* ++ * Tick may be needed by tasks in the runqueue depending on their policy and ++ * requirements. If tick is needed, lets send the target an IPI to kick it out ++ * of nohz mode if necessary. ++ */ ++static inline void sched_update_tick_dependency(struct rq *rq) ++{ ++ int cpu; ++ ++ if (!tick_nohz_full_enabled()) ++ return; ++ ++ cpu = cpu_of(rq); ++ ++ if (!tick_nohz_full_cpu(cpu)) ++ return; ++ ++ if (rq->nr_running < 2) ++ tick_nohz_dep_clear_cpu(cpu, TICK_DEP_BIT_SCHED); ++ else ++ tick_nohz_dep_set_cpu(cpu, TICK_DEP_BIT_SCHED); ++} ++#else /* !CONFIG_NO_HZ_FULL */ ++static inline void sched_update_tick_dependency(struct rq *rq) { } ++#endif ++ ++/* ++ * Add/Remove/Requeue task to/from the runqueue routines ++ * Context: rq->lock ++ */ ++static inline void dequeue_task(struct task_struct *p, struct rq *rq, int flags) ++{ ++ lockdep_assert_held(&rq->lock); ++ ++ WARN_ONCE(task_rq(p) != rq, "sched: dequeue task reside on cpu%d from cpu%d\n", ++ task_cpu(p), cpu_of(rq)); ++ ++ __SCHED_DEQUEUE_TASK(p, rq, flags, update_sched_rq_watermark(rq)); ++ --rq->nr_running; ++#ifdef CONFIG_SMP ++ if (1 == rq->nr_running) ++ cpumask_clear_cpu(cpu_of(rq), &sched_rq_pending_mask); ++#endif ++ ++ sched_update_tick_dependency(rq); ++} ++ ++static inline void enqueue_task(struct task_struct *p, struct rq *rq, int flags) ++{ ++ lockdep_assert_held(&rq->lock); ++ ++ WARN_ONCE(task_rq(p) != rq, "sched: enqueue task reside on cpu%d to cpu%d\n", ++ task_cpu(p), cpu_of(rq)); ++ ++ __SCHED_ENQUEUE_TASK(p, rq, flags); ++ update_sched_rq_watermark(rq); ++ ++rq->nr_running; ++#ifdef CONFIG_SMP ++ if (2 == rq->nr_running) ++ cpumask_set_cpu(cpu_of(rq), &sched_rq_pending_mask); ++#endif ++ ++ sched_update_tick_dependency(rq); ++ ++ /* ++ * If in_iowait is set, the code below may not trigger any cpufreq ++ * utilization updates, so do it here explicitly with the IOWAIT flag ++ * passed. ++ */ ++ if (p->in_iowait) ++ cpufreq_update_util(rq, SCHED_CPUFREQ_IOWAIT); ++} ++ ++static inline void requeue_task(struct task_struct *p, struct rq *rq) ++{ ++ lockdep_assert_held(&rq->lock); ++ WARN_ONCE(task_rq(p) != rq, "sched: cpu[%d] requeue task reside on cpu%d\n", ++ cpu_of(rq), task_cpu(p)); ++ ++ __requeue_task(p, rq); ++} ++ ++/* ++ * cmpxchg based fetch_or, macro so it works for different integer types ++ */ ++#define fetch_or(ptr, mask) \ ++ ({ \ ++ typeof(ptr) _ptr = (ptr); \ ++ typeof(mask) _mask = (mask); \ ++ typeof(*_ptr) _old, _val = *_ptr; \ ++ \ ++ for (;;) { \ ++ _old = cmpxchg(_ptr, _val, _val | _mask); \ ++ if (_old == _val) \ ++ break; \ ++ _val = _old; \ ++ } \ ++ _old; \ ++}) ++ ++#if defined(CONFIG_SMP) && defined(TIF_POLLING_NRFLAG) ++/* ++ * Atomically set TIF_NEED_RESCHED and test for TIF_POLLING_NRFLAG, ++ * this avoids any races wrt polling state changes and thereby avoids ++ * spurious IPIs. ++ */ ++static bool set_nr_and_not_polling(struct task_struct *p) ++{ ++ struct thread_info *ti = task_thread_info(p); ++ return !(fetch_or(&ti->flags, _TIF_NEED_RESCHED) & _TIF_POLLING_NRFLAG); ++} ++ ++/* ++ * Atomically set TIF_NEED_RESCHED if TIF_POLLING_NRFLAG is set. ++ * ++ * If this returns true, then the idle task promises to call ++ * sched_ttwu_pending() and reschedule soon. ++ */ ++static bool set_nr_if_polling(struct task_struct *p) ++{ ++ struct thread_info *ti = task_thread_info(p); ++ typeof(ti->flags) old, val = READ_ONCE(ti->flags); ++ ++ for (;;) { ++ if (!(val & _TIF_POLLING_NRFLAG)) ++ return false; ++ if (val & _TIF_NEED_RESCHED) ++ return true; ++ old = cmpxchg(&ti->flags, val, val | _TIF_NEED_RESCHED); ++ if (old == val) ++ break; ++ val = old; ++ } ++ return true; ++} ++ ++#else ++static bool set_nr_and_not_polling(struct task_struct *p) ++{ ++ set_tsk_need_resched(p); ++ return true; ++} ++ ++#ifdef CONFIG_SMP ++static bool set_nr_if_polling(struct task_struct *p) ++{ ++ return false; ++} ++#endif ++#endif ++ ++static bool __wake_q_add(struct wake_q_head *head, struct task_struct *task) ++{ ++ struct wake_q_node *node = &task->wake_q; ++ ++ /* ++ * Atomically grab the task, if ->wake_q is !nil already it means ++ * its already queued (either by us or someone else) and will get the ++ * wakeup due to that. ++ * ++ * In order to ensure that a pending wakeup will observe our pending ++ * state, even in the failed case, an explicit smp_mb() must be used. ++ */ ++ smp_mb__before_atomic(); ++ if (unlikely(cmpxchg_relaxed(&node->next, NULL, WAKE_Q_TAIL))) ++ return false; ++ ++ /* ++ * The head is context local, there can be no concurrency. ++ */ ++ *head->lastp = node; ++ head->lastp = &node->next; ++ return true; ++} ++ ++/** ++ * wake_q_add() - queue a wakeup for 'later' waking. ++ * @head: the wake_q_head to add @task to ++ * @task: the task to queue for 'later' wakeup ++ * ++ * Queue a task for later wakeup, most likely by the wake_up_q() call in the ++ * same context, _HOWEVER_ this is not guaranteed, the wakeup can come ++ * instantly. ++ * ++ * This function must be used as-if it were wake_up_process(); IOW the task ++ * must be ready to be woken at this location. ++ */ ++void wake_q_add(struct wake_q_head *head, struct task_struct *task) ++{ ++ if (__wake_q_add(head, task)) ++ get_task_struct(task); ++} ++ ++/** ++ * wake_q_add_safe() - safely queue a wakeup for 'later' waking. ++ * @head: the wake_q_head to add @task to ++ * @task: the task to queue for 'later' wakeup ++ * ++ * Queue a task for later wakeup, most likely by the wake_up_q() call in the ++ * same context, _HOWEVER_ this is not guaranteed, the wakeup can come ++ * instantly. ++ * ++ * This function must be used as-if it were wake_up_process(); IOW the task ++ * must be ready to be woken at this location. ++ * ++ * This function is essentially a task-safe equivalent to wake_q_add(). Callers ++ * that already hold reference to @task can call the 'safe' version and trust ++ * wake_q to do the right thing depending whether or not the @task is already ++ * queued for wakeup. ++ */ ++void wake_q_add_safe(struct wake_q_head *head, struct task_struct *task) ++{ ++ if (!__wake_q_add(head, task)) ++ put_task_struct(task); ++} ++ ++void wake_up_q(struct wake_q_head *head) ++{ ++ struct wake_q_node *node = head->first; ++ ++ while (node != WAKE_Q_TAIL) { ++ struct task_struct *task; ++ ++ task = container_of(node, struct task_struct, wake_q); ++ BUG_ON(!task); ++ /* task can safely be re-inserted now: */ ++ node = node->next; ++ task->wake_q.next = NULL; ++ ++ /* ++ * wake_up_process() executes a full barrier, which pairs with ++ * the queueing in wake_q_add() so as not to miss wakeups. ++ */ ++ wake_up_process(task); ++ put_task_struct(task); ++ } ++} ++ ++/* ++ * resched_curr - mark rq's current task 'to be rescheduled now'. ++ * ++ * On UP this means the setting of the need_resched flag, on SMP it ++ * might also involve a cross-CPU call to trigger the scheduler on ++ * the target CPU. ++ */ ++void resched_curr(struct rq *rq) ++{ ++ struct task_struct *curr = rq->curr; ++ int cpu; ++ ++ lockdep_assert_held(&rq->lock); ++ ++ if (test_tsk_need_resched(curr)) ++ return; ++ ++ cpu = cpu_of(rq); ++ if (cpu == smp_processor_id()) { ++ set_tsk_need_resched(curr); ++ set_preempt_need_resched(); ++ return; ++ } ++ ++ if (set_nr_and_not_polling(curr)) ++ smp_send_reschedule(cpu); ++ else ++ trace_sched_wake_idle_without_ipi(cpu); ++} ++ ++void resched_cpu(int cpu) ++{ ++ struct rq *rq = cpu_rq(cpu); ++ unsigned long flags; ++ ++ raw_spin_lock_irqsave(&rq->lock, flags); ++ if (cpu_online(cpu) || cpu == smp_processor_id()) ++ resched_curr(cpu_rq(cpu)); ++ raw_spin_unlock_irqrestore(&rq->lock, flags); ++} ++ ++#ifdef CONFIG_SMP ++#ifdef CONFIG_NO_HZ_COMMON ++void nohz_balance_enter_idle(int cpu) ++{ ++} ++ ++void select_nohz_load_balancer(int stop_tick) ++{ ++} ++ ++void set_cpu_sd_state_idle(void) {} ++ ++/* ++ * In the semi idle case, use the nearest busy CPU for migrating timers ++ * from an idle CPU. This is good for power-savings. ++ * ++ * We don't do similar optimization for completely idle system, as ++ * selecting an idle CPU will add more delays to the timers than intended ++ * (as that CPU's timer base may not be uptodate wrt jiffies etc). ++ */ ++int get_nohz_timer_target(void) ++{ ++ int i, cpu = smp_processor_id(), default_cpu = -1; ++ struct cpumask *mask; ++ ++ if (housekeeping_cpu(cpu, HK_FLAG_TIMER)) { ++ if (!idle_cpu(cpu)) ++ return cpu; ++ default_cpu = cpu; ++ } ++ ++ for (mask = &(per_cpu(sched_cpu_affinity_masks, cpu)[0]); ++ mask < per_cpu(sched_cpu_affinity_end_mask, cpu); mask++) ++ for_each_cpu_and(i, mask, housekeeping_cpumask(HK_FLAG_TIMER)) ++ if (!idle_cpu(i)) ++ return i; ++ ++ if (default_cpu == -1) ++ default_cpu = housekeeping_any_cpu(HK_FLAG_TIMER); ++ cpu = default_cpu; ++ ++ return cpu; ++} ++ ++/* ++ * When add_timer_on() enqueues a timer into the timer wheel of an ++ * idle CPU then this timer might expire before the next timer event ++ * which is scheduled to wake up that CPU. In case of a completely ++ * idle system the next event might even be infinite time into the ++ * future. wake_up_idle_cpu() ensures that the CPU is woken up and ++ * leaves the inner idle loop so the newly added timer is taken into ++ * account when the CPU goes back to idle and evaluates the timer ++ * wheel for the next timer event. ++ */ ++static inline void wake_up_idle_cpu(int cpu) ++{ ++ if (cpu == smp_processor_id()) ++ return; ++ ++ set_tsk_need_resched(cpu_rq(cpu)->idle); ++ smp_send_reschedule(cpu); ++} ++ ++static inline bool wake_up_full_nohz_cpu(int cpu) ++{ ++ /* ++ * We just need the target to call irq_exit() and re-evaluate ++ * the next tick. The nohz full kick at least implies that. ++ * If needed we can still optimize that later with an ++ * empty IRQ. ++ */ ++ if (tick_nohz_full_cpu(cpu)) { ++ if (cpu != smp_processor_id() || ++ tick_nohz_tick_stopped()) ++ tick_nohz_full_kick_cpu(cpu); ++ return true; ++ } ++ ++ return false; ++} ++ ++void wake_up_nohz_cpu(int cpu) ++{ ++ if (cpu_online(cpu) && !wake_up_full_nohz_cpu(cpu)) ++ wake_up_idle_cpu(cpu); ++} ++ ++#endif /* CONFIG_NO_HZ_COMMON */ ++#endif /* CONFIG_SMP */ ++ ++static inline void check_preempt_curr(struct rq *rq) ++{ ++ if (sched_rq_first_task(rq) != rq->curr) ++ resched_curr(rq); ++} ++ ++#ifdef CONFIG_SCHED_HRTICK ++/* ++ * Use HR-timers to deliver accurate preemption points. ++ */ ++ ++static void hrtick_clear(struct rq *rq) ++{ ++ if (hrtimer_active(&rq->hrtick_timer)) ++ hrtimer_cancel(&rq->hrtick_timer); ++} ++ ++/* ++ * High-resolution timer tick. ++ * Runs from hardirq context with interrupts disabled. ++ */ ++static enum hrtimer_restart hrtick(struct hrtimer *timer) ++{ ++ struct rq *rq = container_of(timer, struct rq, hrtick_timer); ++ struct task_struct *p; ++ ++ WARN_ON_ONCE(cpu_of(rq) != smp_processor_id()); ++ ++ raw_spin_lock(&rq->lock); ++ p = rq->curr; ++ p->time_slice = 0; ++ resched_curr(rq); ++ raw_spin_unlock(&rq->lock); ++ ++ return HRTIMER_NORESTART; ++} ++ ++/* ++ * Use hrtick when: ++ * - enabled by features ++ * - hrtimer is actually high res ++ */ ++static inline int hrtick_enabled(struct rq *rq) ++{ ++ /** ++ * Alt schedule FW doesn't support sched_feat yet ++ if (!sched_feat(HRTICK)) ++ return 0; ++ */ ++ if (!cpu_active(cpu_of(rq))) ++ return 0; ++ return hrtimer_is_hres_active(&rq->hrtick_timer); ++} ++ ++#ifdef CONFIG_SMP ++ ++static void __hrtick_restart(struct rq *rq) ++{ ++ struct hrtimer *timer = &rq->hrtick_timer; ++ ++ hrtimer_start_expires(timer, HRTIMER_MODE_ABS_PINNED_HARD); ++} ++ ++/* ++ * called from hardirq (IPI) context ++ */ ++static void __hrtick_start(void *arg) ++{ ++ struct rq *rq = arg; ++ ++ raw_spin_lock(&rq->lock); ++ __hrtick_restart(rq); ++ raw_spin_unlock(&rq->lock); ++} ++ ++/* ++ * Called to set the hrtick timer state. ++ * ++ * called with rq->lock held and irqs disabled ++ */ ++void hrtick_start(struct rq *rq, u64 delay) ++{ ++ struct hrtimer *timer = &rq->hrtick_timer; ++ ktime_t time; ++ s64 delta; ++ ++ /* ++ * Don't schedule slices shorter than 10000ns, that just ++ * doesn't make sense and can cause timer DoS. ++ */ ++ delta = max_t(s64, delay, 10000LL); ++ time = ktime_add_ns(timer->base->get_time(), delta); ++ ++ hrtimer_set_expires(timer, time); ++ ++ if (rq == this_rq()) ++ __hrtick_restart(rq); ++ else ++ smp_call_function_single_async(cpu_of(rq), &rq->hrtick_csd); ++} ++ ++#else ++/* ++ * Called to set the hrtick timer state. ++ * ++ * called with rq->lock held and irqs disabled ++ */ ++void hrtick_start(struct rq *rq, u64 delay) ++{ ++ /* ++ * Don't schedule slices shorter than 10000ns, that just ++ * doesn't make sense. Rely on vruntime for fairness. ++ */ ++ delay = max_t(u64, delay, 10000LL); ++ hrtimer_start(&rq->hrtick_timer, ns_to_ktime(delay), ++ HRTIMER_MODE_REL_PINNED_HARD); ++} ++#endif /* CONFIG_SMP */ ++ ++static void hrtick_rq_init(struct rq *rq) ++{ ++#ifdef CONFIG_SMP ++ rq->hrtick_csd.flags = 0; ++ rq->hrtick_csd.func = __hrtick_start; ++ rq->hrtick_csd.info = rq; ++#endif ++ ++ hrtimer_init(&rq->hrtick_timer, CLOCK_MONOTONIC, HRTIMER_MODE_REL_HARD); ++ rq->hrtick_timer.function = hrtick; ++} ++#else /* CONFIG_SCHED_HRTICK */ ++static inline int hrtick_enabled(struct rq *rq) ++{ ++ return 0; ++} ++ ++static inline void hrtick_clear(struct rq *rq) ++{ ++} ++ ++static inline void hrtick_rq_init(struct rq *rq) ++{ ++} ++#endif /* CONFIG_SCHED_HRTICK */ ++ ++static inline int normal_prio(struct task_struct *p) ++{ ++ if (task_has_rt_policy(p)) ++ return MAX_RT_PRIO - 1 - p->rt_priority; ++ ++ return p->static_prio + MAX_PRIORITY_ADJ; ++} ++ ++/* ++ * Calculate the current priority, i.e. the priority ++ * taken into account by the scheduler. This value might ++ * be boosted by RT tasks as it will be RT if the task got ++ * RT-boosted. If not then it returns p->normal_prio. ++ */ ++static int effective_prio(struct task_struct *p) ++{ ++ p->normal_prio = normal_prio(p); ++ /* ++ * If we are RT tasks or we were boosted to RT priority, ++ * keep the priority unchanged. Otherwise, update priority ++ * to the normal priority: ++ */ ++ if (!rt_prio(p->prio)) ++ return p->normal_prio; ++ return p->prio; ++} ++ ++/* ++ * activate_task - move a task to the runqueue. ++ * ++ * Context: rq->lock ++ */ ++static void activate_task(struct task_struct *p, struct rq *rq) ++{ ++ if (task_contributes_to_load(p)) ++ rq->nr_uninterruptible--; ++ enqueue_task(p, rq, ENQUEUE_WAKEUP); ++ p->on_rq = TASK_ON_RQ_QUEUED; ++ cpufreq_update_util(rq, 0); ++} ++ ++/* ++ * deactivate_task - remove a task from the runqueue. ++ * ++ * Context: rq->lock ++ */ ++static inline void deactivate_task(struct task_struct *p, struct rq *rq) ++{ ++ if (task_contributes_to_load(p)) ++ rq->nr_uninterruptible++; ++ dequeue_task(p, rq, DEQUEUE_SLEEP); ++ p->on_rq = 0; ++ cpufreq_update_util(rq, 0); ++} ++ ++static inline void __set_task_cpu(struct task_struct *p, unsigned int cpu) ++{ ++#ifdef CONFIG_SMP ++ /* ++ * After ->cpu is set up to a new value, task_access_lock(p, ...) can be ++ * successfully executed on another CPU. We must ensure that updates of ++ * per-task data have been completed by this moment. ++ */ ++ smp_wmb(); ++ ++#ifdef CONFIG_THREAD_INFO_IN_TASK ++ WRITE_ONCE(p->cpu, cpu); ++#else ++ WRITE_ONCE(task_thread_info(p)->cpu, cpu); ++#endif ++#endif ++} ++ ++#ifdef CONFIG_SMP ++void set_task_cpu(struct task_struct *p, unsigned int new_cpu) ++{ ++#ifdef CONFIG_SCHED_DEBUG ++ /* ++ * We should never call set_task_cpu() on a blocked task, ++ * ttwu() will sort out the placement. ++ */ ++ WARN_ON_ONCE(p->state != TASK_RUNNING && p->state != TASK_WAKING && ++ !p->on_rq); ++#ifdef CONFIG_LOCKDEP ++ /* ++ * The caller should hold either p->pi_lock or rq->lock, when changing ++ * a task's CPU. ->pi_lock for waking tasks, rq->lock for runnable tasks. ++ * ++ * sched_move_task() holds both and thus holding either pins the cgroup, ++ * see task_group(). ++ */ ++ WARN_ON_ONCE(debug_locks && !(lockdep_is_held(&p->pi_lock) || ++ lockdep_is_held(&task_rq(p)->lock))); ++#endif ++ /* ++ * Clearly, migrating tasks to offline CPUs is a fairly daft thing. ++ */ ++ WARN_ON_ONCE(!cpu_online(new_cpu)); ++#endif ++ if (task_cpu(p) == new_cpu) ++ return; ++ trace_sched_migrate_task(p, new_cpu); ++ rseq_migrate(p); ++ perf_event_task_migrate(p); ++ ++ __set_task_cpu(p, new_cpu); ++} ++ ++static inline bool is_per_cpu_kthread(struct task_struct *p) ++{ ++ return ((p->flags & PF_KTHREAD) && (1 == p->nr_cpus_allowed)); ++} ++ ++/* ++ * Per-CPU kthreads are allowed to run on !active && online CPUs, see ++ * __set_cpus_allowed_ptr() and select_fallback_rq(). ++ */ ++static inline bool is_cpu_allowed(struct task_struct *p, int cpu) ++{ ++ if (!cpumask_test_cpu(cpu, p->cpus_ptr)) ++ return false; ++ ++ if (is_per_cpu_kthread(p)) ++ return cpu_online(cpu); ++ ++ return cpu_active(cpu); ++} ++ ++/* ++ * This is how migration works: ++ * ++ * 1) we invoke migration_cpu_stop() on the target CPU using ++ * stop_one_cpu(). ++ * 2) stopper starts to run (implicitly forcing the migrated thread ++ * off the CPU) ++ * 3) it checks whether the migrated task is still in the wrong runqueue. ++ * 4) if it's in the wrong runqueue then the migration thread removes ++ * it and puts it into the right queue. ++ * 5) stopper completes and stop_one_cpu() returns and the migration ++ * is done. ++ */ ++ ++/* ++ * move_queued_task - move a queued task to new rq. ++ * ++ * Returns (locked) new rq. Old rq's lock is released. ++ */ ++static struct rq *move_queued_task(struct rq *rq, struct task_struct *p, int ++ new_cpu) ++{ ++ lockdep_assert_held(&rq->lock); ++ ++ WRITE_ONCE(p->on_rq, TASK_ON_RQ_MIGRATING); ++ dequeue_task(p, rq, 0); ++ set_task_cpu(p, new_cpu); ++ raw_spin_unlock(&rq->lock); ++ ++ rq = cpu_rq(new_cpu); ++ ++ raw_spin_lock(&rq->lock); ++ BUG_ON(task_cpu(p) != new_cpu); ++ enqueue_task(p, rq, 0); ++ p->on_rq = TASK_ON_RQ_QUEUED; ++ check_preempt_curr(rq); ++ ++ return rq; ++} ++ ++struct migration_arg { ++ struct task_struct *task; ++ int dest_cpu; ++}; ++ ++/* ++ * Move (not current) task off this CPU, onto the destination CPU. We're doing ++ * this because either it can't run here any more (set_cpus_allowed() ++ * away from this CPU, or CPU going down), or because we're ++ * attempting to rebalance this task on exec (sched_exec). ++ * ++ * So we race with normal scheduler movements, but that's OK, as long ++ * as the task is no longer on this CPU. ++ */ ++static struct rq *__migrate_task(struct rq *rq, struct task_struct *p, int ++ dest_cpu) ++{ ++ /* Affinity changed (again). */ ++ if (!is_cpu_allowed(p, dest_cpu)) ++ return rq; ++ ++ update_rq_clock(rq); ++ return move_queued_task(rq, p, dest_cpu); ++} ++ ++/* ++ * migration_cpu_stop - this will be executed by a highprio stopper thread ++ * and performs thread migration by bumping thread off CPU then ++ * 'pushing' onto another runqueue. ++ */ ++static int migration_cpu_stop(void *data) ++{ ++ struct migration_arg *arg = data; ++ struct task_struct *p = arg->task; ++ struct rq *rq = this_rq(); ++ ++ /* ++ * The original target CPU might have gone down and we might ++ * be on another CPU but it doesn't matter. ++ */ ++ local_irq_disable(); ++ ++ raw_spin_lock(&p->pi_lock); ++ raw_spin_lock(&rq->lock); ++ /* ++ * If task_rq(p) != rq, it cannot be migrated here, because we're ++ * holding rq->lock, if p->on_rq == 0 it cannot get enqueued because ++ * we're holding p->pi_lock. ++ */ ++ if (task_rq(p) == rq && task_on_rq_queued(p)) ++ rq = __migrate_task(rq, p, arg->dest_cpu); ++ raw_spin_unlock(&rq->lock); ++ raw_spin_unlock(&p->pi_lock); ++ ++ local_irq_enable(); ++ return 0; ++} ++ ++static inline void ++set_cpus_allowed_common(struct task_struct *p, const struct cpumask *new_mask) ++{ ++ cpumask_copy(&p->cpus_mask, new_mask); ++ p->nr_cpus_allowed = cpumask_weight(new_mask); ++} ++ ++void do_set_cpus_allowed(struct task_struct *p, const struct cpumask *new_mask) ++{ ++ set_cpus_allowed_common(p, new_mask); ++} ++#endif ++ ++/** ++ * task_curr - is this task currently executing on a CPU? ++ * @p: the task in question. ++ * ++ * Return: 1 if the task is currently executing. 0 otherwise. ++ */ ++inline int task_curr(const struct task_struct *p) ++{ ++ return cpu_curr(task_cpu(p)) == p; ++} ++ ++#ifdef CONFIG_SMP ++/* ++ * wait_task_inactive - wait for a thread to unschedule. ++ * ++ * If @match_state is nonzero, it's the @p->state value just checked and ++ * not expected to change. If it changes, i.e. @p might have woken up, ++ * then return zero. When we succeed in waiting for @p to be off its CPU, ++ * we return a positive number (its total switch count). If a second call ++ * a short while later returns the same number, the caller can be sure that ++ * @p has remained unscheduled the whole time. ++ * ++ * The caller must ensure that the task *will* unschedule sometime soon, ++ * else this function might spin for a *long* time. This function can't ++ * be called with interrupts off, or it may introduce deadlock with ++ * smp_call_function() if an IPI is sent by the same process we are ++ * waiting to become inactive. ++ */ ++unsigned long wait_task_inactive(struct task_struct *p, long match_state) ++{ ++ unsigned long flags; ++ bool running, on_rq; ++ unsigned long ncsw; ++ struct rq *rq; ++ raw_spinlock_t *lock; ++ ++ for (;;) { ++ rq = task_rq(p); ++ ++ /* ++ * If the task is actively running on another CPU ++ * still, just relax and busy-wait without holding ++ * any locks. ++ * ++ * NOTE! Since we don't hold any locks, it's not ++ * even sure that "rq" stays as the right runqueue! ++ * But we don't care, since this will return false ++ * if the runqueue has changed and p is actually now ++ * running somewhere else! ++ */ ++ while (task_running(p) && p == rq->curr) { ++ if (match_state && unlikely(p->state != match_state)) ++ return 0; ++ cpu_relax(); ++ } ++ ++ /* ++ * Ok, time to look more closely! We need the rq ++ * lock now, to be *sure*. If we're wrong, we'll ++ * just go back and repeat. ++ */ ++ task_access_lock_irqsave(p, &lock, &flags); ++ trace_sched_wait_task(p); ++ running = task_running(p); ++ on_rq = p->on_rq; ++ ncsw = 0; ++ if (!match_state || p->state == match_state) ++ ncsw = p->nvcsw | LONG_MIN; /* sets MSB */ ++ task_access_unlock_irqrestore(p, lock, &flags); ++ ++ /* ++ * If it changed from the expected state, bail out now. ++ */ ++ if (unlikely(!ncsw)) ++ break; ++ ++ /* ++ * Was it really running after all now that we ++ * checked with the proper locks actually held? ++ * ++ * Oops. Go back and try again.. ++ */ ++ if (unlikely(running)) { ++ cpu_relax(); ++ continue; ++ } ++ ++ /* ++ * It's not enough that it's not actively running, ++ * it must be off the runqueue _entirely_, and not ++ * preempted! ++ * ++ * So if it was still runnable (but just not actively ++ * running right now), it's preempted, and we should ++ * yield - it could be a while. ++ */ ++ if (unlikely(on_rq)) { ++ ktime_t to = NSEC_PER_SEC / HZ; ++ ++ set_current_state(TASK_UNINTERRUPTIBLE); ++ schedule_hrtimeout(&to, HRTIMER_MODE_REL); ++ continue; ++ } ++ ++ /* ++ * Ahh, all good. It wasn't running, and it wasn't ++ * runnable, which means that it will never become ++ * running in the future either. We're all done! ++ */ ++ break; ++ } ++ ++ return ncsw; ++} ++ ++/*** ++ * kick_process - kick a running thread to enter/exit the kernel ++ * @p: the to-be-kicked thread ++ * ++ * Cause a process which is running on another CPU to enter ++ * kernel-mode, without any delay. (to get signals handled.) ++ * ++ * NOTE: this function doesn't have to take the runqueue lock, ++ * because all it wants to ensure is that the remote task enters ++ * the kernel. If the IPI races and the task has been migrated ++ * to another CPU then no harm is done and the purpose has been ++ * achieved as well. ++ */ ++void kick_process(struct task_struct *p) ++{ ++ int cpu; ++ ++ preempt_disable(); ++ cpu = task_cpu(p); ++ if ((cpu != smp_processor_id()) && task_curr(p)) ++ smp_send_reschedule(cpu); ++ preempt_enable(); ++} ++EXPORT_SYMBOL_GPL(kick_process); ++ ++/* ++ * ->cpus_ptr is protected by both rq->lock and p->pi_lock ++ * ++ * A few notes on cpu_active vs cpu_online: ++ * ++ * - cpu_active must be a subset of cpu_online ++ * ++ * - on CPU-up we allow per-CPU kthreads on the online && !active CPU, ++ * see __set_cpus_allowed_ptr(). At this point the newly online ++ * CPU isn't yet part of the sched domains, and balancing will not ++ * see it. ++ * ++ * - on cpu-down we clear cpu_active() to mask the sched domains and ++ * avoid the load balancer to place new tasks on the to be removed ++ * CPU. Existing tasks will remain running there and will be taken ++ * off. ++ * ++ * This means that fallback selection must not select !active CPUs. ++ * And can assume that any active CPU must be online. Conversely ++ * select_task_rq() below may allow selection of !active CPUs in order ++ * to satisfy the above rules. ++ */ ++static int select_fallback_rq(int cpu, struct task_struct *p) ++{ ++ int nid = cpu_to_node(cpu); ++ const struct cpumask *nodemask = NULL; ++ enum { cpuset, possible, fail } state = cpuset; ++ int dest_cpu; ++ ++ /* ++ * If the node that the CPU is on has been offlined, cpu_to_node() ++ * will return -1. There is no CPU on the node, and we should ++ * select the CPU on the other node. ++ */ ++ if (nid != -1) { ++ nodemask = cpumask_of_node(nid); ++ ++ /* Look for allowed, online CPU in same node. */ ++ for_each_cpu(dest_cpu, nodemask) { ++ if (!cpu_active(dest_cpu)) ++ continue; ++ if (cpumask_test_cpu(dest_cpu, p->cpus_ptr)) ++ return dest_cpu; ++ } ++ } ++ ++ for (;;) { ++ /* Any allowed, online CPU? */ ++ for_each_cpu(dest_cpu, p->cpus_ptr) { ++ if (!is_cpu_allowed(p, dest_cpu)) ++ continue; ++ goto out; ++ } ++ ++ /* No more Mr. Nice Guy. */ ++ switch (state) { ++ case cpuset: ++ if (IS_ENABLED(CONFIG_CPUSETS)) { ++ cpuset_cpus_allowed_fallback(p); ++ state = possible; ++ break; ++ } ++ /* Fall-through */ ++ case possible: ++ do_set_cpus_allowed(p, cpu_possible_mask); ++ state = fail; ++ break; ++ ++ case fail: ++ BUG(); ++ break; ++ } ++ } ++ ++out: ++ if (state != cpuset) { ++ /* ++ * Don't tell them about moving exiting tasks or ++ * kernel threads (both mm NULL), since they never ++ * leave kernel. ++ */ ++ if (p->mm && printk_ratelimit()) { ++ printk_deferred("process %d (%s) no longer affine to cpu%d\n", ++ task_pid_nr(p), p->comm, cpu); ++ } ++ } ++ ++ return dest_cpu; ++} ++ ++static inline int select_task_rq(struct task_struct *p) ++{ ++ cpumask_t chk_mask, tmp; ++ ++ if (unlikely(!cpumask_and(&chk_mask, p->cpus_ptr, cpu_online_mask))) ++ return select_fallback_rq(task_cpu(p), p); ++ ++ if ( ++#ifdef CONFIG_SCHED_SMT ++ cpumask_and(&tmp, &chk_mask, &sched_sg_idle_mask) || ++#endif ++ cpumask_and(&tmp, &chk_mask, &sched_rq_watermark[IDLE_WM]) || ++ cpumask_and(&tmp, &chk_mask, ++ &sched_rq_watermark[task_sched_prio(p) + 1])) ++ return best_mask_cpu(task_cpu(p), &tmp); ++ ++ return best_mask_cpu(task_cpu(p), &chk_mask); ++} ++ ++void sched_set_stop_task(int cpu, struct task_struct *stop) ++{ ++ struct sched_param stop_param = { .sched_priority = STOP_PRIO }; ++ struct sched_param start_param = { .sched_priority = 0 }; ++ struct task_struct *old_stop = cpu_rq(cpu)->stop; ++ ++ if (stop) { ++ /* ++ * Make it appear like a SCHED_FIFO task, its something ++ * userspace knows about and won't get confused about. ++ * ++ * Also, it will make PI more or less work without too ++ * much confusion -- but then, stop work should not ++ * rely on PI working anyway. ++ */ ++ sched_setscheduler_nocheck(stop, SCHED_FIFO, &stop_param); ++ } ++ ++ cpu_rq(cpu)->stop = stop; ++ ++ if (old_stop) { ++ /* ++ * Reset it back to a normal scheduling policy so that ++ * it can die in pieces. ++ */ ++ sched_setscheduler_nocheck(old_stop, SCHED_NORMAL, &start_param); ++ } ++} ++ ++/* ++ * Change a given task's CPU affinity. Migrate the thread to a ++ * proper CPU and schedule it away if the CPU it's executing on ++ * is removed from the allowed bitmask. ++ * ++ * NOTE: the caller must have a valid reference to the task, the ++ * task must not exit() & deallocate itself prematurely. The ++ * call is not atomic; no spinlocks may be held. ++ */ ++static int __set_cpus_allowed_ptr(struct task_struct *p, ++ const struct cpumask *new_mask, bool check) ++{ ++ const struct cpumask *cpu_valid_mask = cpu_active_mask; ++ int dest_cpu; ++ unsigned long flags; ++ struct rq *rq; ++ raw_spinlock_t *lock; ++ int ret = 0; ++ ++ raw_spin_lock_irqsave(&p->pi_lock, flags); ++ rq = __task_access_lock(p, &lock); ++ ++ if (p->flags & PF_KTHREAD) { ++ /* ++ * Kernel threads are allowed on online && !active CPUs ++ */ ++ cpu_valid_mask = cpu_online_mask; ++ } ++ ++ /* ++ * Must re-check here, to close a race against __kthread_bind(), ++ * sched_setaffinity() is not guaranteed to observe the flag. ++ */ ++ if (check && (p->flags & PF_NO_SETAFFINITY)) { ++ ret = -EINVAL; ++ goto out; ++ } ++ ++ if (cpumask_equal(p->cpus_ptr, new_mask)) ++ goto out; ++ ++ dest_cpu = cpumask_any_and(cpu_valid_mask, new_mask); ++ if (dest_cpu >= nr_cpu_ids) { ++ ret = -EINVAL; ++ goto out; ++ } ++ ++ do_set_cpus_allowed(p, new_mask); ++ ++ if (p->flags & PF_KTHREAD) { ++ /* ++ * For kernel threads that do indeed end up on online && ++ * !active we want to ensure they are strict per-CPU threads. ++ */ ++ WARN_ON(cpumask_intersects(new_mask, cpu_online_mask) && ++ !cpumask_intersects(new_mask, cpu_active_mask) && ++ p->nr_cpus_allowed != 1); ++ } ++ ++ /* Can the task run on the task's current CPU? If so, we're done */ ++ if (cpumask_test_cpu(task_cpu(p), new_mask)) ++ goto out; ++ ++ if (task_running(p) || p->state == TASK_WAKING) { ++ struct migration_arg arg = { p, dest_cpu }; ++ ++ /* Need help from migration thread: drop lock and wait. */ ++ __task_access_unlock(p, lock); ++ raw_spin_unlock_irqrestore(&p->pi_lock, flags); ++ stop_one_cpu(cpu_of(rq), migration_cpu_stop, &arg); ++ return 0; ++ } ++ if (task_on_rq_queued(p)) { ++ /* ++ * OK, since we're going to drop the lock immediately ++ * afterwards anyway. ++ */ ++ update_rq_clock(rq); ++ rq = move_queued_task(rq, p, dest_cpu); ++ lock = &rq->lock; ++ } ++ ++out: ++ __task_access_unlock(p, lock); ++ raw_spin_unlock_irqrestore(&p->pi_lock, flags); ++ ++ return ret; ++} ++ ++int set_cpus_allowed_ptr(struct task_struct *p, const struct cpumask *new_mask) ++{ ++ return __set_cpus_allowed_ptr(p, new_mask, false); ++} ++EXPORT_SYMBOL_GPL(set_cpus_allowed_ptr); ++ ++#else /* CONFIG_SMP */ ++ ++static inline int select_task_rq(struct task_struct *p) ++{ ++ return 0; ++} ++ ++static inline int ++__set_cpus_allowed_ptr(struct task_struct *p, ++ const struct cpumask *new_mask, bool check) ++{ ++ return set_cpus_allowed_ptr(p, new_mask); ++} ++ ++#endif /* CONFIG_SMP */ ++ ++static void ++ttwu_stat(struct task_struct *p, int cpu, int wake_flags) ++{ ++ struct rq *rq; ++ ++ if (!schedstat_enabled()) ++ return; ++ ++ rq= this_rq(); ++ ++#ifdef CONFIG_SMP ++ if (cpu == rq->cpu) ++ __schedstat_inc(rq->ttwu_local); ++ else { ++ /** Alt schedule FW ToDo: ++ * How to do ttwu_wake_remote ++ */ ++ } ++#endif /* CONFIG_SMP */ ++ ++ __schedstat_inc(rq->ttwu_count); ++} ++ ++/* ++ * Mark the task runnable and perform wakeup-preemption. ++ */ ++static inline void ++ttwu_do_wakeup(struct rq *rq, struct task_struct *p, int wake_flags) ++{ ++ p->state = TASK_RUNNING; ++ trace_sched_wakeup(p); ++} ++ ++static inline void ++ttwu_do_activate(struct rq *rq, struct task_struct *p, int wake_flags) ++{ ++#ifdef CONFIG_SMP ++ if (p->sched_contributes_to_load) ++ rq->nr_uninterruptible--; ++#endif ++ ++ activate_task(p, rq); ++ ttwu_do_wakeup(rq, p, 0); ++} ++ ++static int ttwu_remote(struct task_struct *p, int wake_flags) ++{ ++ struct rq *rq; ++ raw_spinlock_t *lock; ++ int ret = 0; ++ ++ rq = __task_access_lock(p, &lock); ++ if (task_on_rq_queued(p)) { ++ ttwu_do_wakeup(rq, p, wake_flags); ++ ret = 1; ++ } ++ __task_access_unlock(p, lock); ++ ++ return ret; ++} ++ ++#ifdef CONFIG_SMP ++void scheduler_ipi(void) ++{ ++ /* ++ * Fold TIF_NEED_RESCHED into the preempt_count; anybody setting ++ * TIF_NEED_RESCHED remotely (for the first time) will also send ++ * this IPI. ++ */ ++ preempt_fold_need_resched(); ++ ++ if (!idle_cpu(smp_processor_id()) || need_resched()) ++ return; ++ ++ irq_enter(); ++ irq_exit(); ++} ++ ++void wake_up_if_idle(int cpu) ++{ ++ struct rq *rq = cpu_rq(cpu); ++ unsigned long flags; ++ ++ rcu_read_lock(); ++ ++ if (!is_idle_task(rcu_dereference(rq->curr))) ++ goto out; ++ ++ if (set_nr_if_polling(rq->idle)) { ++ trace_sched_wake_idle_without_ipi(cpu); ++ } else { ++ raw_spin_lock_irqsave(&rq->lock, flags); ++ if (is_idle_task(rq->curr)) ++ smp_send_reschedule(cpu); ++ /* Else CPU is not idle, do nothing here */ ++ raw_spin_unlock_irqrestore(&rq->lock, flags); ++ } ++ ++out: ++ rcu_read_unlock(); ++} ++ ++bool cpus_share_cache(int this_cpu, int that_cpu) ++{ ++ return per_cpu(sd_llc_id, this_cpu) == per_cpu(sd_llc_id, that_cpu); ++} ++#endif /* CONFIG_SMP */ ++ ++static inline void ttwu_queue(struct task_struct *p, int cpu, int wake_flags) ++{ ++ struct rq *rq = cpu_rq(cpu); ++ ++ raw_spin_lock(&rq->lock); ++ update_rq_clock(rq); ++ ttwu_do_activate(rq, p, wake_flags); ++ check_preempt_curr(rq); ++ raw_spin_unlock(&rq->lock); ++} ++ ++/* ++ * Notes on Program-Order guarantees on SMP systems. ++ * ++ * MIGRATION ++ * ++ * The basic program-order guarantee on SMP systems is that when a task [t] ++ * migrates, all its activity on its old CPU [c0] happens-before any subsequent ++ * execution on its new CPU [c1]. ++ * ++ * For migration (of runnable tasks) this is provided by the following means: ++ * ++ * A) UNLOCK of the rq(c0)->lock scheduling out task t ++ * B) migration for t is required to synchronize *both* rq(c0)->lock and ++ * rq(c1)->lock (if not at the same time, then in that order). ++ * C) LOCK of the rq(c1)->lock scheduling in task ++ * ++ * Transitivity guarantees that B happens after A and C after B. ++ * Note: we only require RCpc transitivity. ++ * Note: the CPU doing B need not be c0 or c1 ++ * ++ * Example: ++ * ++ * CPU0 CPU1 CPU2 ++ * ++ * LOCK rq(0)->lock ++ * sched-out X ++ * sched-in Y ++ * UNLOCK rq(0)->lock ++ * ++ * LOCK rq(0)->lock // orders against CPU0 ++ * dequeue X ++ * UNLOCK rq(0)->lock ++ * ++ * LOCK rq(1)->lock ++ * enqueue X ++ * UNLOCK rq(1)->lock ++ * ++ * LOCK rq(1)->lock // orders against CPU2 ++ * sched-out Z ++ * sched-in X ++ * UNLOCK rq(1)->lock ++ * ++ * ++ * BLOCKING -- aka. SLEEP + WAKEUP ++ * ++ * For blocking we (obviously) need to provide the same guarantee as for ++ * migration. However the means are completely different as there is no lock ++ * chain to provide order. Instead we do: ++ * ++ * 1) smp_store_release(X->on_cpu, 0) ++ * 2) smp_cond_load_acquire(!X->on_cpu) ++ * ++ * Example: ++ * ++ * CPU0 (schedule) CPU1 (try_to_wake_up) CPU2 (schedule) ++ * ++ * LOCK rq(0)->lock LOCK X->pi_lock ++ * dequeue X ++ * sched-out X ++ * smp_store_release(X->on_cpu, 0); ++ * ++ * smp_cond_load_acquire(&X->on_cpu, !VAL); ++ * X->state = WAKING ++ * set_task_cpu(X,2) ++ * ++ * LOCK rq(2)->lock ++ * enqueue X ++ * X->state = RUNNING ++ * UNLOCK rq(2)->lock ++ * ++ * LOCK rq(2)->lock // orders against CPU1 ++ * sched-out Z ++ * sched-in X ++ * UNLOCK rq(2)->lock ++ * ++ * UNLOCK X->pi_lock ++ * UNLOCK rq(0)->lock ++ * ++ * ++ * However; for wakeups there is a second guarantee we must provide, namely we ++ * must observe the state that lead to our wakeup. That is, not only must our ++ * task observe its own prior state, it must also observe the stores prior to ++ * its wakeup. ++ * ++ * This means that any means of doing remote wakeups must order the CPU doing ++ * the wakeup against the CPU the task is going to end up running on. This, ++ * however, is already required for the regular Program-Order guarantee above, ++ * since the waking CPU is the one issueing the ACQUIRE (smp_cond_load_acquire). ++ * ++ */ ++ ++/*** ++ * try_to_wake_up - wake up a thread ++ * @p: the thread to be awakened ++ * @state: the mask of task states that can be woken ++ * @wake_flags: wake modifier flags (WF_*) ++ * ++ * Put it on the run-queue if it's not already there. The "current" ++ * thread is always on the run-queue (except when the actual ++ * re-schedule is in progress), and as such you're allowed to do ++ * the simpler "current->state = TASK_RUNNING" to mark yourself ++ * runnable without the overhead of this. ++ * ++ * Return: %true if @p was woken up, %false if it was already running. ++ * or @state didn't match @p's state. ++ */ ++static int try_to_wake_up(struct task_struct *p, unsigned int state, ++ int wake_flags) ++{ ++ unsigned long flags; ++ int cpu, success = 0; ++ ++ preempt_disable(); ++ if (p == current) { ++ /* ++ * We're waking current, this means 'p->on_rq' and 'task_cpu(p) ++ * == smp_processor_id()'. Together this means we can special ++ * case the whole 'p->on_rq && ttwu_remote()' case below ++ * without taking any locks. ++ * ++ * In particular: ++ * - we rely on Program-Order guarantees for all the ordering, ++ * - we're serialized against set_special_state() by virtue of ++ * it disabling IRQs (this allows not taking ->pi_lock). ++ */ ++ if (!(p->state & state)) ++ goto out; ++ ++ success = 1; ++ cpu = task_cpu(p); ++ trace_sched_waking(p); ++ p->state = TASK_RUNNING; ++ trace_sched_wakeup(p); ++ goto out; ++ } ++ ++ /* ++ * If we are going to wake up a thread waiting for CONDITION we ++ * need to ensure that CONDITION=1 done by the caller can not be ++ * reordered with p->state check below. This pairs with mb() in ++ * set_current_state() the waiting thread does. ++ */ ++ raw_spin_lock_irqsave(&p->pi_lock, flags); ++ smp_mb__after_spinlock(); ++ if (!(p->state & state)) ++ goto unlock; ++ ++ trace_sched_waking(p); ++ ++ /* We're going to change ->state: */ ++ success = 1; ++ cpu = task_cpu(p); ++ ++ /* ++ * Ensure we load p->on_rq _after_ p->state, otherwise it would ++ * be possible to, falsely, observe p->on_rq == 0 and get stuck ++ * in smp_cond_load_acquire() below. ++ * ++ * sched_ttwu_pending() try_to_wake_up() ++ * STORE p->on_rq = 1 LOAD p->state ++ * UNLOCK rq->lock ++ * ++ * __schedule() (switch to task 'p') ++ * LOCK rq->lock smp_rmb(); ++ * smp_mb__after_spinlock(); ++ * UNLOCK rq->lock ++ * ++ * [task p] ++ * STORE p->state = UNINTERRUPTIBLE LOAD p->on_rq ++ * ++ * Pairs with the LOCK+smp_mb__after_spinlock() on rq->lock in ++ * __schedule(). See the comment for smp_mb__after_spinlock(). ++ */ ++ smp_rmb(); ++ if (p->on_rq && ttwu_remote(p, wake_flags)) ++ goto unlock; ++ ++#ifdef CONFIG_SMP ++ /* ++ * Ensure we load p->on_cpu _after_ p->on_rq, otherwise it would be ++ * possible to, falsely, observe p->on_cpu == 0. ++ * ++ * One must be running (->on_cpu == 1) in order to remove oneself ++ * from the runqueue. ++ * ++ * __schedule() (switch to task 'p') try_to_wake_up() ++ * STORE p->on_cpu = 1 LOAD p->on_rq ++ * UNLOCK rq->lock ++ * ++ * __schedule() (put 'p' to sleep) ++ * LOCK rq->lock smp_rmb(); ++ * smp_mb__after_spinlock(); ++ * STORE p->on_rq = 0 LOAD p->on_cpu ++ * ++ * Pairs with the LOCK+smp_mb__after_spinlock() on rq->lock in ++ * __schedule(). See the comment for smp_mb__after_spinlock(). ++ */ ++ smp_rmb(); ++ ++ /* ++ * If the owning (remote) CPU is still in the middle of schedule() with ++ * this task as prev, wait until its done referencing the task. ++ * ++ * Pairs with the smp_store_release() in finish_task(). ++ * ++ * This ensures that tasks getting woken will be fully ordered against ++ * their previous state and preserve Program Order. ++ */ ++ smp_cond_load_acquire(&p->on_cpu, !VAL); ++ ++ p->sched_contributes_to_load = !!task_contributes_to_load(p); ++ p->state = TASK_WAKING; ++ ++ if (p->in_iowait) { ++ delayacct_blkio_end(p); ++ atomic_dec(&task_rq(p)->nr_iowait); ++ } ++ ++ if(this_rq()->clock_task - p->last_ran > sched_timeslice_ns) ++ boost_task(p); ++ ++ cpu = select_task_rq(p); ++ ++ if (cpu != task_cpu(p)) { ++ wake_flags |= WF_MIGRATED; ++ psi_ttwu_dequeue(p); ++ set_task_cpu(p, cpu); ++ } ++#else /* CONFIG_SMP */ ++ if (p->in_iowait) { ++ delayacct_blkio_end(p); ++ atomic_dec(&task_rq(p)->nr_iowait); ++ } ++#endif /* CONFIG_SMP */ ++ ++ ttwu_queue(p, cpu, wake_flags); ++unlock: ++ raw_spin_unlock_irqrestore(&p->pi_lock, flags); ++out: ++ if (success) ++ ttwu_stat(p, cpu, wake_flags); ++ preempt_enable(); ++ ++ return success; ++} ++ ++/** ++ * wake_up_process - Wake up a specific process ++ * @p: The process to be woken up. ++ * ++ * Attempt to wake up the nominated process and move it to the set of runnable ++ * processes. ++ * ++ * Return: 1 if the process was woken up, 0 if it was already running. ++ * ++ * This function executes a full memory barrier before accessing the task state. ++ */ ++int wake_up_process(struct task_struct *p) ++{ ++ return try_to_wake_up(p, TASK_NORMAL, 0); ++} ++EXPORT_SYMBOL(wake_up_process); ++ ++int wake_up_state(struct task_struct *p, unsigned int state) ++{ ++ return try_to_wake_up(p, state, 0); ++} ++ ++/* ++ * Perform scheduler related setup for a newly forked process p. ++ * p is forked by current. ++ * ++ * __sched_fork() is basic setup used by init_idle() too: ++ */ ++static inline void __sched_fork(unsigned long clone_flags, struct task_struct *p) ++{ ++ p->on_rq = 0; ++ p->on_cpu = 0; ++ p->utime = 0; ++ p->stime = 0; ++ p->sched_time = 0; ++ ++#ifdef CONFIG_PREEMPT_NOTIFIERS ++ INIT_HLIST_HEAD(&p->preempt_notifiers); ++#endif ++ ++#ifdef CONFIG_COMPACTION ++ p->capture_control = NULL; ++#endif ++} ++ ++/* ++ * fork()/clone()-time setup: ++ */ ++int sched_fork(unsigned long clone_flags, struct task_struct *p) ++{ ++ unsigned long flags; ++ struct rq *rq; ++ ++ __sched_fork(clone_flags, p); ++ /* ++ * We mark the process as NEW here. This guarantees that ++ * nobody will actually run it, and a signal or other external ++ * event cannot wake it up and insert it on the runqueue either. ++ */ ++ p->state = TASK_NEW; ++ ++ /* ++ * Make sure we do not leak PI boosting priority to the child. ++ */ ++ p->prio = current->normal_prio; ++ ++ /* ++ * Revert to default priority/policy on fork if requested. ++ */ ++ if (unlikely(p->sched_reset_on_fork)) { ++ if (task_has_rt_policy(p)) { ++ p->policy = SCHED_NORMAL; ++ p->static_prio = NICE_TO_PRIO(0); ++ p->rt_priority = 0; ++ } else if (PRIO_TO_NICE(p->static_prio) < 0) ++ p->static_prio = NICE_TO_PRIO(0); ++ ++ p->prio = p->normal_prio = normal_prio(p); ++ ++ /* ++ * We don't need the reset flag anymore after the fork. It has ++ * fulfilled its duty: ++ */ ++ p->sched_reset_on_fork = 0; ++ } ++ ++ p->boost_prio = (p->boost_prio < 0) ? ++ p->boost_prio + MAX_PRIORITY_ADJ : MAX_PRIORITY_ADJ; ++ /* ++ * The child is not yet in the pid-hash so no cgroup attach races, ++ * and the cgroup is pinned to this child due to cgroup_fork() ++ * is ran before sched_fork(). ++ * ++ * Silence PROVE_RCU. ++ */ ++ raw_spin_lock_irqsave(&p->pi_lock, flags); ++ /* ++ * Share the timeslice between parent and child, thus the ++ * total amount of pending timeslices in the system doesn't change, ++ * resulting in more scheduling fairness. ++ */ ++ rq = this_rq(); ++ raw_spin_lock(&rq->lock); ++ rq->curr->time_slice /= 2; ++ p->time_slice = rq->curr->time_slice; ++#ifdef CONFIG_SCHED_HRTICK ++ hrtick_start(rq, rq->curr->time_slice); ++#endif ++ ++ if (p->time_slice < RESCHED_NS) { ++ p->time_slice = sched_timeslice_ns; ++ resched_curr(rq); ++ } ++ raw_spin_unlock(&rq->lock); ++ ++ /* ++ * We're setting the CPU for the first time, we don't migrate, ++ * so use __set_task_cpu(). ++ */ ++ __set_task_cpu(p, cpu_of(rq)); ++ raw_spin_unlock_irqrestore(&p->pi_lock, flags); ++ ++#ifdef CONFIG_SCHED_INFO ++ if (unlikely(sched_info_on())) ++ memset(&p->sched_info, 0, sizeof(p->sched_info)); ++#endif ++ init_task_preempt_count(p); ++ ++ return 0; ++} ++ ++#ifdef CONFIG_SCHEDSTATS ++ ++DEFINE_STATIC_KEY_FALSE(sched_schedstats); ++static bool __initdata __sched_schedstats = false; ++ ++static void set_schedstats(bool enabled) ++{ ++ if (enabled) ++ static_branch_enable(&sched_schedstats); ++ else ++ static_branch_disable(&sched_schedstats); ++} ++ ++void force_schedstat_enabled(void) ++{ ++ if (!schedstat_enabled()) { ++ pr_info("kernel profiling enabled schedstats, disable via kernel.sched_schedstats.\n"); ++ static_branch_enable(&sched_schedstats); ++ } ++} ++ ++static int __init setup_schedstats(char *str) ++{ ++ int ret = 0; ++ if (!str) ++ goto out; ++ ++ /* ++ * This code is called before jump labels have been set up, so we can't ++ * change the static branch directly just yet. Instead set a temporary ++ * variable so init_schedstats() can do it later. ++ */ ++ if (!strcmp(str, "enable")) { ++ __sched_schedstats = true; ++ ret = 1; ++ } else if (!strcmp(str, "disable")) { ++ __sched_schedstats = false; ++ ret = 1; ++ } ++out: ++ if (!ret) ++ pr_warn("Unable to parse schedstats=\n"); ++ ++ return ret; ++} ++__setup("schedstats=", setup_schedstats); ++ ++static void __init init_schedstats(void) ++{ ++ set_schedstats(__sched_schedstats); ++} ++ ++#ifdef CONFIG_PROC_SYSCTL ++int sysctl_schedstats(struct ctl_table *table, int write, ++ void __user *buffer, size_t *lenp, loff_t *ppos) ++{ ++ struct ctl_table t; ++ int err; ++ int state = static_branch_likely(&sched_schedstats); ++ ++ if (write && !capable(CAP_SYS_ADMIN)) ++ return -EPERM; ++ ++ t = *table; ++ t.data = &state; ++ err = proc_dointvec_minmax(&t, write, buffer, lenp, ppos); ++ if (err < 0) ++ return err; ++ if (write) ++ set_schedstats(state); ++ return err; ++} ++#endif /* CONFIG_PROC_SYSCTL */ ++#else /* !CONFIG_SCHEDSTATS */ ++static inline void init_schedstats(void) {} ++#endif /* CONFIG_SCHEDSTATS */ ++ ++/* ++ * wake_up_new_task - wake up a newly created task for the first time. ++ * ++ * This function will do some initial scheduler statistics housekeeping ++ * that must be done for every newly created context, then puts the task ++ * on the runqueue and wakes it. ++ */ ++void wake_up_new_task(struct task_struct *p) ++{ ++ unsigned long flags; ++ struct rq *rq; ++ ++ raw_spin_lock_irqsave(&p->pi_lock, flags); ++ ++ p->state = TASK_RUNNING; ++ ++ rq = cpu_rq(select_task_rq(p)); ++#ifdef CONFIG_SMP ++ /* ++ * Fork balancing, do it here and not earlier because: ++ * - cpus_ptr can change in the fork path ++ * - any previously selected CPU might disappear through hotplug ++ * Use __set_task_cpu() to avoid calling sched_class::migrate_task_rq, ++ * as we're not fully set-up yet. ++ */ ++ __set_task_cpu(p, cpu_of(rq)); ++#endif ++ ++ raw_spin_lock(&rq->lock); ++ ++ update_rq_clock(rq); ++ activate_task(p, rq); ++ trace_sched_wakeup_new(p); ++ check_preempt_curr(rq); ++ ++ raw_spin_unlock(&rq->lock); ++ raw_spin_unlock_irqrestore(&p->pi_lock, flags); ++} ++ ++#ifdef CONFIG_PREEMPT_NOTIFIERS ++ ++static DEFINE_STATIC_KEY_FALSE(preempt_notifier_key); ++ ++void preempt_notifier_inc(void) ++{ ++ static_branch_inc(&preempt_notifier_key); ++} ++EXPORT_SYMBOL_GPL(preempt_notifier_inc); ++ ++void preempt_notifier_dec(void) ++{ ++ static_branch_dec(&preempt_notifier_key); ++} ++EXPORT_SYMBOL_GPL(preempt_notifier_dec); ++ ++/** ++ * preempt_notifier_register - tell me when current is being preempted & rescheduled ++ * @notifier: notifier struct to register ++ */ ++void preempt_notifier_register(struct preempt_notifier *notifier) ++{ ++ if (!static_branch_unlikely(&preempt_notifier_key)) ++ WARN(1, "registering preempt_notifier while notifiers disabled\n"); ++ ++ hlist_add_head(¬ifier->link, ¤t->preempt_notifiers); ++} ++EXPORT_SYMBOL_GPL(preempt_notifier_register); ++ ++/** ++ * preempt_notifier_unregister - no longer interested in preemption notifications ++ * @notifier: notifier struct to unregister ++ * ++ * This is *not* safe to call from within a preemption notifier. ++ */ ++void preempt_notifier_unregister(struct preempt_notifier *notifier) ++{ ++ hlist_del(¬ifier->link); ++} ++EXPORT_SYMBOL_GPL(preempt_notifier_unregister); ++ ++static void __fire_sched_in_preempt_notifiers(struct task_struct *curr) ++{ ++ struct preempt_notifier *notifier; ++ ++ hlist_for_each_entry(notifier, &curr->preempt_notifiers, link) ++ notifier->ops->sched_in(notifier, raw_smp_processor_id()); ++} ++ ++static __always_inline void fire_sched_in_preempt_notifiers(struct task_struct *curr) ++{ ++ if (static_branch_unlikely(&preempt_notifier_key)) ++ __fire_sched_in_preempt_notifiers(curr); ++} ++ ++static void ++__fire_sched_out_preempt_notifiers(struct task_struct *curr, ++ struct task_struct *next) ++{ ++ struct preempt_notifier *notifier; ++ ++ hlist_for_each_entry(notifier, &curr->preempt_notifiers, link) ++ notifier->ops->sched_out(notifier, next); ++} ++ ++static __always_inline void ++fire_sched_out_preempt_notifiers(struct task_struct *curr, ++ struct task_struct *next) ++{ ++ if (static_branch_unlikely(&preempt_notifier_key)) ++ __fire_sched_out_preempt_notifiers(curr, next); ++} ++ ++#else /* !CONFIG_PREEMPT_NOTIFIERS */ ++ ++static inline void fire_sched_in_preempt_notifiers(struct task_struct *curr) ++{ ++} ++ ++static inline void ++fire_sched_out_preempt_notifiers(struct task_struct *curr, ++ struct task_struct *next) ++{ ++} ++ ++#endif /* CONFIG_PREEMPT_NOTIFIERS */ ++ ++static inline void prepare_task(struct task_struct *next) ++{ ++ /* ++ * Claim the task as running, we do this before switching to it ++ * such that any running task will have this set. ++ */ ++ next->on_cpu = 1; ++} ++ ++static inline void finish_task(struct task_struct *prev) ++{ ++#ifdef CONFIG_SMP ++ /* ++ * After ->on_cpu is cleared, the task can be moved to a different CPU. ++ * We must ensure this doesn't happen until the switch is completely ++ * finished. ++ * ++ * In particular, the load of prev->state in finish_task_switch() must ++ * happen before this. ++ * ++ * Pairs with the smp_cond_load_acquire() in try_to_wake_up(). ++ */ ++ smp_store_release(&prev->on_cpu, 0); ++#else ++ prev->on_cpu = 0; ++#endif ++} ++ ++static inline void ++prepare_lock_switch(struct rq *rq, struct task_struct *next) ++{ ++ /* ++ * Since the runqueue lock will be released by the next ++ * task (which is an invalid locking op but in the case ++ * of the scheduler it's an obvious special-case), so we ++ * do an early lockdep release here: ++ */ ++ spin_release(&rq->lock.dep_map, _THIS_IP_); ++#ifdef CONFIG_DEBUG_SPINLOCK ++ /* this is a valid case when another task releases the spinlock */ ++ rq->lock.owner = next; ++#endif ++} ++ ++static inline void finish_lock_switch(struct rq *rq) ++{ ++ /* ++ * If we are tracking spinlock dependencies then we have to ++ * fix up the runqueue lock - which gets 'carried over' from ++ * prev into current: ++ */ ++ spin_acquire(&rq->lock.dep_map, 0, 0, _THIS_IP_); ++ raw_spin_unlock_irq(&rq->lock); ++} ++ ++/** ++ * prepare_task_switch - prepare to switch tasks ++ * @rq: the runqueue preparing to switch ++ * @next: the task we are going to switch to. ++ * ++ * This is called with the rq lock held and interrupts off. It must ++ * be paired with a subsequent finish_task_switch after the context ++ * switch. ++ * ++ * prepare_task_switch sets up locking and calls architecture specific ++ * hooks. ++ */ ++static inline void ++prepare_task_switch(struct rq *rq, struct task_struct *prev, ++ struct task_struct *next) ++{ ++ kcov_prepare_switch(prev); ++ sched_info_switch(rq, prev, next); ++ perf_event_task_sched_out(prev, next); ++ rseq_preempt(prev); ++ fire_sched_out_preempt_notifiers(prev, next); ++ prepare_task(next); ++ prepare_arch_switch(next); ++} ++ ++/** ++ * finish_task_switch - clean up after a task-switch ++ * @rq: runqueue associated with task-switch ++ * @prev: the thread we just switched away from. ++ * ++ * finish_task_switch must be called after the context switch, paired ++ * with a prepare_task_switch call before the context switch. ++ * finish_task_switch will reconcile locking set up by prepare_task_switch, ++ * and do any other architecture-specific cleanup actions. ++ * ++ * Note that we may have delayed dropping an mm in context_switch(). If ++ * so, we finish that here outside of the runqueue lock. (Doing it ++ * with the lock held can cause deadlocks; see schedule() for ++ * details.) ++ * ++ * The context switch have flipped the stack from under us and restored the ++ * local variables which were saved when this task called schedule() in the ++ * past. prev == current is still correct but we need to recalculate this_rq ++ * because prev may have moved to another CPU. ++ */ ++static struct rq *finish_task_switch(struct task_struct *prev) ++ __releases(rq->lock) ++{ ++ struct rq *rq = this_rq(); ++ struct mm_struct *mm = rq->prev_mm; ++ long prev_state; ++ ++ /* ++ * The previous task will have left us with a preempt_count of 2 ++ * because it left us after: ++ * ++ * schedule() ++ * preempt_disable(); // 1 ++ * __schedule() ++ * raw_spin_lock_irq(&rq->lock) // 2 ++ * ++ * Also, see FORK_PREEMPT_COUNT. ++ */ ++ if (WARN_ONCE(preempt_count() != 2*PREEMPT_DISABLE_OFFSET, ++ "corrupted preempt_count: %s/%d/0x%x\n", ++ current->comm, current->pid, preempt_count())) ++ preempt_count_set(FORK_PREEMPT_COUNT); ++ ++ rq->prev_mm = NULL; ++ ++ /* ++ * A task struct has one reference for the use as "current". ++ * If a task dies, then it sets TASK_DEAD in tsk->state and calls ++ * schedule one last time. The schedule call will never return, and ++ * the scheduled task must drop that reference. ++ * ++ * We must observe prev->state before clearing prev->on_cpu (in ++ * finish_task), otherwise a concurrent wakeup can get prev ++ * running on another CPU and we could rave with its RUNNING -> DEAD ++ * transition, resulting in a double drop. ++ */ ++ prev_state = prev->state; ++ vtime_task_switch(prev); ++ perf_event_task_sched_in(prev, current); ++ finish_task(prev); ++ finish_lock_switch(rq); ++ finish_arch_post_lock_switch(); ++ kcov_finish_switch(current); ++ ++ fire_sched_in_preempt_notifiers(current); ++ /* ++ * When switching through a kernel thread, the loop in ++ * membarrier_{private,global}_expedited() may have observed that ++ * kernel thread and not issued an IPI. It is therefore possible to ++ * schedule between user->kernel->user threads without passing though ++ * switch_mm(). Membarrier requires a barrier after storing to ++ * rq->curr, before returning to userspace, so provide them here: ++ * ++ * - a full memory barrier for {PRIVATE,GLOBAL}_EXPEDITED, implicitly ++ * provided by mmdrop(), ++ * - a sync_core for SYNC_CORE. ++ */ ++ if (mm) { ++ membarrier_mm_sync_core_before_usermode(mm); ++ mmdrop(mm); ++ } ++ if (unlikely(prev_state == TASK_DEAD)) { ++ /* ++ * Remove function-return probe instances associated with this ++ * task and put them back on the free list. ++ */ ++ kprobe_flush_task(prev); ++ ++ /* Task is done with its stack. */ ++ put_task_stack(prev); ++ ++ put_task_struct_rcu_user(prev); ++ } ++ ++ tick_nohz_task_switch(); ++ return rq; ++} ++ ++/** ++ * schedule_tail - first thing a freshly forked thread must call. ++ * @prev: the thread we just switched away from. ++ */ ++asmlinkage __visible void schedule_tail(struct task_struct *prev) ++ __releases(rq->lock) ++{ ++ struct rq *rq; ++ ++ /* ++ * New tasks start with FORK_PREEMPT_COUNT, see there and ++ * finish_task_switch() for details. ++ * ++ * finish_task_switch() will drop rq->lock() and lower preempt_count ++ * and the preempt_enable() will end up enabling preemption (on ++ * PREEMPT_COUNT kernels). ++ */ ++ ++ rq = finish_task_switch(prev); ++ preempt_enable(); ++ ++ if (current->set_child_tid) ++ put_user(task_pid_vnr(current), current->set_child_tid); ++ ++ calculate_sigpending(); ++} ++ ++/* ++ * context_switch - switch to the new MM and the new thread's register state. ++ */ ++static __always_inline struct rq * ++context_switch(struct rq *rq, struct task_struct *prev, ++ struct task_struct *next) ++{ ++ prepare_task_switch(rq, prev, next); ++ ++ /* ++ * For paravirt, this is coupled with an exit in switch_to to ++ * combine the page table reload and the switch backend into ++ * one hypercall. ++ */ ++ arch_start_context_switch(prev); ++ ++ /* ++ * kernel -> kernel lazy + transfer active ++ * user -> kernel lazy + mmgrab() active ++ * ++ * kernel -> user switch + mmdrop() active ++ * user -> user switch ++ */ ++ if (!next->mm) { // to kernel ++ enter_lazy_tlb(prev->active_mm, next); ++ ++ next->active_mm = prev->active_mm; ++ if (prev->mm) // from user ++ mmgrab(prev->active_mm); ++ else ++ prev->active_mm = NULL; ++ } else { // to user ++ membarrier_switch_mm(rq, prev->active_mm, next->mm); ++ /* ++ * sys_membarrier() requires an smp_mb() between setting ++ * rq->curr / membarrier_switch_mm() and returning to userspace. ++ * ++ * The below provides this either through switch_mm(), or in ++ * case 'prev->active_mm == next->mm' through ++ * finish_task_switch()'s mmdrop(). ++ */ ++ switch_mm_irqs_off(prev->active_mm, next->mm, next); ++ ++ if (!prev->mm) { // from kernel ++ /* will mmdrop() in finish_task_switch(). */ ++ rq->prev_mm = prev->active_mm; ++ prev->active_mm = NULL; ++ } ++ } ++ ++ prepare_lock_switch(rq, next); ++ ++ /* Here we just switch the register state and the stack. */ ++ switch_to(prev, next, prev); ++ barrier(); ++ ++ return finish_task_switch(prev); ++} ++ ++/* ++ * nr_running, nr_uninterruptible and nr_context_switches: ++ * ++ * externally visible scheduler statistics: current number of runnable ++ * threads, total number of context switches performed since bootup. ++ */ ++unsigned long nr_running(void) ++{ ++ unsigned long i, sum = 0; ++ ++ for_each_online_cpu(i) ++ sum += cpu_rq(i)->nr_running; ++ ++ return sum; ++} ++ ++/* ++ * Check if only the current task is running on the CPU. ++ * ++ * Caution: this function does not check that the caller has disabled ++ * preemption, thus the result might have a time-of-check-to-time-of-use ++ * race. The caller is responsible to use it correctly, for example: ++ * ++ * - from a non-preemptible section (of course) ++ * ++ * - from a thread that is bound to a single CPU ++ * ++ * - in a loop with very short iterations (e.g. a polling loop) ++ */ ++bool single_task_running(void) ++{ ++ return raw_rq()->nr_running == 1; ++} ++EXPORT_SYMBOL(single_task_running); ++ ++unsigned long long nr_context_switches(void) ++{ ++ int i; ++ unsigned long long sum = 0; ++ ++ for_each_possible_cpu(i) ++ sum += cpu_rq(i)->nr_switches; ++ ++ return sum; ++} ++ ++/* ++ * Consumers of these two interfaces, like for example the cpuidle menu ++ * governor, are using nonsensical data. Preferring shallow idle state selection ++ * for a CPU that has IO-wait which might not even end up running the task when ++ * it does become runnable. ++ */ ++ ++unsigned long nr_iowait_cpu(int cpu) ++{ ++ return atomic_read(&cpu_rq(cpu)->nr_iowait); ++} ++ ++/* ++ * IO-wait accounting, and how its mostly bollocks (on SMP). ++ * ++ * The idea behind IO-wait account is to account the idle time that we could ++ * have spend running if it were not for IO. That is, if we were to improve the ++ * storage performance, we'd have a proportional reduction in IO-wait time. ++ * ++ * This all works nicely on UP, where, when a task blocks on IO, we account ++ * idle time as IO-wait, because if the storage were faster, it could've been ++ * running and we'd not be idle. ++ * ++ * This has been extended to SMP, by doing the same for each CPU. This however ++ * is broken. ++ * ++ * Imagine for instance the case where two tasks block on one CPU, only the one ++ * CPU will have IO-wait accounted, while the other has regular idle. Even ++ * though, if the storage were faster, both could've ran at the same time, ++ * utilising both CPUs. ++ * ++ * This means, that when looking globally, the current IO-wait accounting on ++ * SMP is a lower bound, by reason of under accounting. ++ * ++ * Worse, since the numbers are provided per CPU, they are sometimes ++ * interpreted per CPU, and that is nonsensical. A blocked task isn't strictly ++ * associated with any one particular CPU, it can wake to another CPU than it ++ * blocked on. This means the per CPU IO-wait number is meaningless. ++ * ++ * Task CPU affinities can make all that even more 'interesting'. ++ */ ++ ++unsigned long nr_iowait(void) ++{ ++ unsigned long i, sum = 0; ++ ++ for_each_possible_cpu(i) ++ sum += nr_iowait_cpu(i); ++ ++ return sum; ++} ++ ++#ifdef CONFIG_SMP ++ ++/* ++ * sched_exec - execve() is a valuable balancing opportunity, because at ++ * this point the task has the smallest effective memory and cache ++ * footprint. ++ */ ++void sched_exec(void) ++{ ++ struct task_struct *p = current; ++ int dest_cpu; ++ ++ if (task_rq(p)->nr_running < 2) ++ return; ++ ++ dest_cpu = cpumask_any_and(p->cpus_ptr, &sched_rq_watermark[IDLE_WM]); ++ if ( dest_cpu < nr_cpu_ids) { ++#ifdef CONFIG_SCHED_SMT ++ int smt = cpumask_any_and(p->cpus_ptr, &sched_sg_idle_mask); ++ if (smt < nr_cpu_ids) ++ dest_cpu = smt; ++#endif ++ if (likely(cpu_active(dest_cpu))) { ++ struct migration_arg arg = { p, dest_cpu }; ++ ++ stop_one_cpu(task_cpu(p), migration_cpu_stop, &arg); ++ return; ++ } ++ } ++} ++ ++#endif ++ ++DEFINE_PER_CPU(struct kernel_stat, kstat); ++DEFINE_PER_CPU(struct kernel_cpustat, kernel_cpustat); ++ ++EXPORT_PER_CPU_SYMBOL(kstat); ++EXPORT_PER_CPU_SYMBOL(kernel_cpustat); ++ ++static inline void update_curr(struct rq *rq, struct task_struct *p) ++{ ++ s64 ns = rq->clock_task - p->last_ran; ++ ++ p->sched_time += ns; ++ account_group_exec_runtime(p, ns); ++ ++ p->time_slice -= ns; ++ p->last_ran = rq->clock_task; ++} ++ ++/* ++ * Return accounted runtime for the task. ++ * Return separately the current's pending runtime that have not been ++ * accounted yet. ++ */ ++unsigned long long task_sched_runtime(struct task_struct *p) ++{ ++ unsigned long flags; ++ struct rq *rq; ++ raw_spinlock_t *lock; ++ u64 ns; ++ ++#if defined(CONFIG_64BIT) && defined(CONFIG_SMP) ++ /* ++ * 64-bit doesn't need locks to atomically read a 64-bit value. ++ * So we have a optimization chance when the task's delta_exec is 0. ++ * Reading ->on_cpu is racy, but this is ok. ++ * ++ * If we race with it leaving CPU, we'll take a lock. So we're correct. ++ * If we race with it entering CPU, unaccounted time is 0. This is ++ * indistinguishable from the read occurring a few cycles earlier. ++ * If we see ->on_cpu without ->on_rq, the task is leaving, and has ++ * been accounted, so we're correct here as well. ++ */ ++ if (!p->on_cpu || !task_on_rq_queued(p)) ++ return tsk_seruntime(p); ++#endif ++ ++ rq = task_access_lock_irqsave(p, &lock, &flags); ++ /* ++ * Must be ->curr _and_ ->on_rq. If dequeued, we would ++ * project cycles that may never be accounted to this ++ * thread, breaking clock_gettime(). ++ */ ++ if (p == rq->curr && task_on_rq_queued(p)) { ++ update_rq_clock(rq); ++ update_curr(rq, p); ++ } ++ ns = tsk_seruntime(p); ++ task_access_unlock_irqrestore(p, lock, &flags); ++ ++ return ns; ++} ++ ++DEFINE_PER_CPU(unsigned long, thermal_pressure); ++ ++void arch_set_thermal_pressure(struct cpumask *cpus, ++ unsigned long th_pressure) ++{ ++ int cpu; ++ ++ for_each_cpu(cpu, cpus) ++ WRITE_ONCE(per_cpu(thermal_pressure, cpu), th_pressure); ++} ++ ++/* This manages tasks that have run out of timeslice during a scheduler_tick */ ++static inline void scheduler_task_tick(struct rq *rq) ++{ ++ struct task_struct *p = rq->curr; ++ ++ if (is_idle_task(p)) ++ return; ++ ++ update_curr(rq, p); ++ cpufreq_update_util(rq, 0); ++ ++ /* ++ * Tasks have less than RESCHED_NS of time slice left they will be ++ * rescheduled. ++ */ ++ if (p->time_slice >= RESCHED_NS) ++ return; ++ set_tsk_need_resched(p); ++ set_preempt_need_resched(); ++} ++ ++/* ++ * This function gets called by the timer code, with HZ frequency. ++ * We call it with interrupts disabled. ++ */ ++void scheduler_tick(void) ++{ ++ int cpu __maybe_unused = smp_processor_id(); ++ struct rq *rq = cpu_rq(cpu); ++ ++ arch_scale_freq_tick(); ++ sched_clock_tick(); ++ ++ raw_spin_lock(&rq->lock); ++ update_rq_clock(rq); ++ ++ scheduler_task_tick(rq); ++ calc_global_load_tick(rq); ++ psi_task_tick(rq); ++ ++ rq->last_tick = rq->clock; ++ raw_spin_unlock(&rq->lock); ++ ++ perf_event_task_tick(); ++} ++ ++#ifdef CONFIG_SCHED_SMT ++static inline int active_load_balance_cpu_stop(void *data) ++{ ++ struct rq *rq = this_rq(); ++ struct task_struct *p = data; ++ cpumask_t tmp; ++ unsigned long flags; ++ ++ local_irq_save(flags); ++ ++ raw_spin_lock(&p->pi_lock); ++ raw_spin_lock(&rq->lock); ++ ++ rq->active_balance = 0; ++ /* _something_ may have changed the task, double check again */ ++ if (task_on_rq_queued(p) && task_rq(p) == rq && ++ cpumask_and(&tmp, p->cpus_ptr, &sched_sg_idle_mask)) { ++ int cpu = cpu_of(rq); ++ int dcpu = __best_mask_cpu(cpu, &tmp, ++ per_cpu(sched_cpu_llc_mask, cpu)); ++ rq = move_queued_task(rq, p, dcpu); ++ } ++ ++ raw_spin_unlock(&rq->lock); ++ raw_spin_unlock(&p->pi_lock); ++ ++ local_irq_restore(flags); ++ ++ return 0; ++} ++ ++/* sg_balance_trigger - trigger slibing group balance for @cpu */ ++static inline int sg_balance_trigger(const int cpu) ++{ ++ struct rq *rq= cpu_rq(cpu); ++ unsigned long flags; ++ struct task_struct *curr; ++ int res; ++ ++ if (!raw_spin_trylock_irqsave(&rq->lock, flags)) ++ return 0; ++ curr = rq->curr; ++ res = (!is_idle_task(curr)) && (1 == rq->nr_running) &&\ ++ cpumask_intersects(curr->cpus_ptr, &sched_sg_idle_mask) &&\ ++ (!rq->active_balance); ++ ++ if (res) ++ rq->active_balance = 1; ++ ++ raw_spin_unlock_irqrestore(&rq->lock, flags); ++ ++ if (res) ++ stop_one_cpu_nowait(cpu, active_load_balance_cpu_stop, ++ curr, &rq->active_balance_work); ++ return res; ++} ++ ++/* ++ * sg_balance_check - slibing group balance check for run queue @rq ++ */ ++static inline void sg_balance_check(struct rq *rq) ++{ ++ cpumask_t chk; ++ int cpu; ++ ++ /* exit when no sg in idle */ ++ if (cpumask_empty(&sched_sg_idle_mask)) ++ return; ++ ++ cpu = cpu_of(rq); ++ /* ++ * Only cpu in slibing idle group will do the checking and then ++ * find potential cpus which can migrate the current running task ++ */ ++ if (cpumask_test_cpu(cpu, &sched_sg_idle_mask) && ++ cpumask_andnot(&chk, cpu_online_mask, &sched_rq_pending_mask) && ++ cpumask_andnot(&chk, &chk, &sched_rq_watermark[IDLE_WM])) { ++ int i, tried = 0; ++ ++ for_each_cpu_wrap(i, &chk, cpu) { ++ if (cpumask_subset(cpu_smt_mask(i), &chk)) { ++ if (sg_balance_trigger(i)) ++ return; ++ if (tried) ++ return; ++ tried++; ++ } ++ } ++ } ++} ++#endif /* CONFIG_SCHED_SMT */ ++ ++#ifdef CONFIG_NO_HZ_FULL ++ ++struct tick_work { ++ int cpu; ++ atomic_t state; ++ struct delayed_work work; ++}; ++/* Values for ->state, see diagram below. */ ++#define TICK_SCHED_REMOTE_OFFLINE 0 ++#define TICK_SCHED_REMOTE_OFFLINING 1 ++#define TICK_SCHED_REMOTE_RUNNING 2 ++ ++/* ++ * State diagram for ->state: ++ * ++ * ++ * TICK_SCHED_REMOTE_OFFLINE ++ * | ^ ++ * | | ++ * | | sched_tick_remote() ++ * | | ++ * | | ++ * +--TICK_SCHED_REMOTE_OFFLINING ++ * | ^ ++ * | | ++ * sched_tick_start() | | sched_tick_stop() ++ * | | ++ * V | ++ * TICK_SCHED_REMOTE_RUNNING ++ * ++ * ++ * Other transitions get WARN_ON_ONCE(), except that sched_tick_remote() ++ * and sched_tick_start() are happy to leave the state in RUNNING. ++ */ ++ ++static struct tick_work __percpu *tick_work_cpu; ++ ++static void sched_tick_remote(struct work_struct *work) ++{ ++ struct delayed_work *dwork = to_delayed_work(work); ++ struct tick_work *twork = container_of(dwork, struct tick_work, work); ++ int cpu = twork->cpu; ++ struct rq *rq = cpu_rq(cpu); ++ struct task_struct *curr; ++ unsigned long flags; ++ u64 delta; ++ int os; ++ ++ /* ++ * Handle the tick only if it appears the remote CPU is running in full ++ * dynticks mode. The check is racy by nature, but missing a tick or ++ * having one too much is no big deal because the scheduler tick updates ++ * statistics and checks timeslices in a time-independent way, regardless ++ * of when exactly it is running. ++ */ ++ if (!tick_nohz_tick_stopped_cpu(cpu)) ++ goto out_requeue; ++ ++ raw_spin_lock_irqsave(&rq->lock, flags); ++ curr = rq->curr; ++ if (cpu_is_offline(cpu)) ++ goto out_unlock; ++ ++ update_rq_clock(rq); ++ if (!is_idle_task(curr)) { ++ /* ++ * Make sure the next tick runs within a reasonable ++ * amount of time. ++ */ ++ delta = rq_clock_task(rq) - curr->last_ran; ++ WARN_ON_ONCE(delta > (u64)NSEC_PER_SEC * 3); ++ } ++ scheduler_task_tick(rq); ++ ++ calc_load_nohz_remote(rq); ++out_unlock: ++ raw_spin_unlock_irqrestore(&rq->lock, flags); ++ ++out_requeue: ++ /* ++ * Run the remote tick once per second (1Hz). This arbitrary ++ * frequency is large enough to avoid overload but short enough ++ * to keep scheduler internal stats reasonably up to date. But ++ * first update state to reflect hotplug activity if required. ++ */ ++ os = atomic_fetch_add_unless(&twork->state, -1, TICK_SCHED_REMOTE_RUNNING); ++ WARN_ON_ONCE(os == TICK_SCHED_REMOTE_OFFLINE); ++ if (os == TICK_SCHED_REMOTE_RUNNING) ++ queue_delayed_work(system_unbound_wq, dwork, HZ); ++} ++ ++static void sched_tick_start(int cpu) ++{ ++ int os; ++ struct tick_work *twork; ++ ++ if (housekeeping_cpu(cpu, HK_FLAG_TICK)) ++ return; ++ ++ WARN_ON_ONCE(!tick_work_cpu); ++ ++ twork = per_cpu_ptr(tick_work_cpu, cpu); ++ os = atomic_xchg(&twork->state, TICK_SCHED_REMOTE_RUNNING); ++ WARN_ON_ONCE(os == TICK_SCHED_REMOTE_RUNNING); ++ if (os == TICK_SCHED_REMOTE_OFFLINE) { ++ twork->cpu = cpu; ++ INIT_DELAYED_WORK(&twork->work, sched_tick_remote); ++ queue_delayed_work(system_unbound_wq, &twork->work, HZ); ++ } ++} ++ ++#ifdef CONFIG_HOTPLUG_CPU ++static void sched_tick_stop(int cpu) ++{ ++ struct tick_work *twork; ++ ++ if (housekeeping_cpu(cpu, HK_FLAG_TICK)) ++ return; ++ ++ WARN_ON_ONCE(!tick_work_cpu); ++ ++ twork = per_cpu_ptr(tick_work_cpu, cpu); ++ cancel_delayed_work_sync(&twork->work); ++} ++#endif /* CONFIG_HOTPLUG_CPU */ ++ ++int __init sched_tick_offload_init(void) ++{ ++ tick_work_cpu = alloc_percpu(struct tick_work); ++ BUG_ON(!tick_work_cpu); ++ return 0; ++} ++ ++#else /* !CONFIG_NO_HZ_FULL */ ++static inline void sched_tick_start(int cpu) { } ++static inline void sched_tick_stop(int cpu) { } ++#endif ++ ++#if defined(CONFIG_PREEMPTION) && (defined(CONFIG_DEBUG_PREEMPT) || \ ++ defined(CONFIG_PREEMPT_TRACER)) ++/* ++ * If the value passed in is equal to the current preempt count ++ * then we just disabled preemption. Start timing the latency. ++ */ ++static inline void preempt_latency_start(int val) ++{ ++ if (preempt_count() == val) { ++ unsigned long ip = get_lock_parent_ip(); ++#ifdef CONFIG_DEBUG_PREEMPT ++ current->preempt_disable_ip = ip; ++#endif ++ trace_preempt_off(CALLER_ADDR0, ip); ++ } ++} ++ ++void preempt_count_add(int val) ++{ ++#ifdef CONFIG_DEBUG_PREEMPT ++ /* ++ * Underflow? ++ */ ++ if (DEBUG_LOCKS_WARN_ON((preempt_count() < 0))) ++ return; ++#endif ++ __preempt_count_add(val); ++#ifdef CONFIG_DEBUG_PREEMPT ++ /* ++ * Spinlock count overflowing soon? ++ */ ++ DEBUG_LOCKS_WARN_ON((preempt_count() & PREEMPT_MASK) >= ++ PREEMPT_MASK - 10); ++#endif ++ preempt_latency_start(val); ++} ++EXPORT_SYMBOL(preempt_count_add); ++NOKPROBE_SYMBOL(preempt_count_add); ++ ++/* ++ * If the value passed in equals to the current preempt count ++ * then we just enabled preemption. Stop timing the latency. ++ */ ++static inline void preempt_latency_stop(int val) ++{ ++ if (preempt_count() == val) ++ trace_preempt_on(CALLER_ADDR0, get_lock_parent_ip()); ++} ++ ++void preempt_count_sub(int val) ++{ ++#ifdef CONFIG_DEBUG_PREEMPT ++ /* ++ * Underflow? ++ */ ++ if (DEBUG_LOCKS_WARN_ON(val > preempt_count())) ++ return; ++ /* ++ * Is the spinlock portion underflowing? ++ */ ++ if (DEBUG_LOCKS_WARN_ON((val < PREEMPT_MASK) && ++ !(preempt_count() & PREEMPT_MASK))) ++ return; ++#endif ++ ++ preempt_latency_stop(val); ++ __preempt_count_sub(val); ++} ++EXPORT_SYMBOL(preempt_count_sub); ++NOKPROBE_SYMBOL(preempt_count_sub); ++ ++#else ++static inline void preempt_latency_start(int val) { } ++static inline void preempt_latency_stop(int val) { } ++#endif ++ ++static inline unsigned long get_preempt_disable_ip(struct task_struct *p) ++{ ++#ifdef CONFIG_DEBUG_PREEMPT ++ return p->preempt_disable_ip; ++#else ++ return 0; ++#endif ++} ++ ++/* ++ * Print scheduling while atomic bug: ++ */ ++static noinline void __schedule_bug(struct task_struct *prev) ++{ ++ /* Save this before calling printk(), since that will clobber it */ ++ unsigned long preempt_disable_ip = get_preempt_disable_ip(current); ++ ++ if (oops_in_progress) ++ return; ++ ++ printk(KERN_ERR "BUG: scheduling while atomic: %s/%d/0x%08x\n", ++ prev->comm, prev->pid, preempt_count()); ++ ++ debug_show_held_locks(prev); ++ print_modules(); ++ if (irqs_disabled()) ++ print_irqtrace_events(prev); ++ if (IS_ENABLED(CONFIG_DEBUG_PREEMPT) ++ && in_atomic_preempt_off()) { ++ pr_err("Preemption disabled at:"); ++ print_ip_sym(preempt_disable_ip); ++ pr_cont("\n"); ++ } ++ if (panic_on_warn) ++ panic("scheduling while atomic\n"); ++ ++ dump_stack(); ++ add_taint(TAINT_WARN, LOCKDEP_STILL_OK); ++} ++ ++/* ++ * Various schedule()-time debugging checks and statistics: ++ */ ++static inline void schedule_debug(struct task_struct *prev, bool preempt) ++{ ++#ifdef CONFIG_SCHED_STACK_END_CHECK ++ if (task_stack_end_corrupted(prev)) ++ panic("corrupted stack end detected inside scheduler\n"); ++#endif ++ ++#ifdef CONFIG_DEBUG_ATOMIC_SLEEP ++ if (!preempt && prev->state && prev->non_block_count) { ++ printk(KERN_ERR "BUG: scheduling in a non-blocking section: %s/%d/%i\n", ++ prev->comm, prev->pid, prev->non_block_count); ++ dump_stack(); ++ add_taint(TAINT_WARN, LOCKDEP_STILL_OK); ++ } ++#endif ++ ++ if (unlikely(in_atomic_preempt_off())) { ++ __schedule_bug(prev); ++ preempt_count_set(PREEMPT_DISABLED); ++ } ++ rcu_sleep_check(); ++ ++ profile_hit(SCHED_PROFILING, __builtin_return_address(0)); ++ ++ schedstat_inc(this_rq()->sched_count); ++} ++ ++#ifdef CONFIG_SMP ++ ++#define SCHED_RQ_NR_MIGRATION (32UL) ++/* ++ * Migrate pending tasks in @rq to @dest_cpu ++ * Will try to migrate mininal of half of @rq nr_running tasks and ++ * SCHED_RQ_NR_MIGRATION to @dest_cpu ++ */ ++static inline int ++migrate_pending_tasks(struct rq *rq, struct rq *dest_rq, const int dest_cpu) ++{ ++ struct task_struct *p, *skip = rq->curr; ++ int nr_migrated = 0; ++ int nr_tries = min(rq->nr_running / 2, SCHED_RQ_NR_MIGRATION); ++ ++ while (skip != rq->idle && nr_tries && ++ (p = sched_rq_next_task(skip, rq)) != rq->idle) { ++ skip = sched_rq_next_task(p, rq); ++ if (cpumask_test_cpu(dest_cpu, p->cpus_ptr)) { ++ __SCHED_DEQUEUE_TASK(p, rq, 0, ); ++ set_task_cpu(p, dest_cpu); ++ __SCHED_ENQUEUE_TASK(p, dest_rq, 0); ++ nr_migrated++; ++ } ++ nr_tries--; ++ } ++ ++ return nr_migrated; ++} ++ ++static inline int take_other_rq_tasks(struct rq *rq, int cpu) ++{ ++ struct cpumask *affinity_mask, *end_mask; ++ ++ if (unlikely(!rq->online)) ++ return 0; ++ ++ if (cpumask_empty(&sched_rq_pending_mask)) ++ return 0; ++ ++ affinity_mask = &(per_cpu(sched_cpu_affinity_masks, cpu)[0]); ++ end_mask = per_cpu(sched_cpu_affinity_end_mask, cpu); ++ do { ++ int i; ++ for_each_cpu_and(i, &sched_rq_pending_mask, affinity_mask) { ++ int nr_migrated; ++ struct rq *src_rq; ++ ++ src_rq = cpu_rq(i); ++ if (!do_raw_spin_trylock(&src_rq->lock)) ++ continue; ++ spin_acquire(&src_rq->lock.dep_map, ++ SINGLE_DEPTH_NESTING, 1, _RET_IP_); ++ ++ if ((nr_migrated = migrate_pending_tasks(src_rq, rq, cpu))) { ++ src_rq->nr_running -= nr_migrated; ++#ifdef CONFIG_SMP ++ if (src_rq->nr_running < 2) ++ cpumask_clear_cpu(i, &sched_rq_pending_mask); ++#endif ++ rq->nr_running += nr_migrated; ++#ifdef CONFIG_SMP ++ if (rq->nr_running > 1) ++ cpumask_set_cpu(cpu, &sched_rq_pending_mask); ++#endif ++ update_sched_rq_watermark(rq); ++ cpufreq_update_util(rq, 0); ++ ++ spin_release(&src_rq->lock.dep_map, _RET_IP_); ++ do_raw_spin_unlock(&src_rq->lock); ++ ++ return 1; ++ } ++ ++ spin_release(&src_rq->lock.dep_map, _RET_IP_); ++ do_raw_spin_unlock(&src_rq->lock); ++ } ++ } while (++affinity_mask < end_mask); ++ ++ return 0; ++} ++#endif ++ ++/* ++ * Timeslices below RESCHED_NS are considered as good as expired as there's no ++ * point rescheduling when there's so little time left. ++ */ ++static inline void check_curr(struct task_struct *p, struct rq *rq) ++{ ++ if (unlikely(rq->idle == p)) ++ return; ++ ++ update_curr(rq, p); ++ ++ if (p->time_slice < RESCHED_NS) { ++ p->time_slice = sched_timeslice_ns; ++ if (SCHED_FIFO != p->policy && task_on_rq_queued(p)) { ++ if (SCHED_RR != p->policy) ++ deboost_task(p); ++ requeue_task(p, rq); ++ } ++ } ++} ++ ++static inline struct task_struct * ++choose_next_task(struct rq *rq, int cpu, struct task_struct *prev) ++{ ++ struct task_struct *next; ++ ++ if (unlikely(rq->skip)) { ++ next = rq_runnable_task(rq); ++ if (next == rq->idle) { ++#ifdef CONFIG_SMP ++ if (!take_other_rq_tasks(rq, cpu)) { ++#endif ++ rq->skip = NULL; ++ schedstat_inc(rq->sched_goidle); ++ return next; ++#ifdef CONFIG_SMP ++ } ++ next = rq_runnable_task(rq); ++#endif ++ } ++ rq->skip = NULL; ++#ifdef CONFIG_HIGH_RES_TIMERS ++ hrtick_start(rq, next->time_slice); ++#endif ++ return next; ++ } ++ ++ next = sched_rq_first_task(rq); ++ if (next == rq->idle) { ++#ifdef CONFIG_SMP ++ if (!take_other_rq_tasks(rq, cpu)) { ++#endif ++ schedstat_inc(rq->sched_goidle); ++ return next; ++#ifdef CONFIG_SMP ++ } ++ next = sched_rq_first_task(rq); ++#endif ++ } ++#ifdef CONFIG_HIGH_RES_TIMERS ++ hrtick_start(rq, next->time_slice); ++#endif ++ return next; ++} ++ ++/* ++ * schedule() is the main scheduler function. ++ * ++ * The main means of driving the scheduler and thus entering this function are: ++ * ++ * 1. Explicit blocking: mutex, semaphore, waitqueue, etc. ++ * ++ * 2. TIF_NEED_RESCHED flag is checked on interrupt and userspace return ++ * paths. For example, see arch/x86/entry_64.S. ++ * ++ * To drive preemption between tasks, the scheduler sets the flag in timer ++ * interrupt handler scheduler_tick(). ++ * ++ * 3. Wakeups don't really cause entry into schedule(). They add a ++ * task to the run-queue and that's it. ++ * ++ * Now, if the new task added to the run-queue preempts the current ++ * task, then the wakeup sets TIF_NEED_RESCHED and schedule() gets ++ * called on the nearest possible occasion: ++ * ++ * - If the kernel is preemptible (CONFIG_PREEMPTION=y): ++ * ++ * - in syscall or exception context, at the next outmost ++ * preempt_enable(). (this might be as soon as the wake_up()'s ++ * spin_unlock()!) ++ * ++ * - in IRQ context, return from interrupt-handler to ++ * preemptible context ++ * ++ * - If the kernel is not preemptible (CONFIG_PREEMPTION is not set) ++ * then at the next: ++ * ++ * - cond_resched() call ++ * - explicit schedule() call ++ * - return from syscall or exception to user-space ++ * - return from interrupt-handler to user-space ++ * ++ * WARNING: must be called with preemption disabled! ++ */ ++static void __sched notrace __schedule(bool preempt) ++{ ++ struct task_struct *prev, *next; ++ unsigned long *switch_count; ++ struct rq *rq; ++ int cpu; ++ ++ cpu = smp_processor_id(); ++ rq = cpu_rq(cpu); ++ prev = rq->curr; ++ ++ schedule_debug(prev, preempt); ++ ++ /* by passing sched_feat(HRTICK) checking which Alt schedule FW doesn't support */ ++ hrtick_clear(rq); ++ ++ local_irq_disable(); ++ rcu_note_context_switch(preempt); ++ ++ /* ++ * Make sure that signal_pending_state()->signal_pending() below ++ * can't be reordered with __set_current_state(TASK_INTERRUPTIBLE) ++ * done by the caller to avoid the race with signal_wake_up(). ++ * ++ * The membarrier system call requires a full memory barrier ++ * after coming from user-space, before storing to rq->curr. ++ */ ++ raw_spin_lock(&rq->lock); ++ smp_mb__after_spinlock(); ++ ++ update_rq_clock(rq); ++ ++ switch_count = &prev->nivcsw; ++ if (!preempt && prev->state) { ++ if (signal_pending_state(prev->state, prev)) { ++ prev->state = TASK_RUNNING; ++ } else { ++ if (rq_switch_time(rq) < boost_threshold(prev)) ++ boost_task(prev); ++ deactivate_task(prev, rq); ++ ++ if (prev->in_iowait) { ++ atomic_inc(&rq->nr_iowait); ++ delayacct_blkio_start(); ++ } ++ } ++ switch_count = &prev->nvcsw; ++ } ++ ++ clear_tsk_need_resched(prev); ++ clear_preempt_need_resched(); ++ ++ check_curr(prev, rq); ++ ++ next = choose_next_task(rq, cpu, prev); ++ ++ if (likely(prev != next)) { ++ next->last_ran = rq->clock_task; ++ rq->last_ts_switch = rq->clock; ++ ++ rq->nr_switches++; ++ /* ++ * RCU users of rcu_dereference(rq->curr) may not see ++ * changes to task_struct made by pick_next_task(). ++ */ ++ RCU_INIT_POINTER(rq->curr, next); ++ /* ++ * The membarrier system call requires each architecture ++ * to have a full memory barrier after updating ++ * rq->curr, before returning to user-space. ++ * ++ * Here are the schemes providing that barrier on the ++ * various architectures: ++ * - mm ? switch_mm() : mmdrop() for x86, s390, sparc, PowerPC. ++ * switch_mm() rely on membarrier_arch_switch_mm() on PowerPC. ++ * - finish_lock_switch() for weakly-ordered ++ * architectures where spin_unlock is a full barrier, ++ * - switch_to() for arm64 (weakly-ordered, spin_unlock ++ * is a RELEASE barrier), ++ */ ++ ++*switch_count; ++ ++ psi_sched_switch(prev, next, !task_on_rq_queued(prev)); ++ ++ trace_sched_switch(preempt, prev, next); ++ ++ /* Also unlocks the rq: */ ++ rq = context_switch(rq, prev, next); ++ } else ++ raw_spin_unlock_irq(&rq->lock); ++ ++#ifdef CONFIG_SCHED_SMT ++ sg_balance_check(rq); ++#endif ++} ++ ++void __noreturn do_task_dead(void) ++{ ++ /* Causes final put_task_struct in finish_task_switch(): */ ++ set_special_state(TASK_DEAD); ++ ++ /* Tell freezer to ignore us: */ ++ current->flags |= PF_NOFREEZE; ++ ++ __schedule(false); ++ BUG(); ++ ++ /* Avoid "noreturn function does return" - but don't continue if BUG() is a NOP: */ ++ for (;;) ++ cpu_relax(); ++} ++ ++static inline void sched_submit_work(struct task_struct *tsk) ++{ ++ if (!tsk->state) ++ return; ++ ++ /* ++ * If a worker went to sleep, notify and ask workqueue whether ++ * it wants to wake up a task to maintain concurrency. ++ * As this function is called inside the schedule() context, ++ * we disable preemption to avoid it calling schedule() again ++ * in the possible wakeup of a kworker and because wq_worker_sleeping() ++ * requires it. ++ */ ++ if (tsk->flags & (PF_WQ_WORKER | PF_IO_WORKER)) { ++ preempt_disable(); ++ if (tsk->flags & PF_WQ_WORKER) ++ wq_worker_sleeping(tsk); ++ else ++ io_wq_worker_sleeping(tsk); ++ preempt_enable_no_resched(); ++ } ++ ++ if (tsk_is_pi_blocked(tsk)) ++ return; ++ ++ /* ++ * If we are going to sleep and we have plugged IO queued, ++ * make sure to submit it to avoid deadlocks. ++ */ ++ if (blk_needs_flush_plug(tsk)) ++ blk_schedule_flush_plug(tsk); ++} ++ ++static void sched_update_worker(struct task_struct *tsk) ++{ ++ if (tsk->flags & (PF_WQ_WORKER | PF_IO_WORKER)) { ++ if (tsk->flags & PF_WQ_WORKER) ++ wq_worker_running(tsk); ++ else ++ io_wq_worker_running(tsk); ++ } ++} ++ ++asmlinkage __visible void __sched schedule(void) ++{ ++ struct task_struct *tsk = current; ++ ++ sched_submit_work(tsk); ++ do { ++ preempt_disable(); ++ __schedule(false); ++ sched_preempt_enable_no_resched(); ++ } while (need_resched()); ++ sched_update_worker(tsk); ++} ++EXPORT_SYMBOL(schedule); ++ ++/* ++ * synchronize_rcu_tasks() makes sure that no task is stuck in preempted ++ * state (have scheduled out non-voluntarily) by making sure that all ++ * tasks have either left the run queue or have gone into user space. ++ * As idle tasks do not do either, they must not ever be preempted ++ * (schedule out non-voluntarily). ++ * ++ * schedule_idle() is similar to schedule_preempt_disable() except that it ++ * never enables preemption because it does not call sched_submit_work(). ++ */ ++void __sched schedule_idle(void) ++{ ++ /* ++ * As this skips calling sched_submit_work(), which the idle task does ++ * regardless because that function is a nop when the task is in a ++ * TASK_RUNNING state, make sure this isn't used someplace that the ++ * current task can be in any other state. Note, idle is always in the ++ * TASK_RUNNING state. ++ */ ++ WARN_ON_ONCE(current->state); ++ do { ++ __schedule(false); ++ } while (need_resched()); ++} ++ ++#ifdef CONFIG_CONTEXT_TRACKING ++asmlinkage __visible void __sched schedule_user(void) ++{ ++ /* ++ * If we come here after a random call to set_need_resched(), ++ * or we have been woken up remotely but the IPI has not yet arrived, ++ * we haven't yet exited the RCU idle mode. Do it here manually until ++ * we find a better solution. ++ * ++ * NB: There are buggy callers of this function. Ideally we ++ * should warn if prev_state != CONTEXT_USER, but that will trigger ++ * too frequently to make sense yet. ++ */ ++ enum ctx_state prev_state = exception_enter(); ++ schedule(); ++ exception_exit(prev_state); ++} ++#endif ++ ++/** ++ * schedule_preempt_disabled - called with preemption disabled ++ * ++ * Returns with preemption disabled. Note: preempt_count must be 1 ++ */ ++void __sched schedule_preempt_disabled(void) ++{ ++ sched_preempt_enable_no_resched(); ++ schedule(); ++ preempt_disable(); ++} ++ ++static void __sched notrace preempt_schedule_common(void) ++{ ++ do { ++ /* ++ * Because the function tracer can trace preempt_count_sub() ++ * and it also uses preempt_enable/disable_notrace(), if ++ * NEED_RESCHED is set, the preempt_enable_notrace() called ++ * by the function tracer will call this function again and ++ * cause infinite recursion. ++ * ++ * Preemption must be disabled here before the function ++ * tracer can trace. Break up preempt_disable() into two ++ * calls. One to disable preemption without fear of being ++ * traced. The other to still record the preemption latency, ++ * which can also be traced by the function tracer. ++ */ ++ preempt_disable_notrace(); ++ preempt_latency_start(1); ++ __schedule(true); ++ preempt_latency_stop(1); ++ preempt_enable_no_resched_notrace(); ++ ++ /* ++ * Check again in case we missed a preemption opportunity ++ * between schedule and now. ++ */ ++ } while (need_resched()); ++} ++ ++#ifdef CONFIG_PREEMPTION ++/* ++ * This is the entry point to schedule() from in-kernel preemption ++ * off of preempt_enable. ++ */ ++asmlinkage __visible void __sched notrace preempt_schedule(void) ++{ ++ /* ++ * If there is a non-zero preempt_count or interrupts are disabled, ++ * we do not want to preempt the current task. Just return.. ++ */ ++ if (likely(!preemptible())) ++ return; ++ ++ preempt_schedule_common(); ++} ++NOKPROBE_SYMBOL(preempt_schedule); ++EXPORT_SYMBOL(preempt_schedule); ++ ++/** ++ * preempt_schedule_notrace - preempt_schedule called by tracing ++ * ++ * The tracing infrastructure uses preempt_enable_notrace to prevent ++ * recursion and tracing preempt enabling caused by the tracing ++ * infrastructure itself. But as tracing can happen in areas coming ++ * from userspace or just about to enter userspace, a preempt enable ++ * can occur before user_exit() is called. This will cause the scheduler ++ * to be called when the system is still in usermode. ++ * ++ * To prevent this, the preempt_enable_notrace will use this function ++ * instead of preempt_schedule() to exit user context if needed before ++ * calling the scheduler. ++ */ ++asmlinkage __visible void __sched notrace preempt_schedule_notrace(void) ++{ ++ enum ctx_state prev_ctx; ++ ++ if (likely(!preemptible())) ++ return; ++ ++ do { ++ /* ++ * Because the function tracer can trace preempt_count_sub() ++ * and it also uses preempt_enable/disable_notrace(), if ++ * NEED_RESCHED is set, the preempt_enable_notrace() called ++ * by the function tracer will call this function again and ++ * cause infinite recursion. ++ * ++ * Preemption must be disabled here before the function ++ * tracer can trace. Break up preempt_disable() into two ++ * calls. One to disable preemption without fear of being ++ * traced. The other to still record the preemption latency, ++ * which can also be traced by the function tracer. ++ */ ++ preempt_disable_notrace(); ++ preempt_latency_start(1); ++ /* ++ * Needs preempt disabled in case user_exit() is traced ++ * and the tracer calls preempt_enable_notrace() causing ++ * an infinite recursion. ++ */ ++ prev_ctx = exception_enter(); ++ __schedule(true); ++ exception_exit(prev_ctx); ++ ++ preempt_latency_stop(1); ++ preempt_enable_no_resched_notrace(); ++ } while (need_resched()); ++} ++EXPORT_SYMBOL_GPL(preempt_schedule_notrace); ++ ++#endif /* CONFIG_PREEMPTION */ ++ ++/* ++ * This is the entry point to schedule() from kernel preemption ++ * off of irq context. ++ * Note, that this is called and return with irqs disabled. This will ++ * protect us against recursive calling from irq. ++ */ ++asmlinkage __visible void __sched preempt_schedule_irq(void) ++{ ++ enum ctx_state prev_state; ++ ++ /* Catch callers which need to be fixed */ ++ BUG_ON(preempt_count() || !irqs_disabled()); ++ ++ prev_state = exception_enter(); ++ ++ do { ++ preempt_disable(); ++ local_irq_enable(); ++ __schedule(true); ++ local_irq_disable(); ++ sched_preempt_enable_no_resched(); ++ } while (need_resched()); ++ ++ exception_exit(prev_state); ++} ++ ++int default_wake_function(wait_queue_entry_t *curr, unsigned mode, int wake_flags, ++ void *key) ++{ ++ return try_to_wake_up(curr->private, mode, wake_flags); ++} ++EXPORT_SYMBOL(default_wake_function); ++ ++static inline void check_task_changed(struct rq *rq, struct task_struct *p) ++{ ++ /* Trigger resched if task sched_prio has been modified. */ ++ if (task_on_rq_queued(p) && sched_task_need_requeue(p)) { ++ requeue_task(p, rq); ++ check_preempt_curr(rq); ++ } ++} ++ ++#ifdef CONFIG_RT_MUTEXES ++ ++static inline int __rt_effective_prio(struct task_struct *pi_task, int prio) ++{ ++ if (pi_task) ++ prio = min(prio, pi_task->prio); ++ ++ return prio; ++} ++ ++static inline int rt_effective_prio(struct task_struct *p, int prio) ++{ ++ struct task_struct *pi_task = rt_mutex_get_top_task(p); ++ ++ return __rt_effective_prio(pi_task, prio); ++} ++ ++/* ++ * rt_mutex_setprio - set the current priority of a task ++ * @p: task to boost ++ * @pi_task: donor task ++ * ++ * This function changes the 'effective' priority of a task. It does ++ * not touch ->normal_prio like __setscheduler(). ++ * ++ * Used by the rt_mutex code to implement priority inheritance ++ * logic. Call site only calls if the priority of the task changed. ++ */ ++void rt_mutex_setprio(struct task_struct *p, struct task_struct *pi_task) ++{ ++ int prio; ++ struct rq *rq; ++ raw_spinlock_t *lock; ++ ++ /* XXX used to be waiter->prio, not waiter->task->prio */ ++ prio = __rt_effective_prio(pi_task, p->normal_prio); ++ ++ /* ++ * If nothing changed; bail early. ++ */ ++ if (p->pi_top_task == pi_task && prio == p->prio) ++ return; ++ ++ rq = __task_access_lock(p, &lock); ++ /* ++ * Set under pi_lock && rq->lock, such that the value can be used under ++ * either lock. ++ * ++ * Note that there is loads of tricky to make this pointer cache work ++ * right. rt_mutex_slowunlock()+rt_mutex_postunlock() work together to ++ * ensure a task is de-boosted (pi_task is set to NULL) before the ++ * task is allowed to run again (and can exit). This ensures the pointer ++ * points to a blocked task -- which guaratees the task is present. ++ */ ++ p->pi_top_task = pi_task; ++ ++ /* ++ * For FIFO/RR we only need to set prio, if that matches we're done. ++ */ ++ if (prio == p->prio) ++ goto out_unlock; ++ ++ /* ++ * Idle task boosting is a nono in general. There is one ++ * exception, when PREEMPT_RT and NOHZ is active: ++ * ++ * The idle task calls get_next_timer_interrupt() and holds ++ * the timer wheel base->lock on the CPU and another CPU wants ++ * to access the timer (probably to cancel it). We can safely ++ * ignore the boosting request, as the idle CPU runs this code ++ * with interrupts disabled and will complete the lock ++ * protected section without being interrupted. So there is no ++ * real need to boost. ++ */ ++ if (unlikely(p == rq->idle)) { ++ WARN_ON(p != rq->curr); ++ WARN_ON(p->pi_blocked_on); ++ goto out_unlock; ++ } ++ ++ trace_sched_pi_setprio(p, pi_task); ++ p->prio = prio; ++ ++ check_task_changed(rq, p); ++out_unlock: ++ __task_access_unlock(p, lock); ++} ++#else ++static inline int rt_effective_prio(struct task_struct *p, int prio) ++{ ++ return prio; ++} ++#endif ++ ++void set_user_nice(struct task_struct *p, long nice) ++{ ++ unsigned long flags; ++ struct rq *rq; ++ raw_spinlock_t *lock; ++ ++ if (task_nice(p) == nice || nice < MIN_NICE || nice > MAX_NICE) ++ return; ++ /* ++ * We have to be careful, if called from sys_setpriority(), ++ * the task might be in the middle of scheduling on another CPU. ++ */ ++ raw_spin_lock_irqsave(&p->pi_lock, flags); ++ rq = __task_access_lock(p, &lock); ++ ++ p->static_prio = NICE_TO_PRIO(nice); ++ /* ++ * The RT priorities are set via sched_setscheduler(), but we still ++ * allow the 'normal' nice value to be set - but as expected ++ * it wont have any effect on scheduling until the task is ++ * not SCHED_NORMAL/SCHED_BATCH: ++ */ ++ if (task_has_rt_policy(p)) ++ goto out_unlock; ++ ++ p->prio = effective_prio(p); ++ check_task_changed(rq, p); ++out_unlock: ++ __task_access_unlock(p, lock); ++ raw_spin_unlock_irqrestore(&p->pi_lock, flags); ++} ++EXPORT_SYMBOL(set_user_nice); ++ ++/* ++ * can_nice - check if a task can reduce its nice value ++ * @p: task ++ * @nice: nice value ++ */ ++int can_nice(const struct task_struct *p, const int nice) ++{ ++ /* Convert nice value [19,-20] to rlimit style value [1,40] */ ++ int nice_rlim = nice_to_rlimit(nice); ++ ++ return (nice_rlim <= task_rlimit(p, RLIMIT_NICE) || ++ capable(CAP_SYS_NICE)); ++} ++ ++#ifdef __ARCH_WANT_SYS_NICE ++ ++/* ++ * sys_nice - change the priority of the current process. ++ * @increment: priority increment ++ * ++ * sys_setpriority is a more generic, but much slower function that ++ * does similar things. ++ */ ++SYSCALL_DEFINE1(nice, int, increment) ++{ ++ long nice, retval; ++ ++ /* ++ * Setpriority might change our priority at the same moment. ++ * We don't have to worry. Conceptually one call occurs first ++ * and we have a single winner. ++ */ ++ ++ increment = clamp(increment, -NICE_WIDTH, NICE_WIDTH); ++ nice = task_nice(current) + increment; ++ ++ nice = clamp_val(nice, MIN_NICE, MAX_NICE); ++ if (increment < 0 && !can_nice(current, nice)) ++ return -EPERM; ++ ++ retval = security_task_setnice(current, nice); ++ if (retval) ++ return retval; ++ ++ set_user_nice(current, nice); ++ return 0; ++} ++ ++#endif ++ ++/** ++ * task_prio - return the priority value of a given task. ++ * @p: the task in question. ++ * ++ * Return: The priority value as seen by users in /proc. ++ * RT tasks are offset by -100. Normal tasks are centered around 1, value goes ++ * from 0(SCHED_ISO) up to 82 (nice +19 SCHED_IDLE). ++ */ ++int task_prio(const struct task_struct *p) ++{ ++ if (p->prio < MAX_RT_PRIO) ++ return (p->prio - MAX_RT_PRIO); ++ return (p->prio - MAX_RT_PRIO + p->boost_prio); ++} ++ ++/** ++ * idle_cpu - is a given CPU idle currently? ++ * @cpu: the processor in question. ++ * ++ * Return: 1 if the CPU is currently idle. 0 otherwise. ++ */ ++int idle_cpu(int cpu) ++{ ++ return cpu_curr(cpu) == cpu_rq(cpu)->idle; ++} ++ ++/** ++ * idle_task - return the idle task for a given CPU. ++ * @cpu: the processor in question. ++ * ++ * Return: The idle task for the cpu @cpu. ++ */ ++struct task_struct *idle_task(int cpu) ++{ ++ return cpu_rq(cpu)->idle; ++} ++ ++/** ++ * find_process_by_pid - find a process with a matching PID value. ++ * @pid: the pid in question. ++ * ++ * The task of @pid, if found. %NULL otherwise. ++ */ ++static inline struct task_struct *find_process_by_pid(pid_t pid) ++{ ++ return pid ? find_task_by_vpid(pid) : current; ++} ++ ++/* ++ * sched_setparam() passes in -1 for its policy, to let the functions ++ * it calls know not to change it. ++ */ ++#define SETPARAM_POLICY -1 ++ ++static void __setscheduler_params(struct task_struct *p, ++ const struct sched_attr *attr) ++{ ++ int policy = attr->sched_policy; ++ ++ if (policy == SETPARAM_POLICY) ++ policy = p->policy; ++ ++ p->policy = policy; ++ ++ /* ++ * allow normal nice value to be set, but will not have any ++ * effect on scheduling until the task not SCHED_NORMAL/ ++ * SCHED_BATCH ++ */ ++ p->static_prio = NICE_TO_PRIO(attr->sched_nice); ++ ++ /* ++ * __sched_setscheduler() ensures attr->sched_priority == 0 when ++ * !rt_policy. Always setting this ensures that things like ++ * getparam()/getattr() don't report silly values for !rt tasks. ++ */ ++ p->rt_priority = attr->sched_priority; ++ p->normal_prio = normal_prio(p); ++} ++ ++/* Actually do priority change: must hold rq lock. */ ++static void __setscheduler(struct rq *rq, struct task_struct *p, ++ const struct sched_attr *attr, bool keep_boost) ++{ ++ __setscheduler_params(p, attr); ++ ++ /* ++ * Keep a potential priority boosting if called from ++ * sched_setscheduler(). ++ */ ++ p->prio = normal_prio(p); ++ if (keep_boost) ++ p->prio = rt_effective_prio(p, p->prio); ++} ++ ++/* ++ * check the target process has a UID that matches the current process's ++ */ ++static bool check_same_owner(struct task_struct *p) ++{ ++ const struct cred *cred = current_cred(), *pcred; ++ bool match; ++ ++ rcu_read_lock(); ++ pcred = __task_cred(p); ++ match = (uid_eq(cred->euid, pcred->euid) || ++ uid_eq(cred->euid, pcred->uid)); ++ rcu_read_unlock(); ++ return match; ++} ++ ++static int __sched_setscheduler(struct task_struct *p, ++ const struct sched_attr *attr, ++ bool user, bool pi) ++{ ++ const struct sched_attr dl_squash_attr = { ++ .size = sizeof(struct sched_attr), ++ .sched_policy = SCHED_FIFO, ++ .sched_nice = 0, ++ .sched_priority = 99, ++ }; ++ int newprio = MAX_RT_PRIO - 1 - attr->sched_priority; ++ int retval, oldpolicy = -1; ++ int policy = attr->sched_policy; ++ unsigned long flags; ++ struct rq *rq; ++ int reset_on_fork; ++ raw_spinlock_t *lock; ++ ++ /* The pi code expects interrupts enabled */ ++ BUG_ON(pi && in_interrupt()); ++ ++ /* ++ * Alt schedule FW supports SCHED_DEADLINE by squash it as prio 0 SCHED_FIFO ++ */ ++ if (unlikely(SCHED_DEADLINE == policy)) { ++ attr = &dl_squash_attr; ++ policy = attr->sched_policy; ++ newprio = MAX_RT_PRIO - 1 - attr->sched_priority; ++ } ++recheck: ++ /* Double check policy once rq lock held */ ++ if (policy < 0) { ++ reset_on_fork = p->sched_reset_on_fork; ++ policy = oldpolicy = p->policy; ++ } else { ++ reset_on_fork = !!(attr->sched_flags & SCHED_RESET_ON_FORK); ++ ++ if (policy > SCHED_IDLE) ++ return -EINVAL; ++ } ++ ++ if (attr->sched_flags & ~(SCHED_FLAG_ALL)) ++ return -EINVAL; ++ ++ /* ++ * Valid priorities for SCHED_FIFO and SCHED_RR are ++ * 1..MAX_USER_RT_PRIO-1, valid priority for SCHED_NORMAL and ++ * SCHED_BATCH and SCHED_IDLE is 0. ++ */ ++ if (attr->sched_priority < 0 || ++ (p->mm && attr->sched_priority > MAX_USER_RT_PRIO - 1) || ++ (!p->mm && attr->sched_priority > MAX_RT_PRIO - 1)) ++ return -EINVAL; ++ if ((SCHED_RR == policy || SCHED_FIFO == policy) != ++ (attr->sched_priority != 0)) ++ return -EINVAL; ++ ++ /* ++ * Allow unprivileged RT tasks to decrease priority: ++ */ ++ if (user && !capable(CAP_SYS_NICE)) { ++ if (SCHED_FIFO == policy || SCHED_RR == policy) { ++ unsigned long rlim_rtprio = ++ task_rlimit(p, RLIMIT_RTPRIO); ++ ++ /* Can't set/change the rt policy */ ++ if (policy != p->policy && !rlim_rtprio) ++ return -EPERM; ++ ++ /* Can't increase priority */ ++ if (attr->sched_priority > p->rt_priority && ++ attr->sched_priority > rlim_rtprio) ++ return -EPERM; ++ } ++ ++ /* Can't change other user's priorities */ ++ if (!check_same_owner(p)) ++ return -EPERM; ++ ++ /* Normal users shall not reset the sched_reset_on_fork flag */ ++ if (p->sched_reset_on_fork && !reset_on_fork) ++ return -EPERM; ++ } ++ ++ if (user) { ++ retval = security_task_setscheduler(p); ++ if (retval) ++ return retval; ++ } ++ ++ if (pi) ++ cpuset_read_lock(); ++ ++ /* ++ * Make sure no PI-waiters arrive (or leave) while we are ++ * changing the priority of the task: ++ */ ++ raw_spin_lock_irqsave(&p->pi_lock, flags); ++ ++ /* ++ * To be able to change p->policy safely, task_access_lock() ++ * must be called. ++ * IF use task_access_lock() here: ++ * For the task p which is not running, reading rq->stop is ++ * racy but acceptable as ->stop doesn't change much. ++ * An enhancemnet can be made to read rq->stop saftly. ++ */ ++ rq = __task_access_lock(p, &lock); ++ ++ /* ++ * Changing the policy of the stop threads its a very bad idea ++ */ ++ if (p == rq->stop) { ++ retval = -EINVAL; ++ goto unlock; ++ } ++ ++ /* ++ * If not changing anything there's no need to proceed further: ++ */ ++ if (unlikely(policy == p->policy)) { ++ if (rt_policy(policy) && attr->sched_priority != p->rt_priority) ++ goto change; ++ if (!rt_policy(policy) && ++ NICE_TO_PRIO(attr->sched_nice) != p->static_prio) ++ goto change; ++ ++ p->sched_reset_on_fork = reset_on_fork; ++ retval = 0; ++ goto unlock; ++ } ++change: ++ ++ /* Re-check policy now with rq lock held */ ++ if (unlikely(oldpolicy != -1 && oldpolicy != p->policy)) { ++ policy = oldpolicy = -1; ++ __task_access_unlock(p, lock); ++ raw_spin_unlock_irqrestore(&p->pi_lock, flags); ++ if (pi) ++ cpuset_read_unlock(); ++ goto recheck; ++ } ++ ++ p->sched_reset_on_fork = reset_on_fork; ++ ++ if (pi) { ++ /* ++ * Take priority boosted tasks into account. If the new ++ * effective priority is unchanged, we just store the new ++ * normal parameters and do not touch the scheduler class and ++ * the runqueue. This will be done when the task deboost ++ * itself. ++ */ ++ if (rt_effective_prio(p, newprio) == p->prio) { ++ __setscheduler_params(p, attr); ++ retval = 0; ++ goto unlock; ++ } ++ } ++ ++ __setscheduler(rq, p, attr, pi); ++ ++ check_task_changed(rq, p); ++ ++ /* Avoid rq from going away on us: */ ++ preempt_disable(); ++ __task_access_unlock(p, lock); ++ raw_spin_unlock_irqrestore(&p->pi_lock, flags); ++ ++ if (pi) { ++ cpuset_read_unlock(); ++ rt_mutex_adjust_pi(p); ++ } ++ ++ preempt_enable(); ++ ++ return 0; ++ ++unlock: ++ __task_access_unlock(p, lock); ++ raw_spin_unlock_irqrestore(&p->pi_lock, flags); ++ if (pi) ++ cpuset_read_unlock(); ++ return retval; ++} ++ ++static int _sched_setscheduler(struct task_struct *p, int policy, ++ const struct sched_param *param, bool check) ++{ ++ struct sched_attr attr = { ++ .sched_policy = policy, ++ .sched_priority = param->sched_priority, ++ .sched_nice = PRIO_TO_NICE(p->static_prio), ++ }; ++ ++ /* Fixup the legacy SCHED_RESET_ON_FORK hack. */ ++ if ((policy != SETPARAM_POLICY) && (policy & SCHED_RESET_ON_FORK)) { ++ attr.sched_flags |= SCHED_FLAG_RESET_ON_FORK; ++ policy &= ~SCHED_RESET_ON_FORK; ++ attr.sched_policy = policy; ++ } ++ ++ return __sched_setscheduler(p, &attr, check, true); ++} ++ ++/** ++ * sched_setscheduler - change the scheduling policy and/or RT priority of a thread. ++ * @p: the task in question. ++ * @policy: new policy. ++ * @param: structure containing the new RT priority. ++ * ++ * Return: 0 on success. An error code otherwise. ++ * ++ * NOTE that the task may be already dead. ++ */ ++int sched_setscheduler(struct task_struct *p, int policy, ++ const struct sched_param *param) ++{ ++ return _sched_setscheduler(p, policy, param, true); ++} ++ ++EXPORT_SYMBOL_GPL(sched_setscheduler); ++ ++int sched_setattr(struct task_struct *p, const struct sched_attr *attr) ++{ ++ return __sched_setscheduler(p, attr, true, true); ++} ++EXPORT_SYMBOL_GPL(sched_setattr); ++ ++int sched_setattr_nocheck(struct task_struct *p, const struct sched_attr *attr) ++{ ++ return __sched_setscheduler(p, attr, false, true); ++} ++ ++/** ++ * sched_setscheduler_nocheck - change the scheduling policy and/or RT priority of a thread from kernelspace. ++ * @p: the task in question. ++ * @policy: new policy. ++ * @param: structure containing the new RT priority. ++ * ++ * Just like sched_setscheduler, only don't bother checking if the ++ * current context has permission. For example, this is needed in ++ * stop_machine(): we create temporary high priority worker threads, ++ * but our caller might not have that capability. ++ * ++ * Return: 0 on success. An error code otherwise. ++ */ ++int sched_setscheduler_nocheck(struct task_struct *p, int policy, ++ const struct sched_param *param) ++{ ++ return _sched_setscheduler(p, policy, param, false); ++} ++EXPORT_SYMBOL_GPL(sched_setscheduler_nocheck); ++ ++static int ++do_sched_setscheduler(pid_t pid, int policy, struct sched_param __user *param) ++{ ++ struct sched_param lparam; ++ struct task_struct *p; ++ int retval; ++ ++ if (!param || pid < 0) ++ return -EINVAL; ++ if (copy_from_user(&lparam, param, sizeof(struct sched_param))) ++ return -EFAULT; ++ ++ rcu_read_lock(); ++ retval = -ESRCH; ++ p = find_process_by_pid(pid); ++ if (likely(p)) ++ get_task_struct(p); ++ rcu_read_unlock(); ++ ++ if (likely(p)) { ++ retval = sched_setscheduler(p, policy, &lparam); ++ put_task_struct(p); ++ } ++ ++ return retval; ++} ++ ++/* ++ * Mimics kernel/events/core.c perf_copy_attr(). ++ */ ++static int sched_copy_attr(struct sched_attr __user *uattr, struct sched_attr *attr) ++{ ++ u32 size; ++ int ret; ++ ++ /* Zero the full structure, so that a short copy will be nice: */ ++ memset(attr, 0, sizeof(*attr)); ++ ++ ret = get_user(size, &uattr->size); ++ if (ret) ++ return ret; ++ ++ /* ABI compatibility quirk: */ ++ if (!size) ++ size = SCHED_ATTR_SIZE_VER0; ++ ++ if (size < SCHED_ATTR_SIZE_VER0 || size > PAGE_SIZE) ++ goto err_size; ++ ++ ret = copy_struct_from_user(attr, sizeof(*attr), uattr, size); ++ if (ret) { ++ if (ret == -E2BIG) ++ goto err_size; ++ return ret; ++ } ++ ++ /* ++ * XXX: Do we want to be lenient like existing syscalls; or do we want ++ * to be strict and return an error on out-of-bounds values? ++ */ ++ attr->sched_nice = clamp(attr->sched_nice, -20, 19); ++ ++ /* sched/core.c uses zero here but we already know ret is zero */ ++ return 0; ++ ++err_size: ++ put_user(sizeof(*attr), &uattr->size); ++ return -E2BIG; ++} ++ ++/** ++ * sys_sched_setscheduler - set/change the scheduler policy and RT priority ++ * @pid: the pid in question. ++ * @policy: new policy. ++ * ++ * Return: 0 on success. An error code otherwise. ++ * @param: structure containing the new RT priority. ++ */ ++SYSCALL_DEFINE3(sched_setscheduler, pid_t, pid, int, policy, struct sched_param __user *, param) ++{ ++ if (policy < 0) ++ return -EINVAL; ++ ++ return do_sched_setscheduler(pid, policy, param); ++} ++ ++/** ++ * sys_sched_setparam - set/change the RT priority of a thread ++ * @pid: the pid in question. ++ * @param: structure containing the new RT priority. ++ * ++ * Return: 0 on success. An error code otherwise. ++ */ ++SYSCALL_DEFINE2(sched_setparam, pid_t, pid, struct sched_param __user *, param) ++{ ++ return do_sched_setscheduler(pid, SETPARAM_POLICY, param); ++} ++ ++/** ++ * sys_sched_setattr - same as above, but with extended sched_attr ++ * @pid: the pid in question. ++ * @uattr: structure containing the extended parameters. ++ */ ++SYSCALL_DEFINE3(sched_setattr, pid_t, pid, struct sched_attr __user *, uattr, ++ unsigned int, flags) ++{ ++ struct sched_attr attr; ++ struct task_struct *p; ++ int retval; ++ ++ if (!uattr || pid < 0 || flags) ++ return -EINVAL; ++ ++ retval = sched_copy_attr(uattr, &attr); ++ if (retval) ++ return retval; ++ ++ if ((int)attr.sched_policy < 0) ++ return -EINVAL; ++ ++ rcu_read_lock(); ++ retval = -ESRCH; ++ p = find_process_by_pid(pid); ++ if (p != NULL) ++ retval = sched_setattr(p, &attr); ++ rcu_read_unlock(); ++ ++ return retval; ++} ++ ++/** ++ * sys_sched_getscheduler - get the policy (scheduling class) of a thread ++ * @pid: the pid in question. ++ * ++ * Return: On success, the policy of the thread. Otherwise, a negative error ++ * code. ++ */ ++SYSCALL_DEFINE1(sched_getscheduler, pid_t, pid) ++{ ++ struct task_struct *p; ++ int retval = -EINVAL; ++ ++ if (pid < 0) ++ goto out_nounlock; ++ ++ retval = -ESRCH; ++ rcu_read_lock(); ++ p = find_process_by_pid(pid); ++ if (p) { ++ retval = security_task_getscheduler(p); ++ if (!retval) ++ retval = p->policy; ++ } ++ rcu_read_unlock(); ++ ++out_nounlock: ++ return retval; ++} ++ ++/** ++ * sys_sched_getscheduler - get the RT priority of a thread ++ * @pid: the pid in question. ++ * @param: structure containing the RT priority. ++ * ++ * Return: On success, 0 and the RT priority is in @param. Otherwise, an error ++ * code. ++ */ ++SYSCALL_DEFINE2(sched_getparam, pid_t, pid, struct sched_param __user *, param) ++{ ++ struct sched_param lp = { .sched_priority = 0 }; ++ struct task_struct *p; ++ int retval = -EINVAL; ++ ++ if (!param || pid < 0) ++ goto out_nounlock; ++ ++ rcu_read_lock(); ++ p = find_process_by_pid(pid); ++ retval = -ESRCH; ++ if (!p) ++ goto out_unlock; ++ ++ retval = security_task_getscheduler(p); ++ if (retval) ++ goto out_unlock; ++ ++ if (task_has_rt_policy(p)) ++ lp.sched_priority = p->rt_priority; ++ rcu_read_unlock(); ++ ++ /* ++ * This one might sleep, we cannot do it with a spinlock held ... ++ */ ++ retval = copy_to_user(param, &lp, sizeof(*param)) ? -EFAULT : 0; ++ ++out_nounlock: ++ return retval; ++ ++out_unlock: ++ rcu_read_unlock(); ++ return retval; ++} ++ ++/* ++ * Copy the kernel size attribute structure (which might be larger ++ * than what user-space knows about) to user-space. ++ * ++ * Note that all cases are valid: user-space buffer can be larger or ++ * smaller than the kernel-space buffer. The usual case is that both ++ * have the same size. ++ */ ++static int ++sched_attr_copy_to_user(struct sched_attr __user *uattr, ++ struct sched_attr *kattr, ++ unsigned int usize) ++{ ++ unsigned int ksize = sizeof(*kattr); ++ ++ if (!access_ok(uattr, usize)) ++ return -EFAULT; ++ ++ /* ++ * sched_getattr() ABI forwards and backwards compatibility: ++ * ++ * If usize == ksize then we just copy everything to user-space and all is good. ++ * ++ * If usize < ksize then we only copy as much as user-space has space for, ++ * this keeps ABI compatibility as well. We skip the rest. ++ * ++ * If usize > ksize then user-space is using a newer version of the ABI, ++ * which part the kernel doesn't know about. Just ignore it - tooling can ++ * detect the kernel's knowledge of attributes from the attr->size value ++ * which is set to ksize in this case. ++ */ ++ kattr->size = min(usize, ksize); ++ ++ if (copy_to_user(uattr, kattr, kattr->size)) ++ return -EFAULT; ++ ++ return 0; ++} ++ ++/** ++ * sys_sched_getattr - similar to sched_getparam, but with sched_attr ++ * @pid: the pid in question. ++ * @uattr: structure containing the extended parameters. ++ * @usize: sizeof(attr) for fwd/bwd comp. ++ * @flags: for future extension. ++ */ ++SYSCALL_DEFINE4(sched_getattr, pid_t, pid, struct sched_attr __user *, uattr, ++ unsigned int, usize, unsigned int, flags) ++{ ++ struct sched_attr kattr = { }; ++ struct task_struct *p; ++ int retval; ++ ++ if (!uattr || pid < 0 || usize > PAGE_SIZE || ++ usize < SCHED_ATTR_SIZE_VER0 || flags) ++ return -EINVAL; ++ ++ rcu_read_lock(); ++ p = find_process_by_pid(pid); ++ retval = -ESRCH; ++ if (!p) ++ goto out_unlock; ++ ++ retval = security_task_getscheduler(p); ++ if (retval) ++ goto out_unlock; ++ ++ kattr.sched_policy = p->policy; ++ if (p->sched_reset_on_fork) ++ kattr.sched_flags |= SCHED_FLAG_RESET_ON_FORK; ++ if (task_has_rt_policy(p)) ++ kattr.sched_priority = p->rt_priority; ++ else ++ kattr.sched_nice = task_nice(p); ++ ++#ifdef CONFIG_UCLAMP_TASK ++ kattr.sched_util_min = p->uclamp_req[UCLAMP_MIN].value; ++ kattr.sched_util_max = p->uclamp_req[UCLAMP_MAX].value; ++#endif ++ ++ rcu_read_unlock(); ++ ++ return sched_attr_copy_to_user(uattr, &kattr, usize); ++ ++out_unlock: ++ rcu_read_unlock(); ++ return retval; ++} ++ ++long sched_setaffinity(pid_t pid, const struct cpumask *in_mask) ++{ ++ cpumask_var_t cpus_allowed, new_mask; ++ struct task_struct *p; ++ int retval; ++ ++ get_online_cpus(); ++ rcu_read_lock(); ++ ++ p = find_process_by_pid(pid); ++ if (!p) { ++ rcu_read_unlock(); ++ put_online_cpus(); ++ return -ESRCH; ++ } ++ ++ /* Prevent p going away */ ++ get_task_struct(p); ++ rcu_read_unlock(); ++ ++ if (p->flags & PF_NO_SETAFFINITY) { ++ retval = -EINVAL; ++ goto out_put_task; ++ } ++ if (!alloc_cpumask_var(&cpus_allowed, GFP_KERNEL)) { ++ retval = -ENOMEM; ++ goto out_put_task; ++ } ++ if (!alloc_cpumask_var(&new_mask, GFP_KERNEL)) { ++ retval = -ENOMEM; ++ goto out_free_cpus_allowed; ++ } ++ retval = -EPERM; ++ if (!check_same_owner(p)) { ++ rcu_read_lock(); ++ if (!ns_capable(__task_cred(p)->user_ns, CAP_SYS_NICE)) { ++ rcu_read_unlock(); ++ goto out_unlock; ++ } ++ rcu_read_unlock(); ++ } ++ ++ retval = security_task_setscheduler(p); ++ if (retval) ++ goto out_unlock; ++ ++ cpuset_cpus_allowed(p, cpus_allowed); ++ cpumask_and(new_mask, in_mask, cpus_allowed); ++again: ++ retval = __set_cpus_allowed_ptr(p, new_mask, true); ++ ++ if (!retval) { ++ cpuset_cpus_allowed(p, cpus_allowed); ++ if (!cpumask_subset(new_mask, cpus_allowed)) { ++ /* ++ * We must have raced with a concurrent cpuset ++ * update. Just reset the cpus_allowed to the ++ * cpuset's cpus_allowed ++ */ ++ cpumask_copy(new_mask, cpus_allowed); ++ goto again; ++ } ++ } ++out_unlock: ++ free_cpumask_var(new_mask); ++out_free_cpus_allowed: ++ free_cpumask_var(cpus_allowed); ++out_put_task: ++ put_task_struct(p); ++ put_online_cpus(); ++ return retval; ++} ++ ++static int get_user_cpu_mask(unsigned long __user *user_mask_ptr, unsigned len, ++ struct cpumask *new_mask) ++{ ++ if (len < cpumask_size()) ++ cpumask_clear(new_mask); ++ else if (len > cpumask_size()) ++ len = cpumask_size(); ++ ++ return copy_from_user(new_mask, user_mask_ptr, len) ? -EFAULT : 0; ++} ++ ++/** ++ * sys_sched_setaffinity - set the CPU affinity of a process ++ * @pid: pid of the process ++ * @len: length in bytes of the bitmask pointed to by user_mask_ptr ++ * @user_mask_ptr: user-space pointer to the new CPU mask ++ * ++ * Return: 0 on success. An error code otherwise. ++ */ ++SYSCALL_DEFINE3(sched_setaffinity, pid_t, pid, unsigned int, len, ++ unsigned long __user *, user_mask_ptr) ++{ ++ cpumask_var_t new_mask; ++ int retval; ++ ++ if (!alloc_cpumask_var(&new_mask, GFP_KERNEL)) ++ return -ENOMEM; ++ ++ retval = get_user_cpu_mask(user_mask_ptr, len, new_mask); ++ if (retval == 0) ++ retval = sched_setaffinity(pid, new_mask); ++ free_cpumask_var(new_mask); ++ return retval; ++} ++ ++long sched_getaffinity(pid_t pid, cpumask_t *mask) ++{ ++ struct task_struct *p; ++ raw_spinlock_t *lock; ++ unsigned long flags; ++ int retval; ++ ++ rcu_read_lock(); ++ ++ retval = -ESRCH; ++ p = find_process_by_pid(pid); ++ if (!p) ++ goto out_unlock; ++ ++ retval = security_task_getscheduler(p); ++ if (retval) ++ goto out_unlock; ++ ++ task_access_lock_irqsave(p, &lock, &flags); ++ cpumask_and(mask, &p->cpus_mask, cpu_active_mask); ++ task_access_unlock_irqrestore(p, lock, &flags); ++ ++out_unlock: ++ rcu_read_unlock(); ++ ++ return retval; ++} ++ ++/** ++ * sys_sched_getaffinity - get the CPU affinity of a process ++ * @pid: pid of the process ++ * @len: length in bytes of the bitmask pointed to by user_mask_ptr ++ * @user_mask_ptr: user-space pointer to hold the current CPU mask ++ * ++ * Return: size of CPU mask copied to user_mask_ptr on success. An ++ * error code otherwise. ++ */ ++SYSCALL_DEFINE3(sched_getaffinity, pid_t, pid, unsigned int, len, ++ unsigned long __user *, user_mask_ptr) ++{ ++ int ret; ++ cpumask_var_t mask; ++ ++ if ((len * BITS_PER_BYTE) < nr_cpu_ids) ++ return -EINVAL; ++ if (len & (sizeof(unsigned long)-1)) ++ return -EINVAL; ++ ++ if (!alloc_cpumask_var(&mask, GFP_KERNEL)) ++ return -ENOMEM; ++ ++ ret = sched_getaffinity(pid, mask); ++ if (ret == 0) { ++ unsigned int retlen = min_t(size_t, len, cpumask_size()); ++ ++ if (copy_to_user(user_mask_ptr, mask, retlen)) ++ ret = -EFAULT; ++ else ++ ret = retlen; ++ } ++ free_cpumask_var(mask); ++ ++ return ret; ++} ++ ++/** ++ * sys_sched_yield - yield the current processor to other threads. ++ * ++ * This function yields the current CPU to other tasks. It does this by ++ * scheduling away the current task. If it still has the earliest deadline ++ * it will be scheduled again as the next task. ++ * ++ * Return: 0. ++ */ ++static void do_sched_yield(void) ++{ ++ struct rq *rq; ++ struct rq_flags rf; ++ ++ if (!sched_yield_type) ++ return; ++ ++ rq = this_rq_lock_irq(&rf); ++ ++ schedstat_inc(rq->yld_count); ++ ++ if (1 == sched_yield_type) { ++ if (!rt_task(current)) { ++ current->boost_prio = MAX_PRIORITY_ADJ; ++ requeue_task(current, rq); ++ } ++ } else if (2 == sched_yield_type) { ++ if (rq->nr_running > 1) ++ rq->skip = current; ++ } ++ ++ /* ++ * Since we are going to call schedule() anyway, there's ++ * no need to preempt or enable interrupts: ++ */ ++ preempt_disable(); ++ raw_spin_unlock(&rq->lock); ++ sched_preempt_enable_no_resched(); ++ ++ schedule(); ++} ++ ++SYSCALL_DEFINE0(sched_yield) ++{ ++ do_sched_yield(); ++ return 0; ++} ++ ++#ifndef CONFIG_PREEMPTION ++int __sched _cond_resched(void) ++{ ++ if (should_resched(0)) { ++ preempt_schedule_common(); ++ return 1; ++ } ++ rcu_all_qs(); ++ return 0; ++} ++EXPORT_SYMBOL(_cond_resched); ++#endif ++ ++/* ++ * __cond_resched_lock() - if a reschedule is pending, drop the given lock, ++ * call schedule, and on return reacquire the lock. ++ * ++ * This works OK both with and without CONFIG_PREEMPTION. We do strange low-level ++ * operations here to prevent schedule() from being called twice (once via ++ * spin_unlock(), once by hand). ++ */ ++int __cond_resched_lock(spinlock_t *lock) ++{ ++ int resched = should_resched(PREEMPT_LOCK_OFFSET); ++ int ret = 0; ++ ++ lockdep_assert_held(lock); ++ ++ if (spin_needbreak(lock) || resched) { ++ spin_unlock(lock); ++ if (resched) ++ preempt_schedule_common(); ++ else ++ cpu_relax(); ++ ret = 1; ++ spin_lock(lock); ++ } ++ return ret; ++} ++EXPORT_SYMBOL(__cond_resched_lock); ++ ++/** ++ * yield - yield the current processor to other threads. ++ * ++ * Do not ever use this function, there's a 99% chance you're doing it wrong. ++ * ++ * The scheduler is at all times free to pick the calling task as the most ++ * eligible task to run, if removing the yield() call from your code breaks ++ * it, its already broken. ++ * ++ * Typical broken usage is: ++ * ++ * while (!event) ++ * yield(); ++ * ++ * where one assumes that yield() will let 'the other' process run that will ++ * make event true. If the current task is a SCHED_FIFO task that will never ++ * happen. Never use yield() as a progress guarantee!! ++ * ++ * If you want to use yield() to wait for something, use wait_event(). ++ * If you want to use yield() to be 'nice' for others, use cond_resched(). ++ * If you still want to use yield(), do not! ++ */ ++void __sched yield(void) ++{ ++ set_current_state(TASK_RUNNING); ++ do_sched_yield(); ++} ++EXPORT_SYMBOL(yield); ++ ++/** ++ * yield_to - yield the current processor to another thread in ++ * your thread group, or accelerate that thread toward the ++ * processor it's on. ++ * @p: target task ++ * @preempt: whether task preemption is allowed or not ++ * ++ * It's the caller's job to ensure that the target task struct ++ * can't go away on us before we can do any checks. ++ * ++ * In Alt schedule FW, yield_to is not supported. ++ * ++ * Return: ++ * true (>0) if we indeed boosted the target task. ++ * false (0) if we failed to boost the target. ++ * -ESRCH if there's no task to yield to. ++ */ ++int __sched yield_to(struct task_struct *p, bool preempt) ++{ ++ return 0; ++} ++EXPORT_SYMBOL_GPL(yield_to); ++ ++int io_schedule_prepare(void) ++{ ++ int old_iowait = current->in_iowait; ++ ++ current->in_iowait = 1; ++ blk_schedule_flush_plug(current); ++ ++ return old_iowait; ++} ++ ++void io_schedule_finish(int token) ++{ ++ current->in_iowait = token; ++} ++ ++/* ++ * This task is about to go to sleep on IO. Increment rq->nr_iowait so ++ * that process accounting knows that this is a task in IO wait state. ++ * ++ * But don't do that if it is a deliberate, throttling IO wait (this task ++ * has set its backing_dev_info: the queue against which it should throttle) ++ */ ++ ++long __sched io_schedule_timeout(long timeout) ++{ ++ int token; ++ long ret; ++ ++ token = io_schedule_prepare(); ++ ret = schedule_timeout(timeout); ++ io_schedule_finish(token); ++ ++ return ret; ++} ++EXPORT_SYMBOL(io_schedule_timeout); ++ ++void __sched io_schedule(void) ++{ ++ int token; ++ ++ token = io_schedule_prepare(); ++ schedule(); ++ io_schedule_finish(token); ++} ++EXPORT_SYMBOL(io_schedule); ++ ++/** ++ * sys_sched_get_priority_max - return maximum RT priority. ++ * @policy: scheduling class. ++ * ++ * Return: On success, this syscall returns the maximum ++ * rt_priority that can be used by a given scheduling class. ++ * On failure, a negative error code is returned. ++ */ ++SYSCALL_DEFINE1(sched_get_priority_max, int, policy) ++{ ++ int ret = -EINVAL; ++ ++ switch (policy) { ++ case SCHED_FIFO: ++ case SCHED_RR: ++ ret = MAX_USER_RT_PRIO-1; ++ break; ++ case SCHED_NORMAL: ++ case SCHED_BATCH: ++ case SCHED_IDLE: ++ ret = 0; ++ break; ++ } ++ return ret; ++} ++ ++/** ++ * sys_sched_get_priority_min - return minimum RT priority. ++ * @policy: scheduling class. ++ * ++ * Return: On success, this syscall returns the minimum ++ * rt_priority that can be used by a given scheduling class. ++ * On failure, a negative error code is returned. ++ */ ++SYSCALL_DEFINE1(sched_get_priority_min, int, policy) ++{ ++ int ret = -EINVAL; ++ ++ switch (policy) { ++ case SCHED_FIFO: ++ case SCHED_RR: ++ ret = 1; ++ break; ++ case SCHED_NORMAL: ++ case SCHED_BATCH: ++ case SCHED_IDLE: ++ ret = 0; ++ break; ++ } ++ return ret; ++} ++ ++static int sched_rr_get_interval(pid_t pid, struct timespec64 *t) ++{ ++ struct task_struct *p; ++ int retval; ++ ++ if (pid < 0) ++ return -EINVAL; ++ ++ retval = -ESRCH; ++ rcu_read_lock(); ++ p = find_process_by_pid(pid); ++ if (!p) ++ goto out_unlock; ++ ++ retval = security_task_getscheduler(p); ++ if (retval) ++ goto out_unlock; ++ rcu_read_unlock(); ++ ++ *t = ns_to_timespec64(sched_timeslice_ns); ++ return 0; ++ ++out_unlock: ++ rcu_read_unlock(); ++ return retval; ++} ++ ++/** ++ * sys_sched_rr_get_interval - return the default timeslice of a process. ++ * @pid: pid of the process. ++ * @interval: userspace pointer to the timeslice value. ++ * ++ * ++ * Return: On success, 0 and the timeslice is in @interval. Otherwise, ++ * an error code. ++ */ ++SYSCALL_DEFINE2(sched_rr_get_interval, pid_t, pid, ++ struct __kernel_timespec __user *, interval) ++{ ++ struct timespec64 t; ++ int retval = sched_rr_get_interval(pid, &t); ++ ++ if (retval == 0) ++ retval = put_timespec64(&t, interval); ++ ++ return retval; ++} ++ ++#ifdef CONFIG_COMPAT_32BIT_TIME ++SYSCALL_DEFINE2(sched_rr_get_interval_time32, pid_t, pid, ++ struct old_timespec32 __user *, interval) ++{ ++ struct timespec64 t; ++ int retval = sched_rr_get_interval(pid, &t); ++ ++ if (retval == 0) ++ retval = put_old_timespec32(&t, interval); ++ return retval; ++} ++#endif ++ ++void sched_show_task(struct task_struct *p) ++{ ++ unsigned long free = 0; ++ int ppid; ++ ++ if (!try_get_task_stack(p)) ++ return; ++ ++ printk(KERN_INFO "%-15.15s %c", p->comm, task_state_to_char(p)); ++ ++ if (p->state == TASK_RUNNING) ++ printk(KERN_CONT " running task "); ++#ifdef CONFIG_DEBUG_STACK_USAGE ++ free = stack_not_used(p); ++#endif ++ ppid = 0; ++ rcu_read_lock(); ++ if (pid_alive(p)) ++ ppid = task_pid_nr(rcu_dereference(p->real_parent)); ++ rcu_read_unlock(); ++ printk(KERN_CONT "%5lu %5d %6d 0x%08lx\n", free, ++ task_pid_nr(p), ppid, ++ (unsigned long)task_thread_info(p)->flags); ++ ++ print_worker_info(KERN_INFO, p); ++ show_stack(p, NULL); ++ put_task_stack(p); ++} ++EXPORT_SYMBOL_GPL(sched_show_task); ++ ++static inline bool ++state_filter_match(unsigned long state_filter, struct task_struct *p) ++{ ++ /* no filter, everything matches */ ++ if (!state_filter) ++ return true; ++ ++ /* filter, but doesn't match */ ++ if (!(p->state & state_filter)) ++ return false; ++ ++ /* ++ * When looking for TASK_UNINTERRUPTIBLE skip TASK_IDLE (allows ++ * TASK_KILLABLE). ++ */ ++ if (state_filter == TASK_UNINTERRUPTIBLE && p->state == TASK_IDLE) ++ return false; ++ ++ return true; ++} ++ ++ ++void show_state_filter(unsigned long state_filter) ++{ ++ struct task_struct *g, *p; ++ ++#if BITS_PER_LONG == 32 ++ printk(KERN_INFO ++ " task PC stack pid father\n"); ++#else ++ printk(KERN_INFO ++ " task PC stack pid father\n"); ++#endif ++ rcu_read_lock(); ++ for_each_process_thread(g, p) { ++ /* ++ * reset the NMI-timeout, listing all files on a slow ++ * console might take a lot of time: ++ * Also, reset softlockup watchdogs on all CPUs, because ++ * another CPU might be blocked waiting for us to process ++ * an IPI. ++ */ ++ touch_nmi_watchdog(); ++ touch_all_softlockup_watchdogs(); ++ if (state_filter_match(state_filter, p)) ++ sched_show_task(p); ++ } ++ ++#ifdef CONFIG_SCHED_DEBUG ++ /* TODO: Alt schedule FW should support this ++ if (!state_filter) ++ sysrq_sched_debug_show(); ++ */ ++#endif ++ rcu_read_unlock(); ++ /* ++ * Only show locks if all tasks are dumped: ++ */ ++ if (!state_filter) ++ debug_show_all_locks(); ++} ++ ++void dump_cpu_task(int cpu) ++{ ++ pr_info("Task dump for CPU %d:\n", cpu); ++ sched_show_task(cpu_curr(cpu)); ++} ++ ++/** ++ * init_idle - set up an idle thread for a given CPU ++ * @idle: task in question ++ * @cpu: CPU the idle task belongs to ++ * ++ * NOTE: this function does not set the idle thread's NEED_RESCHED ++ * flag, to make booting more robust. ++ */ ++void init_idle(struct task_struct *idle, int cpu) ++{ ++ struct rq *rq = cpu_rq(cpu); ++ unsigned long flags; ++ ++ __sched_fork(0, idle); ++ ++ raw_spin_lock_irqsave(&idle->pi_lock, flags); ++ raw_spin_lock(&rq->lock); ++ update_rq_clock(rq); ++ ++ idle->last_ran = rq->clock_task; ++ idle->state = TASK_RUNNING; ++ idle->flags |= PF_IDLE; ++ sched_queue_init_idle(rq, idle); ++ ++ kasan_unpoison_task_stack(idle); ++ ++#ifdef CONFIG_SMP ++ /* ++ * It's possible that init_idle() gets called multiple times on a task, ++ * in that case do_set_cpus_allowed() will not do the right thing. ++ * ++ * And since this is boot we can forgo the serialisation. ++ */ ++ set_cpus_allowed_common(idle, cpumask_of(cpu)); ++#endif ++ ++ /* Silence PROVE_RCU */ ++ rcu_read_lock(); ++ __set_task_cpu(idle, cpu); ++ rcu_read_unlock(); ++ ++ rq->idle = idle; ++ rcu_assign_pointer(rq->curr, idle); ++ idle->on_cpu = 1; ++ ++ raw_spin_unlock(&rq->lock); ++ raw_spin_unlock_irqrestore(&idle->pi_lock, flags); ++ ++ /* Set the preempt count _outside_ the spinlocks! */ ++ init_idle_preempt_count(idle, cpu); ++ ++ ftrace_graph_init_idle_task(idle, cpu); ++ vtime_init_idle(idle, cpu); ++#ifdef CONFIG_SMP ++ sprintf(idle->comm, "%s/%d", INIT_TASK_COMM, cpu); ++#endif ++} ++ ++#ifdef CONFIG_SMP ++ ++int cpuset_cpumask_can_shrink(const struct cpumask __maybe_unused *cur, ++ const struct cpumask __maybe_unused *trial) ++{ ++ return 1; ++} ++ ++int task_can_attach(struct task_struct *p, ++ const struct cpumask *cs_cpus_allowed) ++{ ++ int ret = 0; ++ ++ /* ++ * Kthreads which disallow setaffinity shouldn't be moved ++ * to a new cpuset; we don't want to change their CPU ++ * affinity and isolating such threads by their set of ++ * allowed nodes is unnecessary. Thus, cpusets are not ++ * applicable for such threads. This prevents checking for ++ * success of set_cpus_allowed_ptr() on all attached tasks ++ * before cpus_mask may be changed. ++ */ ++ if (p->flags & PF_NO_SETAFFINITY) ++ ret = -EINVAL; ++ ++ return ret; ++} ++ ++bool sched_smp_initialized __read_mostly; ++ ++#ifdef CONFIG_HOTPLUG_CPU ++/* ++ * Ensures that the idle task is using init_mm right before its CPU goes ++ * offline. ++ */ ++void idle_task_exit(void) ++{ ++ struct mm_struct *mm = current->active_mm; ++ ++ BUG_ON(current != this_rq()->idle); ++ ++ if (mm != &init_mm) { ++ switch_mm(mm, &init_mm, current); ++ finish_arch_post_lock_switch(); ++ } ++ ++ /* finish_cpu(), as ran on the BP, will clean up the active_mm state */ ++} ++ ++/* ++ * Migrate all tasks from the rq, sleeping tasks will be migrated by ++ * try_to_wake_up()->select_task_rq(). ++ * ++ * Called with rq->lock held even though we'er in stop_machine() and ++ * there's no concurrency possible, we hold the required locks anyway ++ * because of lock validation efforts. ++ */ ++static void migrate_tasks(struct rq *dead_rq) ++{ ++ struct rq *rq = dead_rq; ++ struct task_struct *p, *stop = rq->stop; ++ int count = 0; ++ ++ /* ++ * Fudge the rq selection such that the below task selection loop ++ * doesn't get stuck on the currently eligible stop task. ++ * ++ * We're currently inside stop_machine() and the rq is either stuck ++ * in the stop_machine_cpu_stop() loop, or we're executing this code, ++ * either way we should never end up calling schedule() until we're ++ * done here. ++ */ ++ rq->stop = NULL; ++ ++ p = sched_rq_first_task(rq); ++ while (p != rq->idle) { ++ int dest_cpu; ++ ++ /* skip the running task */ ++ if (task_running(p) || 1 == p->nr_cpus_allowed) { ++ p = sched_rq_next_task(p, rq); ++ continue; ++ } ++ ++ /* ++ * Rules for changing task_struct::cpus_allowed are holding ++ * both pi_lock and rq->lock, such that holding either ++ * stabilizes the mask. ++ * ++ * Drop rq->lock is not quite as disastrous as it usually is ++ * because !cpu_active at this point, which means load-balance ++ * will not interfere. Also, stop-machine. ++ */ ++ raw_spin_unlock(&rq->lock); ++ raw_spin_lock(&p->pi_lock); ++ raw_spin_lock(&rq->lock); ++ ++ /* ++ * Since we're inside stop-machine, _nothing_ should have ++ * changed the task, WARN if weird stuff happened, because in ++ * that case the above rq->lock drop is a fail too. ++ */ ++ if (WARN_ON(task_rq(p) != rq || !task_on_rq_queued(p))) { ++ raw_spin_unlock(&p->pi_lock); ++ p = sched_rq_next_task(p, rq); ++ continue; ++ } ++ ++ count++; ++ /* Find suitable destination for @next, with force if needed. */ ++ dest_cpu = select_fallback_rq(dead_rq->cpu, p); ++ rq = __migrate_task(rq, p, dest_cpu); ++ raw_spin_unlock(&rq->lock); ++ raw_spin_unlock(&p->pi_lock); ++ ++ rq = dead_rq; ++ raw_spin_lock(&rq->lock); ++ /* Check queued task all over from the header again */ ++ p = sched_rq_first_task(rq); ++ } ++ ++ rq->stop = stop; ++} ++ ++static void set_rq_offline(struct rq *rq) ++{ ++ if (rq->online) ++ rq->online = false; ++} ++#endif /* CONFIG_HOTPLUG_CPU */ ++ ++static void set_rq_online(struct rq *rq) ++{ ++ if (!rq->online) ++ rq->online = true; ++} ++ ++/* ++ * used to mark begin/end of suspend/resume: ++ */ ++static int num_cpus_frozen; ++ ++/* ++ * Update cpusets according to cpu_active mask. If cpusets are ++ * disabled, cpuset_update_active_cpus() becomes a simple wrapper ++ * around partition_sched_domains(). ++ * ++ * If we come here as part of a suspend/resume, don't touch cpusets because we ++ * want to restore it back to its original state upon resume anyway. ++ */ ++static void cpuset_cpu_active(void) ++{ ++ if (cpuhp_tasks_frozen) { ++ /* ++ * num_cpus_frozen tracks how many CPUs are involved in suspend ++ * resume sequence. As long as this is not the last online ++ * operation in the resume sequence, just build a single sched ++ * domain, ignoring cpusets. ++ */ ++ partition_sched_domains(1, NULL, NULL); ++ if (--num_cpus_frozen) ++ return; ++ /* ++ * This is the last CPU online operation. So fall through and ++ * restore the original sched domains by considering the ++ * cpuset configurations. ++ */ ++ cpuset_force_rebuild(); ++ } ++ ++ cpuset_update_active_cpus(); ++} ++ ++static int cpuset_cpu_inactive(unsigned int cpu) ++{ ++ if (!cpuhp_tasks_frozen) { ++ cpuset_update_active_cpus(); ++ } else { ++ num_cpus_frozen++; ++ partition_sched_domains(1, NULL, NULL); ++ } ++ return 0; ++} ++ ++int sched_cpu_activate(unsigned int cpu) ++{ ++ struct rq *rq = cpu_rq(cpu); ++ unsigned long flags; ++ ++#ifdef CONFIG_SCHED_SMT ++ /* ++ * When going up, increment the number of cores with SMT present. ++ */ ++ if (cpumask_weight(cpu_smt_mask(cpu)) == 2) ++ static_branch_inc_cpuslocked(&sched_smt_present); ++#endif ++ set_cpu_active(cpu, true); ++ ++ if (sched_smp_initialized) ++ cpuset_cpu_active(); ++ ++ /* ++ * Put the rq online, if not already. This happens: ++ * ++ * 1) In the early boot process, because we build the real domains ++ * after all cpus have been brought up. ++ * ++ * 2) At runtime, if cpuset_cpu_active() fails to rebuild the ++ * domains. ++ */ ++ raw_spin_lock_irqsave(&rq->lock, flags); ++ set_rq_online(rq); ++ raw_spin_unlock_irqrestore(&rq->lock, flags); ++ ++ return 0; ++} ++ ++int sched_cpu_deactivate(unsigned int cpu) ++{ ++ int ret; ++ ++ set_cpu_active(cpu, false); ++ /* ++ * We've cleared cpu_active_mask, wait for all preempt-disabled and RCU ++ * users of this state to go away such that all new such users will ++ * observe it. ++ * ++ * Do sync before park smpboot threads to take care the rcu boost case. ++ */ ++ synchronize_rcu(); ++ ++#ifdef CONFIG_SCHED_SMT ++ /* ++ * When going down, decrement the number of cores with SMT present. ++ */ ++ if (cpumask_weight(cpu_smt_mask(cpu)) == 2) { ++ static_branch_dec_cpuslocked(&sched_smt_present); ++ if (!static_branch_likely(&sched_smt_present)) ++ cpumask_clear(&sched_sg_idle_mask); ++ } ++#endif ++ ++ if (!sched_smp_initialized) ++ return 0; ++ ++ ret = cpuset_cpu_inactive(cpu); ++ if (ret) { ++ set_cpu_active(cpu, true); ++ return ret; ++ } ++ return 0; ++} ++ ++static void sched_rq_cpu_starting(unsigned int cpu) ++{ ++ struct rq *rq = cpu_rq(cpu); ++ ++ rq->calc_load_update = calc_load_update; ++} ++ ++int sched_cpu_starting(unsigned int cpu) ++{ ++ sched_rq_cpu_starting(cpu); ++ sched_tick_start(cpu); ++ return 0; ++} ++ ++#ifdef CONFIG_HOTPLUG_CPU ++int sched_cpu_dying(unsigned int cpu) ++{ ++ struct rq *rq = cpu_rq(cpu); ++ unsigned long flags; ++ ++ sched_tick_stop(cpu); ++ raw_spin_lock_irqsave(&rq->lock, flags); ++ set_rq_offline(rq); ++ migrate_tasks(rq); ++ raw_spin_unlock_irqrestore(&rq->lock, flags); ++ ++ hrtick_clear(rq); ++ return 0; ++} ++#endif ++ ++#ifdef CONFIG_SMP ++static void sched_init_topology_cpumask_early(void) ++{ ++ int cpu, level; ++ cpumask_t *tmp; ++ ++ for_each_possible_cpu(cpu) { ++ for (level = 0; level < NR_CPU_AFFINITY_CHK_LEVEL; level++) { ++ tmp = &(per_cpu(sched_cpu_affinity_masks, cpu)[level]); ++ cpumask_copy(tmp, cpu_possible_mask); ++ cpumask_clear_cpu(cpu, tmp); ++ } ++ per_cpu(sched_cpu_llc_mask, cpu) = ++ &(per_cpu(sched_cpu_affinity_masks, cpu)[0]); ++ per_cpu(sched_cpu_affinity_end_mask, cpu) = ++ &(per_cpu(sched_cpu_affinity_masks, cpu)[1]); ++ per_cpu(sd_llc_id, cpu) = cpu; ++ } ++} ++ ++#define TOPOLOGY_CPUMASK(name, mask, last) \ ++ if (cpumask_and(chk, chk, mask)) \ ++ printk(KERN_INFO "sched: cpu#%02d affinity mask: 0x%08lx - "#name,\ ++ cpu, (chk++)->bits[0]); \ ++ if (!last) \ ++ cpumask_complement(chk, mask) ++ ++static void sched_init_topology_cpumask(void) ++{ ++ int cpu; ++ cpumask_t *chk; ++ ++ for_each_online_cpu(cpu) { ++ /* take chance to reset time slice for idle tasks */ ++ cpu_rq(cpu)->idle->time_slice = sched_timeslice_ns; ++ ++ chk = &(per_cpu(sched_cpu_affinity_masks, cpu)[0]); ++ ++ cpumask_complement(chk, cpumask_of(cpu)); ++#ifdef CONFIG_SCHED_SMT ++ TOPOLOGY_CPUMASK(smt, topology_sibling_cpumask(cpu), false); ++#endif ++ per_cpu(sd_llc_id, cpu) = cpumask_first(cpu_coregroup_mask(cpu)); ++ per_cpu(sched_cpu_llc_mask, cpu) = chk; ++ TOPOLOGY_CPUMASK(coregroup, cpu_coregroup_mask(cpu), false); ++ ++ TOPOLOGY_CPUMASK(core, topology_core_cpumask(cpu), false); ++ ++ TOPOLOGY_CPUMASK(others, cpu_online_mask, true); ++ ++ per_cpu(sched_cpu_affinity_end_mask, cpu) = chk; ++ printk(KERN_INFO "sched: cpu#%02d llc_id = %d, llc_mask idx = %d\n", ++ cpu, per_cpu(sd_llc_id, cpu), ++ (int) (per_cpu(sched_cpu_llc_mask, cpu) - ++ &(per_cpu(sched_cpu_affinity_masks, cpu)[0]))); ++ } ++} ++#endif ++ ++void __init sched_init_smp(void) ++{ ++ /* Move init over to a non-isolated CPU */ ++ if (set_cpus_allowed_ptr(current, housekeeping_cpumask(HK_FLAG_DOMAIN)) < 0) ++ BUG(); ++ ++ sched_init_topology_cpumask(); ++ ++ sched_smp_initialized = true; ++} ++#else ++void __init sched_init_smp(void) ++{ ++ cpu_rq(0)->idle->time_slice = sched_timeslice_ns; ++} ++#endif /* CONFIG_SMP */ ++ ++int in_sched_functions(unsigned long addr) ++{ ++ return in_lock_functions(addr) || ++ (addr >= (unsigned long)__sched_text_start ++ && addr < (unsigned long)__sched_text_end); ++} ++ ++#ifdef CONFIG_CGROUP_SCHED ++/* task group related information */ ++struct task_group { ++ struct cgroup_subsys_state css; ++ ++ struct rcu_head rcu; ++ struct list_head list; ++ ++ struct task_group *parent; ++ struct list_head siblings; ++ struct list_head children; ++}; ++ ++/* ++ * Default task group. ++ * Every task in system belongs to this group at bootup. ++ */ ++struct task_group root_task_group; ++LIST_HEAD(task_groups); ++ ++/* Cacheline aligned slab cache for task_group */ ++static struct kmem_cache *task_group_cache __read_mostly; ++#endif /* CONFIG_CGROUP_SCHED */ ++ ++void __init sched_init(void) ++{ ++ int i; ++ struct rq *rq; ++ ++ printk(KERN_INFO ALT_SCHED_VERSION_MSG); ++ ++ wait_bit_init(); ++ ++#ifdef CONFIG_SMP ++ for (i = 0; i < SCHED_BITS; i++) ++ cpumask_copy(&sched_rq_watermark[i], cpu_present_mask); ++#endif ++ ++#ifdef CONFIG_CGROUP_SCHED ++ task_group_cache = KMEM_CACHE(task_group, 0); ++ ++ list_add(&root_task_group.list, &task_groups); ++ INIT_LIST_HEAD(&root_task_group.children); ++ INIT_LIST_HEAD(&root_task_group.siblings); ++#endif /* CONFIG_CGROUP_SCHED */ ++ for_each_possible_cpu(i) { ++ rq = cpu_rq(i); ++ ++ sched_queue_init(rq); ++ rq->watermark = IDLE_WM; ++ rq->skip = NULL; ++ ++ raw_spin_lock_init(&rq->lock); ++ rq->nr_running = rq->nr_uninterruptible = 0; ++ rq->calc_load_active = 0; ++ rq->calc_load_update = jiffies + LOAD_FREQ; ++#ifdef CONFIG_SMP ++ rq->online = false; ++ rq->cpu = i; ++ ++#ifdef CONFIG_SCHED_SMT ++ rq->active_balance = 0; ++#endif ++#endif ++ rq->nr_switches = 0; ++ atomic_set(&rq->nr_iowait, 0); ++ hrtick_rq_init(rq); ++ } ++#ifdef CONFIG_SMP ++ /* Set rq->online for cpu 0 */ ++ cpu_rq(0)->online = true; ++#endif ++ /* ++ * The boot idle thread does lazy MMU switching as well: ++ */ ++ mmgrab(&init_mm); ++ enter_lazy_tlb(&init_mm, current); ++ ++ /* ++ * Make us the idle thread. Technically, schedule() should not be ++ * called from this thread, however somewhere below it might be, ++ * but because we are the idle thread, we just pick up running again ++ * when this runqueue becomes "idle". ++ */ ++ init_idle(current, smp_processor_id()); ++ ++ calc_load_update = jiffies + LOAD_FREQ; ++ ++#ifdef CONFIG_SMP ++ idle_thread_set_boot_cpu(); ++ ++ sched_init_topology_cpumask_early(); ++#endif /* SMP */ ++ ++ init_schedstats(); ++ ++ psi_init(); ++} ++ ++#ifdef CONFIG_DEBUG_ATOMIC_SLEEP ++static inline int preempt_count_equals(int preempt_offset) ++{ ++ int nested = preempt_count() + rcu_preempt_depth(); ++ ++ return (nested == preempt_offset); ++} ++ ++void __might_sleep(const char *file, int line, int preempt_offset) ++{ ++ /* ++ * Blocking primitives will set (and therefore destroy) current->state, ++ * since we will exit with TASK_RUNNING make sure we enter with it, ++ * otherwise we will destroy state. ++ */ ++ WARN_ONCE(current->state != TASK_RUNNING && current->task_state_change, ++ "do not call blocking ops when !TASK_RUNNING; " ++ "state=%lx set at [<%p>] %pS\n", ++ current->state, ++ (void *)current->task_state_change, ++ (void *)current->task_state_change); ++ ++ ___might_sleep(file, line, preempt_offset); ++} ++EXPORT_SYMBOL(__might_sleep); ++ ++void ___might_sleep(const char *file, int line, int preempt_offset) ++{ ++ /* Ratelimiting timestamp: */ ++ static unsigned long prev_jiffy; ++ ++ unsigned long preempt_disable_ip; ++ ++ /* WARN_ON_ONCE() by default, no rate limit required: */ ++ rcu_sleep_check(); ++ ++ if ((preempt_count_equals(preempt_offset) && !irqs_disabled() && ++ !is_idle_task(current) && !current->non_block_count) || ++ system_state == SYSTEM_BOOTING || system_state > SYSTEM_RUNNING || ++ oops_in_progress) ++ return; ++ if (time_before(jiffies, prev_jiffy + HZ) && prev_jiffy) ++ return; ++ prev_jiffy = jiffies; ++ ++ /* Save this before calling printk(), since that will clobber it: */ ++ preempt_disable_ip = get_preempt_disable_ip(current); ++ ++ printk(KERN_ERR ++ "BUG: sleeping function called from invalid context at %s:%d\n", ++ file, line); ++ printk(KERN_ERR ++ "in_atomic(): %d, irqs_disabled(): %d, non_block: %d, pid: %d, name: %s\n", ++ in_atomic(), irqs_disabled(), current->non_block_count, ++ current->pid, current->comm); ++ ++ if (task_stack_end_corrupted(current)) ++ printk(KERN_EMERG "Thread overran stack, or stack corrupted\n"); ++ ++ debug_show_held_locks(current); ++ if (irqs_disabled()) ++ print_irqtrace_events(current); ++#ifdef CONFIG_DEBUG_PREEMPT ++ if (!preempt_count_equals(preempt_offset)) { ++ pr_err("Preemption disabled at:"); ++ print_ip_sym(preempt_disable_ip); ++ pr_cont("\n"); ++ } ++#endif ++ dump_stack(); ++ add_taint(TAINT_WARN, LOCKDEP_STILL_OK); ++} ++EXPORT_SYMBOL(___might_sleep); ++ ++void __cant_sleep(const char *file, int line, int preempt_offset) ++{ ++ static unsigned long prev_jiffy; ++ ++ if (irqs_disabled()) ++ return; ++ ++ if (!IS_ENABLED(CONFIG_PREEMPT_COUNT)) ++ return; ++ ++ if (preempt_count() > preempt_offset) ++ return; ++ ++ if (time_before(jiffies, prev_jiffy + HZ) && prev_jiffy) ++ return; ++ prev_jiffy = jiffies; ++ ++ printk(KERN_ERR "BUG: assuming atomic context at %s:%d\n", file, line); ++ printk(KERN_ERR "in_atomic(): %d, irqs_disabled(): %d, pid: %d, name: %s\n", ++ in_atomic(), irqs_disabled(), ++ current->pid, current->comm); ++ ++ debug_show_held_locks(current); ++ dump_stack(); ++ add_taint(TAINT_WARN, LOCKDEP_STILL_OK); ++} ++EXPORT_SYMBOL_GPL(__cant_sleep); ++#endif ++ ++#ifdef CONFIG_MAGIC_SYSRQ ++void normalize_rt_tasks(void) ++{ ++ struct task_struct *g, *p; ++ struct sched_attr attr = { ++ .sched_policy = SCHED_NORMAL, ++ }; ++ ++ read_lock(&tasklist_lock); ++ for_each_process_thread(g, p) { ++ /* ++ * Only normalize user tasks: ++ */ ++ if (p->flags & PF_KTHREAD) ++ continue; ++ ++ if (!rt_task(p)) { ++ /* ++ * Renice negative nice level userspace ++ * tasks back to 0: ++ */ ++ if (task_nice(p) < 0) ++ set_user_nice(p, 0); ++ continue; ++ } ++ ++ __sched_setscheduler(p, &attr, false, false); ++ } ++ read_unlock(&tasklist_lock); ++} ++#endif /* CONFIG_MAGIC_SYSRQ */ ++ ++#if defined(CONFIG_IA64) || defined(CONFIG_KGDB_KDB) ++/* ++ * These functions are only useful for the IA64 MCA handling, or kdb. ++ * ++ * They can only be called when the whole system has been ++ * stopped - every CPU needs to be quiescent, and no scheduling ++ * activity can take place. Using them for anything else would ++ * be a serious bug, and as a result, they aren't even visible ++ * under any other configuration. ++ */ ++ ++/** ++ * curr_task - return the current task for a given CPU. ++ * @cpu: the processor in question. ++ * ++ * ONLY VALID WHEN THE WHOLE SYSTEM IS STOPPED! ++ * ++ * Return: The current task for @cpu. ++ */ ++struct task_struct *curr_task(int cpu) ++{ ++ return cpu_curr(cpu); ++} ++ ++#endif /* defined(CONFIG_IA64) || defined(CONFIG_KGDB_KDB) */ ++ ++#ifdef CONFIG_IA64 ++/** ++ * ia64_set_curr_task - set the current task for a given CPU. ++ * @cpu: the processor in question. ++ * @p: the task pointer to set. ++ * ++ * Description: This function must only be used when non-maskable interrupts ++ * are serviced on a separate stack. It allows the architecture to switch the ++ * notion of the current task on a CPU in a non-blocking manner. This function ++ * must be called with all CPU's synchronised, and interrupts disabled, the ++ * and caller must save the original value of the current task (see ++ * curr_task() above) and restore that value before reenabling interrupts and ++ * re-starting the system. ++ * ++ * ONLY VALID WHEN THE WHOLE SYSTEM IS STOPPED! ++ */ ++void ia64_set_curr_task(int cpu, struct task_struct *p) ++{ ++ cpu_curr(cpu) = p; ++} ++ ++#endif ++ ++#ifdef CONFIG_CGROUP_SCHED ++static void sched_free_group(struct task_group *tg) ++{ ++ kmem_cache_free(task_group_cache, tg); ++} ++ ++/* allocate runqueue etc for a new task group */ ++struct task_group *sched_create_group(struct task_group *parent) ++{ ++ struct task_group *tg; ++ ++ tg = kmem_cache_alloc(task_group_cache, GFP_KERNEL | __GFP_ZERO); ++ if (!tg) ++ return ERR_PTR(-ENOMEM); ++ ++ return tg; ++} ++ ++void sched_online_group(struct task_group *tg, struct task_group *parent) ++{ ++} ++ ++/* rcu callback to free various structures associated with a task group */ ++static void sched_free_group_rcu(struct rcu_head *rhp) ++{ ++ /* Now it should be safe to free those cfs_rqs */ ++ sched_free_group(container_of(rhp, struct task_group, rcu)); ++} ++ ++void sched_destroy_group(struct task_group *tg) ++{ ++ /* Wait for possible concurrent references to cfs_rqs complete */ ++ call_rcu(&tg->rcu, sched_free_group_rcu); ++} ++ ++void sched_offline_group(struct task_group *tg) ++{ ++} ++ ++static inline struct task_group *css_tg(struct cgroup_subsys_state *css) ++{ ++ return css ? container_of(css, struct task_group, css) : NULL; ++} ++ ++static struct cgroup_subsys_state * ++cpu_cgroup_css_alloc(struct cgroup_subsys_state *parent_css) ++{ ++ struct task_group *parent = css_tg(parent_css); ++ struct task_group *tg; ++ ++ if (!parent) { ++ /* This is early initialization for the top cgroup */ ++ return &root_task_group.css; ++ } ++ ++ tg = sched_create_group(parent); ++ if (IS_ERR(tg)) ++ return ERR_PTR(-ENOMEM); ++ return &tg->css; ++} ++ ++/* Expose task group only after completing cgroup initialization */ ++static int cpu_cgroup_css_online(struct cgroup_subsys_state *css) ++{ ++ struct task_group *tg = css_tg(css); ++ struct task_group *parent = css_tg(css->parent); ++ ++ if (parent) ++ sched_online_group(tg, parent); ++ return 0; ++} ++ ++static void cpu_cgroup_css_released(struct cgroup_subsys_state *css) ++{ ++ struct task_group *tg = css_tg(css); ++ ++ sched_offline_group(tg); ++} ++ ++static void cpu_cgroup_css_free(struct cgroup_subsys_state *css) ++{ ++ struct task_group *tg = css_tg(css); ++ ++ /* ++ * Relies on the RCU grace period between css_released() and this. ++ */ ++ sched_free_group(tg); ++} ++ ++static void cpu_cgroup_fork(struct task_struct *task) ++{ ++} ++ ++static int cpu_cgroup_can_attach(struct cgroup_taskset *tset) ++{ ++ return 0; ++} ++ ++static void cpu_cgroup_attach(struct cgroup_taskset *tset) ++{ ++} ++ ++static struct cftype cpu_legacy_files[] = { ++ { } /* Terminate */ ++}; ++ ++ ++static struct cftype cpu_files[] = { ++ { } /* terminate */ ++}; ++ ++static int cpu_extra_stat_show(struct seq_file *sf, ++ struct cgroup_subsys_state *css) ++{ ++ return 0; ++} ++ ++struct cgroup_subsys cpu_cgrp_subsys = { ++ .css_alloc = cpu_cgroup_css_alloc, ++ .css_online = cpu_cgroup_css_online, ++ .css_released = cpu_cgroup_css_released, ++ .css_free = cpu_cgroup_css_free, ++ .css_extra_stat_show = cpu_extra_stat_show, ++ .fork = cpu_cgroup_fork, ++ .can_attach = cpu_cgroup_can_attach, ++ .attach = cpu_cgroup_attach, ++ .legacy_cftypes = cpu_files, ++ .legacy_cftypes = cpu_legacy_files, ++ .dfl_cftypes = cpu_files, ++ .early_init = true, ++ .threaded = true, ++}; ++#endif /* CONFIG_CGROUP_SCHED */ ++ ++#undef CREATE_TRACE_POINTS +diff --git a/kernel/sched/alt_debug.c b/kernel/sched/alt_debug.c +new file mode 100644 +index 000000000000..835e6bb98dda +--- /dev/null ++++ b/kernel/sched/alt_debug.c +@@ -0,0 +1,31 @@ ++/* ++ * kernel/sched/alt_debug.c ++ * ++ * Print the BMQ debugging details ++ * ++ * Author: Alfred Chen ++ * Date : 2020 ++ */ ++#include "sched.h" ++ ++/* ++ * This allows printing both to /proc/sched_debug and ++ * to the console ++ */ ++#define SEQ_printf(m, x...) \ ++ do { \ ++ if (m) \ ++ seq_printf(m, x); \ ++ else \ ++ pr_cont(x); \ ++ } while (0) ++ ++void proc_sched_show_task(struct task_struct *p, struct pid_namespace *ns, ++ struct seq_file *m) ++{ ++ SEQ_printf(m, "%s (%d, #threads: %d)\n", p->comm, task_pid_nr_ns(p, ns), ++ get_nr_threads(p)); ++} ++ ++void proc_sched_set_task(struct task_struct *p) ++{} +diff --git a/kernel/sched/alt_sched.h b/kernel/sched/alt_sched.h +new file mode 100644 +index 000000000000..0936cf766514 +--- /dev/null ++++ b/kernel/sched/alt_sched.h +@@ -0,0 +1,521 @@ ++#ifndef ALT_SCHED_H ++#define ALT_SCHED_H ++ ++#include ++ ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++ ++#include ++ ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++ ++#include ++ ++#ifdef CONFIG_PARAVIRT ++# include ++#endif ++ ++#include "cpupri.h" ++ ++#ifdef CONFIG_SCHED_BMQ ++#include "bmq.h" ++#endif ++ ++/* task_struct::on_rq states: */ ++#define TASK_ON_RQ_QUEUED 1 ++#define TASK_ON_RQ_MIGRATING 2 ++ ++static inline int task_on_rq_queued(struct task_struct *p) ++{ ++ return p->on_rq == TASK_ON_RQ_QUEUED; ++} ++ ++static inline int task_on_rq_migrating(struct task_struct *p) ++{ ++ return READ_ONCE(p->on_rq) == TASK_ON_RQ_MIGRATING; ++} ++ ++/* ++ * wake flags ++ */ ++#define WF_SYNC 0x01 /* waker goes to sleep after wakeup */ ++#define WF_FORK 0x02 /* child wakeup after fork */ ++#define WF_MIGRATED 0x04 /* internal use, task got migrated */ ++ ++/* ++ * This is the main, per-CPU runqueue data structure. ++ * This data should only be modified by the local cpu. ++ */ ++struct rq { ++ /* runqueue lock: */ ++ raw_spinlock_t lock; ++ ++ struct task_struct __rcu *curr; ++ struct task_struct *idle, *stop, *skip; ++ struct mm_struct *prev_mm; ++ ++#ifdef CONFIG_SCHED_BMQ ++ struct bmq queue; ++#endif ++ unsigned long watermark; ++ ++ /* switch count */ ++ u64 nr_switches; ++ ++ atomic_t nr_iowait; ++ ++#ifdef CONFIG_MEMBARRIER ++ int membarrier_state; ++#endif ++ ++#ifdef CONFIG_SMP ++ int cpu; /* cpu of this runqueue */ ++ bool online; ++ ++#ifdef CONFIG_HAVE_SCHED_AVG_IRQ ++ struct sched_avg avg_irq; ++#endif ++ ++#ifdef CONFIG_SCHED_SMT ++ int active_balance; ++ struct cpu_stop_work active_balance_work; ++#endif ++#endif /* CONFIG_SMP */ ++#ifdef CONFIG_IRQ_TIME_ACCOUNTING ++ u64 prev_irq_time; ++#endif /* CONFIG_IRQ_TIME_ACCOUNTING */ ++#ifdef CONFIG_PARAVIRT ++ u64 prev_steal_time; ++#endif /* CONFIG_PARAVIRT */ ++#ifdef CONFIG_PARAVIRT_TIME_ACCOUNTING ++ u64 prev_steal_time_rq; ++#endif /* CONFIG_PARAVIRT_TIME_ACCOUNTING */ ++ ++ /* calc_load related fields */ ++ unsigned long calc_load_update; ++ long calc_load_active; ++ ++ u64 clock, last_tick; ++ u64 last_ts_switch; ++ u64 clock_task; ++ ++ unsigned long nr_running; ++ unsigned long nr_uninterruptible; ++ ++#ifdef CONFIG_SCHED_HRTICK ++#ifdef CONFIG_SMP ++ call_single_data_t hrtick_csd; ++#endif ++ struct hrtimer hrtick_timer; ++#endif ++ ++#ifdef CONFIG_SCHEDSTATS ++ ++ /* latency stats */ ++ struct sched_info rq_sched_info; ++ unsigned long long rq_cpu_time; ++ /* could above be rq->cfs_rq.exec_clock + rq->rt_rq.rt_runtime ? */ ++ ++ /* sys_sched_yield() stats */ ++ unsigned int yld_count; ++ ++ /* schedule() stats */ ++ unsigned int sched_switch; ++ unsigned int sched_count; ++ unsigned int sched_goidle; ++ ++ /* try_to_wake_up() stats */ ++ unsigned int ttwu_count; ++ unsigned int ttwu_local; ++#endif /* CONFIG_SCHEDSTATS */ ++#ifdef CONFIG_CPU_IDLE ++ /* Must be inspected within a rcu lock section */ ++ struct cpuidle_state *idle_state; ++#endif ++}; ++ ++extern unsigned long calc_load_update; ++extern atomic_long_t calc_load_tasks; ++ ++extern void calc_global_load_tick(struct rq *this_rq); ++extern long calc_load_fold_active(struct rq *this_rq, long adjust); ++ ++DECLARE_PER_CPU_SHARED_ALIGNED(struct rq, runqueues); ++#define cpu_rq(cpu) (&per_cpu(runqueues, (cpu))) ++#define this_rq() this_cpu_ptr(&runqueues) ++#define task_rq(p) cpu_rq(task_cpu(p)) ++#define cpu_curr(cpu) (cpu_rq(cpu)->curr) ++#define raw_rq() raw_cpu_ptr(&runqueues) ++ ++#ifdef CONFIG_SMP ++#if defined(CONFIG_SCHED_DEBUG) && defined(CONFIG_SYSCTL) ++void register_sched_domain_sysctl(void); ++void unregister_sched_domain_sysctl(void); ++#else ++static inline void register_sched_domain_sysctl(void) ++{ ++} ++static inline void unregister_sched_domain_sysctl(void) ++{ ++} ++#endif ++ ++extern bool sched_smp_initialized; ++ ++enum { ++ BASE_CPU_AFFINITY_CHK_LEVEL = 1, ++#ifdef CONFIG_SCHED_SMT ++ SMT_CPU_AFFINITY_CHK_LEVEL_SPACE_HOLDER, ++#endif ++#ifdef CONFIG_SCHED_MC ++ MC_CPU_AFFINITY_CHK_LEVEL_SPACE_HOLDER, ++#endif ++ NR_CPU_AFFINITY_CHK_LEVEL ++}; ++ ++DECLARE_PER_CPU(cpumask_t [NR_CPU_AFFINITY_CHK_LEVEL], sched_cpu_affinity_masks); ++ ++static inline int __best_mask_cpu(int cpu, const cpumask_t *cpumask, ++ const cpumask_t *mask) ++{ ++ while ((cpu = cpumask_any_and(cpumask, mask)) >= nr_cpu_ids) ++ mask++; ++ return cpu; ++} ++ ++static inline int best_mask_cpu(int cpu, const cpumask_t *cpumask) ++{ ++ return cpumask_test_cpu(cpu, cpumask)? cpu : ++ __best_mask_cpu(cpu, cpumask, &(per_cpu(sched_cpu_affinity_masks, cpu)[0])); ++} ++ ++#endif /* CONFIG_SMP */ ++ ++#ifndef arch_scale_freq_tick ++static __always_inline ++void arch_scale_freq_tick(void) ++{ ++} ++#endif ++ ++#ifndef arch_scale_freq_capacity ++static __always_inline ++unsigned long arch_scale_freq_capacity(int cpu) ++{ ++ return SCHED_CAPACITY_SCALE; ++} ++#endif ++ ++static inline u64 __rq_clock_broken(struct rq *rq) ++{ ++ return READ_ONCE(rq->clock); ++} ++ ++static inline u64 rq_clock(struct rq *rq) ++{ ++ /* ++ * Relax lockdep_assert_held() checking as in VRQ, call to ++ * sched_info_xxxx() may not held rq->lock ++ * lockdep_assert_held(&rq->lock); ++ */ ++ return rq->clock; ++} ++ ++static inline u64 rq_clock_task(struct rq *rq) ++{ ++ /* ++ * Relax lockdep_assert_held() checking as in VRQ, call to ++ * sched_info_xxxx() may not held rq->lock ++ * lockdep_assert_held(&rq->lock); ++ */ ++ return rq->clock_task; ++} ++ ++/* ++ * {de,en}queue flags: ++ * ++ * DEQUEUE_SLEEP - task is no longer runnable ++ * ENQUEUE_WAKEUP - task just became runnable ++ * ++ */ ++ ++#define DEQUEUE_SLEEP 0x01 ++ ++#define ENQUEUE_WAKEUP 0x01 ++ ++ ++/* ++ * Below are scheduler API which using in other kernel code ++ * It use the dummy rq_flags ++ * ToDo : BMQ need to support these APIs for compatibility with mainline ++ * scheduler code. ++ */ ++struct rq_flags { ++ unsigned long flags; ++}; ++ ++struct rq *__task_rq_lock(struct task_struct *p, struct rq_flags *rf) ++ __acquires(rq->lock); ++ ++struct rq *task_rq_lock(struct task_struct *p, struct rq_flags *rf) ++ __acquires(p->pi_lock) ++ __acquires(rq->lock); ++ ++static inline void __task_rq_unlock(struct rq *rq, struct rq_flags *rf) ++ __releases(rq->lock) ++{ ++ raw_spin_unlock(&rq->lock); ++} ++ ++static inline void ++task_rq_unlock(struct rq *rq, struct task_struct *p, struct rq_flags *rf) ++ __releases(rq->lock) ++ __releases(p->pi_lock) ++{ ++ raw_spin_unlock(&rq->lock); ++ raw_spin_unlock_irqrestore(&p->pi_lock, rf->flags); ++} ++ ++static inline void ++rq_unlock_irq(struct rq *rq, struct rq_flags *rf) ++ __releases(rq->lock) ++{ ++ raw_spin_unlock_irq(&rq->lock); ++} ++ ++static inline struct rq * ++this_rq_lock_irq(struct rq_flags *rf) ++ __acquires(rq->lock) ++{ ++ struct rq *rq; ++ ++ local_irq_disable(); ++ rq = this_rq(); ++ raw_spin_lock(&rq->lock); ++ ++ return rq; ++} ++ ++static inline int task_current(struct rq *rq, struct task_struct *p) ++{ ++ return rq->curr == p; ++} ++ ++static inline bool task_running(struct task_struct *p) ++{ ++ return p->on_cpu; ++} ++ ++extern struct static_key_false sched_schedstats; ++ ++static inline void sched_ttwu_pending(void) { } ++ ++#ifdef CONFIG_CPU_IDLE ++static inline void idle_set_state(struct rq *rq, ++ struct cpuidle_state *idle_state) ++{ ++ rq->idle_state = idle_state; ++} ++ ++static inline struct cpuidle_state *idle_get_state(struct rq *rq) ++{ ++ WARN_ON(!rcu_read_lock_held()); ++ return rq->idle_state; ++} ++#else ++static inline void idle_set_state(struct rq *rq, ++ struct cpuidle_state *idle_state) ++{ ++} ++ ++static inline struct cpuidle_state *idle_get_state(struct rq *rq) ++{ ++ return NULL; ++} ++#endif ++ ++static inline int cpu_of(const struct rq *rq) ++{ ++#ifdef CONFIG_SMP ++ return rq->cpu; ++#else ++ return 0; ++#endif ++} ++ ++#include "stats.h" ++ ++#ifdef CONFIG_IRQ_TIME_ACCOUNTING ++struct irqtime { ++ u64 total; ++ u64 tick_delta; ++ u64 irq_start_time; ++ struct u64_stats_sync sync; ++}; ++ ++DECLARE_PER_CPU(struct irqtime, cpu_irqtime); ++ ++/* ++ * Returns the irqtime minus the softirq time computed by ksoftirqd. ++ * Otherwise ksoftirqd's sum_exec_runtime is substracted its own runtime ++ * and never move forward. ++ */ ++static inline u64 irq_time_read(int cpu) ++{ ++ struct irqtime *irqtime = &per_cpu(cpu_irqtime, cpu); ++ unsigned int seq; ++ u64 total; ++ ++ do { ++ seq = __u64_stats_fetch_begin(&irqtime->sync); ++ total = irqtime->total; ++ } while (__u64_stats_fetch_retry(&irqtime->sync, seq)); ++ ++ return total; ++} ++#endif /* CONFIG_IRQ_TIME_ACCOUNTING */ ++ ++#ifdef CONFIG_CPU_FREQ ++DECLARE_PER_CPU(struct update_util_data __rcu *, cpufreq_update_util_data); ++ ++/** ++ * cpufreq_update_util - Take a note about CPU utilization changes. ++ * @rq: Runqueue to carry out the update for. ++ * @flags: Update reason flags. ++ * ++ * This function is called by the scheduler on the CPU whose utilization is ++ * being updated. ++ * ++ * It can only be called from RCU-sched read-side critical sections. ++ * ++ * The way cpufreq is currently arranged requires it to evaluate the CPU ++ * performance state (frequency/voltage) on a regular basis to prevent it from ++ * being stuck in a completely inadequate performance level for too long. ++ * That is not guaranteed to happen if the updates are only triggered from CFS ++ * and DL, though, because they may not be coming in if only RT tasks are ++ * active all the time (or there are RT tasks only). ++ * ++ * As a workaround for that issue, this function is called periodically by the ++ * RT sched class to trigger extra cpufreq updates to prevent it from stalling, ++ * but that really is a band-aid. Going forward it should be replaced with ++ * solutions targeted more specifically at RT tasks. ++ */ ++static inline void cpufreq_update_util(struct rq *rq, unsigned int flags) ++{ ++ struct update_util_data *data; ++ ++ data = rcu_dereference_sched(*this_cpu_ptr(&cpufreq_update_util_data)); ++ if (data) ++ data->func(data, rq_clock(rq), flags); ++} ++#else ++static inline void cpufreq_update_util(struct rq *rq, unsigned int flags) {} ++#endif /* CONFIG_CPU_FREQ */ ++ ++#ifdef CONFIG_NO_HZ_FULL ++extern int __init sched_tick_offload_init(void); ++#else ++static inline int sched_tick_offload_init(void) { return 0; } ++#endif ++ ++#ifdef arch_scale_freq_capacity ++#ifndef arch_scale_freq_invariant ++#define arch_scale_freq_invariant() (true) ++#endif ++#else /* arch_scale_freq_capacity */ ++#define arch_scale_freq_invariant() (false) ++#endif ++ ++extern void schedule_idle(void); ++ ++/* ++ * !! For sched_setattr_nocheck() (kernel) only !! ++ * ++ * This is actually gross. :( ++ * ++ * It is used to make schedutil kworker(s) higher priority than SCHED_DEADLINE ++ * tasks, but still be able to sleep. We need this on platforms that cannot ++ * atomically change clock frequency. Remove once fast switching will be ++ * available on such platforms. ++ * ++ * SUGOV stands for SchedUtil GOVernor. ++ */ ++#define SCHED_FLAG_SUGOV 0x10000000 ++ ++#ifdef CONFIG_MEMBARRIER ++/* ++ * The scheduler provides memory barriers required by membarrier between: ++ * - prior user-space memory accesses and store to rq->membarrier_state, ++ * - store to rq->membarrier_state and following user-space memory accesses. ++ * In the same way it provides those guarantees around store to rq->curr. ++ */ ++static inline void membarrier_switch_mm(struct rq *rq, ++ struct mm_struct *prev_mm, ++ struct mm_struct *next_mm) ++{ ++ int membarrier_state; ++ ++ if (prev_mm == next_mm) ++ return; ++ ++ membarrier_state = atomic_read(&next_mm->membarrier_state); ++ if (READ_ONCE(rq->membarrier_state) == membarrier_state) ++ return; ++ ++ WRITE_ONCE(rq->membarrier_state, membarrier_state); ++} ++#else ++static inline void membarrier_switch_mm(struct rq *rq, ++ struct mm_struct *prev_mm, ++ struct mm_struct *next_mm) ++{ ++} ++#endif ++ ++static inline int task_running_nice(struct task_struct *p) ++{ ++ return (p->prio + p->boost_prio > DEFAULT_PRIO + MAX_PRIORITY_ADJ); ++} ++ ++#ifdef CONFIG_NUMA ++extern int sched_numa_find_closest(const struct cpumask *cpus, int cpu); ++#else ++static inline int sched_numa_find_closest(const struct cpumask *cpus, int cpu) ++{ ++ return nr_cpu_ids; ++} ++#endif ++ ++void swake_up_all_locked(struct swait_queue_head *q); ++void __prepare_to_swait(struct swait_queue_head *q, struct swait_queue *wait); ++ ++#endif /* ALT_SCHED_H */ +diff --git a/kernel/sched/bmq.h b/kernel/sched/bmq.h +new file mode 100644 +index 000000000000..4ce30c30bd3e +--- /dev/null ++++ b/kernel/sched/bmq.h +@@ -0,0 +1,14 @@ ++#ifndef BMQ_H ++#define BMQ_H ++ ++/* bits: ++ * RT(0-99), Low prio adj range, nice width, high prio adj range, cpu idle task */ ++#define SCHED_BITS (MAX_RT_PRIO + NICE_WIDTH + 2 * MAX_PRIORITY_ADJ + 1) ++#define IDLE_TASK_SCHED_PRIO (SCHED_BITS - 1) ++ ++struct bmq { ++ DECLARE_BITMAP(bitmap, SCHED_BITS); ++ struct list_head heads[SCHED_BITS]; ++}; ++ ++#endif +diff --git a/kernel/sched/bmq_imp.h b/kernel/sched/bmq_imp.h +new file mode 100644 +index 000000000000..68313e01356d +--- /dev/null ++++ b/kernel/sched/bmq_imp.h +@@ -0,0 +1,86 @@ ++#define ALT_SCHED_VERSION_MSG "sched/bmq: BMQ CPU Scheduler 5.7-r2 by Alfred Chen.\n" ++ ++static inline void sched_queue_init(struct rq *rq) ++{ ++ struct bmq *q = &rq->queue; ++ int i; ++ ++ bitmap_zero(q->bitmap, SCHED_BITS); ++ for(i = 0; i < SCHED_BITS; i++) ++ INIT_LIST_HEAD(&q->heads[i]); ++} ++ ++static inline void sched_queue_init_idle(struct rq *rq, struct task_struct *idle) ++{ ++ struct bmq *q = &rq->queue; ++ ++ idle->bmq_idx = IDLE_TASK_SCHED_PRIO; ++ INIT_LIST_HEAD(&q->heads[idle->bmq_idx]); ++ list_add(&idle->bmq_node, &q->heads[idle->bmq_idx]); ++ set_bit(idle->bmq_idx, q->bitmap); ++} ++ ++/* ++ * This routine used in bmq scheduler only which assume the idle task in the bmq ++ */ ++static inline struct task_struct *sched_rq_first_task(struct rq *rq) ++{ ++ unsigned long idx = find_first_bit(rq->queue.bitmap, SCHED_BITS); ++ const struct list_head *head = &rq->queue.heads[idx]; ++ ++ return list_first_entry(head, struct task_struct, bmq_node); ++} ++ ++static inline struct task_struct * ++sched_rq_next_task(struct task_struct *p, struct rq *rq) ++{ ++ unsigned long idx = p->bmq_idx; ++ struct list_head *head = &rq->queue.heads[idx]; ++ ++ if (list_is_last(&p->bmq_node, head)) { ++ idx = find_next_bit(rq->queue.bitmap, SCHED_BITS, idx + 1); ++ head = &rq->queue.heads[idx]; ++ ++ return list_first_entry(head, struct task_struct, bmq_node); ++ } ++ ++ return list_next_entry(p, bmq_node); ++} ++ ++#define __SCHED_DEQUEUE_TASK(p, rq, flags, func) \ ++ psi_dequeue(p, flags & DEQUEUE_SLEEP); \ ++ sched_info_dequeued(rq, p); \ ++ \ ++ list_del(&p->bmq_node); \ ++ if (list_empty(&rq->queue.heads[p->bmq_idx])) { \ ++ clear_bit(p->bmq_idx, rq->queue.bitmap);\ ++ func; \ ++ } ++ ++#define __SCHED_ENQUEUE_TASK(p, rq, flags) \ ++ sched_info_queued(rq, p); \ ++ psi_enqueue(p, flags); \ ++ \ ++ p->bmq_idx = task_sched_prio(p); \ ++ list_add_tail(&p->bmq_node, &rq->queue.heads[p->bmq_idx]); \ ++ set_bit(p->bmq_idx, rq->queue.bitmap) ++ ++static inline void __requeue_task(struct task_struct *p, struct rq *rq) ++{ ++ int idx = task_sched_prio(p); ++ ++ list_del(&p->bmq_node); ++ list_add_tail(&p->bmq_node, &rq->queue.heads[idx]); ++ if (idx != p->bmq_idx) { ++ if (list_empty(&rq->queue.heads[p->bmq_idx])) ++ clear_bit(p->bmq_idx, rq->queue.bitmap); ++ p->bmq_idx = idx; ++ set_bit(p->bmq_idx, rq->queue.bitmap); ++ update_sched_rq_watermark(rq); ++ } ++} ++ ++static inline bool sched_task_need_requeue(struct task_struct *p) ++{ ++ return (task_sched_prio(p) != p->bmq_idx); ++} +diff --git a/kernel/sched/build_policy.c b/kernel/sched/build_policy.c +index d9dc9ab3773f..71a25540d65e 100644 +--- a/kernel/sched/build_policy.c ++++ b/kernel/sched/build_policy.c +@@ -42,13 +42,19 @@ + + #include "idle.c" + ++#ifndef CONFIG_SCHED_ALT + #include "rt.c" ++#endif + + #ifdef CONFIG_SMP ++#ifndef CONFIG_SCHED_ALT + # include "cpudeadline.c" ++#endif + # include "pelt.c" + #endif + + #include "cputime.c" +-#include "deadline.c" + ++#ifndef CONFIG_SCHED_ALT ++#include "deadline.c" ++#endif +diff --git a/kernel/sched/build_utility.c b/kernel/sched/build_utility.c +index 99bdd96f454f..128a283332f4 100644 +--- a/kernel/sched/build_utility.c ++++ b/kernel/sched/build_utility.c +@@ -69,9 +69,11 @@ + # include "cpufreq_schedutil.c" + #endif + ++#ifdef CONFIG_SCHED_ALT + #ifdef CONFIG_SCHED_DEBUG + # include "debug.c" + #endif ++#endif + + #ifdef CONFIG_SCHEDSTATS + # include "stats.c" +@@ -85,7 +87,9 @@ + + #ifdef CONFIG_SMP + # include "cpupri.c" ++#ifdef CONFIG_SCHED_ALT + # include "stop_task.c" ++#endif + # include "topology.c" + #endif + +diff --git a/kernel/sched/cpufreq_schedutil.c b/kernel/sched/cpufreq_schedutil.c +index 3dbf351d12d5..424b1e360af5 100644 +--- a/kernel/sched/cpufreq_schedutil.c ++++ b/kernel/sched/cpufreq_schedutil.c +@@ -154,6 +154,7 @@ static unsigned int get_next_freq(struct sugov_policy *sg_policy, + return cpufreq_driver_resolve_freq(policy, freq); + } + ++#ifndef CONFIG_SCHED_ALT + static void sugov_get_util(struct sugov_cpu *sg_cpu) + { + struct rq *rq = cpu_rq(sg_cpu->cpu); +@@ -164,6 +165,13 @@ static void sugov_get_util(struct sugov_cpu *sg_cpu) + sg_cpu->util = effective_cpu_util(sg_cpu->cpu, cpu_util_cfs(sg_cpu->cpu), max, + FREQUENCY_UTIL, NULL); + } ++#else /* CONFIG_SCHED_ALT */ ++static unsigned long sugov_get_util(struct sugov_cpu *sg_cpu) ++{ ++ sg_cpu->max = arch_scale_cpu_capacity(sg_cpu->cpu); ++ return sg_cpu->max; ++} ++#endif + + /** + * sugov_iowait_reset() - Reset the IO boost status of a CPU. +@@ -306,7 +314,9 @@ static inline bool sugov_cpu_is_busy(struct sugov_cpu *sg_cpu) { return false; } + */ + static inline void ignore_dl_rate_limit(struct sugov_cpu *sg_cpu) + { ++#ifndef CONFIG_SCHED_ALT + if (cpu_bw_dl(cpu_rq(sg_cpu->cpu)) > sg_cpu->bw_dl) ++#endif + sg_cpu->sg_policy->limits_changed = true; + } + +@@ -607,6 +617,7 @@ static int sugov_kthread_create(struct sugov_policy *sg_policy) + } + + ret = sched_setattr_nocheck(thread, &attr); ++ + if (ret) { + kthread_stop(thread); + pr_warn("%s: failed to set SCHED_DEADLINE\n", __func__); +@@ -837,6 +848,7 @@ struct cpufreq_governor *cpufreq_default_governor(void) + cpufreq_governor_init(schedutil_gov); + + #ifdef CONFIG_ENERGY_MODEL ++#ifndef CONFIG_SCHED_ALT + static void rebuild_sd_workfn(struct work_struct *work) + { + rebuild_sched_domains_energy(); +@@ -860,4 +872,10 @@ void sched_cpufreq_governor_change(struct cpufreq_policy *policy, + } + + } ++#else /* CONFIG_SCHED_ALT */ ++void sched_cpufreq_governor_change(struct cpufreq_policy *policy, ++ struct cpufreq_governor *old_gov) ++{ ++} ++#endif + #endif +diff --git a/kernel/sched/cputime.c b/kernel/sched/cputime.c +index 78a233d43757..b3bbc87d4352 100644 +--- a/kernel/sched/cputime.c ++++ b/kernel/sched/cputime.c +@@ -122,7 +122,7 @@ void account_user_time(struct task_struct *p, u64 cputime) + p->utime += cputime; + account_group_user_time(p, cputime); + +- index = (task_nice(p) > 0) ? CPUTIME_NICE : CPUTIME_USER; ++ index = task_running_nice(p) ? CPUTIME_NICE : CPUTIME_USER; + + /* Add user time to cpustat. */ + task_group_account_field(p, index, cputime); +@@ -146,7 +146,7 @@ void account_guest_time(struct task_struct *p, u64 cputime) + p->gtime += cputime; + + /* Add guest time to cpustat. */ +- if (task_nice(p) > 0) { ++ if (task_running_nice(p)) { + task_group_account_field(p, CPUTIME_NICE, cputime); + cpustat[CPUTIME_GUEST_NICE] += cputime; + } else { +@@ -269,7 +269,7 @@ static inline u64 account_other_time(u64 max) + #ifdef CONFIG_64BIT + static inline u64 read_sum_exec_runtime(struct task_struct *t) + { +- return t->se.sum_exec_runtime; ++ return tsk_seruntime(t); + } + #else + static u64 read_sum_exec_runtime(struct task_struct *t) +@@ -279,7 +279,7 @@ static u64 read_sum_exec_runtime(struct task_struct *t) + struct rq *rq; + + rq = task_rq_lock(t, &rf); +- ns = t->se.sum_exec_runtime; ++ ns = tsk_seruntime(t); + task_rq_unlock(rq, t, &rf); + + return ns; +@@ -611,7 +611,7 @@ void cputime_adjust(struct task_cputime *curr, struct prev_cputime *prev, + void task_cputime_adjusted(struct task_struct *p, u64 *ut, u64 *st) + { + struct task_cputime cputime = { +- .sum_exec_runtime = p->se.sum_exec_runtime, ++ .sum_exec_runtime = tsk_seruntime(p), + }; + + if (task_cputime(p, &cputime.utime, &cputime.stime)) +diff --git a/kernel/sched/idle.c b/kernel/sched/idle.c +index 328cccbee444..aef991facc79 100644 +--- a/kernel/sched/idle.c ++++ b/kernel/sched/idle.c +@@ -400,6 +400,7 @@ void cpu_startup_entry(enum cpuhp_state state) + do_idle(); + } + ++#ifndef CONFIG_SCHED_ALT + /* + * idle-task scheduling class. + */ +@@ -521,3 +522,4 @@ DEFINE_SCHED_CLASS(idle) = { + .switched_to = switched_to_idle, + .update_curr = update_curr_idle, + }; ++#endif +diff --git a/kernel/sched/pelt.c b/kernel/sched/pelt.c +index 0f310768260c..184918afc89c 100644 +--- a/kernel/sched/pelt.c ++++ b/kernel/sched/pelt.c +@@ -266,6 +266,7 @@ ___update_load_avg(struct sched_avg *sa, unsigned long load) + WRITE_ONCE(sa->util_avg, sa->util_sum / divider); + } + ++#ifndef CONFIG_SCHED_ALT + /* + * sched_entity: + * +@@ -383,6 +384,7 @@ int update_dl_rq_load_avg(u64 now, struct rq *rq, int running) + + return 0; + } ++#endif + + #ifdef CONFIG_SCHED_THERMAL_PRESSURE + /* +diff --git a/kernel/sched/pelt.h b/kernel/sched/pelt.h +index 4ff2ed4f8fa1..b95f7db44dd8 100644 +--- a/kernel/sched/pelt.h ++++ b/kernel/sched/pelt.h +@@ -1,11 +1,13 @@ + #ifdef CONFIG_SMP + #include "sched-pelt.h" + ++#ifndef CONFIG_SCHED_ALT + int __update_load_avg_blocked_se(u64 now, struct sched_entity *se); + int __update_load_avg_se(u64 now, struct cfs_rq *cfs_rq, struct sched_entity *se); + int __update_load_avg_cfs_rq(u64 now, struct cfs_rq *cfs_rq); + int update_rt_rq_load_avg(u64 now, struct rq *rq, int running); + int update_dl_rq_load_avg(u64 now, struct rq *rq, int running); ++#endif + + #ifdef CONFIG_SCHED_THERMAL_PRESSURE + int update_thermal_load_avg(u64 now, struct rq *rq, u64 capacity); +@@ -44,6 +46,7 @@ static inline u32 get_pelt_divider(struct sched_avg *avg) + return PELT_MIN_DIVIDER + avg->period_contrib; + } + ++#ifndef CONFIG_SCHED_ALT + static inline void cfs_se_util_change(struct sched_avg *avg) + { + unsigned int enqueued; +@@ -155,9 +158,11 @@ static inline u64 cfs_rq_clock_pelt(struct cfs_rq *cfs_rq) + return rq_clock_pelt(rq_of(cfs_rq)); + } + #endif ++#endif /* CONFIG_SCHED_ALT */ + + #else + ++#ifndef CONFIG_SCHED_ALT + static inline int + update_cfs_rq_load_avg(u64 now, struct cfs_rq *cfs_rq) + { +@@ -175,6 +180,7 @@ update_dl_rq_load_avg(u64 now, struct rq *rq, int running) + { + return 0; + } ++#endif + + static inline int + update_thermal_load_avg(u64 now, struct rq *rq, u64 capacity) +diff --git a/kernel/sched/sched.h b/kernel/sched/sched.h +index 47b89a0fc6e5..de2641a32c22 100644 +--- a/kernel/sched/sched.h ++++ b/kernel/sched/sched.h +@@ -5,6 +5,10 @@ + #ifndef _KERNEL_SCHED_SCHED_H + #define _KERNEL_SCHED_SCHED_H + ++#ifdef CONFIG_SCHED_ALT ++#include "alt_sched.h" ++#else ++ + #include + #include + #include +@@ -3116,4 +3120,9 @@ extern int sched_dynamic_mode(const char *str); + extern void sched_dynamic_update(int mode); + #endif + ++static inline int task_running_nice(struct task_struct *p) ++{ ++ return (task_nice(p) > 0); ++} ++#endif /* !CONFIG_SCHED_ALT */ + #endif /* _KERNEL_SCHED_SCHED_H */ +diff --git a/kernel/sched/stats.c b/kernel/sched/stats.c +index 857f837f52cb..5486c63e4790 100644 +--- a/kernel/sched/stats.c ++++ b/kernel/sched/stats.c +@@ -125,8 +125,10 @@ static int show_schedstat(struct seq_file *seq, void *v) + } else { + struct rq *rq; + #ifdef CONFIG_SMP ++#ifndef CONFIG_SCHED_ALT + struct sched_domain *sd; + int dcount = 0; ++#endif + #endif + cpu = (unsigned long)(v - 2); + rq = cpu_rq(cpu); +@@ -143,6 +145,7 @@ static int show_schedstat(struct seq_file *seq, void *v) + seq_printf(seq, "\n"); + + #ifdef CONFIG_SMP ++#ifndef CONFIG_SCHED_ALT + /* domain-specific stats */ + rcu_read_lock(); + for_each_domain(cpu, sd) { +@@ -171,6 +174,7 @@ static int show_schedstat(struct seq_file *seq, void *v) + sd->ttwu_move_balance); + } + rcu_read_unlock(); ++#endif + #endif + } + return 0; +diff --git a/kernel/sched/topology.c b/kernel/sched/topology.c +index 05b6c2ad90b9..2c9daf90398f 100644 +--- a/kernel/sched/topology.c ++++ b/kernel/sched/topology.c +@@ -3,6 +3,7 @@ + * Scheduler topology setup/handling methods + */ + ++#ifndef CONFIG_SCHED_ALT + DEFINE_MUTEX(sched_domains_mutex); + + /* Protected by sched_domains_mutex: */ +@@ -1413,8 +1414,10 @@ static void asym_cpu_capacity_scan(void) + */ + + static int default_relax_domain_level = -1; ++#endif /* CONFIG_SCHED_ALT */ + int sched_domain_level_max; + ++#ifndef CONFIG_SCHED_ALT + static int __init setup_relax_domain_level(char *str) + { + if (kstrtoint(str, 0, &default_relax_domain_level)) +@@ -1647,6 +1650,7 @@ sd_init(struct sched_domain_topology_level *tl, + + return sd; + } ++#endif /* CONFIG_SCHED_ALT */ + + /* + * Topology list, bottom-up. +@@ -1683,6 +1687,7 @@ void set_sched_topology(struct sched_domain_topology_level *tl) + sched_domain_topology_saved = NULL; + } + ++#ifndef CONFIG_SCHED_ALT + #ifdef CONFIG_NUMA + + static const struct cpumask *sd_numa_mask(int cpu) +@@ -2638,3 +2643,17 @@ void partition_sched_domains(int ndoms_new, cpumask_var_t doms_new[], + partition_sched_domains_locked(ndoms_new, doms_new, dattr_new); + mutex_unlock(&sched_domains_mutex); + } ++#else /* CONFIG_SCHED_ALT */ ++void partition_sched_domains(int ndoms_new, cpumask_var_t doms_new[], ++ struct sched_domain_attr *dattr_new) ++{} ++ ++#ifdef CONFIG_NUMA ++int __read_mostly node_reclaim_distance = RECLAIM_DISTANCE; ++ ++int sched_numa_find_closest(const struct cpumask *cpus, int cpu) ++{ ++ return best_mask_cpu(cpu, cpus); ++} ++#endif /* CONFIG_NUMA */ ++#endif +diff --git a/kernel/sysctl.c b/kernel/sysctl.c +index e52b6e372c60..124713423ebc 100644 +--- a/kernel/sysctl.c ++++ b/kernel/sysctl.c +@@ -86,6 +86,10 @@ + + /* Constants used for minimum and maximum */ + ++#ifdef CONFIG_SCHED_ALT ++extern int sched_yield_type; ++#endif ++ + #ifdef CONFIG_PERF_EVENTS + static const int six_hundred_forty_kb = 640 * 1024; + #endif +@@ -1899,6 +1903,17 @@ static struct ctl_table kern_table[] = { + .proc_handler = proc_dointvec, + }, + #endif ++#ifdef CONFIG_SCHED_ALT ++ { ++ .procname = "yield_type", ++ .data = &sched_yield_type, ++ .maxlen = sizeof (int), ++ .mode = 0644, ++ .proc_handler = &proc_dointvec_minmax, ++ .extra1 = SYSCTL_ZERO, ++ .extra2 = SYSCTL_TWO, ++ }, ++#endif + #if defined(CONFIG_S390) && defined(CONFIG_SMP) + { + .procname = "spin_retry", +diff --git a/kernel/time/hrtimer.c b/kernel/time/hrtimer.c +index 0ea8702eb516..a27a0f3a654d 100644 +--- a/kernel/time/hrtimer.c ++++ b/kernel/time/hrtimer.c +@@ -2088,8 +2088,10 @@ long hrtimer_nanosleep(ktime_t rqtp, const enum hrtimer_mode mode, + int ret = 0; + u64 slack; + ++#ifndef CONFIG_SCHED_ALT + slack = current->timer_slack_ns; + if (dl_task(current) || rt_task(current)) ++#endif + slack = 0; + + hrtimer_init_sleeper_on_stack(&t, clockid, mode); +diff --git a/kernel/time/posix-cpu-timers.c b/kernel/time/posix-cpu-timers.c +index cb925e8ef9a8..67d823510f5c 100644 +--- a/kernel/time/posix-cpu-timers.c ++++ b/kernel/time/posix-cpu-timers.c +@@ -223,7 +223,7 @@ static void task_sample_cputime(struct task_struct *p, u64 *samples) + u64 stime, utime; + + task_cputime(p, &utime, &stime); +- store_samples(samples, stime, utime, p->se.sum_exec_runtime); ++ store_samples(samples, stime, utime, tsk_seruntime(p)); + } + + static void proc_sample_cputime_atomic(struct task_cputime_atomic *at, +@@ -866,6 +866,7 @@ static void collect_posix_cputimers(struct posix_cputimers *pct, u64 *samples, + } + } + ++#ifndef CONFIG_SCHED_ALT + static inline void check_dl_overrun(struct task_struct *tsk) + { + if (tsk->dl.dl_overrun) { +@@ -873,6 +874,7 @@ static inline void check_dl_overrun(struct task_struct *tsk) + send_signal_locked(SIGXCPU, SEND_SIG_PRIV, tsk, PIDTYPE_TGID); + } + } ++#endif + + static bool check_rlimit(u64 time, u64 limit, int signo, bool rt, bool hard) + { +@@ -900,8 +902,10 @@ static void check_thread_timers(struct task_struct *tsk, + u64 samples[CPUCLOCK_MAX]; + unsigned long soft; + ++#ifndef CONFIG_SCHED_ALT + if (dl_task(tsk)) + check_dl_overrun(tsk); ++#endif + + if (expiry_cache_is_inactive(pct)) + return; +@@ -915,7 +919,7 @@ static void check_thread_timers(struct task_struct *tsk, + soft = task_rlimit(tsk, RLIMIT_RTTIME); + if (soft != RLIM_INFINITY) { + /* Task RT timeout is accounted in jiffies. RTTIME is usec */ +- unsigned long rttime = tsk->rt.timeout * (USEC_PER_SEC / HZ); ++ unsigned long rttime = tsk_rttimeout(tsk) * (USEC_PER_SEC / HZ); + unsigned long hard = task_rlimit_max(tsk, RLIMIT_RTTIME); + + /* At the hard limit, send SIGKILL. No further action. */ +@@ -1151,8 +1155,10 @@ static inline bool fastpath_timer_check(struct task_struct *tsk) + return true; + } + ++#ifndef CONFIG_SCHED_ALT + if (dl_task(tsk) && tsk->dl.dl_overrun) + return true; ++#endif + + return false; + } +diff --git a/kernel/trace/trace_selftest.c b/kernel/trace/trace_selftest.c +index a2d301f58ced..bc131c6d1299 100644 +--- a/kernel/trace/trace_selftest.c ++++ b/kernel/trace/trace_selftest.c +@@ -1143,10 +1143,15 @@ static int trace_wakeup_test_thread(void *data) + { + /* Make this a -deadline thread */ + static const struct sched_attr attr = { ++#ifdef CONFIG_SCHED_ALT ++ /* No deadline on BMQ, use RR */ ++ .sched_policy = SCHED_RR, ++#else + .sched_policy = SCHED_DEADLINE, + .sched_runtime = 100000ULL, + .sched_deadline = 10000000ULL, + .sched_period = 10000000ULL ++#endif + }; + struct wakeup_test_data *x = data; + +-- +2.37.0 + + +From d98eea85062a3b613ec15b7ceba280c0549d6257 Mon Sep 17 00:00:00 2001 +From: Alfred Chen +Date: Wed, 1 Jul 2020 21:56:45 +0800 +Subject: [PATCH 002/297] sched/alt: Fix compilation issue when + CONFIG_SCHED_TRACER + +--- + include/linux/sched/deadline.h | 5 +++++ + 1 file changed, 5 insertions(+) + +diff --git a/include/linux/sched/deadline.h b/include/linux/sched/deadline.h +index 529e1b2ebd19..da7834c45673 100644 +--- a/include/linux/sched/deadline.h ++++ b/include/linux/sched/deadline.h +@@ -4,6 +4,11 @@ + + #ifdef CONFIG_SCHED_BMQ + #define __tsk_deadline(p) (0UL) ++ ++static inline int dl_task(struct task_struct *p) ++{ ++ return 0; ++} + #endif + + #else +-- +2.37.0 + + +From 2545c2e765185dfa44556e917aae8d99e5cc81b2 Mon Sep 17 00:00:00 2001 +From: Piotr Gorski +Date: Fri, 10 Jul 2020 16:18:57 +0200 +Subject: [PATCH 003/297] sched/alt: Backport update to + "__set_cpus_allowed_ptr()" in sched/core.c + +Signed-off-by: Piotr Gorski +--- + kernel/sched/alt_core.c | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +diff --git a/kernel/sched/alt_core.c b/kernel/sched/alt_core.c +index 09ca47de425c..ab331d814e4e 100644 +--- a/kernel/sched/alt_core.c ++++ b/kernel/sched/alt_core.c +@@ -1473,7 +1473,7 @@ static int __set_cpus_allowed_ptr(struct task_struct *p, + goto out; + } + +- if (cpumask_equal(p->cpus_ptr, new_mask)) ++ if (cpumask_equal(&p->cpus_mask, new_mask)) + goto out; + + dest_cpu = cpumask_any_and(cpu_valid_mask, new_mask); +-- +2.37.0 + + +From 0936f2bb4e9964b32c7a6e94ccf75d137a36dfdc Mon Sep 17 00:00:00 2001 +From: Alfred Chen +Date: Wed, 29 Jul 2020 06:54:45 +0800 +Subject: [PATCH 004/297] sched/alt: Instroduce rq wake_list. + +--- + include/linux/sched.h | 3 + + kernel/sched/alt_core.c | 123 ++++++++++++++++++++++++++++++++++++++- + kernel/sched/alt_sched.h | 10 +++- + 3 files changed, 131 insertions(+), 5 deletions(-) + +diff --git a/include/linux/sched.h b/include/linux/sched.h +index 6907241224a5..af63be15fb5e 100644 +--- a/include/linux/sched.h ++++ b/include/linux/sched.h +@@ -750,6 +750,9 @@ struct task_struct { + unsigned int flags; + unsigned int ptrace; + ++#if defined(CONFIG_SMP) ++ struct llist_node wake_entry; ++#endif + #if defined(CONFIG_SMP) || defined(CONFIG_SCHED_ALT) + int on_cpu; + #endif +diff --git a/kernel/sched/alt_core.c b/kernel/sched/alt_core.c +index ab331d814e4e..48e5fac710bc 100644 +--- a/kernel/sched/alt_core.c ++++ b/kernel/sched/alt_core.c +@@ -336,6 +336,20 @@ struct rq *task_rq_lock(struct task_struct *p, struct rq_flags *rf) + } + } + ++static inline void ++rq_lock_irqsave(struct rq *rq, struct rq_flags *rf) ++ __acquires(rq->lock) ++{ ++ raw_spin_lock_irqsave(&rq->lock, rf->flags); ++} ++ ++static inline void ++rq_unlock_irqrestore(struct rq *rq, struct rq_flags *rf) ++ __releases(rq->lock) ++{ ++ raw_spin_unlock_irqrestore(&rq->lock, rf->flags); ++} ++ + /* + * RQ-clock updating methods: + */ +@@ -770,6 +784,34 @@ void wake_up_nohz_cpu(int cpu) + wake_up_idle_cpu(cpu); + } + ++static inline bool got_nohz_idle_kick(void) ++{ ++ int cpu = smp_processor_id(); ++ ++ /* TODO: need to support nohz_flag ++ if (!(atomic_read(nohz_flags(cpu)) & NOHZ_KICK_MASK)) ++ return false; ++ */ ++ ++ if (idle_cpu(cpu) && !need_resched()) ++ return true; ++ ++ /* ++ * We can't run Idle Load Balance on this CPU for this time so we ++ * cancel it and clear NOHZ_BALANCE_KICK ++ */ ++ /* TODO: need to support nohz_flag ++ atomic_andnot(NOHZ_KICK_MASK, nohz_flags(cpu)); ++ */ ++ return false; ++} ++ ++#else /* CONFIG_NO_HZ_COMMON */ ++ ++static inline bool got_nohz_idle_kick(void) ++{ ++ return false; ++} + #endif /* CONFIG_NO_HZ_COMMON */ + #endif /* CONFIG_SMP */ + +@@ -1127,6 +1169,12 @@ static int migration_cpu_stop(void *data) + * be on another CPU but it doesn't matter. + */ + local_irq_disable(); ++ /* ++ * We need to explicitly wake pending tasks before running ++ * __migrate_task() such that we will not miss enforcing cpus_ptr ++ * during wakeups, see set_cpus_allowed_ptr()'s TASK_WAKING test. ++ */ ++ sched_ttwu_pending(); + + raw_spin_lock(&p->pi_lock); + raw_spin_lock(&rq->lock); +@@ -1608,6 +1656,26 @@ static int ttwu_remote(struct task_struct *p, int wake_flags) + } + + #ifdef CONFIG_SMP ++void sched_ttwu_pending(void) ++{ ++ struct rq *rq = this_rq(); ++ struct llist_node *llist = llist_del_all(&rq->wake_list); ++ struct task_struct *p, *t; ++ struct rq_flags rf; ++ ++ if (!llist) ++ return; ++ ++ rq_lock_irqsave(rq, &rf); ++ update_rq_clock(rq); ++ ++ llist_for_each_entry_safe(p, t, llist, wake_entry) ++ ttwu_do_activate(rq, p, p->sched_remote_wakeup ? WF_MIGRATED : 0); ++ check_preempt_curr(rq); ++ ++ rq_unlock_irqrestore(rq, &rf); ++} ++ + void scheduler_ipi(void) + { + /* +@@ -1617,13 +1685,38 @@ void scheduler_ipi(void) + */ + preempt_fold_need_resched(); + +- if (!idle_cpu(smp_processor_id()) || need_resched()) ++ if (llist_empty(&this_rq()->wake_list) && !got_nohz_idle_kick()) + return; + + irq_enter(); ++ sched_ttwu_pending(); ++ ++ /* ++ * Check if someone kicked us for doing the nohz idle load balance. ++ */ ++ if (unlikely(got_nohz_idle_kick())) { ++ /* TODO need to kick off balance ++ this_rq()->idle_balance = 1; ++ raise_softirq_irqoff(SCHED_SOFTIRQ); ++ */ ++ } + irq_exit(); + } + ++static void ttwu_queue_remote(struct task_struct *p, int cpu, int wake_flags) ++{ ++ struct rq *rq = cpu_rq(cpu); ++ ++ p->sched_remote_wakeup = !!(wake_flags & WF_MIGRATED); ++ ++ if (llist_add(&p->wake_entry, &cpu_rq(cpu)->wake_list)) { ++ if (!set_nr_if_polling(rq->idle)) ++ smp_send_reschedule(cpu); ++ else ++ trace_sched_wake_idle_without_ipi(cpu); ++ } ++} ++ + void wake_up_if_idle(int cpu) + { + struct rq *rq = cpu_rq(cpu); +@@ -1658,6 +1751,14 @@ static inline void ttwu_queue(struct task_struct *p, int cpu, int wake_flags) + { + struct rq *rq = cpu_rq(cpu); + ++#if defined(CONFIG_SMP) ++ if (!cpus_share_cache(smp_processor_id(), cpu)) { ++ sched_clock_cpu(cpu); /* Sync clocks across CPUs */ ++ ttwu_queue_remote(p, cpu, wake_flags); ++ return; ++ } ++#endif ++ + raw_spin_lock(&rq->lock); + update_rq_clock(rq); + ttwu_do_activate(rq, p, wake_flags); +@@ -3890,7 +3991,20 @@ int task_prio(const struct task_struct *p) + */ + int idle_cpu(int cpu) + { +- return cpu_curr(cpu) == cpu_rq(cpu)->idle; ++ struct rq *rq = cpu_rq(cpu); ++ ++ if (rq->curr != rq->idle) ++ return 0; ++ ++ if (rq->nr_running) ++ return 0; ++ ++#ifdef CONFIG_SMP ++ if (!llist_empty(&rq->wake_list)) ++ return 0; ++#endif ++ ++ return 1; + } + + /** +@@ -5426,6 +5540,9 @@ int sched_cpu_dying(unsigned int cpu) + struct rq *rq = cpu_rq(cpu); + unsigned long flags; + ++ /* Handle pending wakeups and then migrate everything off */ ++ sched_ttwu_pending(); ++ + sched_tick_stop(cpu); + raw_spin_lock_irqsave(&rq->lock, flags); + set_rq_offline(rq); +@@ -5453,7 +5570,7 @@ static void sched_init_topology_cpumask_early(void) + &(per_cpu(sched_cpu_affinity_masks, cpu)[0]); + per_cpu(sched_cpu_affinity_end_mask, cpu) = + &(per_cpu(sched_cpu_affinity_masks, cpu)[1]); +- per_cpu(sd_llc_id, cpu) = cpu; ++ /*per_cpu(sd_llc_id, cpu) = cpu;*/ + } + } + +diff --git a/kernel/sched/alt_sched.h b/kernel/sched/alt_sched.h +index 0936cf766514..2b66983cce42 100644 +--- a/kernel/sched/alt_sched.h ++++ b/kernel/sched/alt_sched.h +@@ -157,6 +157,11 @@ struct rq { + unsigned int ttwu_count; + unsigned int ttwu_local; + #endif /* CONFIG_SCHEDSTATS */ ++ ++#ifdef CONFIG_SMP ++ struct llist_head wake_list; ++#endif ++ + #ifdef CONFIG_CPU_IDLE + /* Must be inspected within a rcu lock section */ + struct cpuidle_state *idle_state; +@@ -218,6 +223,9 @@ static inline int best_mask_cpu(int cpu, const cpumask_t *cpumask) + __best_mask_cpu(cpu, cpumask, &(per_cpu(sched_cpu_affinity_masks, cpu)[0])); + } + ++extern void sched_ttwu_pending(void); ++#else /* !CONFIG_SMP */ ++static inline void sched_ttwu_pending(void) { } + #endif /* CONFIG_SMP */ + + #ifndef arch_scale_freq_tick +@@ -337,8 +345,6 @@ static inline bool task_running(struct task_struct *p) + + extern struct static_key_false sched_schedstats; + +-static inline void sched_ttwu_pending(void) { } +- + #ifdef CONFIG_CPU_IDLE + static inline void idle_set_state(struct rq *rq, + struct cpuidle_state *idle_state) +-- +2.37.0 + + +From c7a96fabb29e5b688949e369f0bff15d46fbeefe Mon Sep 17 00:00:00 2001 +From: Alfred Chen +Date: Wed, 29 Jul 2020 07:39:05 +0800 +Subject: [PATCH 005/297] Project-C v5.7-r3 + +--- + kernel/sched/bmq_imp.h | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +diff --git a/kernel/sched/bmq_imp.h b/kernel/sched/bmq_imp.h +index 68313e01356d..cb0fc0688a89 100644 +--- a/kernel/sched/bmq_imp.h ++++ b/kernel/sched/bmq_imp.h +@@ -1,4 +1,4 @@ +-#define ALT_SCHED_VERSION_MSG "sched/bmq: BMQ CPU Scheduler 5.7-r2 by Alfred Chen.\n" ++#define ALT_SCHED_VERSION_MSG "sched/bmq: BMQ CPU Scheduler 5.7-r3 by Alfred Chen.\n" + + static inline void sched_queue_init(struct rq *rq) + { +-- +2.37.0 + + +From 2f12ca6d33d2f9c2a71214b5d59f9e32d548a37e Mon Sep 17 00:00:00 2001 +From: Alfred Chen +Date: Thu, 16 Jul 2020 16:48:01 +0800 +Subject: [PATCH 006/297] sched/alt: [Sync] 2beaf3280e57 sched/core: Add + function to sample state of locked-down task + +--- + kernel/sched/alt_core.c | 48 +++++++++++++++++++++++++++++++++++++++++ + 1 file changed, 48 insertions(+) + +diff --git a/kernel/sched/alt_core.c b/kernel/sched/alt_core.c +index 48e5fac710bc..51c2b8d0e89b 100644 +--- a/kernel/sched/alt_core.c ++++ b/kernel/sched/alt_core.c +@@ -1938,6 +1938,8 @@ static int try_to_wake_up(struct task_struct *p, unsigned int state, + * + * Pairs with the LOCK+smp_mb__after_spinlock() on rq->lock in + * __schedule(). See the comment for smp_mb__after_spinlock(). ++ * ++ * A similar smb_rmb() lives in try_invoke_on_locked_down_task(). + */ + smp_rmb(); + if (p->on_rq && ttwu_remote(p, wake_flags)) +@@ -2012,6 +2014,52 @@ static int try_to_wake_up(struct task_struct *p, unsigned int state, + return success; + } + ++/** ++ * try_invoke_on_locked_down_task - Invoke a function on task in fixed state ++ * @p: Process for which the function is to be invoked. ++ * @func: Function to invoke. ++ * @arg: Argument to function. ++ * ++ * If the specified task can be quickly locked into a definite state ++ * (either sleeping or on a given runqueue), arrange to keep it in that ++ * state while invoking @func(@arg). This function can use ->on_rq and ++ * task_curr() to work out what the state is, if required. Given that ++ * @func can be invoked with a runqueue lock held, it had better be quite ++ * lightweight. ++ * ++ * Returns: ++ * @false if the task slipped out from under the locks. ++ * @true if the task was locked onto a runqueue or is sleeping. ++ * However, @func can override this by returning @false. ++ */ ++bool try_invoke_on_locked_down_task(struct task_struct *p, bool (*func)(struct task_struct *t, void *arg), void *arg) ++{ ++ bool ret = false; ++ struct rq_flags rf; ++ struct rq *rq; ++ ++ lockdep_assert_irqs_enabled(); ++ raw_spin_lock_irq(&p->pi_lock); ++ if (p->on_rq) { ++ rq = __task_rq_lock(p, &rf); ++ if (task_rq(p) == rq) ++ ret = func(p, arg); ++ __task_rq_unlock(rq, &rf); ++ } else { ++ switch (p->state) { ++ case TASK_RUNNING: ++ case TASK_WAKING: ++ break; ++ default: ++ smp_rmb(); // See smp_rmb() comment in try_to_wake_up(). ++ if (!p->on_rq) ++ ret = func(p, arg); ++ } ++ } ++ raw_spin_unlock_irq(&p->pi_lock); ++ return ret; ++} ++ + /** + * wake_up_process - Wake up a specific process + * @p: The process to be woken up. +-- +2.37.0 + + +From e416d9ae39871ba06d034f1ad70674384d255dac Mon Sep 17 00:00:00 2001 +From: Alfred Chen +Date: Thu, 16 Jul 2020 16:51:17 +0800 +Subject: [PATCH 007/297] sched/alt: [Sync] d08b9f0ca660 scs: Add support for + Clang's Shadow Call Stack (SCS) + +--- + kernel/sched/alt_core.c | 2 ++ + 1 file changed, 2 insertions(+) + +diff --git a/kernel/sched/alt_core.c b/kernel/sched/alt_core.c +index 51c2b8d0e89b..c85e72b38320 100644 +--- a/kernel/sched/alt_core.c ++++ b/kernel/sched/alt_core.c +@@ -31,6 +31,7 @@ + #include + + #include ++#include + + #include + +@@ -5281,6 +5282,7 @@ void init_idle(struct task_struct *idle, int cpu) + idle->flags |= PF_IDLE; + sched_queue_init_idle(rq, idle); + ++ scs_task_reset(idle); + kasan_unpoison_task_stack(idle); + + #ifdef CONFIG_SMP +-- +2.37.0 + + +From 9a34c52be25591f34a4fdc74d4ce1f90cb343566 Mon Sep 17 00:00:00 2001 +From: Alfred Chen +Date: Thu, 16 Jul 2020 16:53:06 +0800 +Subject: [PATCH 008/297] sched/alt: [Sync] 88485be531f4 scs: Move + scs_overflow_check() out of architecture code + +--- + kernel/sched/alt_core.c | 3 +++ + 1 file changed, 3 insertions(+) + +diff --git a/kernel/sched/alt_core.c b/kernel/sched/alt_core.c +index c85e72b38320..03020c9154fe 100644 +--- a/kernel/sched/alt_core.c ++++ b/kernel/sched/alt_core.c +@@ -3262,6 +3262,9 @@ static inline void schedule_debug(struct task_struct *prev, bool preempt) + #ifdef CONFIG_SCHED_STACK_END_CHECK + if (task_stack_end_corrupted(prev)) + panic("corrupted stack end detected inside scheduler\n"); ++ ++ if (task_scs_end_corrupted(prev)) ++ panic("corrupted shadow stack detected inside scheduler\n"); + #endif + + #ifdef CONFIG_DEBUG_ATOMIC_SLEEP +-- +2.37.0 + + +From a954cc7c6ed77402e00f1a4421d0bafb1573f6a3 Mon Sep 17 00:00:00 2001 +From: Alfred Chen +Date: Fri, 31 Jul 2020 10:14:20 +0800 +Subject: [PATCH 009/297] sched/alt: [Sync] 90b5363acd47 sched: Clean up + scheduler_ipi() + +--- + kernel/sched/alt_core.c | 75 ++++++++++++++++++++++++++++------------ + kernel/sched/alt_sched.h | 7 ++++ + 2 files changed, 59 insertions(+), 23 deletions(-) + +diff --git a/kernel/sched/alt_core.c b/kernel/sched/alt_core.c +index 03020c9154fe..97a857fd0fb3 100644 +--- a/kernel/sched/alt_core.c ++++ b/kernel/sched/alt_core.c +@@ -807,6 +807,34 @@ static inline bool got_nohz_idle_kick(void) + return false; + } + ++static void nohz_csd_func(void *info) ++{ ++ struct rq *rq = info; ++ ++ if (got_nohz_idle_kick()) { ++ /* TODO need to kick off balance ++ rq->idle_balance = 1; ++ raise_softirq_irqoff(SCHED_SOFTIRQ); ++ */ ++ } ++ /* ++ int cpu = cpu_of(rq); ++ unsigned int flags; ++ */ ++ ++ /* ++ * Release the rq::nohz_csd. ++ flags = atomic_fetch_andnot(NOHZ_KICK_MASK, nohz_flags(cpu)); ++ WARN_ON(!(flags & NOHZ_KICK_MASK)); ++ ++ rq->idle_balance = idle_cpu(cpu); ++ if (rq->idle_balance && !need_resched()) { ++ rq->nohz_idle_balance = flags; ++ raise_softirq_irqoff(SCHED_SOFTIRQ); ++ } ++ */ ++} ++ + #else /* CONFIG_NO_HZ_COMMON */ + + static inline bool got_nohz_idle_kick(void) +@@ -822,6 +850,14 @@ static inline void check_preempt_curr(struct rq *rq) + resched_curr(rq); + } + ++static inline void ++rq_csd_init(struct rq *rq, call_single_data_t *csd, smp_call_func_t func) ++{ ++ csd->flags = 0; ++ csd->func = func; ++ csd->info = rq; ++} ++ + #ifdef CONFIG_SCHED_HRTICK + /* + * Use HR-timers to deliver accurate preemption points. +@@ -938,9 +974,7 @@ void hrtick_start(struct rq *rq, u64 delay) + static void hrtick_rq_init(struct rq *rq) + { + #ifdef CONFIG_SMP +- rq->hrtick_csd.flags = 0; +- rq->hrtick_csd.func = __hrtick_start; +- rq->hrtick_csd.info = rq; ++ rq_csd_init(rq, &rq->hrtick_csd, __hrtick_start); + #endif + + hrtimer_init(&rq->hrtick_timer, CLOCK_MONOTONIC, HRTIMER_MODE_REL_HARD); +@@ -1677,6 +1711,11 @@ void sched_ttwu_pending(void) + rq_unlock_irqrestore(rq, &rf); + } + ++static void wake_csd_func(void *info) ++{ ++ sched_ttwu_pending(); ++} ++ + void scheduler_ipi(void) + { + /* +@@ -1685,23 +1724,6 @@ void scheduler_ipi(void) + * this IPI. + */ + preempt_fold_need_resched(); +- +- if (llist_empty(&this_rq()->wake_list) && !got_nohz_idle_kick()) +- return; +- +- irq_enter(); +- sched_ttwu_pending(); +- +- /* +- * Check if someone kicked us for doing the nohz idle load balance. +- */ +- if (unlikely(got_nohz_idle_kick())) { +- /* TODO need to kick off balance +- this_rq()->idle_balance = 1; +- raise_softirq_irqoff(SCHED_SOFTIRQ); +- */ +- } +- irq_exit(); + } + + static void ttwu_queue_remote(struct task_struct *p, int cpu, int wake_flags) +@@ -1710,9 +1732,9 @@ static void ttwu_queue_remote(struct task_struct *p, int cpu, int wake_flags) + + p->sched_remote_wakeup = !!(wake_flags & WF_MIGRATED); + +- if (llist_add(&p->wake_entry, &cpu_rq(cpu)->wake_list)) { ++ if (llist_add(&p->wake_entry, &rq->wake_list)) { + if (!set_nr_if_polling(rq->idle)) +- smp_send_reschedule(cpu); ++ smp_call_function_single_async(cpu, &rq->wake_csd); + else + trace_sched_wake_idle_without_ipi(cpu); + } +@@ -5755,8 +5777,15 @@ void __init sched_init(void) + #endif + #endif + rq->nr_switches = 0; +- atomic_set(&rq->nr_iowait, 0); ++ ++#ifdef CONFIG_NO_HZ_COMMON ++ rq_csd_init(rq, &rq->nohz_csd, nohz_csd_func); ++#endif ++ + hrtick_rq_init(rq); ++ atomic_set(&rq->nr_iowait, 0); ++ ++ rq_csd_init(rq, &rq->nohz_csd, nohz_csd_func); + } + #ifdef CONFIG_SMP + /* Set rq->online for cpu 0 */ +diff --git a/kernel/sched/alt_sched.h b/kernel/sched/alt_sched.h +index 2b66983cce42..b4148dd822b2 100644 +--- a/kernel/sched/alt_sched.h ++++ b/kernel/sched/alt_sched.h +@@ -166,6 +166,13 @@ struct rq { + /* Must be inspected within a rcu lock section */ + struct cpuidle_state *idle_state; + #endif ++ ++#ifdef CONFIG_NO_HZ_COMMON ++#ifdef CONFIG_SMP ++ call_single_data_t nohz_csd; ++#endif ++ atomic_t nohz_flags; ++#endif /* CONFIG_NO_HZ_COMMON */ + }; + + extern unsigned long calc_load_update; +-- +2.37.0 + + +From de95aaaaf7777cb7bf10ac96db43350607242654 Mon Sep 17 00:00:00 2001 +From: Alfred Chen +Date: Fri, 31 Jul 2020 10:17:21 +0800 +Subject: [PATCH 010/297] sched/alt: [Sync] 2a0a24ebb499 sched: Make + scheduler_ipi inline + +--- + kernel/sched/alt_core.c | 10 ---------- + 1 file changed, 10 deletions(-) + +diff --git a/kernel/sched/alt_core.c b/kernel/sched/alt_core.c +index 97a857fd0fb3..c2301b9f4f55 100644 +--- a/kernel/sched/alt_core.c ++++ b/kernel/sched/alt_core.c +@@ -1716,16 +1716,6 @@ static void wake_csd_func(void *info) + sched_ttwu_pending(); + } + +-void scheduler_ipi(void) +-{ +- /* +- * Fold TIF_NEED_RESCHED into the preempt_count; anybody setting +- * TIF_NEED_RESCHED remotely (for the first time) will also send +- * this IPI. +- */ +- preempt_fold_need_resched(); +-} +- + static void ttwu_queue_remote(struct task_struct *p, int cpu, int wake_flags) + { + struct rq *rq = cpu_rq(cpu); +-- +2.37.0 + + +From 0bdea6f1069003f6446267162abe2f94b96ec18b Mon Sep 17 00:00:00 2001 +From: Alfred Chen +Date: Fri, 31 Jul 2020 14:50:39 +0800 +Subject: [PATCH 011/297] sched/alt: [Sync] c6e7bd7afaeb sched/core: Optimize + ttwu() spinning on p->on_cpu + +--- + kernel/sched/alt_core.c | 49 ++++++++++++++++++++++++++--------------- + 1 file changed, 31 insertions(+), 18 deletions(-) + +diff --git a/kernel/sched/alt_core.c b/kernel/sched/alt_core.c +index c2301b9f4f55..d387ae45f5e8 100644 +--- a/kernel/sched/alt_core.c ++++ b/kernel/sched/alt_core.c +@@ -1716,7 +1716,7 @@ static void wake_csd_func(void *info) + sched_ttwu_pending(); + } + +-static void ttwu_queue_remote(struct task_struct *p, int cpu, int wake_flags) ++static void __ttwu_queue_remote(struct task_struct *p, int cpu, int wake_flags) + { + struct rq *rq = cpu_rq(cpu); + +@@ -1730,6 +1730,17 @@ static void ttwu_queue_remote(struct task_struct *p, int cpu, int wake_flags) + } + } + ++static void ttwu_queue_remote(struct task_struct *p, int cpu, int wake_flags) ++{ ++ if (!cpus_share_cache(smp_processor_id(), cpu)) { ++ sched_clock_cpu(cpu); /* Sync clocks across CPUs */ ++ __ttwu_queue_remote(p, cpu, wake_flags); ++ return true; ++ } ++ ++ return false; ++} ++ + void wake_up_if_idle(int cpu) + { + struct rq *rq = cpu_rq(cpu); +@@ -1765,11 +1776,8 @@ static inline void ttwu_queue(struct task_struct *p, int cpu, int wake_flags) + struct rq *rq = cpu_rq(cpu); + + #if defined(CONFIG_SMP) +- if (!cpus_share_cache(smp_processor_id(), cpu)) { +- sched_clock_cpu(cpu); /* Sync clocks across CPUs */ +- ttwu_queue_remote(p, cpu, wake_flags); ++ if (ttwu_queue_remote(p, cpu, wake_flags)) + return; +- } + #endif + + raw_spin_lock(&rq->lock); +@@ -1958,7 +1966,15 @@ static int try_to_wake_up(struct task_struct *p, unsigned int state, + if (p->on_rq && ttwu_remote(p, wake_flags)) + goto unlock; + ++ if (p->in_iowait) { ++ delayacct_blkio_end(p); ++ atomic_dec(&task_rq(p)->nr_iowait); ++ } ++ + #ifdef CONFIG_SMP ++ p->sched_contributes_to_load = !!task_contributes_to_load(p); ++ p->state = TASK_WAKING; ++ + /* + * Ensure we load p->on_cpu _after_ p->on_rq, otherwise it would be + * possible to, falsely, observe p->on_cpu == 0. +@@ -1980,6 +1996,16 @@ static int try_to_wake_up(struct task_struct *p, unsigned int state, + */ + smp_rmb(); + ++ /* ++ * If the owning (remote) CPU is still in the middle of schedule() with ++ * this task as prev, considering queueing p on the remote CPUs wake_list ++ * which potentially sends an IPI instead of spinning on p->on_cpu to ++ * let the waker make forward progress. This is safe because IRQs are ++ * disabled and the IPI will deliver after on_cpu is cleared. ++ */ ++ if (READ_ONCE(p->on_cpu) && ttwu_queue_remote(p, cpu, wake_flags)) ++ goto unlock; ++ + /* + * If the owning (remote) CPU is still in the middle of schedule() with + * this task as prev, wait until its done referencing the task. +@@ -1991,14 +2017,6 @@ static int try_to_wake_up(struct task_struct *p, unsigned int state, + */ + smp_cond_load_acquire(&p->on_cpu, !VAL); + +- p->sched_contributes_to_load = !!task_contributes_to_load(p); +- p->state = TASK_WAKING; +- +- if (p->in_iowait) { +- delayacct_blkio_end(p); +- atomic_dec(&task_rq(p)->nr_iowait); +- } +- + if(this_rq()->clock_task - p->last_ran > sched_timeslice_ns) + boost_task(p); + +@@ -2009,11 +2027,6 @@ static int try_to_wake_up(struct task_struct *p, unsigned int state, + psi_ttwu_dequeue(p); + set_task_cpu(p, cpu); + } +-#else /* CONFIG_SMP */ +- if (p->in_iowait) { +- delayacct_blkio_end(p); +- atomic_dec(&task_rq(p)->nr_iowait); +- } + #endif /* CONFIG_SMP */ + + ttwu_queue(p, cpu, wake_flags); +-- +2.37.0 + + +From 751986f9d045ee7d1277a5351ec0b09f60548dc1 Mon Sep 17 00:00:00 2001 +From: Alfred Chen +Date: Sun, 2 Aug 2020 08:42:49 +0800 +Subject: [PATCH 012/297] sched/alt: [Sync] 2ebb17717550 sched/core: Offload + wakee task activation if it the wakee is descheduling + +--- + kernel/sched/alt_core.c | 39 +++++++++++++++++++++++++++++++++------ + kernel/sched/alt_sched.h | 1 + + 2 files changed, 34 insertions(+), 6 deletions(-) + +diff --git a/kernel/sched/alt_core.c b/kernel/sched/alt_core.c +index d387ae45f5e8..84cde78423c6 100644 +--- a/kernel/sched/alt_core.c ++++ b/kernel/sched/alt_core.c +@@ -1716,7 +1716,13 @@ static void wake_csd_func(void *info) + sched_ttwu_pending(); + } + +-static void __ttwu_queue_remote(struct task_struct *p, int cpu, int wake_flags) ++/* ++ * Queue a task on the target CPUs wake_list and wake the CPU via IPI if ++ * necessary. The wakee CPU on receipt of the IPI will queue the task ++ * via sched_ttwu_wakeup() for activation so the wakee incurs the cost ++ * of the wakeup instead of the waker. ++ */ ++static void __ttwu_queue_wakelist(struct task_struct *p, int cpu, int wake_flags) + { + struct rq *rq = cpu_rq(cpu); + +@@ -1730,11 +1736,32 @@ static void __ttwu_queue_remote(struct task_struct *p, int cpu, int wake_flags) + } + } + +-static void ttwu_queue_remote(struct task_struct *p, int cpu, int wake_flags) ++static inline bool ttwu_queue_cond(int cpu, int wake_flags) ++{ ++ /* ++ * If the CPU does not share cache, then queue the task on the ++ * remote rqs wakelist to avoid accessing remote data. ++ */ ++ if (!cpus_share_cache(smp_processor_id(), cpu)) ++ return true; ++ ++ /* ++ * If the task is descheduling and the only running task on the ++ * CPU then use the wakelist to offload the task activation to ++ * the soon-to-be-idle CPU as the current CPU is likely busy. ++ * nr_running is checked to avoid unnecessary task stacking. ++ */ ++ if ((wake_flags & WF_ON_RQ) && cpu_rq(cpu)->nr_running <= 1) ++ return true; ++ ++ return false; ++} ++ ++static bool ttwu_queue_wakelist(struct task_struct *p, int cpu, int wake_flags) + { +- if (!cpus_share_cache(smp_processor_id(), cpu)) { ++ if (ttwu_queue_cond(cpu, wake_flags)) { + sched_clock_cpu(cpu); /* Sync clocks across CPUs */ +- __ttwu_queue_remote(p, cpu, wake_flags); ++ __ttwu_queue_wakelist(p, cpu, wake_flags); + return true; + } + +@@ -1776,7 +1803,7 @@ static inline void ttwu_queue(struct task_struct *p, int cpu, int wake_flags) + struct rq *rq = cpu_rq(cpu); + + #if defined(CONFIG_SMP) +- if (ttwu_queue_remote(p, cpu, wake_flags)) ++ if (ttwu_queue_wakelist(p, cpu, wake_flags)) + return; + #endif + +@@ -2003,7 +2030,7 @@ static int try_to_wake_up(struct task_struct *p, unsigned int state, + * let the waker make forward progress. This is safe because IRQs are + * disabled and the IPI will deliver after on_cpu is cleared. + */ +- if (READ_ONCE(p->on_cpu) && ttwu_queue_remote(p, cpu, wake_flags)) ++ if (READ_ONCE(p->on_cpu) && ttwu_queue_wakelist(p, cpu, wake_flags | WF_ON_RQ)) + goto unlock; + + /* +diff --git a/kernel/sched/alt_sched.h b/kernel/sched/alt_sched.h +index b4148dd822b2..4684f2d27c7c 100644 +--- a/kernel/sched/alt_sched.h ++++ b/kernel/sched/alt_sched.h +@@ -70,6 +70,7 @@ static inline int task_on_rq_migrating(struct task_struct *p) + #define WF_SYNC 0x01 /* waker goes to sleep after wakeup */ + #define WF_FORK 0x02 /* child wakeup after fork */ + #define WF_MIGRATED 0x04 /* internal use, task got migrated */ ++#define WF_ON_RQ 0x08 /* Wakee is on_rq */ + + /* + * This is the main, per-CPU runqueue data structure. +-- +2.37.0 + + +From 4410bc19de7ae9b89b5e76d540547fc1da4ec1f7 Mon Sep 17 00:00:00 2001 +From: Alfred Chen +Date: Sun, 2 Aug 2020 09:11:04 +0800 +Subject: [PATCH 013/297] sched/alt: [Sync] 19a1f5ec6999 sched: Fix + smp_call_function_single_async() usage for ILB + +--- + kernel/sched/alt_core.c | 38 +------------------------------------- + kernel/sched/alt_sched.h | 7 +++---- + 2 files changed, 4 insertions(+), 41 deletions(-) + +diff --git a/kernel/sched/alt_core.c b/kernel/sched/alt_core.c +index 84cde78423c6..929a2654355e 100644 +--- a/kernel/sched/alt_core.c ++++ b/kernel/sched/alt_core.c +@@ -785,45 +785,15 @@ void wake_up_nohz_cpu(int cpu) + wake_up_idle_cpu(cpu); + } + +-static inline bool got_nohz_idle_kick(void) +-{ +- int cpu = smp_processor_id(); +- +- /* TODO: need to support nohz_flag +- if (!(atomic_read(nohz_flags(cpu)) & NOHZ_KICK_MASK)) +- return false; +- */ +- +- if (idle_cpu(cpu) && !need_resched()) +- return true; +- +- /* +- * We can't run Idle Load Balance on this CPU for this time so we +- * cancel it and clear NOHZ_BALANCE_KICK +- */ +- /* TODO: need to support nohz_flag +- atomic_andnot(NOHZ_KICK_MASK, nohz_flags(cpu)); +- */ +- return false; +-} +- + static void nohz_csd_func(void *info) + { + struct rq *rq = info; +- +- if (got_nohz_idle_kick()) { +- /* TODO need to kick off balance +- rq->idle_balance = 1; +- raise_softirq_irqoff(SCHED_SOFTIRQ); +- */ +- } +- /* + int cpu = cpu_of(rq); + unsigned int flags; +- */ + + /* + * Release the rq::nohz_csd. ++ */ + flags = atomic_fetch_andnot(NOHZ_KICK_MASK, nohz_flags(cpu)); + WARN_ON(!(flags & NOHZ_KICK_MASK)); + +@@ -835,12 +805,6 @@ static void nohz_csd_func(void *info) + */ + } + +-#else /* CONFIG_NO_HZ_COMMON */ +- +-static inline bool got_nohz_idle_kick(void) +-{ +- return false; +-} + #endif /* CONFIG_NO_HZ_COMMON */ + #endif /* CONFIG_SMP */ + +diff --git a/kernel/sched/alt_sched.h b/kernel/sched/alt_sched.h +index 4684f2d27c7c..a6e618494b52 100644 +--- a/kernel/sched/alt_sched.h ++++ b/kernel/sched/alt_sched.h +@@ -102,6 +102,9 @@ struct rq { + int cpu; /* cpu of this runqueue */ + bool online; + ++ struct llist_head wake_list; ++ unsigned char nohz_idle_balance; ++ + #ifdef CONFIG_HAVE_SCHED_AVG_IRQ + struct sched_avg avg_irq; + #endif +@@ -159,10 +162,6 @@ struct rq { + unsigned int ttwu_local; + #endif /* CONFIG_SCHEDSTATS */ + +-#ifdef CONFIG_SMP +- struct llist_head wake_list; +-#endif +- + #ifdef CONFIG_CPU_IDLE + /* Must be inspected within a rcu lock section */ + struct cpuidle_state *idle_state; +-- +2.37.0 + + +From 4103b95205040c136c99efc9830d150cfb1500ac Mon Sep 17 00:00:00 2001 +From: Alfred Chen +Date: Sun, 2 Aug 2020 09:52:48 +0800 +Subject: [PATCH 014/297] sched/alt: [Sync] b2a02fc43a1f smp: Optimize + send_call_function_single_ipi() + +--- + kernel/sched/alt_core.c | 11 ++++++++++- + kernel/sched/alt_sched.h | 5 ++++- + 2 files changed, 14 insertions(+), 2 deletions(-) + +diff --git a/kernel/sched/alt_core.c b/kernel/sched/alt_core.c +index 929a2654355e..5b27f920de0b 100644 +--- a/kernel/sched/alt_core.c ++++ b/kernel/sched/alt_core.c +@@ -793,7 +793,6 @@ static void nohz_csd_func(void *info) + + /* + * Release the rq::nohz_csd. +- */ + flags = atomic_fetch_andnot(NOHZ_KICK_MASK, nohz_flags(cpu)); + WARN_ON(!(flags & NOHZ_KICK_MASK)); + +@@ -1680,6 +1679,16 @@ static void wake_csd_func(void *info) + sched_ttwu_pending(); + } + ++void send_call_function_single_ipi(int cpu) ++{ ++ struct rq *rq = cpu_rq(cpu); ++ ++ if (!set_nr_if_polling(rq->idle)) ++ arch_send_call_function_single_ipi(cpu); ++ else ++ trace_sched_wake_idle_without_ipi(cpu); ++} ++ + /* + * Queue a task on the target CPUs wake_list and wake the CPU via IPI if + * necessary. The wakee CPU on receipt of the IPI will queue the task +diff --git a/kernel/sched/alt_sched.h b/kernel/sched/alt_sched.h +index a6e618494b52..d996bc91e1b2 100644 +--- a/kernel/sched/alt_sched.h ++++ b/kernel/sched/alt_sched.h +@@ -231,9 +231,12 @@ static inline int best_mask_cpu(int cpu, const cpumask_t *cpumask) + } + + extern void sched_ttwu_pending(void); ++extern void flush_smp_call_function_from_idle(void); ++ + #else /* !CONFIG_SMP */ ++static inline void flush_smp_call_function_from_idle(void) { } + static inline void sched_ttwu_pending(void) { } +-#endif /* CONFIG_SMP */ ++#endif + + #ifndef arch_scale_freq_tick + static __always_inline +-- +2.37.0 + + +From 1c29c11a7f640df8b8eedb958a8fce35338d1236 Mon Sep 17 00:00:00 2001 +From: Alfred Chen +Date: Sun, 2 Aug 2020 10:04:51 +0800 +Subject: [PATCH 015/297] sched/alt: [Sync] 126c2092e5c8 sched: Add + rq::ttwu_pending + +--- + kernel/sched/alt_core.c | 13 +++++++++++-- + kernel/sched/alt_sched.h | 1 + + 2 files changed, 12 insertions(+), 2 deletions(-) + +diff --git a/kernel/sched/alt_core.c b/kernel/sched/alt_core.c +index 5b27f920de0b..9165ad79b363 100644 +--- a/kernel/sched/alt_core.c ++++ b/kernel/sched/alt_core.c +@@ -1657,13 +1657,21 @@ static int ttwu_remote(struct task_struct *p, int wake_flags) + void sched_ttwu_pending(void) + { + struct rq *rq = this_rq(); +- struct llist_node *llist = llist_del_all(&rq->wake_list); ++ struct llist_node *llist; + struct task_struct *p, *t; + struct rq_flags rf; + ++ llist = llist_del_all(&rq->wake_list); + if (!llist) + return; + ++ /* ++ * rq::ttwu_pending racy indication of out-standing wakeups. ++ * Races such that false-negatives are possible, since they ++ * are shorter lived that false-positives would be. ++ */ ++ WRITE_ONCE(rq->ttwu_pending, 0); ++ + rq_lock_irqsave(rq, &rf); + update_rq_clock(rq); + +@@ -1701,6 +1709,7 @@ static void __ttwu_queue_wakelist(struct task_struct *p, int cpu, int wake_flags + + p->sched_remote_wakeup = !!(wake_flags & WF_MIGRATED); + ++ WRITE_ONCE(rq->ttwu_pending, 1); + if (llist_add(&p->wake_entry, &rq->wake_list)) { + if (!set_nr_if_polling(rq->idle)) + smp_call_function_single_async(cpu, &rq->wake_csd); +@@ -4077,7 +4086,7 @@ int idle_cpu(int cpu) + return 0; + + #ifdef CONFIG_SMP +- if (!llist_empty(&rq->wake_list)) ++ if (rq->ttwu_pending) + return 0; + #endif + +diff --git a/kernel/sched/alt_sched.h b/kernel/sched/alt_sched.h +index d996bc91e1b2..194aae423056 100644 +--- a/kernel/sched/alt_sched.h ++++ b/kernel/sched/alt_sched.h +@@ -102,6 +102,7 @@ struct rq { + int cpu; /* cpu of this runqueue */ + bool online; + ++ unsigned int ttwu_pending; + struct llist_head wake_list; + unsigned char nohz_idle_balance; + +-- +2.37.0 + + +From 994e7926ae4a5806586086c33d43d89d7a93d799 Mon Sep 17 00:00:00 2001 +From: Alfred Chen +Date: Sun, 2 Aug 2020 11:17:52 +0800 +Subject: [PATCH 016/297] sched/alt: [Sync] a148866489fb sched: Replace + rq::wake_list + +--- + include/linux/sched.h | 3 ++- + kernel/sched/alt_core.c | 25 ++++++++----------------- + kernel/sched/alt_sched.h | 3 --- + 3 files changed, 10 insertions(+), 21 deletions(-) + +diff --git a/include/linux/sched.h b/include/linux/sched.h +index af63be15fb5e..4d74a6f6c66d 100644 +--- a/include/linux/sched.h ++++ b/include/linux/sched.h +@@ -750,8 +750,9 @@ struct task_struct { + unsigned int flags; + unsigned int ptrace; + +-#if defined(CONFIG_SMP) ++#ifdef CONFIG_SMP + struct llist_node wake_entry; ++ unsigned int wake_entry_type; + #endif + #if defined(CONFIG_SMP) || defined(CONFIG_SCHED_ALT) + int on_cpu; +diff --git a/kernel/sched/alt_core.c b/kernel/sched/alt_core.c +index 9165ad79b363..e412ca697c38 100644 +--- a/kernel/sched/alt_core.c ++++ b/kernel/sched/alt_core.c +@@ -1172,7 +1172,7 @@ static int migration_cpu_stop(void *data) + * __migrate_task() such that we will not miss enforcing cpus_ptr + * during wakeups, see set_cpus_allowed_ptr()'s TASK_WAKING test. + */ +- sched_ttwu_pending(); ++ flush_smp_call_function_from_idle(); + + raw_spin_lock(&p->pi_lock); + raw_spin_lock(&rq->lock); +@@ -1654,14 +1654,13 @@ static int ttwu_remote(struct task_struct *p, int wake_flags) + } + + #ifdef CONFIG_SMP +-void sched_ttwu_pending(void) ++void sched_ttwu_pending(void *arg) + { ++ struct llist_node *llist = arg; + struct rq *rq = this_rq(); +- struct llist_node *llist; + struct task_struct *p, *t; + struct rq_flags rf; + +- llist = llist_del_all(&rq->wake_list); + if (!llist) + return; + +@@ -1682,11 +1681,6 @@ void sched_ttwu_pending(void) + rq_unlock_irqrestore(rq, &rf); + } + +-static void wake_csd_func(void *info) +-{ +- sched_ttwu_pending(); +-} +- + void send_call_function_single_ipi(int cpu) + { + struct rq *rq = cpu_rq(cpu); +@@ -1710,12 +1704,7 @@ static void __ttwu_queue_wakelist(struct task_struct *p, int cpu, int wake_flags + p->sched_remote_wakeup = !!(wake_flags & WF_MIGRATED); + + WRITE_ONCE(rq->ttwu_pending, 1); +- if (llist_add(&p->wake_entry, &rq->wake_list)) { +- if (!set_nr_if_polling(rq->idle)) +- smp_call_function_single_async(cpu, &rq->wake_csd); +- else +- trace_sched_wake_idle_without_ipi(cpu); +- } ++ __smp_call_single_queue(cpu, &p->wake_entry); + } + + static inline bool ttwu_queue_cond(int cpu, int wake_flags) +@@ -2138,6 +2127,9 @@ static inline void __sched_fork(unsigned long clone_flags, struct task_struct *p + #ifdef CONFIG_COMPACTION + p->capture_control = NULL; + #endif ++#ifdef CONFIG_SMP ++ p->wake_entry_type = CSD_TYPE_TTWU; ++#endif + } + + /* +@@ -5628,9 +5620,8 @@ int sched_cpu_dying(unsigned int cpu) + unsigned long flags; + + /* Handle pending wakeups and then migrate everything off */ +- sched_ttwu_pending(); +- + sched_tick_stop(cpu); ++ + raw_spin_lock_irqsave(&rq->lock, flags); + set_rq_offline(rq); + migrate_tasks(rq); +diff --git a/kernel/sched/alt_sched.h b/kernel/sched/alt_sched.h +index 194aae423056..2e7fa0fe74fc 100644 +--- a/kernel/sched/alt_sched.h ++++ b/kernel/sched/alt_sched.h +@@ -103,7 +103,6 @@ struct rq { + bool online; + + unsigned int ttwu_pending; +- struct llist_head wake_list; + unsigned char nohz_idle_balance; + + #ifdef CONFIG_HAVE_SCHED_AVG_IRQ +@@ -231,12 +230,10 @@ static inline int best_mask_cpu(int cpu, const cpumask_t *cpumask) + __best_mask_cpu(cpu, cpumask, &(per_cpu(sched_cpu_affinity_masks, cpu)[0])); + } + +-extern void sched_ttwu_pending(void); + extern void flush_smp_call_function_from_idle(void); + + #else /* !CONFIG_SMP */ + static inline void flush_smp_call_function_from_idle(void) { } +-static inline void sched_ttwu_pending(void) { } + #endif + + #ifndef arch_scale_freq_tick +-- +2.37.0 + + +From 843332026ebdd2baab8e23ff0e21ff97579ba3bf Mon Sep 17 00:00:00 2001 +From: Alfred Chen +Date: Sun, 2 Aug 2020 11:43:42 +0800 +Subject: [PATCH 017/297] sched/alt: [Sync] 1f8db4150536 sched/headers: Split + out open-coded prototypes into kernel/sched/smp.h + +--- + kernel/sched/alt_core.c | 1 + + 1 file changed, 1 insertion(+) + +diff --git a/kernel/sched/alt_core.c b/kernel/sched/alt_core.c +index e412ca697c38..4a06d8f6d356 100644 +--- a/kernel/sched/alt_core.c ++++ b/kernel/sched/alt_core.c +@@ -40,6 +40,7 @@ + #include "../smpboot.h" + + #include "pelt.h" ++#include "smp.h" + + #define CREATE_TRACE_POINTS + #include +-- +2.37.0 + + +From 2482e5722a64bc24893a2388dee534e34503b3fe Mon Sep 17 00:00:00 2001 +From: Alfred Chen +Date: Sun, 2 Aug 2020 11:47:14 +0800 +Subject: [PATCH 018/297] sched/alt: [Sync] 2062a4e8ae9f kallsyms/printk: add + loglvl to print_ip_sym() + +--- + kernel/sched/alt_core.c | 6 ++---- + 1 file changed, 2 insertions(+), 4 deletions(-) + +diff --git a/kernel/sched/alt_core.c b/kernel/sched/alt_core.c +index 4a06d8f6d356..963bb5d4ab3f 100644 +--- a/kernel/sched/alt_core.c ++++ b/kernel/sched/alt_core.c +@@ -3271,8 +3271,7 @@ static noinline void __schedule_bug(struct task_struct *prev) + if (IS_ENABLED(CONFIG_DEBUG_PREEMPT) + && in_atomic_preempt_off()) { + pr_err("Preemption disabled at:"); +- print_ip_sym(preempt_disable_ip); +- pr_cont("\n"); ++ print_ip_sym(KERN_ERR, preempt_disable_ip); + } + if (panic_on_warn) + panic("scheduling while atomic\n"); +@@ -5887,8 +5886,7 @@ void ___might_sleep(const char *file, int line, int preempt_offset) + #ifdef CONFIG_DEBUG_PREEMPT + if (!preempt_count_equals(preempt_offset)) { + pr_err("Preemption disabled at:"); +- print_ip_sym(preempt_disable_ip); +- pr_cont("\n"); ++ print_ip_sym(KERN_ERR, preempt_disable_ip); + } + #endif + dump_stack(); +-- +2.37.0 + + +From 77916abb42d024c2f852afa55ca034e8db840a8f Mon Sep 17 00:00:00 2001 +From: Alfred Chen +Date: Sun, 2 Aug 2020 11:59:38 +0800 +Subject: [PATCH 019/297] sched/alt: [Sync] 8ba09b1dc131 sched: print stack + trace with KERN_INFO + +--- + kernel/sched/alt_core.c | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +diff --git a/kernel/sched/alt_core.c b/kernel/sched/alt_core.c +index 963bb5d4ab3f..9e0feae7aa60 100644 +--- a/kernel/sched/alt_core.c ++++ b/kernel/sched/alt_core.c +@@ -5214,7 +5214,7 @@ void sched_show_task(struct task_struct *p) + (unsigned long)task_thread_info(p)->flags); + + print_worker_info(KERN_INFO, p); +- show_stack(p, NULL); ++ show_stack_loglvl(p, NULL, KERN_INFO); + put_task_stack(p); + } + EXPORT_SYMBOL_GPL(sched_show_task); +-- +2.37.0 + + +From 0113ee7acf2f8d4aea16c6d10ba2cd2755120801 Mon Sep 17 00:00:00 2001 +From: Alfred Chen +Date: Sun, 2 Aug 2020 18:48:11 +0800 +Subject: [PATCH 020/297] sched/alt: [Sync] 9cb8f069deee kernel: rename + show_stack_loglvl() => show_stack() + +--- + kernel/sched/alt_core.c | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +diff --git a/kernel/sched/alt_core.c b/kernel/sched/alt_core.c +index 9e0feae7aa60..c47934a24d70 100644 +--- a/kernel/sched/alt_core.c ++++ b/kernel/sched/alt_core.c +@@ -5214,7 +5214,7 @@ void sched_show_task(struct task_struct *p) + (unsigned long)task_thread_info(p)->flags); + + print_worker_info(KERN_INFO, p); +- show_stack_loglvl(p, NULL, KERN_INFO); ++ show_stack(p, NULL, KERN_INFO); + put_task_stack(p); + } + EXPORT_SYMBOL_GPL(sched_show_task); +-- +2.37.0 + + +From cae34e897581b021f12285da75e46d03c36df448 Mon Sep 17 00:00:00 2001 +From: Alfred Chen +Date: Sun, 2 Aug 2020 20:22:57 +0800 +Subject: [PATCH 021/297] sched/alt: [Sync] b6e13e85829f sched/core: Fix ttwu() + race + +--- + kernel/sched/alt_core.c | 34 +++++++++++++++++++++++++++++----- + 1 file changed, 29 insertions(+), 5 deletions(-) + +diff --git a/kernel/sched/alt_core.c b/kernel/sched/alt_core.c +index c47934a24d70..d5d3111850a8 100644 +--- a/kernel/sched/alt_core.c ++++ b/kernel/sched/alt_core.c +@@ -1675,8 +1675,16 @@ void sched_ttwu_pending(void *arg) + rq_lock_irqsave(rq, &rf); + update_rq_clock(rq); + +- llist_for_each_entry_safe(p, t, llist, wake_entry) ++ llist_for_each_entry_safe(p, t, llist, wake_entry) { ++ if (WARN_ON_ONCE(p->on_cpu)) ++ smp_cond_load_acquire(&p->on_cpu, !VAL); ++ ++ if (WARN_ON_ONCE(task_cpu(p) != cpu_of(rq))) ++ set_task_cpu(p, cpu_of(rq)); ++ + ttwu_do_activate(rq, p, p->sched_remote_wakeup ? WF_MIGRATED : 0); ++ } ++ + check_preempt_curr(rq); + + rq_unlock_irqrestore(rq, &rf); +@@ -1732,6 +1740,9 @@ static inline bool ttwu_queue_cond(int cpu, int wake_flags) + static bool ttwu_queue_wakelist(struct task_struct *p, int cpu, int wake_flags) + { + if (ttwu_queue_cond(cpu, wake_flags)) { ++ if (WARN_ON_ONCE(cpu == smp_processor_id())) ++ return false; ++ + sched_clock_cpu(cpu); /* Sync clocks across CPUs */ + __ttwu_queue_wakelist(p, cpu, wake_flags); + return true; +@@ -1915,7 +1926,6 @@ static int try_to_wake_up(struct task_struct *p, unsigned int state, + goto out; + + success = 1; +- cpu = task_cpu(p); + trace_sched_waking(p); + p->state = TASK_RUNNING; + trace_sched_wakeup(p); +@@ -1937,7 +1947,6 @@ static int try_to_wake_up(struct task_struct *p, unsigned int state, + + /* We're going to change ->state: */ + success = 1; +- cpu = task_cpu(p); + + /* + * Ensure we load p->on_rq _after_ p->state, otherwise it would +@@ -2001,8 +2010,21 @@ static int try_to_wake_up(struct task_struct *p, unsigned int state, + * which potentially sends an IPI instead of spinning on p->on_cpu to + * let the waker make forward progress. This is safe because IRQs are + * disabled and the IPI will deliver after on_cpu is cleared. ++ * ++ * Ensure we load task_cpu(p) after p->on_cpu: ++ * ++ * set_task_cpu(p, cpu); ++ * STORE p->cpu = @cpu ++ * __schedule() (switch to task 'p') ++ * LOCK rq->lock ++ * smp_mb__after_spin_lock() smp_cond_load_acquire(&p->on_cpu) ++ * STORE p->on_cpu = 1 LOAD p->cpu ++ * ++ * to ensure we observe the correct CPU on which the task is currently ++ * scheduling. + */ +- if (READ_ONCE(p->on_cpu) && ttwu_queue_wakelist(p, cpu, wake_flags | WF_ON_RQ)) ++ if (smp_load_acquire(&p->on_cpu) && ++ ttwu_queue_wakelist(p, task_cpu(p), wake_flags | WF_ON_RQ)) + goto unlock; + + /* +@@ -2026,6 +2048,8 @@ static int try_to_wake_up(struct task_struct *p, unsigned int state, + psi_ttwu_dequeue(p); + set_task_cpu(p, cpu); + } ++#else ++ cpu = task_cpu(p); + #endif /* CONFIG_SMP */ + + ttwu_queue(p, cpu, wake_flags); +@@ -2033,7 +2057,7 @@ static int try_to_wake_up(struct task_struct *p, unsigned int state, + raw_spin_unlock_irqrestore(&p->pi_lock, flags); + out: + if (success) +- ttwu_stat(p, cpu, wake_flags); ++ ttwu_stat(p, task_cpu(p), wake_flags); + preempt_enable(); + + return success; +-- +2.37.0 + + +From 24c842c7c342175b20b86aae5059a97029a0c5e8 Mon Sep 17 00:00:00 2001 +From: Alfred Chen +Date: Sun, 2 Aug 2020 20:27:29 +0800 +Subject: [PATCH 022/297] sched/alt: [Sync] 739f70b476cf sched/core: + s/WF_ON_RQ/WQ_ON_CPU/ + +--- + kernel/sched/alt_core.c | 4 ++-- + kernel/sched/alt_sched.h | 2 +- + 2 files changed, 3 insertions(+), 3 deletions(-) + +diff --git a/kernel/sched/alt_core.c b/kernel/sched/alt_core.c +index d5d3111850a8..c1664c64222b 100644 +--- a/kernel/sched/alt_core.c ++++ b/kernel/sched/alt_core.c +@@ -1731,7 +1731,7 @@ static inline bool ttwu_queue_cond(int cpu, int wake_flags) + * the soon-to-be-idle CPU as the current CPU is likely busy. + * nr_running is checked to avoid unnecessary task stacking. + */ +- if ((wake_flags & WF_ON_RQ) && cpu_rq(cpu)->nr_running <= 1) ++ if ((wake_flags & WF_ON_CPU) && cpu_rq(cpu)->nr_running <= 1) + return true; + + return false; +@@ -2024,7 +2024,7 @@ static int try_to_wake_up(struct task_struct *p, unsigned int state, + * scheduling. + */ + if (smp_load_acquire(&p->on_cpu) && +- ttwu_queue_wakelist(p, task_cpu(p), wake_flags | WF_ON_RQ)) ++ ttwu_queue_wakelist(p, task_cpu(p), wake_flags | WF_ON_CPU)) + goto unlock; + + /* +diff --git a/kernel/sched/alt_sched.h b/kernel/sched/alt_sched.h +index 2e7fa0fe74fc..cf4df89f6b50 100644 +--- a/kernel/sched/alt_sched.h ++++ b/kernel/sched/alt_sched.h +@@ -70,7 +70,7 @@ static inline int task_on_rq_migrating(struct task_struct *p) + #define WF_SYNC 0x01 /* waker goes to sleep after wakeup */ + #define WF_FORK 0x02 /* child wakeup after fork */ + #define WF_MIGRATED 0x04 /* internal use, task got migrated */ +-#define WF_ON_RQ 0x08 /* Wakee is on_rq */ ++#define WF_ON_CPU 0x08 /* Wakee is on_rq */ + + /* + * This is the main, per-CPU runqueue data structure. +-- +2.37.0 + + +From 51b020d97aa97327a38bcfc6a8b510017c6e27a6 Mon Sep 17 00:00:00 2001 +From: Alfred Chen +Date: Sun, 2 Aug 2020 23:58:37 +0800 +Subject: [PATCH 023/297] sched/alt: [Sync] 8c4890d1c335 smp, irq_work: + Continue smp_call_function*() and irq_work*() integration + +--- + include/linux/sched.h | 3 +-- + kernel/sched/alt_core.c | 6 +++--- + 2 files changed, 4 insertions(+), 5 deletions(-) + +diff --git a/include/linux/sched.h b/include/linux/sched.h +index 4d74a6f6c66d..20afd23b94d8 100644 +--- a/include/linux/sched.h ++++ b/include/linux/sched.h +@@ -751,8 +751,7 @@ struct task_struct { + unsigned int ptrace; + + #ifdef CONFIG_SMP +- struct llist_node wake_entry; +- unsigned int wake_entry_type; ++ struct __call_single_node wake_entry; + #endif + #if defined(CONFIG_SMP) || defined(CONFIG_SCHED_ALT) + int on_cpu; +diff --git a/kernel/sched/alt_core.c b/kernel/sched/alt_core.c +index c1664c64222b..399f9025fe59 100644 +--- a/kernel/sched/alt_core.c ++++ b/kernel/sched/alt_core.c +@@ -1675,7 +1675,7 @@ void sched_ttwu_pending(void *arg) + rq_lock_irqsave(rq, &rf); + update_rq_clock(rq); + +- llist_for_each_entry_safe(p, t, llist, wake_entry) { ++ llist_for_each_entry_safe(p, t, llist, wake_entry.llist) { + if (WARN_ON_ONCE(p->on_cpu)) + smp_cond_load_acquire(&p->on_cpu, !VAL); + +@@ -1713,7 +1713,7 @@ static void __ttwu_queue_wakelist(struct task_struct *p, int cpu, int wake_flags + p->sched_remote_wakeup = !!(wake_flags & WF_MIGRATED); + + WRITE_ONCE(rq->ttwu_pending, 1); +- __smp_call_single_queue(cpu, &p->wake_entry); ++ __smp_call_single_queue(cpu, &p->wake_entry.llist); + } + + static inline bool ttwu_queue_cond(int cpu, int wake_flags) +@@ -2153,7 +2153,7 @@ static inline void __sched_fork(unsigned long clone_flags, struct task_struct *p + p->capture_control = NULL; + #endif + #ifdef CONFIG_SMP +- p->wake_entry_type = CSD_TYPE_TTWU; ++ p->wake_entry.u_flags = CSD_TYPE_TTWU; + #endif + } + +-- +2.37.0 + + +From 853a5db6560ee53bec0dbdeee92c520a1cd9b397 Mon Sep 17 00:00:00 2001 +From: Alfred Chen +Date: Mon, 3 Aug 2020 21:41:20 +0800 +Subject: [PATCH 024/297] sched/alt: [Sync] dbfb089d360b sched: Fix loadavg + accounting race + +--- + kernel/sched/alt_core.c | 71 ++++++++++++++++++++++++++++++++--------- + 1 file changed, 56 insertions(+), 15 deletions(-) + +diff --git a/kernel/sched/alt_core.c b/kernel/sched/alt_core.c +index 399f9025fe59..d6011a8c4c76 100644 +--- a/kernel/sched/alt_core.c ++++ b/kernel/sched/alt_core.c +@@ -993,8 +993,6 @@ static int effective_prio(struct task_struct *p) + */ + static void activate_task(struct task_struct *p, struct rq *rq) + { +- if (task_contributes_to_load(p)) +- rq->nr_uninterruptible--; + enqueue_task(p, rq, ENQUEUE_WAKEUP); + p->on_rq = TASK_ON_RQ_QUEUED; + cpufreq_update_util(rq, 0); +@@ -1007,8 +1005,6 @@ static void activate_task(struct task_struct *p, struct rq *rq) + */ + static inline void deactivate_task(struct task_struct *p, struct rq *rq) + { +- if (task_contributes_to_load(p)) +- rq->nr_uninterruptible++; + dequeue_task(p, rq, DEQUEUE_SLEEP); + p->on_rq = 0; + cpufreq_update_util(rq, 0); +@@ -1629,10 +1625,8 @@ ttwu_do_wakeup(struct rq *rq, struct task_struct *p, int wake_flags) + static inline void + ttwu_do_activate(struct rq *rq, struct task_struct *p, int wake_flags) + { +-#ifdef CONFIG_SMP + if (p->sched_contributes_to_load) + rq->nr_uninterruptible--; +-#endif + + activate_task(p, rq); + ttwu_do_wakeup(rq, p, 0); +@@ -1971,7 +1965,7 @@ static int try_to_wake_up(struct task_struct *p, unsigned int state, + * A similar smb_rmb() lives in try_invoke_on_locked_down_task(). + */ + smp_rmb(); +- if (p->on_rq && ttwu_remote(p, wake_flags)) ++ if (READ_ONCE(p->on_rq) && ttwu_remote(p, wake_flags)) + goto unlock; + + if (p->in_iowait) { +@@ -1980,9 +1974,6 @@ static int try_to_wake_up(struct task_struct *p, unsigned int state, + } + + #ifdef CONFIG_SMP +- p->sched_contributes_to_load = !!task_contributes_to_load(p); +- p->state = TASK_WAKING; +- + /* + * Ensure we load p->on_cpu _after_ p->on_rq, otherwise it would be + * possible to, falsely, observe p->on_cpu == 0. +@@ -2001,8 +1992,20 @@ static int try_to_wake_up(struct task_struct *p, unsigned int state, + * + * Pairs with the LOCK+smp_mb__after_spinlock() on rq->lock in + * __schedule(). See the comment for smp_mb__after_spinlock(). ++ * ++ * Form a control-dep-acquire with p->on_rq == 0 above, to ensure ++ * schedule()'s deactivate_task() has 'happened' and p will no longer ++ * care about it's own p->state. See the comment in __schedule(). + */ +- smp_rmb(); ++ smp_acquire__after_ctrl_dep(); ++ ++ /* ++ * We're doing the wakeup (@success == 1), they did a dequeue (p->on_rq ++ * == 0), which means we need to do an enqueue, change p->state to ++ * TASK_WAKING such that we can unlock p->pi_lock before doing the ++ * enqueue, such as ttwu_queue_wakelist(). ++ */ ++ p->state = TASK_WAKING; + + /* + * If the owning (remote) CPU is still in the middle of schedule() with +@@ -3528,6 +3531,7 @@ static void __sched notrace __schedule(bool preempt) + { + struct task_struct *prev, *next; + unsigned long *switch_count; ++ unsigned long prev_state; + struct rq *rq; + int cpu; + +@@ -3543,12 +3547,22 @@ static void __sched notrace __schedule(bool preempt) + local_irq_disable(); + rcu_note_context_switch(preempt); + ++ /* See deactivate_task() below. */ ++ prev_state = prev->state; ++ + /* + * Make sure that signal_pending_state()->signal_pending() below + * can't be reordered with __set_current_state(TASK_INTERRUPTIBLE) +- * done by the caller to avoid the race with signal_wake_up(). ++ * done by the caller to avoid the race with signal_wake_up(): ++ * ++ * __set_current_state(@state) signal_wake_up() ++ * schedule() set_tsk_thread_flag(p, TIF_SIGPENDING) ++ * wake_up_state(p, state) ++ * LOCK rq->lock LOCK p->pi_state ++ * smp_mb__after_spinlock() smp_mb__after_spinlock() ++ * if (signal_pending_state()) if (p->state & @state) + * +- * The membarrier system call requires a full memory barrier ++ * Also, the membarrier system call requires a full memory barrier + * after coming from user-space, before storing to rq->curr. + */ + raw_spin_lock(&rq->lock); +@@ -3557,10 +3571,37 @@ static void __sched notrace __schedule(bool preempt) + update_rq_clock(rq); + + switch_count = &prev->nivcsw; +- if (!preempt && prev->state) { +- if (signal_pending_state(prev->state, prev)) { ++ /* ++ * We must load prev->state once (task_struct::state is volatile), such ++ * that: ++ * ++ * - we form a control dependency vs deactivate_task() below. ++ * - ptrace_{,un}freeze_traced() can change ->state underneath us. ++ */ ++ prev_state = prev->state; ++ if (!preempt && prev_state && prev_state == prev->state) { ++ if (signal_pending_state(prev_state, prev)) { + prev->state = TASK_RUNNING; + } else { ++ prev->sched_contributes_to_load = ++ (prev_state & TASK_UNINTERRUPTIBLE) && ++ !(prev_state & TASK_NOLOAD) && ++ !(prev->flags & PF_FROZEN); ++ ++ if (prev->sched_contributes_to_load) ++ rq->nr_uninterruptible++; ++ ++ /* ++ * __schedule() ttwu() ++ * prev_state = prev->state; if (p->on_rq && ...) ++ * if (prev_state) goto out; ++ * p->on_rq = 0; smp_acquire__after_ctrl_dep(); ++ * p->state = TASK_WAKING ++ * ++ * Where __schedule() and ttwu() have matching control dependencies. ++ * ++ * After this, schedule() must not care about p->state any more. ++ */ + if (rq_switch_time(rq) < boost_threshold(prev)) + boost_task(prev); + deactivate_task(prev, rq); +-- +2.37.0 + + +From f751c772c3c27dbfce66568a0f23fda6eb8daeaa Mon Sep 17 00:00:00 2001 +From: Alfred Chen +Date: Mon, 3 Aug 2020 21:49:38 +0800 +Subject: [PATCH 025/297] sched/alt: [Sync] ce3614daabea sched: Fix unreliable + rseq cpu_id for new tasks + +--- + kernel/sched/alt_core.c | 2 ++ + 1 file changed, 2 insertions(+) + +diff --git a/kernel/sched/alt_core.c b/kernel/sched/alt_core.c +index d6011a8c4c76..ef73e640af62 100644 +--- a/kernel/sched/alt_core.c ++++ b/kernel/sched/alt_core.c +@@ -2230,6 +2230,7 @@ int sched_fork(unsigned long clone_flags, struct task_struct *p) + } + raw_spin_unlock(&rq->lock); + ++ rseq_migrate(p); + /* + * We're setting the CPU for the first time, we don't migrate, + * so use __set_task_cpu(). +@@ -2341,6 +2342,7 @@ void wake_up_new_task(struct task_struct *p) + + rq = cpu_rq(select_task_rq(p)); + #ifdef CONFIG_SMP ++ rseq_migrate(p); + /* + * Fork balancing, do it here and not earlier because: + * - cpus_ptr can change in the fork path +-- +2.37.0 + + +From 0ee63988b12700f261aa28a588806f4c91ba1949 Mon Sep 17 00:00:00 2001 +From: Alfred Chen +Date: Mon, 3 Aug 2020 21:52:54 +0800 +Subject: [PATCH 026/297] sched/alt: [Sync] d136122f5845 sched: Fix race + against ptrace_freeze_trace() + +--- + kernel/sched/alt_core.c | 3 --- + 1 file changed, 3 deletions(-) + +diff --git a/kernel/sched/alt_core.c b/kernel/sched/alt_core.c +index ef73e640af62..abe5321fa665 100644 +--- a/kernel/sched/alt_core.c ++++ b/kernel/sched/alt_core.c +@@ -3549,9 +3549,6 @@ static void __sched notrace __schedule(bool preempt) + local_irq_disable(); + rcu_note_context_switch(preempt); + +- /* See deactivate_task() below. */ +- prev_state = prev->state; +- + /* + * Make sure that signal_pending_state()->signal_pending() below + * can't be reordered with __set_current_state(TASK_INTERRUPTIBLE) +-- +2.37.0 + + +From 611fc53384a98f1b89b59fe928c6693ed7d724f0 Mon Sep 17 00:00:00 2001 +From: Alfred Chen +Date: Mon, 3 Aug 2020 21:58:15 +0800 +Subject: [PATCH 027/297] sched/alt: [Sync] 062d3f95b630 sched: Warn if garbage + is passed to default_wake_function() + +--- + kernel/sched/alt_core.c | 1 + + 1 file changed, 1 insertion(+) + +diff --git a/kernel/sched/alt_core.c b/kernel/sched/alt_core.c +index abe5321fa665..a6b71eba16ab 100644 +--- a/kernel/sched/alt_core.c ++++ b/kernel/sched/alt_core.c +@@ -3920,6 +3920,7 @@ asmlinkage __visible void __sched preempt_schedule_irq(void) + int default_wake_function(wait_queue_entry_t *curr, unsigned mode, int wake_flags, + void *key) + { ++ WARN_ON_ONCE(IS_ENABLED(CONFIG_SCHED_DEBUG) && wake_flags & ~WF_SYNC); + return try_to_wake_up(curr->private, mode, wake_flags); + } + EXPORT_SYMBOL(default_wake_function); +-- +2.37.0 + + +From 8562c5bfbd8fdc4fd796a0260fb26003a66a1a10 Mon Sep 17 00:00:00 2001 +From: Alfred Chen +Date: Fri, 7 Aug 2020 10:28:07 +0800 +Subject: [PATCH 028/297] sched/alt: Fix/Enabled nohz_csd_func(), but not used. + +--- + kernel/sched/alt_core.c | 12 +++--------- + kernel/sched/alt_sched.h | 19 +++++++++++++++++++ + 2 files changed, 22 insertions(+), 9 deletions(-) + +diff --git a/kernel/sched/alt_core.c b/kernel/sched/alt_core.c +index a6b71eba16ab..2f67a87b4b35 100644 +--- a/kernel/sched/alt_core.c ++++ b/kernel/sched/alt_core.c +@@ -701,13 +701,9 @@ void resched_cpu(int cpu) + + #ifdef CONFIG_SMP + #ifdef CONFIG_NO_HZ_COMMON +-void nohz_balance_enter_idle(int cpu) +-{ +-} ++void nohz_balance_enter_idle(int cpu) {} + +-void select_nohz_load_balancer(int stop_tick) +-{ +-} ++void select_nohz_load_balancer(int stop_tick) {} + + void set_cpu_sd_state_idle(void) {} + +@@ -794,6 +790,7 @@ static void nohz_csd_func(void *info) + + /* + * Release the rq::nohz_csd. ++ */ + flags = atomic_fetch_andnot(NOHZ_KICK_MASK, nohz_flags(cpu)); + WARN_ON(!(flags & NOHZ_KICK_MASK)); + +@@ -802,7 +799,6 @@ static void nohz_csd_func(void *info) + rq->nohz_idle_balance = flags; + raise_softirq_irqoff(SCHED_SOFTIRQ); + } +- */ + } + + #endif /* CONFIG_NO_HZ_COMMON */ +@@ -5852,8 +5848,6 @@ void __init sched_init(void) + + hrtick_rq_init(rq); + atomic_set(&rq->nr_iowait, 0); +- +- rq_csd_init(rq, &rq->nohz_csd, nohz_csd_func); + } + #ifdef CONFIG_SMP + /* Set rq->online for cpu 0 */ +diff --git a/kernel/sched/alt_sched.h b/kernel/sched/alt_sched.h +index cf4df89f6b50..d8887f377455 100644 +--- a/kernel/sched/alt_sched.h ++++ b/kernel/sched/alt_sched.h +@@ -104,6 +104,7 @@ struct rq { + + unsigned int ttwu_pending; + unsigned char nohz_idle_balance; ++ unsigned char idle_balance; + + #ifdef CONFIG_HAVE_SCHED_AVG_IRQ + struct sched_avg avg_irq; +@@ -388,6 +389,24 @@ static inline int cpu_of(const struct rq *rq) + + #include "stats.h" + ++#ifdef CONFIG_NO_HZ_COMMON ++#define NOHZ_BALANCE_KICK_BIT 0 ++#define NOHZ_STATS_KICK_BIT 1 ++ ++#define NOHZ_BALANCE_KICK BIT(NOHZ_BALANCE_KICK_BIT) ++#define NOHZ_STATS_KICK BIT(NOHZ_STATS_KICK_BIT) ++ ++#define NOHZ_KICK_MASK (NOHZ_BALANCE_KICK | NOHZ_STATS_KICK) ++ ++#define nohz_flags(cpu) (&cpu_rq(cpu)->nohz_flags) ++ ++/* TODO: needed? ++extern void nohz_balance_exit_idle(struct rq *rq); ++#else ++static inline void nohz_balance_exit_idle(struct rq *rq) { } ++*/ ++#endif ++ + #ifdef CONFIG_IRQ_TIME_ACCOUNTING + struct irqtime { + u64 total; +-- +2.37.0 + + +From 0107ae666f6d3b8a02885300ad77051b147cb69f Mon Sep 17 00:00:00 2001 +From: Alfred Chen +Date: Fri, 7 Aug 2020 23:09:43 +0800 +Subject: [PATCH 029/297] Project-C v5.8-r0 + +--- + kernel/sched/bmq_imp.h | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +diff --git a/kernel/sched/bmq_imp.h b/kernel/sched/bmq_imp.h +index cb0fc0688a89..e581fbf3528b 100644 +--- a/kernel/sched/bmq_imp.h ++++ b/kernel/sched/bmq_imp.h +@@ -1,4 +1,4 @@ +-#define ALT_SCHED_VERSION_MSG "sched/bmq: BMQ CPU Scheduler 5.7-r3 by Alfred Chen.\n" ++#define ALT_SCHED_VERSION_MSG "sched/bmq: BMQ CPU Scheduler 5.8-r0 by Alfred Chen.\n" + + static inline void sched_queue_init(struct rq *rq) + { +-- +2.37.0 + + +From 1a97ac217cf27a9b059f49ac83890a1b4449fd9c Mon Sep 17 00:00:00 2001 +From: Alfred Chen +Date: Tue, 11 Aug 2020 21:58:03 +0800 +Subject: [PATCH 030/297] sched/alt: Fix UP compilation issue. + +--- + kernel/sched/alt_core.c | 6 ++++-- + 1 file changed, 4 insertions(+), 2 deletions(-) + +diff --git a/kernel/sched/alt_core.c b/kernel/sched/alt_core.c +index 2f67a87b4b35..5580b600db67 100644 +--- a/kernel/sched/alt_core.c ++++ b/kernel/sched/alt_core.c +@@ -141,7 +141,9 @@ DEFINE_PER_CPU_SHARED_ALIGNED(struct rq, runqueues); + + #define IDLE_WM (IDLE_TASK_SCHED_PRIO) + ++#ifdef CONFIG_SCHED_SMT + static cpumask_t sched_sg_idle_mask ____cacheline_aligned_in_smp; ++#endif + static cpumask_t sched_rq_watermark[SCHED_BITS] ____cacheline_aligned_in_smp; + + static inline void update_sched_rq_watermark(struct rq *rq) +@@ -5839,12 +5841,12 @@ void __init sched_init(void) + #ifdef CONFIG_SCHED_SMT + rq->active_balance = 0; + #endif +-#endif +- rq->nr_switches = 0; + + #ifdef CONFIG_NO_HZ_COMMON + rq_csd_init(rq, &rq->nohz_csd, nohz_csd_func); + #endif ++#endif /* CONFIG_SMP */ ++ rq->nr_switches = 0; + + hrtick_rq_init(rq); + atomic_set(&rq->nr_iowait, 0); +-- +2.37.0 + + +From 257d6f0f7f6e42d1dd6eefb4d2e0382ab7c6635a Mon Sep 17 00:00:00 2001 +From: Alfred Chen +Date: Mon, 17 Aug 2020 12:31:46 +0800 +Subject: [PATCH 031/297] sched/alt: Fix compilation issue when + CONFIG_SCHED_THERMAL_PRESSURE enabled. + +--- + kernel/sched/pelt.h | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +diff --git a/kernel/sched/pelt.h b/kernel/sched/pelt.h +index b95f7db44dd8..226eeed61318 100644 +--- a/kernel/sched/pelt.h ++++ b/kernel/sched/pelt.h +@@ -9,7 +9,7 @@ int update_rt_rq_load_avg(u64 now, struct rq *rq, int running); + int update_dl_rq_load_avg(u64 now, struct rq *rq, int running); + #endif + +-#ifdef CONFIG_SCHED_THERMAL_PRESSURE ++#if defined(CONFIG_SCHED_THERMAL_PRESSURE) && !defined(CONFIG_SCHED_ALT) + int update_thermal_load_avg(u64 now, struct rq *rq, u64 capacity); + + static inline u64 thermal_load_avg(struct rq *rq) +-- +2.37.0 + + +From 1772a32431c6955530b2c93c79170f4f94a7601c Mon Sep 17 00:00:00 2001 +From: Alfred Chen +Date: Mon, 17 Aug 2020 13:55:00 +0800 +Subject: [PATCH 032/297] Project-C v5.8-r1 + +--- + kernel/sched/bmq_imp.h | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +diff --git a/kernel/sched/bmq_imp.h b/kernel/sched/bmq_imp.h +index e581fbf3528b..86d496ec23b3 100644 +--- a/kernel/sched/bmq_imp.h ++++ b/kernel/sched/bmq_imp.h +@@ -1,4 +1,4 @@ +-#define ALT_SCHED_VERSION_MSG "sched/bmq: BMQ CPU Scheduler 5.8-r0 by Alfred Chen.\n" ++#define ALT_SCHED_VERSION_MSG "sched/bmq: BMQ CPU Scheduler 5.8-r1 by Alfred Chen.\n" + + static inline void sched_queue_init(struct rq *rq) + { +-- +2.37.0 + + +From e57a7b15a79162df169b9f5caf3fea94a697d568 Mon Sep 17 00:00:00 2001 +From: Alfred Chen +Date: Sun, 30 Aug 2020 08:36:33 +0800 +Subject: [PATCH 033/297] sched/alt: Fix compilation issue when + CONFIG_SCHED_THERMAL_PRESSURE enabled, cont. + +--- + kernel/sched/pelt.c | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +diff --git a/kernel/sched/pelt.c b/kernel/sched/pelt.c +index 184918afc89c..bd38bf738fe9 100644 +--- a/kernel/sched/pelt.c ++++ b/kernel/sched/pelt.c +@@ -386,7 +386,7 @@ int update_dl_rq_load_avg(u64 now, struct rq *rq, int running) + } + #endif + +-#ifdef CONFIG_SCHED_THERMAL_PRESSURE ++#if defined(CONFIG_SCHED_THERMAL_PRESSURE) && !defined(CONFIG_SCHED_ALT) + /* + * thermal: + * +-- +2.37.0 + + +From 9bff06ecd831861235d8de28895d952a65fdc520 Mon Sep 17 00:00:00 2001 +From: Alfred Chen +Date: Sun, 9 Aug 2020 08:49:45 +0800 +Subject: [PATCH 034/297] sched/pds: Port of PDS + +Port PDS from 5.0 to current Project C. +--- + include/linux/sched.h | 11 +- + include/linux/sched/deadline.h | 4 + + include/linux/sched/prio.h | 7 +- + include/linux/skip_list.h | 177 ++++++++++++++++++++++++++++ + init/Kconfig | 6 + + init/init_task.c | 7 +- + kernel/locking/rtmutex.c | 8 ++ + kernel/sched/alt_core.c | 110 ++++++------------ + kernel/sched/alt_sched.h | 11 +- + kernel/sched/bmq.h | 6 + + kernel/sched/bmq_imp.h | 127 +++++++++++++++++--- + kernel/sched/pds.h | 14 +++ + kernel/sched/pds_imp.h | 205 +++++++++++++++++++++++++++++++++ + 13 files changed, 594 insertions(+), 99 deletions(-) + create mode 100644 include/linux/skip_list.h + create mode 100644 kernel/sched/pds.h + create mode 100644 kernel/sched/pds_imp.h + +diff --git a/include/linux/sched.h b/include/linux/sched.h +index 20afd23b94d8..2bbe9f377c6e 100644 +--- a/include/linux/sched.h ++++ b/include/linux/sched.h +@@ -34,6 +34,7 @@ + #include + #include + #include ++#include + #include + + /* task_struct member predeclarations (sorted alphabetically): */ +@@ -782,11 +783,19 @@ struct task_struct { + #ifdef CONFIG_SCHED_ALT + u64 last_ran; + s64 time_slice; +- int boost_prio; + #ifdef CONFIG_SCHED_BMQ ++ int boost_prio; + int bmq_idx; + struct list_head bmq_node; + #endif /* CONFIG_SCHED_BMQ */ ++#ifdef CONFIG_SCHED_PDS ++ u64 deadline; ++ u64 priodl; ++ /* skip list level */ ++ int sl_level; ++ /* skip list node */ ++ struct skiplist_node sl_node; ++#endif /* CONFIG_SCHED_PDS */ + /* sched_clock time spent running */ + u64 sched_time; + #else /* !CONFIG_SCHED_ALT */ +diff --git a/include/linux/sched/deadline.h b/include/linux/sched/deadline.h +index da7834c45673..0aa37b0a1676 100644 +--- a/include/linux/sched/deadline.h ++++ b/include/linux/sched/deadline.h +@@ -11,6 +11,10 @@ static inline int dl_task(struct task_struct *p) + } + #endif + ++#ifdef CONFIG_SCHED_PDS ++#define __tsk_deadline(p) ((p)->priodl) ++#endif ++ + #else + + #define __tsk_deadline(p) ((p)->dl.deadline) +diff --git a/include/linux/sched/prio.h b/include/linux/sched/prio.h +index c28676e431be..4d4f92bffeea 100644 +--- a/include/linux/sched/prio.h ++++ b/include/linux/sched/prio.h +@@ -18,9 +18,12 @@ + #define MAX_PRIO (MAX_RT_PRIO + NICE_WIDTH) + #define DEFAULT_PRIO (MAX_RT_PRIO + NICE_WIDTH / 2) + +-#ifdef CONFIG_SCHED_ALT + /* +/- priority levels from the base priority */ +-#define MAX_PRIORITY_ADJ 4 ++#ifdef CONFIG_SCHED_BMQ ++#define MAX_PRIORITY_ADJ 7 ++#endif ++#ifdef CONFIG_SCHED_PDS ++#define MAX_PRIORITY_ADJ 0 + #endif + + /* +diff --git a/include/linux/skip_list.h b/include/linux/skip_list.h +new file mode 100644 +index 000000000000..47ca955a451d +--- /dev/null ++++ b/include/linux/skip_list.h +@@ -0,0 +1,177 @@ ++/* ++ * Copyright (C) 2016 Alfred Chen. ++ * ++ * Code based on Con Kolivas's skip list implementation for BFS, and ++ * which is based on example originally by William Pugh. ++ * ++ * Skip Lists are a probabilistic alternative to balanced trees, as ++ * described in the June 1990 issue of CACM and were invented by ++ * William Pugh in 1987. ++ * ++ * A couple of comments about this implementation: ++ * ++ * This file only provides a infrastructure of skip list. ++ * ++ * skiplist_node is embedded into container data structure, to get rid ++ * the dependency of kmalloc/kfree operation in scheduler code. ++ * ++ * A customized search function should be defined using DEFINE_SKIPLIST_INSERT ++ * macro and be used for skip list insert operation. ++ * ++ * Random Level is also not defined in this file, instead, it should be ++ * customized implemented and set to node->level then pass to the customized ++ * skiplist_insert function. ++ * ++ * Levels start at zero and go up to (NUM_SKIPLIST_LEVEL -1) ++ * ++ * NUM_SKIPLIST_LEVEL in this implementation is 8 instead of origin 16, ++ * considering that there will be 256 entries to enable the top level when using ++ * random level p=0.5, and that number is more than enough for a run queue usage ++ * in a scheduler usage. And it also help to reduce the memory usage of the ++ * embedded skip list node in task_struct to about 50%. ++ * ++ * The insertion routine has been implemented so as to use the ++ * dirty hack described in the CACM paper: if a random level is ++ * generated that is more than the current maximum level, the ++ * current maximum level plus one is used instead. ++ * ++ * BFS Notes: In this implementation of skiplists, there are bidirectional ++ * next/prev pointers and the insert function returns a pointer to the actual ++ * node the value is stored. The key here is chosen by the scheduler so as to ++ * sort tasks according to the priority list requirements and is no longer used ++ * by the scheduler after insertion. The scheduler lookup, however, occurs in ++ * O(1) time because it is always the first item in the level 0 linked list. ++ * Since the task struct stores a copy of the node pointer upon skiplist_insert, ++ * it can also remove it much faster than the original implementation with the ++ * aid of prev<->next pointer manipulation and no searching. ++ */ ++#ifndef _LINUX_SKIP_LIST_H ++#define _LINUX_SKIP_LIST_H ++ ++#include ++ ++#define NUM_SKIPLIST_LEVEL (8) ++ ++struct skiplist_node { ++ int level; /* Levels in this node */ ++ struct skiplist_node *next[NUM_SKIPLIST_LEVEL]; ++ struct skiplist_node *prev[NUM_SKIPLIST_LEVEL]; ++}; ++ ++#define SKIPLIST_NODE_INIT(name) { 0,\ ++ {&name, &name, &name, &name,\ ++ &name, &name, &name, &name},\ ++ {&name, &name, &name, &name,\ ++ &name, &name, &name, &name},\ ++ } ++ ++static inline void INIT_SKIPLIST_NODE(struct skiplist_node *node) ++{ ++ /* only level 0 ->next matters in skiplist_empty() */ ++ WRITE_ONCE(node->next[0], node); ++} ++ ++/** ++ * FULL_INIT_SKIPLIST_NODE -- fully init a skiplist_node, expecially for header ++ * @node: the skip list node to be inited. ++ */ ++static inline void FULL_INIT_SKIPLIST_NODE(struct skiplist_node *node) ++{ ++ int i; ++ ++ node->level = 0; ++ for (i = 0; i < NUM_SKIPLIST_LEVEL; i++) { ++ WRITE_ONCE(node->next[i], node); ++ node->prev[i] = node; ++ } ++} ++ ++/** ++ * skiplist_empty - test whether a skip list is empty ++ * @head: the skip list to test. ++ */ ++static inline int skiplist_empty(const struct skiplist_node *head) ++{ ++ return READ_ONCE(head->next[0]) == head; ++} ++ ++/** ++ * skiplist_entry - get the struct for this entry ++ * @ptr: the &struct skiplist_node pointer. ++ * @type: the type of the struct this is embedded in. ++ * @member: the name of the skiplist_node within the struct. ++ */ ++#define skiplist_entry(ptr, type, member) \ ++ container_of(ptr, type, member) ++ ++/** ++ * DEFINE_SKIPLIST_INSERT_FUNC -- macro to define a customized skip list insert ++ * function, which takes two parameters, first one is the header node of the ++ * skip list, second one is the skip list node to be inserted ++ * @func_name: the customized skip list insert function name ++ * @search_func: the search function to be used, which takes two parameters, ++ * 1st one is the itrator of skiplist_node in the list, the 2nd is the skip list ++ * node to be inserted, the function should return true if search should be ++ * continued, otherwise return false. ++ * Returns 1 if @node is inserted as the first item of skip list at level zero, ++ * otherwise 0 ++ */ ++#define DEFINE_SKIPLIST_INSERT_FUNC(func_name, search_func)\ ++static inline int func_name(struct skiplist_node *head, struct skiplist_node *node)\ ++{\ ++ struct skiplist_node *update[NUM_SKIPLIST_LEVEL];\ ++ struct skiplist_node *p, *q;\ ++ int k = head->level;\ ++\ ++ p = head;\ ++ do {\ ++ while (q = p->next[k], q != head && search_func(q, node))\ ++ p = q;\ ++ update[k] = p;\ ++ } while (--k >= 0);\ ++\ ++ k = node->level;\ ++ if (unlikely(k > head->level)) {\ ++ node->level = k = ++head->level;\ ++ update[k] = head;\ ++ }\ ++\ ++ do {\ ++ p = update[k];\ ++ q = p->next[k];\ ++ node->next[k] = q;\ ++ p->next[k] = node;\ ++ node->prev[k] = p;\ ++ q->prev[k] = node;\ ++ } while (--k >= 0);\ ++\ ++ return (p == head);\ ++} ++ ++/** ++ * skiplist_del_init -- delete skip list node from a skip list and reset it's ++ * init state ++ * @head: the header node of the skip list to be deleted from. ++ * @node: the skip list node to be deleted, the caller need to ensure @node is ++ * in skip list which @head represent. ++ * Returns 1 if @node is the first item of skip level at level zero, otherwise 0 ++ */ ++static inline int ++skiplist_del_init(struct skiplist_node *head, struct skiplist_node *node) ++{ ++ int l, m = node->level; ++ ++ for (l = 0; l <= m; l++) { ++ node->prev[l]->next[l] = node->next[l]; ++ node->next[l]->prev[l] = node->prev[l]; ++ } ++ if (m == head->level && m > 0) { ++ while (head->next[m] == head && m > 0) ++ m--; ++ head->level = m; ++ } ++ INIT_SKIPLIST_NODE(node); ++ ++ return (node->prev[0] == head); ++} ++#endif /* _LINUX_SKIP_LIST_H */ +diff --git a/init/Kconfig b/init/Kconfig +index 0a817c86c966..8059f9d860db 100644 +--- a/init/Kconfig ++++ b/init/Kconfig +@@ -828,6 +828,12 @@ config SCHED_BMQ + responsiveness on the desktop and solid scalability on normal + hardware and commodity servers. + ++config SCHED_PDS ++ bool "PDS CPU scheduler" ++ help ++ The Priority and Deadline based Skip list multiple queue CPU ++ Scheduler. ++ + endchoice + + endif +diff --git a/init/init_task.c b/init/init_task.c +index 9017276b1a80..fc8fcdbbd07a 100644 +--- a/init/init_task.c ++++ b/init/init_task.c +@@ -95,10 +95,15 @@ struct task_struct init_task + .fn = do_no_restart_syscall, + }, + #ifdef CONFIG_SCHED_ALT +- .boost_prio = 0, + #ifdef CONFIG_SCHED_BMQ ++ .boost_prio = 0, + .bmq_idx = 15, + .bmq_node = LIST_HEAD_INIT(init_task.bmq_node), ++#endif ++#ifdef CONFIG_SCHED_PDS ++ .deadline = 0, ++ .sl_level = 0, ++ .sl_node = SKIPLIST_NODE_INIT(init_task.sl_node), + #endif + .time_slice = HZ, + #else +diff --git a/kernel/locking/rtmutex.c b/kernel/locking/rtmutex.c +index 4258d1c08c71..5b9893cdfb1b 100644 +--- a/kernel/locking/rtmutex.c ++++ b/kernel/locking/rtmutex.c +@@ -312,6 +312,9 @@ waiter_update_prio(struct rt_mutex_waiter *waiter, struct task_struct *task) + static __always_inline int rt_mutex_waiter_less(struct rt_mutex_waiter *left, + struct rt_mutex_waiter *right) + { ++#ifdef CONFIG_SCHED_PDS ++ return (left->deadline < right->deadline); ++#else + if (left->prio < right->prio) + return 1; + +@@ -327,11 +330,15 @@ static __always_inline int rt_mutex_waiter_less(struct rt_mutex_waiter *left, + #endif + + return 0; ++#endif + } + + static __always_inline int rt_mutex_waiter_equal(struct rt_mutex_waiter *left, + struct rt_mutex_waiter *right) + { ++#ifdef CONFIG_SCHED_PDS ++ return (left->deadline == right->deadline); ++#else + if (left->prio != right->prio) + return 0; + +@@ -347,6 +354,7 @@ static __always_inline int rt_mutex_waiter_equal(struct rt_mutex_waiter *left, + #endif + + return 1; ++#endif + } + + static inline bool rt_mutex_steal(struct rt_mutex_waiter *waiter, +diff --git a/kernel/sched/alt_core.c b/kernel/sched/alt_core.c +index 5580b600db67..407bc46de451 100644 +--- a/kernel/sched/alt_core.c ++++ b/kernel/sched/alt_core.c +@@ -78,36 +78,6 @@ early_param("sched_timeslice", sched_timeslice); + */ + int sched_yield_type __read_mostly = 1; + +-#define rq_switch_time(rq) ((rq)->clock - (rq)->last_ts_switch) +-#define boost_threshold(p) (sched_timeslice_ns >>\ +- (10 - MAX_PRIORITY_ADJ - (p)->boost_prio)) +- +-static inline void boost_task(struct task_struct *p) +-{ +- int limit; +- +- switch (p->policy) { +- case SCHED_NORMAL: +- limit = -MAX_PRIORITY_ADJ; +- break; +- case SCHED_BATCH: +- case SCHED_IDLE: +- limit = 0; +- break; +- default: +- return; +- } +- +- if (p->boost_prio > limit) +- p->boost_prio--; +-} +- +-static inline void deboost_task(struct task_struct *p) +-{ +- if (p->boost_prio < MAX_PRIORITY_ADJ) +- p->boost_prio++; +-} +- + #ifdef CONFIG_SMP + static cpumask_t sched_rq_pending_mask ____cacheline_aligned_in_smp; + +@@ -146,13 +116,22 @@ static cpumask_t sched_sg_idle_mask ____cacheline_aligned_in_smp; + #endif + static cpumask_t sched_rq_watermark[SCHED_BITS] ____cacheline_aligned_in_smp; + ++#ifdef CONFIG_SCHED_BMQ ++#include "bmq_imp.h" ++#endif ++#ifdef CONFIG_SCHED_PDS ++#include "pds_imp.h" ++#endif ++ + static inline void update_sched_rq_watermark(struct rq *rq) + { +- unsigned long watermark = find_first_bit(rq->queue.bitmap, SCHED_BITS); ++ unsigned long watermark = sched_queue_watermark(rq); + unsigned long last_wm = rq->watermark; + unsigned long i; + int cpu; + ++ /*printk(KERN_INFO "sched: watermark(%d) %d, last %d\n", ++ cpu_of(rq), watermark, last_wm);*/ + if (watermark == last_wm) + return; + +@@ -187,13 +166,6 @@ static inline void update_sched_rq_watermark(struct rq *rq) + #endif + } + +-static inline int task_sched_prio(struct task_struct *p) +-{ +- return (p->prio < MAX_RT_PRIO)? p->prio : p->prio + p->boost_prio; +-} +- +-#include "bmq_imp.h" +- + static inline struct task_struct *rq_runnable_task(struct rq *rq) + { + struct task_struct *next = sched_rq_first_task(rq); +@@ -456,6 +428,7 @@ static inline void dequeue_task(struct task_struct *p, struct rq *rq, int flags) + { + lockdep_assert_held(&rq->lock); + ++ /*printk(KERN_INFO "sched: dequeue(%d) %px %016llx\n", cpu_of(rq), p, p->priodl);*/ + WARN_ONCE(task_rq(p) != rq, "sched: dequeue task reside on cpu%d from cpu%d\n", + task_cpu(p), cpu_of(rq)); + +@@ -473,6 +446,7 @@ static inline void enqueue_task(struct task_struct *p, struct rq *rq, int flags) + { + lockdep_assert_held(&rq->lock); + ++ /*printk(KERN_INFO "sched: enqueue(%d) %px %016llx\n", cpu_of(rq), p, p->priodl);*/ + WARN_ONCE(task_rq(p) != rq, "sched: enqueue task reside on cpu%d to cpu%d\n", + task_cpu(p), cpu_of(rq)); + +@@ -498,10 +472,11 @@ static inline void enqueue_task(struct task_struct *p, struct rq *rq, int flags) + static inline void requeue_task(struct task_struct *p, struct rq *rq) + { + lockdep_assert_held(&rq->lock); ++ /*printk(KERN_INFO "sched: requeue(%d) %px %016llx\n", cpu_of(rq), p, p->priodl);*/ + WARN_ONCE(task_rq(p) != rq, "sched: cpu[%d] requeue task reside on cpu%d\n", + cpu_of(rq), task_cpu(p)); + +- __requeue_task(p, rq); ++ __SCHED_REQUEUE_TASK(p, rq, update_sched_rq_watermark(rq)); + } + + /* +@@ -1428,7 +1403,7 @@ static int select_fallback_rq(int cpu, struct task_struct *p) + return dest_cpu; + } + +-static inline int select_task_rq(struct task_struct *p) ++static inline int select_task_rq(struct task_struct *p, struct rq *rq) + { + cpumask_t chk_mask, tmp; + +@@ -1441,7 +1416,7 @@ static inline int select_task_rq(struct task_struct *p) + #endif + cpumask_and(&tmp, &chk_mask, &sched_rq_watermark[IDLE_WM]) || + cpumask_and(&tmp, &chk_mask, +- &sched_rq_watermark[task_sched_prio(p) + 1])) ++ &sched_rq_watermark[task_sched_prio(p, rq) + 1])) + return best_mask_cpu(task_cpu(p), &tmp); + + return best_mask_cpu(task_cpu(p), &chk_mask); +@@ -1573,7 +1548,7 @@ EXPORT_SYMBOL_GPL(set_cpus_allowed_ptr); + + #else /* CONFIG_SMP */ + +-static inline int select_task_rq(struct task_struct *p) ++static inline int select_task_rq(struct task_struct *p, struct rq *rq) + { + return 0; + } +@@ -2039,10 +2014,9 @@ static int try_to_wake_up(struct task_struct *p, unsigned int state, + */ + smp_cond_load_acquire(&p->on_cpu, !VAL); + +- if(this_rq()->clock_task - p->last_ran > sched_timeslice_ns) +- boost_task(p); ++ sched_task_ttwu(p); + +- cpu = select_task_rq(p); ++ cpu = select_task_rq(p, this_rq()); + + if (cpu != task_cpu(p)) { + wake_flags |= WF_MIGRATED; +@@ -2198,9 +2172,9 @@ int sched_fork(unsigned long clone_flags, struct task_struct *p) + */ + p->sched_reset_on_fork = 0; + } ++ update_task_priodl(p); + +- p->boost_prio = (p->boost_prio < 0) ? +- p->boost_prio + MAX_PRIORITY_ADJ : MAX_PRIORITY_ADJ; ++ sched_task_fork(p); + /* + * The child is not yet in the pid-hash so no cgroup attach races, + * and the cgroup is pinned to this child due to cgroup_fork() +@@ -2224,6 +2198,7 @@ int sched_fork(unsigned long clone_flags, struct task_struct *p) + + if (p->time_slice < RESCHED_NS) { + p->time_slice = sched_timeslice_ns; ++ time_slice_expired(p, rq); + resched_curr(rq); + } + raw_spin_unlock(&rq->lock); +@@ -2338,7 +2313,7 @@ void wake_up_new_task(struct task_struct *p) + + p->state = TASK_RUNNING; + +- rq = cpu_rq(select_task_rq(p)); ++ rq = cpu_rq(select_task_rq(p, this_rq())); + #ifdef CONFIG_SMP + rseq_migrate(p); + /* +@@ -3436,11 +3411,7 @@ static inline void check_curr(struct task_struct *p, struct rq *rq) + + if (p->time_slice < RESCHED_NS) { + p->time_slice = sched_timeslice_ns; +- if (SCHED_FIFO != p->policy && task_on_rq_queued(p)) { +- if (SCHED_RR != p->policy) +- deboost_task(p); +- requeue_task(p, rq); +- } ++ time_slice_expired(p, rq); + } + } + +@@ -3476,6 +3447,7 @@ choose_next_task(struct rq *rq, int cpu, struct task_struct *prev) + if (!take_other_rq_tasks(rq, cpu)) { + #endif + schedstat_inc(rq->sched_goidle); ++ /*printk(KERN_INFO "sched: choose_next_task(%d) idle %px\n", cpu, next);*/ + return next; + #ifdef CONFIG_SMP + } +@@ -3485,6 +3457,8 @@ choose_next_task(struct rq *rq, int cpu, struct task_struct *prev) + #ifdef CONFIG_HIGH_RES_TIMERS + hrtick_start(rq, next->time_slice); + #endif ++ /*printk(KERN_INFO "sched: choose_next_task(%d) next %px\n", cpu, ++ * next);*/ + return next; + } + +@@ -3599,8 +3573,7 @@ static void __sched notrace __schedule(bool preempt) + * + * After this, schedule() must not care about p->state any more. + */ +- if (rq_switch_time(rq) < boost_threshold(prev)) +- boost_task(prev); ++ sched_task_deactivate(prev, rq); + deactivate_task(prev, rq); + + if (prev->in_iowait) { +@@ -3926,7 +3899,7 @@ EXPORT_SYMBOL(default_wake_function); + static inline void check_task_changed(struct rq *rq, struct task_struct *p) + { + /* Trigger resched if task sched_prio has been modified. */ +- if (task_on_rq_queued(p) && sched_task_need_requeue(p)) { ++ if (task_on_rq_queued(p) && sched_task_need_requeue(p, rq)) { + requeue_task(p, rq); + check_preempt_curr(rq); + } +@@ -4014,6 +3987,7 @@ void rt_mutex_setprio(struct task_struct *p, struct task_struct *pi_task) + + trace_sched_pi_setprio(p, pi_task); + p->prio = prio; ++ update_task_priodl(p); + + check_task_changed(rq, p); + out_unlock: +@@ -4052,6 +4026,8 @@ void set_user_nice(struct task_struct *p, long nice) + goto out_unlock; + + p->prio = effective_prio(p); ++ update_task_priodl(p); ++ + check_task_changed(rq, p); + out_unlock: + __task_access_unlock(p, lock); +@@ -4109,21 +4085,6 @@ SYSCALL_DEFINE1(nice, int, increment) + + #endif + +-/** +- * task_prio - return the priority value of a given task. +- * @p: the task in question. +- * +- * Return: The priority value as seen by users in /proc. +- * RT tasks are offset by -100. Normal tasks are centered around 1, value goes +- * from 0(SCHED_ISO) up to 82 (nice +19 SCHED_IDLE). +- */ +-int task_prio(const struct task_struct *p) +-{ +- if (p->prio < MAX_RT_PRIO) +- return (p->prio - MAX_RT_PRIO); +- return (p->prio - MAX_RT_PRIO + p->boost_prio); +-} +- + /** + * idle_cpu - is a given CPU idle currently? + * @cpu: the processor in question. +@@ -4215,6 +4176,7 @@ static void __setscheduler(struct rq *rq, struct task_struct *p, + p->prio = normal_prio(p); + if (keep_boost) + p->prio = rt_effective_prio(p, p->prio); ++ update_task_priodl(p); + } + + /* +@@ -4974,10 +4936,8 @@ static void do_sched_yield(void) + schedstat_inc(rq->yld_count); + + if (1 == sched_yield_type) { +- if (!rt_task(current)) { +- current->boost_prio = MAX_PRIORITY_ADJ; +- requeue_task(current, rq); +- } ++ if (!rt_task(current)) ++ do_sched_yield_type_1(current, rq); + } else if (2 == sched_yield_type) { + if (rq->nr_running > 1) + rq->skip = current; +diff --git a/kernel/sched/alt_sched.h b/kernel/sched/alt_sched.h +index d8887f377455..99be2c51c88d 100644 +--- a/kernel/sched/alt_sched.h ++++ b/kernel/sched/alt_sched.h +@@ -49,6 +49,9 @@ + #ifdef CONFIG_SCHED_BMQ + #include "bmq.h" + #endif ++#ifdef CONFIG_SCHED_PDS ++#include "pds.h" ++#endif + + /* task_struct::on_rq states: */ + #define TASK_ON_RQ_QUEUED 1 +@@ -86,6 +89,9 @@ struct rq { + + #ifdef CONFIG_SCHED_BMQ + struct bmq queue; ++#endif ++#ifdef CONFIG_SCHED_PDS ++ struct skiplist_node sl_header; + #endif + unsigned long watermark; + +@@ -534,11 +540,6 @@ static inline void membarrier_switch_mm(struct rq *rq, + } + #endif + +-static inline int task_running_nice(struct task_struct *p) +-{ +- return (p->prio + p->boost_prio > DEFAULT_PRIO + MAX_PRIORITY_ADJ); +-} +- + #ifdef CONFIG_NUMA + extern int sched_numa_find_closest(const struct cpumask *cpus, int cpu); + #else +diff --git a/kernel/sched/bmq.h b/kernel/sched/bmq.h +index 4ce30c30bd3e..1364824cfa7d 100644 +--- a/kernel/sched/bmq.h ++++ b/kernel/sched/bmq.h +@@ -11,4 +11,10 @@ struct bmq { + struct list_head heads[SCHED_BITS]; + }; + ++ ++static inline int task_running_nice(struct task_struct *p) ++{ ++ return (p->prio + p->boost_prio > DEFAULT_PRIO + MAX_PRIORITY_ADJ); ++} ++ + #endif +diff --git a/kernel/sched/bmq_imp.h b/kernel/sched/bmq_imp.h +index 86d496ec23b3..d7df1d3f9495 100644 +--- a/kernel/sched/bmq_imp.h ++++ b/kernel/sched/bmq_imp.h +@@ -1,5 +1,64 @@ + #define ALT_SCHED_VERSION_MSG "sched/bmq: BMQ CPU Scheduler 5.8-r1 by Alfred Chen.\n" + ++/* ++ * BMQ only routines ++ */ ++#define rq_switch_time(rq) ((rq)->clock - (rq)->last_ts_switch) ++#define boost_threshold(p) (sched_timeslice_ns >>\ ++ (15 - MAX_PRIORITY_ADJ - (p)->boost_prio)) ++ ++static inline void boost_task(struct task_struct *p) ++{ ++ int limit; ++ ++ switch (p->policy) { ++ case SCHED_NORMAL: ++ limit = -MAX_PRIORITY_ADJ; ++ break; ++ case SCHED_BATCH: ++ case SCHED_IDLE: ++ limit = 0; ++ break; ++ default: ++ return; ++ } ++ ++ if (p->boost_prio > limit) ++ p->boost_prio--; ++} ++ ++static inline void deboost_task(struct task_struct *p) ++{ ++ if (p->boost_prio < MAX_PRIORITY_ADJ) ++ p->boost_prio++; ++} ++ ++/* ++ * Common interfaces ++ */ ++static inline int task_sched_prio(struct task_struct *p, struct rq *rq) ++{ ++ return (p->prio < MAX_RT_PRIO)? p->prio : MAX_RT_PRIO / 2 + (p->prio + p->boost_prio) / 2; ++} ++ ++static inline void requeue_task(struct task_struct *p, struct rq *rq); ++ ++static inline void time_slice_expired(struct task_struct *p, struct rq *rq) ++{ ++ if (SCHED_FIFO != p->policy && task_on_rq_queued(p)) { ++ if (SCHED_RR != p->policy) ++ deboost_task(p); ++ requeue_task(p, rq); ++ } ++} ++ ++static inline void update_task_priodl(struct task_struct *p) {} ++ ++static inline unsigned long sched_queue_watermark(struct rq *rq) ++{ ++ return find_first_bit(rq->queue.bitmap, SCHED_BITS); ++} ++ + static inline void sched_queue_init(struct rq *rq) + { + struct bmq *q = &rq->queue; +@@ -61,26 +120,64 @@ sched_rq_next_task(struct task_struct *p, struct rq *rq) + sched_info_queued(rq, p); \ + psi_enqueue(p, flags); \ + \ +- p->bmq_idx = task_sched_prio(p); \ ++ p->bmq_idx = task_sched_prio(p, rq); \ + list_add_tail(&p->bmq_node, &rq->queue.heads[p->bmq_idx]); \ + set_bit(p->bmq_idx, rq->queue.bitmap) + +-static inline void __requeue_task(struct task_struct *p, struct rq *rq) ++#define __SCHED_REQUEUE_TASK(p, rq, func) \ ++{ \ ++ int idx = task_sched_prio(p, rq); \ ++\ ++ list_del(&p->bmq_node); \ ++ list_add_tail(&p->bmq_node, &rq->queue.heads[idx]); \ ++ if (idx != p->bmq_idx) { \ ++ if (list_empty(&rq->queue.heads[p->bmq_idx])) \ ++ clear_bit(p->bmq_idx, rq->queue.bitmap); \ ++ p->bmq_idx = idx; \ ++ set_bit(p->bmq_idx, rq->queue.bitmap); \ ++ func; \ ++ } \ ++} ++ ++static inline bool sched_task_need_requeue(struct task_struct *p, struct rq *rq) + { +- int idx = task_sched_prio(p); +- +- list_del(&p->bmq_node); +- list_add_tail(&p->bmq_node, &rq->queue.heads[idx]); +- if (idx != p->bmq_idx) { +- if (list_empty(&rq->queue.heads[p->bmq_idx])) +- clear_bit(p->bmq_idx, rq->queue.bitmap); +- p->bmq_idx = idx; +- set_bit(p->bmq_idx, rq->queue.bitmap); +- update_sched_rq_watermark(rq); +- } ++ return (task_sched_prio(p, rq) != p->bmq_idx); ++} ++ ++static void sched_task_fork(struct task_struct *p) ++{ ++ p->boost_prio = (p->boost_prio < 0) ? ++ p->boost_prio + MAX_PRIORITY_ADJ : MAX_PRIORITY_ADJ; ++} ++ ++/** ++ * task_prio - return the priority value of a given task. ++ * @p: the task in question. ++ * ++ * Return: The priority value as seen by users in /proc. ++ * RT tasks are offset by -100. Normal tasks are centered around 1, value goes ++ * from 0(SCHED_ISO) up to 82 (nice +19 SCHED_IDLE). ++ */ ++int task_prio(const struct task_struct *p) ++{ ++ if (p->prio < MAX_RT_PRIO) ++ return (p->prio - MAX_RT_PRIO); ++ return (p->prio - MAX_RT_PRIO + p->boost_prio); ++} ++ ++static void do_sched_yield_type_1(struct task_struct *p, struct rq *rq) ++{ ++ p->boost_prio = MAX_PRIORITY_ADJ; ++} ++ ++static void sched_task_ttwu(struct task_struct *p) ++{ ++ if(this_rq()->clock_task - p->last_ran > sched_timeslice_ns) ++ boost_task(p); + } + +-static inline bool sched_task_need_requeue(struct task_struct *p) ++static void sched_task_deactivate(struct task_struct *p, struct rq *rq) + { +- return (task_sched_prio(p) != p->bmq_idx); ++ if (rq_switch_time(rq) < boost_threshold(p)) ++ boost_task(p); + } +diff --git a/kernel/sched/pds.h b/kernel/sched/pds.h +new file mode 100644 +index 000000000000..9b9addc205a9 +--- /dev/null ++++ b/kernel/sched/pds.h +@@ -0,0 +1,14 @@ ++#ifndef PDS_H ++#define PDS_H ++ ++/* bits: ++ * RT(0-99), (Low prio adj range, nice width, high prio adj range) / 2, cpu idle task */ ++#define SCHED_BITS (MAX_RT_PRIO + MAX_PRIORITY_ADJ * 2 + 8 + 1) ++#define IDLE_TASK_SCHED_PRIO (SCHED_BITS - 1) ++ ++static inline int task_running_nice(struct task_struct *p) ++{ ++ return (p->prio > DEFAULT_PRIO); ++} ++ ++#endif +diff --git a/kernel/sched/pds_imp.h b/kernel/sched/pds_imp.h +new file mode 100644 +index 000000000000..b970879f1d2e +--- /dev/null ++++ b/kernel/sched/pds_imp.h +@@ -0,0 +1,205 @@ ++#define ALT_SCHED_VERSION_MSG "sched/bmq: PDS CPU Scheduler 5.8-r0 by Alfred Chen.\n" ++ ++static const u64 user_prio2deadline[NICE_WIDTH] = { ++/* -20 */ 6291456, 6920601, 7612661, 8373927, 9211319, ++/* -15 */ 10132450, 11145695, 12260264, 13486290, 14834919, ++/* -10 */ 16318410, 17950251, 19745276, 21719803, 23891783, ++/* -5 */ 26280961, 28909057, 31799962, 34979958, 38477953, ++/* 0 */ 42325748, 46558322, 51214154, 56335569, 61969125, ++/* 5 */ 68166037, 74982640, 82480904, 90728994, 99801893, ++/* 10 */ 109782082, 120760290, 132836319, 146119950, 160731945, ++/* 15 */ 176805139, 194485652, 213934217, 235327638, 258860401 ++}; ++ ++static const int dl_level_map[] = { ++/* 0 4 8 12 */ ++ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, ++/* 16 20 24 28 */ ++ 1, 1, 1, 2, 2, 2, 2, 2, 3, 3, 3, 4, 4, 5, 6, 7 ++}; ++ ++static inline int ++task_sched_prio(const struct task_struct *p, const struct rq *rq) ++{ ++ u64 delta = (rq->clock + user_prio2deadline[39] - p->deadline) >> 23; ++ ++ delta = min((size_t)delta, ARRAY_SIZE(dl_level_map) - 1); ++ ++ return (p->prio < MAX_RT_PRIO)? p->prio : MAX_RT_PRIO + dl_level_map[delta]; ++} ++ ++static inline void update_task_priodl(struct task_struct *p) ++{ ++ p->priodl = (((u64) (p->prio))<<56) | ((p->deadline)>>8); ++} ++ ++static inline void requeue_task(struct task_struct *p, struct rq *rq); ++ ++static inline void time_slice_expired(struct task_struct *p, struct rq *rq) ++{ ++ /*printk(KERN_INFO "sched: time_slice_expired(%d) - %px\n", cpu_of(rq), p);*/ ++ ++ if (p->prio >= MAX_RT_PRIO) ++ p->deadline = rq->clock + user_prio2deadline[TASK_USER_PRIO(p)]; ++ update_task_priodl(p); ++ ++ if (SCHED_FIFO != p->policy && task_on_rq_queued(p)) ++ requeue_task(p, rq); ++} ++ ++/* ++ * pds_skiplist_task_search -- search function used in PDS run queue skip list ++ * node insert operation. ++ * @it: iterator pointer to the node in the skip list ++ * @node: pointer to the skiplist_node to be inserted ++ * ++ * Returns true if key of @it is less or equal to key value of @node, otherwise ++ * false. ++ */ ++static inline bool ++pds_skiplist_task_search(struct skiplist_node *it, struct skiplist_node *node) ++{ ++ return (skiplist_entry(it, struct task_struct, sl_node)->priodl <= ++ skiplist_entry(node, struct task_struct, sl_node)->priodl); ++} ++ ++/* ++ * Define the skip list insert function for PDS ++ */ ++DEFINE_SKIPLIST_INSERT_FUNC(pds_skiplist_insert, pds_skiplist_task_search); ++ ++/* ++ * Init the queue structure in rq ++ */ ++static inline void sched_queue_init(struct rq *rq) ++{ ++ FULL_INIT_SKIPLIST_NODE(&rq->sl_header); ++} ++ ++/* ++ * Init idle task and put into queue structure of rq ++ * IMPORTANT: may be called multiple times for a single cpu ++ */ ++static inline void sched_queue_init_idle(struct rq *rq, struct task_struct *idle) ++{ ++ /*printk(KERN_INFO "sched: init(%d) - %px\n", cpu_of(rq), idle);*/ ++ int default_prio = idle->prio; ++ ++ idle->prio = MAX_PRIO; ++ idle->deadline = 0ULL; ++ update_task_priodl(idle); ++ ++ FULL_INIT_SKIPLIST_NODE(&rq->sl_header); ++ ++ idle->sl_node.level = idle->sl_level; ++ pds_skiplist_insert(&rq->sl_header, &idle->sl_node); ++ ++ idle->prio = default_prio; ++} ++ ++/* ++ * This routine assume that the idle task always in queue ++ */ ++static inline struct task_struct *sched_rq_first_task(struct rq *rq) ++{ ++ struct skiplist_node *node = rq->sl_header.next[0]; ++ ++ BUG_ON(node == &rq->sl_header); ++ return skiplist_entry(node, struct task_struct, sl_node); ++} ++ ++static inline struct task_struct * ++sched_rq_next_task(struct task_struct *p, struct rq *rq) ++{ ++ struct skiplist_node *next = p->sl_node.next[0]; ++ ++ BUG_ON(next == &rq->sl_header); ++ return skiplist_entry(next, struct task_struct, sl_node); ++} ++ ++static inline unsigned long sched_queue_watermark(struct rq *rq) ++{ ++ return task_sched_prio(sched_rq_first_task(rq), rq); ++} ++ ++#define __SCHED_DEQUEUE_TASK(p, rq, flags, func) \ ++ psi_dequeue(p, flags & DEQUEUE_SLEEP); \ ++ sched_info_dequeued(rq, p); \ ++ \ ++ if (skiplist_del_init(&rq->sl_header, &p->sl_node)) { \ ++ func; \ ++ } ++ ++#define __SCHED_ENQUEUE_TASK(p, rq, flags) \ ++ sched_info_queued(rq, p); \ ++ psi_enqueue(p, flags); \ ++ \ ++ p->sl_node.level = p->sl_level; \ ++ pds_skiplist_insert(&rq->sl_header, &p->sl_node) ++ ++/* ++ * Requeue a task @p to @rq ++ */ ++#define __SCHED_REQUEUE_TASK(p, rq, func) \ ++{\ ++ bool b_first = skiplist_del_init(&rq->sl_header, &p->sl_node); \ ++\ ++ p->sl_node.level = p->sl_level; \ ++ if (pds_skiplist_insert(&rq->sl_header, &p->sl_node) || b_first) { \ ++ func; \ ++ } \ ++} ++ ++static inline bool sched_task_need_requeue(struct task_struct *p, struct rq *rq) ++{ ++ struct skiplist_node *node = p->sl_node.prev[0]; ++ ++ if (node != &rq->sl_header) { ++ struct task_struct *t = skiplist_entry(node, struct task_struct, sl_node); ++ ++ if (t->priodl > p->priodl) ++ return true; ++ } ++ ++ node = p->sl_node.next[0]; ++ if (node != &rq->sl_header) { ++ struct task_struct *t = skiplist_entry(node, struct task_struct, sl_node); ++ ++ if (t->priodl < p->priodl) ++ return true; ++ } ++ ++ return false; ++} ++ ++static void sched_task_fork(struct task_struct *p) {} ++ ++/** ++ * task_prio - return the priority value of a given task. ++ * @p: the task in question. ++ * ++ * Return: The priority value as seen by users in /proc. ++ * RT tasks are offset by -100. Normal tasks are centered around 1, value goes ++ * from 0(SCHED_ISO) up to 82 (nice +19 SCHED_IDLE). ++ */ ++int task_prio(const struct task_struct *p) ++{ ++ int ret; ++ ++ if (p->prio < MAX_RT_PRIO) ++ return (p->prio - MAX_RT_PRIO); ++ ++ preempt_disable(); ++ ret = task_sched_prio(p, this_rq()) - MAX_RT_PRIO; ++ preempt_enable(); ++ ++ return ret; ++} ++ ++static void do_sched_yield_type_1(struct task_struct *p, struct rq *rq) ++{ ++ time_slice_expired(p, rq); ++} ++ ++static void sched_task_ttwu(struct task_struct *p) {} ++static void sched_task_deactivate(struct task_struct *p, struct rq *rq) {} +-- +2.37.0 + + +From 1dafadf5e49f3fbd36f83f35d9d88a3452236a0d Mon Sep 17 00:00:00 2001 +From: Alfred Chen +Date: Wed, 2 Sep 2020 08:24:30 +0800 +Subject: [PATCH 035/297] sched/alt: Refill time_slice in time_slice_expired(). + +--- + kernel/sched/alt_core.c | 5 +---- + kernel/sched/bmq_imp.h | 2 ++ + kernel/sched/pds_imp.h | 1 + + 3 files changed, 4 insertions(+), 4 deletions(-) + +diff --git a/kernel/sched/alt_core.c b/kernel/sched/alt_core.c +index 407bc46de451..5187d23f27df 100644 +--- a/kernel/sched/alt_core.c ++++ b/kernel/sched/alt_core.c +@@ -2197,7 +2197,6 @@ int sched_fork(unsigned long clone_flags, struct task_struct *p) + #endif + + if (p->time_slice < RESCHED_NS) { +- p->time_slice = sched_timeslice_ns; + time_slice_expired(p, rq); + resched_curr(rq); + } +@@ -3409,10 +3408,8 @@ static inline void check_curr(struct task_struct *p, struct rq *rq) + + update_curr(rq, p); + +- if (p->time_slice < RESCHED_NS) { +- p->time_slice = sched_timeslice_ns; ++ if (p->time_slice < RESCHED_NS) + time_slice_expired(p, rq); +- } + } + + static inline struct task_struct * +diff --git a/kernel/sched/bmq_imp.h b/kernel/sched/bmq_imp.h +index d7df1d3f9495..c9f0c708dd61 100644 +--- a/kernel/sched/bmq_imp.h ++++ b/kernel/sched/bmq_imp.h +@@ -45,6 +45,8 @@ static inline void requeue_task(struct task_struct *p, struct rq *rq); + + static inline void time_slice_expired(struct task_struct *p, struct rq *rq) + { ++ p->time_slice = sched_timeslice_ns; ++ + if (SCHED_FIFO != p->policy && task_on_rq_queued(p)) { + if (SCHED_RR != p->policy) + deboost_task(p); +diff --git a/kernel/sched/pds_imp.h b/kernel/sched/pds_imp.h +index b970879f1d2e..aa7e933f08b8 100644 +--- a/kernel/sched/pds_imp.h ++++ b/kernel/sched/pds_imp.h +@@ -38,6 +38,7 @@ static inline void requeue_task(struct task_struct *p, struct rq *rq); + static inline void time_slice_expired(struct task_struct *p, struct rq *rq) + { + /*printk(KERN_INFO "sched: time_slice_expired(%d) - %px\n", cpu_of(rq), p);*/ ++ p->time_slice = sched_timeslice_ns; + + if (p->prio >= MAX_RT_PRIO) + p->deadline = rq->clock + user_prio2deadline[TASK_USER_PRIO(p)]; +-- +2.37.0 + + +From 4fdcb6d7592ea6c2190af732f82688528ca72758 Mon Sep 17 00:00:00 2001 +From: Alfred Chen +Date: Sat, 5 Sep 2020 16:25:49 +0800 +Subject: [PATCH 036/297] sched/alt: PDS rework. + +alt: +Rework bmq&pds routine in sched_fork(). +Sync-up mainline implement in sched_exec(), add task pi locking. +Add alt_sched_debug() and control by ALT_SCHED_DEBUG macro. + +pds: +Update user_prio2deadline which now based on default 4ms time slice. +Update dl_level_map which provides 20 levels instead of the original 8 +levels. +Fix issue that task_sched_prio() doesn't return corrent sched prio for +idle task. +Implement sched_task_for() routine. +--- + kernel/sched/alt_core.c | 57 ++++++++++++++++++++++++++++------------ + kernel/sched/alt_debug.c | 2 +- + kernel/sched/bmq_imp.h | 2 +- + kernel/sched/pds.h | 2 +- + kernel/sched/pds_imp.h | 52 +++++++++++++++++++++++++----------- + 5 files changed, 79 insertions(+), 36 deletions(-) + +diff --git a/kernel/sched/alt_core.c b/kernel/sched/alt_core.c +index 5187d23f27df..091f6919195c 100644 +--- a/kernel/sched/alt_core.c ++++ b/kernel/sched/alt_core.c +@@ -2172,9 +2172,7 @@ int sched_fork(unsigned long clone_flags, struct task_struct *p) + */ + p->sched_reset_on_fork = 0; + } +- update_task_priodl(p); + +- sched_task_fork(p); + /* + * The child is not yet in the pid-hash so no cgroup attach races, + * and the cgroup is pinned to this child due to cgroup_fork() +@@ -2190,6 +2188,7 @@ int sched_fork(unsigned long clone_flags, struct task_struct *p) + */ + rq = this_rq(); + raw_spin_lock(&rq->lock); ++ + rq->curr->time_slice /= 2; + p->time_slice = rq->curr->time_slice; + #ifdef CONFIG_SCHED_HRTICK +@@ -2197,9 +2196,10 @@ int sched_fork(unsigned long clone_flags, struct task_struct *p) + #endif + + if (p->time_slice < RESCHED_NS) { +- time_slice_expired(p, rq); ++ p->time_slice = sched_timeslice_ns; + resched_curr(rq); + } ++ sched_task_fork(p, rq); + raw_spin_unlock(&rq->lock); + + rseq_migrate(p); +@@ -2795,25 +2795,29 @@ unsigned long nr_iowait(void) + void sched_exec(void) + { + struct task_struct *p = current; ++ unsigned long flags; + int dest_cpu; ++ struct rq *rq; + +- if (task_rq(p)->nr_running < 2) +- return; ++ raw_spin_lock_irqsave(&p->pi_lock, flags); ++ rq = this_rq(); + +- dest_cpu = cpumask_any_and(p->cpus_ptr, &sched_rq_watermark[IDLE_WM]); +- if ( dest_cpu < nr_cpu_ids) { +-#ifdef CONFIG_SCHED_SMT +- int smt = cpumask_any_and(p->cpus_ptr, &sched_sg_idle_mask); +- if (smt < nr_cpu_ids) +- dest_cpu = smt; +-#endif +- if (likely(cpu_active(dest_cpu))) { +- struct migration_arg arg = { p, dest_cpu }; ++ if (rq != task_rq(p) || rq->nr_running < 2) ++ goto unlock; + +- stop_one_cpu(task_cpu(p), migration_cpu_stop, &arg); +- return; +- } ++ dest_cpu = select_task_rq(p, task_rq(p)); ++ if (dest_cpu == smp_processor_id()) ++ goto unlock; ++ ++ if (likely(cpu_active(dest_cpu))) { ++ struct migration_arg arg = { p, dest_cpu }; ++ ++ raw_spin_unlock_irqrestore(&p->pi_lock, flags); ++ stop_one_cpu(task_cpu(p), migration_cpu_stop, &arg); ++ return; + } ++unlock: ++ raw_spin_unlock_irqrestore(&p->pi_lock, flags); + } + + #endif +@@ -3314,6 +3318,23 @@ static inline void schedule_debug(struct task_struct *prev, bool preempt) + schedstat_inc(this_rq()->sched_count); + } + ++/* ++ * Compile time debug macro ++ * #define ALT_SCHED_DEBUG ++ */ ++ ++#ifdef ALT_SCHED_DEBUG ++void alt_sched_debug(void) ++{ ++ printk(KERN_INFO "sched: pending: 0x%04lx, idle: 0x%04lx, sg_idle: 0x%04lx\n", ++ sched_rq_pending_mask.bits[0], ++ sched_rq_watermark[IDLE_WM].bits[0], ++ sched_sg_idle_mask.bits[0]); ++} ++#else ++inline void alt_sched_debug(void) {} ++#endif ++ + #ifdef CONFIG_SMP + + #define SCHED_RQ_NR_MIGRATION (32UL) +@@ -5153,6 +5174,8 @@ static int sched_rr_get_interval(pid_t pid, struct timespec64 *t) + struct task_struct *p; + int retval; + ++ alt_sched_debug(); ++ + if (pid < 0) + return -EINVAL; + +diff --git a/kernel/sched/alt_debug.c b/kernel/sched/alt_debug.c +index 835e6bb98dda..1212a031700e 100644 +--- a/kernel/sched/alt_debug.c ++++ b/kernel/sched/alt_debug.c +@@ -1,7 +1,7 @@ + /* + * kernel/sched/alt_debug.c + * +- * Print the BMQ debugging details ++ * Print the alt scheduler debugging details + * + * Author: Alfred Chen + * Date : 2020 +diff --git a/kernel/sched/bmq_imp.h b/kernel/sched/bmq_imp.h +index c9f0c708dd61..0e67e00a6020 100644 +--- a/kernel/sched/bmq_imp.h ++++ b/kernel/sched/bmq_imp.h +@@ -146,7 +146,7 @@ static inline bool sched_task_need_requeue(struct task_struct *p, struct rq *rq) + return (task_sched_prio(p, rq) != p->bmq_idx); + } + +-static void sched_task_fork(struct task_struct *p) ++static void sched_task_fork(struct task_struct *p, struct rq *rq) + { + p->boost_prio = (p->boost_prio < 0) ? + p->boost_prio + MAX_PRIORITY_ADJ : MAX_PRIORITY_ADJ; +diff --git a/kernel/sched/pds.h b/kernel/sched/pds.h +index 9b9addc205a9..7fdeace7e8a5 100644 +--- a/kernel/sched/pds.h ++++ b/kernel/sched/pds.h +@@ -3,7 +3,7 @@ + + /* bits: + * RT(0-99), (Low prio adj range, nice width, high prio adj range) / 2, cpu idle task */ +-#define SCHED_BITS (MAX_RT_PRIO + MAX_PRIORITY_ADJ * 2 + 8 + 1) ++#define SCHED_BITS (MAX_RT_PRIO + 20 + 1) + #define IDLE_TASK_SCHED_PRIO (SCHED_BITS - 1) + + static inline int task_running_nice(struct task_struct *p) +diff --git a/kernel/sched/pds_imp.h b/kernel/sched/pds_imp.h +index aa7e933f08b8..4a2fc8993229 100644 +--- a/kernel/sched/pds_imp.h ++++ b/kernel/sched/pds_imp.h +@@ -1,31 +1,46 @@ + #define ALT_SCHED_VERSION_MSG "sched/bmq: PDS CPU Scheduler 5.8-r0 by Alfred Chen.\n" + + static const u64 user_prio2deadline[NICE_WIDTH] = { +-/* -20 */ 6291456, 6920601, 7612661, 8373927, 9211319, +-/* -15 */ 10132450, 11145695, 12260264, 13486290, 14834919, +-/* -10 */ 16318410, 17950251, 19745276, 21719803, 23891783, +-/* -5 */ 26280961, 28909057, 31799962, 34979958, 38477953, +-/* 0 */ 42325748, 46558322, 51214154, 56335569, 61969125, +-/* 5 */ 68166037, 74982640, 82480904, 90728994, 99801893, +-/* 10 */ 109782082, 120760290, 132836319, 146119950, 160731945, +-/* 15 */ 176805139, 194485652, 213934217, 235327638, 258860401 ++/* -20 */ 4194304, 4613734, 5075107, 5582617, 6140878, ++/* -15 */ 6754965, 7430461, 8173507, 8990857, 9889942, ++/* -10 */ 10878936, 11966829, 13163511, 14479862, 15927848, ++/* -5 */ 17520632, 19272695, 21199964, 23319960, 25651956, ++/* 0 */ 28217151, 31038866, 34142752, 37557027, 41312729, ++/* 5 */ 45444001, 49988401, 54987241, 60485965, 66534561, ++/* 10 */ 73188017, 80506818, 88557499, 97413248, 107154572, ++/* 15 */ 117870029, 129657031, 142622734, 156885007, 172573507 + }; + +-static const int dl_level_map[] = { +-/* 0 4 8 12 */ +- 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, +-/* 16 20 24 28 */ +- 1, 1, 1, 2, 2, 2, 2, 2, 3, 3, 3, 4, 4, 5, 6, 7 ++static const unsigned char dl_level_map[] = { ++/* 0 4 8 12 */ ++ 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 18, ++/* 16 20 24 28 */ ++ 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 17, 17, 17, 17, 17, ++/* 32 36 40 44 */ ++ 17, 17, 17, 17, 16, 16, 16, 16, 16, 16, 16, 16, 15, 15, 15, 15, ++/* 48 52 56 60 */ ++ 15, 15, 15, 14, 14, 14, 14, 14, 14, 13, 13, 13, 13, 12, 12, 12, ++/* 64 68 72 76 */ ++ 12, 11, 11, 11, 10, 10, 10, 9, 9, 8, 7, 6, 5, 4, 3, 2, ++/* 80 84 88 92 */ ++ 1, 0 + }; + + static inline int + task_sched_prio(const struct task_struct *p, const struct rq *rq) + { +- u64 delta = (rq->clock + user_prio2deadline[39] - p->deadline) >> 23; ++ size_t delta; + ++ if (p == rq->idle) ++ return IDLE_TASK_SCHED_PRIO; ++ ++ if (p->prio < MAX_RT_PRIO) ++ return p->prio; ++ ++ delta = (rq->clock + user_prio2deadline[39] - p->deadline) >> 21; + delta = min((size_t)delta, ARRAY_SIZE(dl_level_map) - 1); + +- return (p->prio < MAX_RT_PRIO)? p->prio : MAX_RT_PRIO + dl_level_map[delta]; ++ return MAX_RT_PRIO + dl_level_map[delta]; + } + + static inline void update_task_priodl(struct task_struct *p) +@@ -173,7 +188,12 @@ static inline bool sched_task_need_requeue(struct task_struct *p, struct rq *rq) + return false; + } + +-static void sched_task_fork(struct task_struct *p) {} ++static void sched_task_fork(struct task_struct *p, struct rq *rq) ++{ ++ if (p->prio >= MAX_RT_PRIO) ++ p->deadline = rq->clock + user_prio2deadline[TASK_USER_PRIO(p)]; ++ update_task_priodl(p); ++} + + /** + * task_prio - return the priority value of a given task. +-- +2.37.0 + + +From c258f4da07a61595f9ff450dfab0e23bf3cbe3d8 Mon Sep 17 00:00:00 2001 +From: Alfred Chen +Date: Sun, 6 Sep 2020 10:26:05 +0800 +Subject: [PATCH 037/297] sched/alt: Documentation and comments updates. + +--- + Documentation/admin-guide/kernel-parameters.txt | 2 +- + Documentation/admin-guide/sysctl/kernel.rst | 4 ++-- + kernel/trace/trace_selftest.c | 2 +- + 3 files changed, 4 insertions(+), 4 deletions(-) + +diff --git a/Documentation/admin-guide/kernel-parameters.txt b/Documentation/admin-guide/kernel-parameters.txt +index 11e0b608c57d..d243405aa3bf 100644 +--- a/Documentation/admin-guide/kernel-parameters.txt ++++ b/Documentation/admin-guide/kernel-parameters.txt +@@ -5274,7 +5274,7 @@ + See drivers/net/irda/sa1100_ir.c. + + sched_timeslice= +- [KNL] Time slice in us for BMQ scheduler. ++ [KNL] Time slice in us for BMQ/PDS scheduler. + Format: (must be >= 1000) + Default: 4000 + See Documentation/scheduler/sched-BMQ.txt +diff --git a/Documentation/admin-guide/sysctl/kernel.rst b/Documentation/admin-guide/sysctl/kernel.rst +index 8fd3bfa2ecd9..e24781970a3d 100644 +--- a/Documentation/admin-guide/sysctl/kernel.rst ++++ b/Documentation/admin-guide/sysctl/kernel.rst +@@ -1528,8 +1528,8 @@ tunable to zero will disable lockup detection altogether. + yield_type: + =========== + +-BMQ CPU scheduler only. This determines what type of yield calls to +-sched_yield will perform. ++BMQ/PDS CPU scheduler only. This determines what type of yield calls ++to sched_yield will perform. + + 0 - No yield. + 1 - Deboost and requeue task. (default) +diff --git a/kernel/trace/trace_selftest.c b/kernel/trace/trace_selftest.c +index bc131c6d1299..2ccdede8585c 100644 +--- a/kernel/trace/trace_selftest.c ++++ b/kernel/trace/trace_selftest.c +@@ -1144,7 +1144,7 @@ static int trace_wakeup_test_thread(void *data) + /* Make this a -deadline thread */ + static const struct sched_attr attr = { + #ifdef CONFIG_SCHED_ALT +- /* No deadline on BMQ, use RR */ ++ /* No deadline on BMQ/PDS, use RR */ + .sched_policy = SCHED_RR, + #else + .sched_policy = SCHED_DEADLINE, +-- +2.37.0 + + +From 2b65a7b2dff1584dea7e9a82fb907ae4b10398d0 Mon Sep 17 00:00:00 2001 +From: Alfred Chen +Date: Sun, 6 Sep 2020 10:28:26 +0800 +Subject: [PATCH 038/297] sched/alt: Revert NORMAL_PRIO defination for powerpc + cell. + +--- + arch/powerpc/platforms/cell/spufs/sched.c | 5 +++++ + 1 file changed, 5 insertions(+) + +diff --git a/arch/powerpc/platforms/cell/spufs/sched.c b/arch/powerpc/platforms/cell/spufs/sched.c +index 525372fcd0f2..99bd027a7f7c 100644 +--- a/arch/powerpc/platforms/cell/spufs/sched.c ++++ b/arch/powerpc/platforms/cell/spufs/sched.c +@@ -51,6 +51,11 @@ static struct task_struct *spusched_task; + static struct timer_list spusched_timer; + static struct timer_list spuloadavg_timer; + ++/* ++ * Priority of a normal, non-rt, non-niced'd process (aka nice level 0). ++ */ ++#define NORMAL_PRIO 120 ++ + /* + * Frequency of the spu scheduler tick. By default we do one SPU scheduler + * tick for every 10 CPU scheduler ticks. +-- +2.37.0 + + +From fbc1e5869bff61dcb09bc4b2cca15c5a34d0a0e7 Mon Sep 17 00:00:00 2001 +From: Alfred Chen +Date: Mon, 31 Aug 2020 15:40:14 +0800 +Subject: [PATCH 039/297] sched/alt: Add ALT_SCHED_VERSION micro. + +--- + kernel/sched/alt_core.c | 2 ++ + kernel/sched/bmq_imp.h | 2 +- + kernel/sched/pds_imp.h | 2 +- + 3 files changed, 4 insertions(+), 2 deletions(-) + +diff --git a/kernel/sched/alt_core.c b/kernel/sched/alt_core.c +index 091f6919195c..76f72292e28a 100644 +--- a/kernel/sched/alt_core.c ++++ b/kernel/sched/alt_core.c +@@ -45,6 +45,8 @@ + #define CREATE_TRACE_POINTS + #include + ++#define ALT_SCHED_VERSION "v5.8-r2" ++ + /* rt_prio(prio) defined in include/linux/sched/rt.h */ + #define rt_task(p) rt_prio((p)->prio) + #define rt_policy(policy) ((policy) == SCHED_FIFO || (policy) == SCHED_RR) +diff --git a/kernel/sched/bmq_imp.h b/kernel/sched/bmq_imp.h +index 0e67e00a6020..ad9a7c448da7 100644 +--- a/kernel/sched/bmq_imp.h ++++ b/kernel/sched/bmq_imp.h +@@ -1,4 +1,4 @@ +-#define ALT_SCHED_VERSION_MSG "sched/bmq: BMQ CPU Scheduler 5.8-r1 by Alfred Chen.\n" ++#define ALT_SCHED_VERSION_MSG "sched/bmq: BMQ CPU Scheduler "ALT_SCHED_VERSION" by Alfred Chen.\n" + + /* + * BMQ only routines +diff --git a/kernel/sched/pds_imp.h b/kernel/sched/pds_imp.h +index 4a2fc8993229..041827b92910 100644 +--- a/kernel/sched/pds_imp.h ++++ b/kernel/sched/pds_imp.h +@@ -1,4 +1,4 @@ +-#define ALT_SCHED_VERSION_MSG "sched/bmq: PDS CPU Scheduler 5.8-r0 by Alfred Chen.\n" ++#define ALT_SCHED_VERSION_MSG "sched/bmq: PDS CPU Scheduler "ALT_SCHED_VERSION" by Alfred Chen.\n" + + static const u64 user_prio2deadline[NICE_WIDTH] = { + /* -20 */ 4194304, 4613734, 5075107, 5582617, 6140878, +-- +2.37.0 + + +From 1dadb0fdb024f924062679231d67959715381f05 Mon Sep 17 00:00:00 2001 +From: Alfred Chen +Date: Sun, 6 Sep 2020 20:02:47 +0800 +Subject: [PATCH 040/297] sched/pds: Fix compilation issue with + CONFIG_SCHED_TRACER. + +--- + include/linux/sched/deadline.h | 6 +++--- + 1 file changed, 3 insertions(+), 3 deletions(-) + +diff --git a/include/linux/sched/deadline.h b/include/linux/sched/deadline.h +index 0aa37b0a1676..9516a98cf160 100644 +--- a/include/linux/sched/deadline.h ++++ b/include/linux/sched/deadline.h +@@ -2,13 +2,13 @@ + + #ifdef CONFIG_SCHED_ALT + +-#ifdef CONFIG_SCHED_BMQ +-#define __tsk_deadline(p) (0UL) +- + static inline int dl_task(struct task_struct *p) + { + return 0; + } ++ ++#ifdef CONFIG_SCHED_BMQ ++#define __tsk_deadline(p) (0UL) + #endif + + #ifdef CONFIG_SCHED_PDS +-- +2.37.0 + + +From c228f0d8e54cc76d0311b102e28ac359bc56d324 Mon Sep 17 00:00:00 2001 +From: Alfred Chen +Date: Mon, 7 Sep 2020 06:56:27 +0800 +Subject: [PATCH 041/297] sched/alt: Disable UCLAMP_TASK, NUMA_BALANCING, + SCHED_AUTOGROUP, FAIR_GROUP_SCHED for Project C. + +--- + init/Kconfig | 8 ++++---- + 1 file changed, 4 insertions(+), 4 deletions(-) + +diff --git a/init/Kconfig b/init/Kconfig +index 8059f9d860db..c4acd49edf94 100644 +--- a/init/Kconfig ++++ b/init/Kconfig +@@ -841,7 +841,7 @@ endif + config UCLAMP_TASK + bool "Enable utilization clamping for RT/FAIR tasks" + depends on CPU_FREQ_GOV_SCHEDUTIL +- depends on !SCHED_BMQ ++ depends on !SCHED_ALT + help + This feature enables the scheduler to track the clamped utilization + of each CPU based on RUNNABLE tasks scheduled on that CPU. +@@ -941,7 +941,7 @@ config NUMA_BALANCING + depends on ARCH_SUPPORTS_NUMA_BALANCING + depends on !ARCH_WANT_NUMA_VARIABLE_LOCALITY + depends on SMP && NUMA && MIGRATION && !PREEMPT_RT +- depends on !SCHED_BMQ ++ depends on !SCHED_ALT + help + This option adds support for automatic NUMA aware memory/task placement. + The mechanism is quite primitive and is based on migrating memory when +@@ -1028,7 +1028,7 @@ menuconfig CGROUP_SCHED + bandwidth allocation to such task groups. It uses cgroups to group + tasks. + +-if CGROUP_SCHED && !SCHED_BMQ ++if CGROUP_SCHED && !SCHED_ALT + config FAIR_GROUP_SCHED + bool "Group scheduling for SCHED_OTHER" + depends on CGROUP_SCHED +@@ -1299,7 +1299,7 @@ config CHECKPOINT_RESTORE + + config SCHED_AUTOGROUP + bool "Automatic process group scheduling" +- depends on !SCHED_BMQ ++ depends on !SCHED_ALT + select CGROUPS + select CGROUP_SCHED + select FAIR_GROUP_SCHED +-- +2.37.0 + + +From 2e3bfcb22ff66e9e23696835c6a7bd4be5a5a2c1 Mon Sep 17 00:00:00 2001 +From: Piotr Gorski +Date: Mon, 7 Sep 2020 20:42:07 +0200 +Subject: [PATCH 042/297] sched/pds: Update ALT_SCHED_VERSION_MSG + +Signed-off-by: Piotr Gorski +--- + kernel/sched/pds_imp.h | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +diff --git a/kernel/sched/pds_imp.h b/kernel/sched/pds_imp.h +index 041827b92910..66dc16218444 100644 +--- a/kernel/sched/pds_imp.h ++++ b/kernel/sched/pds_imp.h +@@ -1,4 +1,4 @@ +-#define ALT_SCHED_VERSION_MSG "sched/bmq: PDS CPU Scheduler "ALT_SCHED_VERSION" by Alfred Chen.\n" ++#define ALT_SCHED_VERSION_MSG "sched/pds: PDS CPU Scheduler "ALT_SCHED_VERSION" by Alfred Chen.\n" + + static const u64 user_prio2deadline[NICE_WIDTH] = { + /* -20 */ 4194304, 4613734, 5075107, 5582617, 6140878, +-- +2.37.0 + + +From 520577995688bf0b0afa44e8288446cef9d45866 Mon Sep 17 00:00:00 2001 +From: Alfred Chen +Date: Thu, 10 Sep 2020 22:46:11 +0800 +Subject: [PATCH 043/297] sched/alt: Call check_preempt_curr() in + ttwu_do_wakeup(). + +Sync-up this like what it does in mainline scheduler code. +--- + kernel/sched/alt_core.c | 6 +++--- + 1 file changed, 3 insertions(+), 3 deletions(-) + +diff --git a/kernel/sched/alt_core.c b/kernel/sched/alt_core.c +index 76f72292e28a..1dd4972beda4 100644 +--- a/kernel/sched/alt_core.c ++++ b/kernel/sched/alt_core.c +@@ -1593,6 +1593,7 @@ ttwu_stat(struct task_struct *p, int cpu, int wake_flags) + static inline void + ttwu_do_wakeup(struct rq *rq, struct task_struct *p, int wake_flags) + { ++ check_preempt_curr(rq); + p->state = TASK_RUNNING; + trace_sched_wakeup(p); + } +@@ -1615,6 +1616,8 @@ static int ttwu_remote(struct task_struct *p, int wake_flags) + + rq = __task_access_lock(p, &lock); + if (task_on_rq_queued(p)) { ++ /* check_preempt_curr() may use rq clock */ ++ update_rq_clock(rq); + ttwu_do_wakeup(rq, p, wake_flags); + ret = 1; + } +@@ -1654,8 +1657,6 @@ void sched_ttwu_pending(void *arg) + ttwu_do_activate(rq, p, p->sched_remote_wakeup ? WF_MIGRATED : 0); + } + +- check_preempt_curr(rq); +- + rq_unlock_irqrestore(rq, &rf); + } + +@@ -1762,7 +1763,6 @@ static inline void ttwu_queue(struct task_struct *p, int cpu, int wake_flags) + raw_spin_lock(&rq->lock); + update_rq_clock(rq); + ttwu_do_activate(rq, p, wake_flags); +- check_preempt_curr(rq); + raw_spin_unlock(&rq->lock); + } + +-- +2.37.0 + + +From d27a0663866bed49f43523c4022aa64a2af78cde Mon Sep 17 00:00:00 2001 +From: Alfred Chen +Date: Tue, 15 Sep 2020 10:31:09 +0800 +Subject: [PATCH 044/297] sched/pds: Sync-up pds_skiplist_random_level() from + original PDS. + +Forgot to pick up this function from original pds, this make tasks +sl_level always be 0. +--- + kernel/sched/pds_imp.h | 31 +++++++++++++++++++++++++++++++ + 1 file changed, 31 insertions(+) + +diff --git a/kernel/sched/pds_imp.h b/kernel/sched/pds_imp.h +index 66dc16218444..6baee5e961b9 100644 +--- a/kernel/sched/pds_imp.h ++++ b/kernel/sched/pds_imp.h +@@ -188,8 +188,39 @@ static inline bool sched_task_need_requeue(struct task_struct *p, struct rq *rq) + return false; + } + ++/* ++ * pds_skiplist_random_level -- Returns a pseudo-random level number for skip ++ * list node which is used in PDS run queue. ++ * ++ * In current implementation, based on testing, the first 8 bits in microseconds ++ * of niffies are suitable for random level population. ++ * find_first_bit() is used to satisfy p = 0.5 between each levels, and there ++ * should be platform hardware supported instruction(known as ctz/clz) to speed ++ * up this function. ++ * The skiplist level for a task is populated when task is created and doesn't ++ * change in task's life time. When task is being inserted into run queue, this ++ * skiplist level is set to task's sl_node->level, the skiplist insert function ++ * may change it based on current level of the skip lsit. ++ */ ++static inline int pds_skiplist_random_level(const struct task_struct *p) ++{ ++ long unsigned int randseed; ++ ++ /* ++ * 1. Some architectures don't have better than microsecond resolution ++ * so mask out ~microseconds as a factor of the random seed for skiplist ++ * insertion. ++ * 2. Use address of task structure pointer as another factor of the ++ * random seed for task burst forking scenario. ++ */ ++ randseed = (task_rq(p)->clock ^ (long unsigned int)p) >> 10; ++ ++ return find_first_bit(&randseed, NUM_SKIPLIST_LEVEL - 1); ++} ++ + static void sched_task_fork(struct task_struct *p, struct rq *rq) + { ++ p->sl_level = pds_skiplist_random_level(p); + if (p->prio >= MAX_RT_PRIO) + p->deadline = rq->clock + user_prio2deadline[TASK_USER_PRIO(p)]; + update_task_priodl(p); +-- +2.37.0 + + +From 950f9789e0010a17644a6d226d195ad3aa72c43e Mon Sep 17 00:00:00 2001 +From: Alfred Chen +Date: Tue, 15 Sep 2020 11:07:43 +0800 +Subject: [PATCH 045/297] Project-C v5.8-r3 + +--- + kernel/sched/alt_core.c | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +diff --git a/kernel/sched/alt_core.c b/kernel/sched/alt_core.c +index 1dd4972beda4..b469c9488d18 100644 +--- a/kernel/sched/alt_core.c ++++ b/kernel/sched/alt_core.c +@@ -45,7 +45,7 @@ + #define CREATE_TRACE_POINTS + #include + +-#define ALT_SCHED_VERSION "v5.8-r2" ++#define ALT_SCHED_VERSION "v5.8-r3" + + /* rt_prio(prio) defined in include/linux/sched/rt.h */ + #define rt_task(p) rt_prio((p)->prio) +-- +2.37.0 + + +From e786fd1dae544fb4e3ea3b8f228eb0081a2d0bf6 Mon Sep 17 00:00:00 2001 +From: Alfred Chen +Date: Fri, 18 Sep 2020 21:37:34 +0800 +Subject: [PATCH 046/297] sched/alt: [Sync] 58877d347b58 sched: Better document + ttwu() + +--- + kernel/sched/alt_core.c | 188 +++++++++++++++++++++++++++++++++++----- + 1 file changed, 168 insertions(+), 20 deletions(-) + +diff --git a/kernel/sched/alt_core.c b/kernel/sched/alt_core.c +index b469c9488d18..994dce2a7070 100644 +--- a/kernel/sched/alt_core.c ++++ b/kernel/sched/alt_core.c +@@ -178,6 +178,99 @@ static inline struct task_struct *rq_runnable_task(struct rq *rq) + return next; + } + ++/* ++ * Serialization rules: ++ * ++ * Lock order: ++ * ++ * p->pi_lock ++ * rq->lock ++ * hrtimer_cpu_base->lock (hrtimer_start() for bandwidth controls) ++ * ++ * rq1->lock ++ * rq2->lock where: rq1 < rq2 ++ * ++ * Regular state: ++ * ++ * Normal scheduling state is serialized by rq->lock. __schedule() takes the ++ * local CPU's rq->lock, it optionally removes the task from the runqueue and ++ * always looks at the local rq data structures to find the most elegible task ++ * to run next. ++ * ++ * Task enqueue is also under rq->lock, possibly taken from another CPU. ++ * Wakeups from another LLC domain might use an IPI to transfer the enqueue to ++ * the local CPU to avoid bouncing the runqueue state around [ see ++ * ttwu_queue_wakelist() ] ++ * ++ * Task wakeup, specifically wakeups that involve migration, are horribly ++ * complicated to avoid having to take two rq->locks. ++ * ++ * Special state: ++ * ++ * System-calls and anything external will use task_rq_lock() which acquires ++ * both p->pi_lock and rq->lock. As a consequence the state they change is ++ * stable while holding either lock: ++ * ++ * - sched_setaffinity()/ ++ * set_cpus_allowed_ptr(): p->cpus_ptr, p->nr_cpus_allowed ++ * - set_user_nice(): p->se.load, p->*prio ++ * - __sched_setscheduler(): p->sched_class, p->policy, p->*prio, ++ * p->se.load, p->rt_priority, ++ * p->dl.dl_{runtime, deadline, period, flags, bw, density} ++ * - sched_setnuma(): p->numa_preferred_nid ++ * - sched_move_task()/ ++ * cpu_cgroup_fork(): p->sched_task_group ++ * - uclamp_update_active() p->uclamp* ++ * ++ * p->state <- TASK_*: ++ * ++ * is changed locklessly using set_current_state(), __set_current_state() or ++ * set_special_state(), see their respective comments, or by ++ * try_to_wake_up(). This latter uses p->pi_lock to serialize against ++ * concurrent self. ++ * ++ * p->on_rq <- { 0, 1 = TASK_ON_RQ_QUEUED, 2 = TASK_ON_RQ_MIGRATING }: ++ * ++ * is set by activate_task() and cleared by deactivate_task(), under ++ * rq->lock. Non-zero indicates the task is runnable, the special ++ * ON_RQ_MIGRATING state is used for migration without holding both ++ * rq->locks. It indicates task_cpu() is not stable, see task_rq_lock(). ++ * ++ * p->on_cpu <- { 0, 1 }: ++ * ++ * is set by prepare_task() and cleared by finish_task() such that it will be ++ * set before p is scheduled-in and cleared after p is scheduled-out, both ++ * under rq->lock. Non-zero indicates the task is running on its CPU. ++ * ++ * [ The astute reader will observe that it is possible for two tasks on one ++ * CPU to have ->on_cpu = 1 at the same time. ] ++ * ++ * task_cpu(p): is changed by set_task_cpu(), the rules are: ++ * ++ * - Don't call set_task_cpu() on a blocked task: ++ * ++ * We don't care what CPU we're not running on, this simplifies hotplug, ++ * the CPU assignment of blocked tasks isn't required to be valid. ++ * ++ * - for try_to_wake_up(), called under p->pi_lock: ++ * ++ * This allows try_to_wake_up() to only take one rq->lock, see its comment. ++ * ++ * - for migration called under rq->lock: ++ * [ see task_on_rq_migrating() in task_rq_lock() ] ++ * ++ * o move_queued_task() ++ * o detach_task() ++ * ++ * - for migration called under double_rq_lock(): ++ * ++ * o __migrate_swap_task() ++ * o push_rt_task() / pull_rt_task() ++ * o push_dl_task() / pull_dl_task() ++ * o dl_task_offline_migration() ++ * ++ */ ++ + /* + * Context: p->pi_lock + */ +@@ -1608,7 +1701,32 @@ ttwu_do_activate(struct rq *rq, struct task_struct *p, int wake_flags) + ttwu_do_wakeup(rq, p, 0); + } + +-static int ttwu_remote(struct task_struct *p, int wake_flags) ++/* ++ * Consider @p being inside a wait loop: ++ * ++ * for (;;) { ++ * set_current_state(TASK_UNINTERRUPTIBLE); ++ * ++ * if (CONDITION) ++ * break; ++ * ++ * schedule(); ++ * } ++ * __set_current_state(TASK_RUNNING); ++ * ++ * between set_current_state() and schedule(). In this case @p is still ++ * runnable, so all that needs doing is change p->state back to TASK_RUNNING in ++ * an atomic manner. ++ * ++ * By taking task_rq(p)->lock we serialize against schedule(), if @p->on_rq ++ * then schedule() must still happen and p->state can be changed to ++ * TASK_RUNNING. Otherwise we lost the race, schedule() has happened, and we ++ * need to do a full wakeup with enqueue. ++ * ++ * Returns: %true when the wakeup is done, ++ * %false otherwise. ++ */ ++static int ttwu_runnable(struct task_struct *p, int wake_flags) + { + struct rq *rq; + raw_spinlock_t *lock; +@@ -1749,16 +1867,21 @@ bool cpus_share_cache(int this_cpu, int that_cpu) + { + return per_cpu(sd_llc_id, this_cpu) == per_cpu(sd_llc_id, that_cpu); + } ++#else /* !CONFIG_SMP */ ++ ++static inline bool ttwu_queue_wakelist(struct task_struct *p, int cpu, int wake_flags) ++{ ++ return false; ++} ++ + #endif /* CONFIG_SMP */ + + static inline void ttwu_queue(struct task_struct *p, int cpu, int wake_flags) + { + struct rq *rq = cpu_rq(cpu); + +-#if defined(CONFIG_SMP) + if (ttwu_queue_wakelist(p, cpu, wake_flags)) + return; +-#endif + + raw_spin_lock(&rq->lock); + update_rq_clock(rq); +@@ -1815,8 +1938,8 @@ static inline void ttwu_queue(struct task_struct *p, int cpu, int wake_flags) + * migration. However the means are completely different as there is no lock + * chain to provide order. Instead we do: + * +- * 1) smp_store_release(X->on_cpu, 0) +- * 2) smp_cond_load_acquire(!X->on_cpu) ++ * 1) smp_store_release(X->on_cpu, 0) -- finish_task() ++ * 2) smp_cond_load_acquire(!X->on_cpu) -- try_to_wake_up() + * + * Example: + * +@@ -1857,20 +1980,42 @@ static inline void ttwu_queue(struct task_struct *p, int cpu, int wake_flags) + * + */ + +-/*** ++/** + * try_to_wake_up - wake up a thread + * @p: the thread to be awakened + * @state: the mask of task states that can be woken + * @wake_flags: wake modifier flags (WF_*) + * +- * Put it on the run-queue if it's not already there. The "current" +- * thread is always on the run-queue (except when the actual +- * re-schedule is in progress), and as such you're allowed to do +- * the simpler "current->state = TASK_RUNNING" to mark yourself +- * runnable without the overhead of this. ++ * Conceptually does: ++ * ++ * If (@state & @p->state) @p->state = TASK_RUNNING. ++ * ++ * If the task was not queued/runnable, also place it back on a runqueue. ++ * ++ * This function is atomic against schedule() which would dequeue the task. + * +- * Return: %true if @p was woken up, %false if it was already running. +- * or @state didn't match @p's state. ++ * It issues a full memory barrier before accessing @p->state, see the comment ++ * with set_current_state(). ++ * ++ * Uses p->pi_lock to serialize against concurrent wake-ups. ++ * ++ * Relies on p->pi_lock stabilizing: ++ * - p->sched_class ++ * - p->cpus_ptr ++ * - p->sched_task_group ++ * in order to do migration, see its use of select_task_rq()/set_task_cpu(). ++ * ++ * Tries really hard to only take one task_rq(p)->lock for performance. ++ * Takes rq->lock in: ++ * - ttwu_runnable() -- old rq, unavoidable, see comment there; ++ * - ttwu_queue() -- new rq, for enqueue of the task; ++ * - psi_ttwu_dequeue() -- much sadness :-( accounting will kill us. ++ * ++ * As a consequence we race really badly with just about everything. See the ++ * many memory barriers and their comments for details. ++ * ++ * Return: %true if @p->state changes (an actual wakeup was done), ++ * %false otherwise. + */ + static int try_to_wake_up(struct task_struct *p, unsigned int state, + int wake_flags) +@@ -1883,7 +2028,7 @@ static int try_to_wake_up(struct task_struct *p, unsigned int state, + /* + * We're waking current, this means 'p->on_rq' and 'task_cpu(p) + * == smp_processor_id()'. Together this means we can special +- * case the whole 'p->on_rq && ttwu_remote()' case below ++ * case the whole 'p->on_rq && ttwu_runnable()' case below + * without taking any locks. + * + * In particular: +@@ -1904,8 +2049,8 @@ static int try_to_wake_up(struct task_struct *p, unsigned int state, + /* + * If we are going to wake up a thread waiting for CONDITION we + * need to ensure that CONDITION=1 done by the caller can not be +- * reordered with p->state check below. This pairs with mb() in +- * set_current_state() the waiting thread does. ++ * reordered with p->state check below. This pairs with smp_store_mb() ++ * in set_current_state() that the waiting thread does. + */ + raw_spin_lock_irqsave(&p->pi_lock, flags); + smp_mb__after_spinlock(); +@@ -1940,7 +2085,7 @@ static int try_to_wake_up(struct task_struct *p, unsigned int state, + * A similar smb_rmb() lives in try_invoke_on_locked_down_task(). + */ + smp_rmb(); +- if (READ_ONCE(p->on_rq) && ttwu_remote(p, wake_flags)) ++ if (READ_ONCE(p->on_rq) && ttwu_runnable(p, wake_flags)) + goto unlock; + + if (p->in_iowait) { +@@ -2430,16 +2575,19 @@ static inline void prepare_task(struct task_struct *next) + /* + * Claim the task as running, we do this before switching to it + * such that any running task will have this set. ++ * ++ * See the ttwu() WF_ON_CPU case and its ordering comment. + */ +- next->on_cpu = 1; ++ WRITE_ONCE(next->on_cpu, 1); + } + + static inline void finish_task(struct task_struct *prev) + { + #ifdef CONFIG_SMP + /* +- * After ->on_cpu is cleared, the task can be moved to a different CPU. +- * We must ensure this doesn't happen until the switch is completely ++ * This must be the very last reference to @prev from this CPU. After ++ * p->on_cpu is cleared, the task can be moved to a different CPU. We ++ * must ensure this doesn't happen until the switch is completely + * finished. + * + * In particular, the load of prev->state in finish_task_switch() must +-- +2.37.0 + + +From f08ec8a64a1e4153f744102ff8d7cd95cc763759 Mon Sep 17 00:00:00 2001 +From: Alfred Chen +Date: Fri, 18 Sep 2020 21:38:29 +0800 +Subject: [PATCH 047/297] sched/alt: [Sync] 25980c7a79af arch_topology, + sched/core: Cleanup thermal pressure definition + +--- + kernel/sched/alt_core.c | 11 ----------- + 1 file changed, 11 deletions(-) + +diff --git a/kernel/sched/alt_core.c b/kernel/sched/alt_core.c +index 994dce2a7070..3cc8d7a8d3fe 100644 +--- a/kernel/sched/alt_core.c ++++ b/kernel/sched/alt_core.c +@@ -3033,17 +3033,6 @@ unsigned long long task_sched_runtime(struct task_struct *p) + return ns; + } + +-DEFINE_PER_CPU(unsigned long, thermal_pressure); +- +-void arch_set_thermal_pressure(struct cpumask *cpus, +- unsigned long th_pressure) +-{ +- int cpu; +- +- for_each_cpu(cpu, cpus) +- WRITE_ONCE(per_cpu(thermal_pressure, cpu), th_pressure); +-} +- + /* This manages tasks that have run out of timeslice during a scheduler_tick */ + static inline void scheduler_task_tick(struct rq *rq) + { +-- +2.37.0 + + +From 3583d699907a64f362492df2b3a5c6f307d79bdb Mon Sep 17 00:00:00 2001 +From: Alfred Chen +Date: Fri, 18 Sep 2020 21:45:53 +0800 +Subject: [PATCH 048/297] sched/alt: [Sync] 7318d4cc14c8 sched: Provide + sched_set_fifo() + +--- + kernel/sched/alt_core.c | 47 +++++++++++++++++++++++++++++++++++++++++ + 1 file changed, 47 insertions(+) + +diff --git a/kernel/sched/alt_core.c b/kernel/sched/alt_core.c +index 3cc8d7a8d3fe..d5c6df27b2ca 100644 +--- a/kernel/sched/alt_core.c ++++ b/kernel/sched/alt_core.c +@@ -4563,6 +4563,8 @@ static int _sched_setscheduler(struct task_struct *p, int policy, + * @policy: new policy. + * @param: structure containing the new RT priority. + * ++ * Use sched_set_fifo(), read its comment. ++ * + * Return: 0 on success. An error code otherwise. + * + * NOTE that the task may be already dead. +@@ -4606,6 +4608,51 @@ int sched_setscheduler_nocheck(struct task_struct *p, int policy, + } + EXPORT_SYMBOL_GPL(sched_setscheduler_nocheck); + ++/* ++ * SCHED_FIFO is a broken scheduler model; that is, it is fundamentally ++ * incapable of resource management, which is the one thing an OS really should ++ * be doing. ++ * ++ * This is of course the reason it is limited to privileged users only. ++ * ++ * Worse still; it is fundamentally impossible to compose static priority ++ * workloads. You cannot take two correctly working static prio workloads ++ * and smash them together and still expect them to work. ++ * ++ * For this reason 'all' FIFO tasks the kernel creates are basically at: ++ * ++ * MAX_RT_PRIO / 2 ++ * ++ * The administrator _MUST_ configure the system, the kernel simply doesn't ++ * know enough information to make a sensible choice. ++ */ ++void sched_set_fifo(struct task_struct *p) ++{ ++ struct sched_param sp = { .sched_priority = MAX_RT_PRIO / 2 }; ++ WARN_ON_ONCE(sched_setscheduler_nocheck(p, SCHED_FIFO, &sp) != 0); ++} ++EXPORT_SYMBOL_GPL(sched_set_fifo); ++ ++/* ++ * For when you don't much care about FIFO, but want to be above SCHED_NORMAL. ++ */ ++void sched_set_fifo_low(struct task_struct *p) ++{ ++ struct sched_param sp = { .sched_priority = 1 }; ++ WARN_ON_ONCE(sched_setscheduler_nocheck(p, SCHED_FIFO, &sp) != 0); ++} ++EXPORT_SYMBOL_GPL(sched_set_fifo_low); ++ ++void sched_set_normal(struct task_struct *p, int nice) ++{ ++ struct sched_attr attr = { ++ .sched_policy = SCHED_NORMAL, ++ .sched_nice = nice, ++ }; ++ WARN_ON_ONCE(sched_setattr_nocheck(p, &attr) != 0); ++} ++EXPORT_SYMBOL_GPL(sched_set_normal); ++ + static int + do_sched_setscheduler(pid_t pid, int policy, struct sched_param __user *param) + { +-- +2.37.0 + + +From 12657c8a61232ebb0b955ef1207d7ad75c1ee776 Mon Sep 17 00:00:00 2001 +From: Alfred Chen +Date: Fri, 18 Sep 2020 21:48:06 +0800 +Subject: [PATCH 049/297] sched/alt: [Sync] 616d91b68cd5 sched: Remove + sched_setscheduler*() EXPORTs + +--- + kernel/sched/alt_core.c | 4 ---- + 1 file changed, 4 deletions(-) + +diff --git a/kernel/sched/alt_core.c b/kernel/sched/alt_core.c +index d5c6df27b2ca..c265c8fbc8d6 100644 +--- a/kernel/sched/alt_core.c ++++ b/kernel/sched/alt_core.c +@@ -4575,13 +4575,10 @@ int sched_setscheduler(struct task_struct *p, int policy, + return _sched_setscheduler(p, policy, param, true); + } + +-EXPORT_SYMBOL_GPL(sched_setscheduler); +- + int sched_setattr(struct task_struct *p, const struct sched_attr *attr) + { + return __sched_setscheduler(p, attr, true, true); + } +-EXPORT_SYMBOL_GPL(sched_setattr); + + int sched_setattr_nocheck(struct task_struct *p, const struct sched_attr *attr) + { +@@ -4606,7 +4603,6 @@ int sched_setscheduler_nocheck(struct task_struct *p, int policy, + { + return _sched_setscheduler(p, policy, param, false); + } +-EXPORT_SYMBOL_GPL(sched_setscheduler_nocheck); + + /* + * SCHED_FIFO is a broken scheduler model; that is, it is fundamentally +-- +2.37.0 + + +From daeb45651d2337f4f99fbc3d94d7c54dbd3fe869 Mon Sep 17 00:00:00 2001 +From: Alfred Chen +Date: Fri, 18 Sep 2020 21:53:47 +0800 +Subject: [PATCH 050/297] sched/alt: [Sync] cc172ff301d8 sched/debug: Fix the + alignment of the show-state debug output + +--- + kernel/sched/alt_core.c | 15 ++++----------- + 1 file changed, 4 insertions(+), 11 deletions(-) + +diff --git a/kernel/sched/alt_core.c b/kernel/sched/alt_core.c +index c265c8fbc8d6..4bd60dd264f3 100644 +--- a/kernel/sched/alt_core.c ++++ b/kernel/sched/alt_core.c +@@ -5422,10 +5422,10 @@ void sched_show_task(struct task_struct *p) + if (!try_get_task_stack(p)) + return; + +- printk(KERN_INFO "%-15.15s %c", p->comm, task_state_to_char(p)); ++ pr_info("task:%-15.15s state:%c", p->comm, task_state_to_char(p)); + + if (p->state == TASK_RUNNING) +- printk(KERN_CONT " running task "); ++ pr_cont(" running task "); + #ifdef CONFIG_DEBUG_STACK_USAGE + free = stack_not_used(p); + #endif +@@ -5434,8 +5434,8 @@ void sched_show_task(struct task_struct *p) + if (pid_alive(p)) + ppid = task_pid_nr(rcu_dereference(p->real_parent)); + rcu_read_unlock(); +- printk(KERN_CONT "%5lu %5d %6d 0x%08lx\n", free, +- task_pid_nr(p), ppid, ++ pr_cont(" stack:%5lu pid:%5d ppid:%6d flags:0x%08lx\n", ++ free, task_pid_nr(p), ppid, + (unsigned long)task_thread_info(p)->flags); + + print_worker_info(KERN_INFO, p); +@@ -5470,13 +5470,6 @@ void show_state_filter(unsigned long state_filter) + { + struct task_struct *g, *p; + +-#if BITS_PER_LONG == 32 +- printk(KERN_INFO +- " task PC stack pid father\n"); +-#else +- printk(KERN_INFO +- " task PC stack pid father\n"); +-#endif + rcu_read_lock(); + for_each_process_thread(g, p) { + /* +-- +2.37.0 + + +From 172d673b3df9e3bb928a20619bf923bba67c2d9e Mon Sep 17 00:00:00 2001 +From: Alfred Chen +Date: Fri, 18 Sep 2020 21:55:19 +0800 +Subject: [PATCH 051/297] sched/alt: [Sync] df561f6688fe treewide: Use + fallthrough pseudo-keyword + +--- + kernel/sched/alt_core.c | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +diff --git a/kernel/sched/alt_core.c b/kernel/sched/alt_core.c +index 4bd60dd264f3..754f1950da32 100644 +--- a/kernel/sched/alt_core.c ++++ b/kernel/sched/alt_core.c +@@ -1470,7 +1470,7 @@ static int select_fallback_rq(int cpu, struct task_struct *p) + state = possible; + break; + } +- /* Fall-through */ ++ fallthrough; + case possible: + do_set_cpus_allowed(p, cpu_possible_mask); + state = fail; +-- +2.37.0 + + +From 9488ef06c74c519a7f255f9a3c6db43b447e94d7 Mon Sep 17 00:00:00 2001 +From: Alfred Chen +Date: Fri, 18 Sep 2020 22:01:00 +0800 +Subject: [PATCH 052/297] sched/alt: [Sync] 21a6ee14a8f2 sched: Remove + duplicated tick_nohz_full_enabled() check + +--- + kernel/sched/alt_core.c | 7 +------ + 1 file changed, 1 insertion(+), 6 deletions(-) + +diff --git a/kernel/sched/alt_core.c b/kernel/sched/alt_core.c +index 754f1950da32..377fed0cdfda 100644 +--- a/kernel/sched/alt_core.c ++++ b/kernel/sched/alt_core.c +@@ -496,12 +496,7 @@ static inline void update_rq_clock(struct rq *rq) + */ + static inline void sched_update_tick_dependency(struct rq *rq) + { +- int cpu; +- +- if (!tick_nohz_full_enabled()) +- return; +- +- cpu = cpu_of(rq); ++ int cpu = cpu_of(rq); + + if (!tick_nohz_full_cpu(cpu)) + return; +-- +2.37.0 + + +From 80981e8996c806841919b1c35480942875ab6841 Mon Sep 17 00:00:00 2001 +From: Alfred Chen +Date: Sat, 19 Sep 2020 07:14:35 +0800 +Subject: [PATCH 053/297] sched/alt: [Sync] 13685c4a08fc sched/uclamp: Add a + new sysctl to control RT default boost value + +--- + kernel/sched/alt_core.c | 2 ++ + 1 file changed, 2 insertions(+) + +diff --git a/kernel/sched/alt_core.c b/kernel/sched/alt_core.c +index 377fed0cdfda..02a9a194f533 100644 +--- a/kernel/sched/alt_core.c ++++ b/kernel/sched/alt_core.c +@@ -2361,6 +2361,8 @@ int sched_fork(unsigned long clone_flags, struct task_struct *p) + return 0; + } + ++void sched_post_fork(struct task_struct *p) {} ++ + #ifdef CONFIG_SCHEDSTATS + + DEFINE_STATIC_KEY_FALSE(sched_schedstats); +-- +2.37.0 + + +From f175857e1c2e8d756d5fbbe1b087be6766c2b170 Mon Sep 17 00:00:00 2001 +From: Alfred Chen +Date: Sat, 19 Sep 2020 09:58:04 +0800 +Subject: [PATCH 054/297] sched/alt: Sync-up mainline *need_resched() changes. + +--- + kernel/sched/alt_core.c | 18 ++++++++++++------ + 1 file changed, 12 insertions(+), 6 deletions(-) + +diff --git a/kernel/sched/alt_core.c b/kernel/sched/alt_core.c +index 02a9a194f533..f5380178227e 100644 +--- a/kernel/sched/alt_core.c ++++ b/kernel/sched/alt_core.c +@@ -818,11 +818,15 @@ int get_nohz_timer_target(void) + */ + static inline void wake_up_idle_cpu(int cpu) + { ++ struct rq *rq = cpu_rq(cpu); ++ + if (cpu == smp_processor_id()) + return; + +- set_tsk_need_resched(cpu_rq(cpu)->idle); +- smp_send_reschedule(cpu); ++ if (set_nr_and_not_polling(rq->idle)) ++ smp_send_reschedule(cpu); ++ else ++ trace_sched_wake_idle_without_ipi(cpu); + } + + static inline bool wake_up_full_nohz_cpu(int cpu) +@@ -833,6 +837,8 @@ static inline bool wake_up_full_nohz_cpu(int cpu) + * If needed we can still optimize that later with an + * empty IRQ. + */ ++ if (cpu_is_offline(cpu)) ++ return true; /* Don't try to wake offline CPUs. */ + if (tick_nohz_full_cpu(cpu)) { + if (cpu != smp_processor_id() || + tick_nohz_tick_stopped()) +@@ -845,7 +851,7 @@ static inline bool wake_up_full_nohz_cpu(int cpu) + + void wake_up_nohz_cpu(int cpu) + { +- if (cpu_online(cpu) && !wake_up_full_nohz_cpu(cpu)) ++ if (!wake_up_full_nohz_cpu(cpu)) + wake_up_idle_cpu(cpu); + } + +@@ -3738,12 +3744,12 @@ static void __sched notrace __schedule(bool preempt) + switch_count = &prev->nvcsw; + } + +- clear_tsk_need_resched(prev); +- clear_preempt_need_resched(); +- + check_curr(prev, rq); + + next = choose_next_task(rq, cpu, prev); ++ clear_tsk_need_resched(prev); ++ clear_preempt_need_resched(); ++ + + if (likely(prev != next)) { + next->last_ran = rq->clock_task; +-- +2.37.0 + + +From 65d9b114066a24e17411041c7cd8f02f9b7a78bb Mon Sep 17 00:00:00 2001 +From: Alfred Chen +Date: Fri, 9 Oct 2020 11:21:03 +0800 +Subject: [PATCH 055/297] sched/alt: Disable ttwu queue code path by default. + +Based on user's feedback and benchmark, ttwu queue(wakelist) code path +is disabled by default. +--- + kernel/sched/alt_core.c | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +diff --git a/kernel/sched/alt_core.c b/kernel/sched/alt_core.c +index f5380178227e..3c5eba046ed6 100644 +--- a/kernel/sched/alt_core.c ++++ b/kernel/sched/alt_core.c +@@ -1828,7 +1828,7 @@ static inline bool ttwu_queue_cond(int cpu, int wake_flags) + + static bool ttwu_queue_wakelist(struct task_struct *p, int cpu, int wake_flags) + { +- if (ttwu_queue_cond(cpu, wake_flags)) { ++ if (__is_defined(ALT_SCHED_TTWU_QUEUE) && ttwu_queue_cond(cpu, wake_flags)) { + if (WARN_ON_ONCE(cpu == smp_processor_id())) + return false; + +-- +2.37.0 + + +From c03a2be38e690a43fce3165ed8041a1216dbf0f9 Mon Sep 17 00:00:00 2001 +From: Alfred Chen +Date: Sat, 19 Sep 2020 10:00:49 +0800 +Subject: [PATCH 056/297] Project-C v5.9-r0 + +--- + kernel/sched/alt_core.c | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +diff --git a/kernel/sched/alt_core.c b/kernel/sched/alt_core.c +index 3c5eba046ed6..f36264fea75c 100644 +--- a/kernel/sched/alt_core.c ++++ b/kernel/sched/alt_core.c +@@ -45,7 +45,7 @@ + #define CREATE_TRACE_POINTS + #include + +-#define ALT_SCHED_VERSION "v5.8-r3" ++#define ALT_SCHED_VERSION "v5.9-r0" + + /* rt_prio(prio) defined in include/linux/sched/rt.h */ + #define rt_task(p) rt_prio((p)->prio) +-- +2.37.0 + + +From b794c7f66b550db5eb4d44acc7314dbae5f6d15c Mon Sep 17 00:00:00 2001 +From: Alfred Chen +Date: Mon, 12 Oct 2020 20:43:53 +0800 +Subject: [PATCH 057/297] sched/alt: Fix compilation erro in pelt.c + +--- + kernel/sched/alt_core.c | 11 +++++++++-- + kernel/sched/alt_sched.h | 4 ++++ + 2 files changed, 13 insertions(+), 2 deletions(-) + +diff --git a/kernel/sched/alt_core.c b/kernel/sched/alt_core.c +index f36264fea75c..d43ca62fd00f 100644 +--- a/kernel/sched/alt_core.c ++++ b/kernel/sched/alt_core.c +@@ -11,6 +11,10 @@ + * scheduler by Alfred Chen. + * 2019-02-20 BMQ(BitMap Queue) kernel scheduler by Alfred Chen. + */ ++#define CREATE_TRACE_POINTS ++#include ++#undef CREATE_TRACE_POINTS ++ + #include "sched.h" + + #include +@@ -42,8 +46,11 @@ + #include "pelt.h" + #include "smp.h" + +-#define CREATE_TRACE_POINTS +-#include ++/* ++ * Export tracepoints that act as a bare tracehook (ie: have no trace event ++ * associated with them) to allow external modules to probe them. ++ */ ++EXPORT_TRACEPOINT_SYMBOL_GPL(pelt_irq_tp); + + #define ALT_SCHED_VERSION "v5.9-r0" + +diff --git a/kernel/sched/alt_sched.h b/kernel/sched/alt_sched.h +index 99be2c51c88d..03f8b8b1aa27 100644 +--- a/kernel/sched/alt_sched.h ++++ b/kernel/sched/alt_sched.h +@@ -46,6 +46,8 @@ + + #include "cpupri.h" + ++#include ++ + #ifdef CONFIG_SCHED_BMQ + #include "bmq.h" + #endif +@@ -496,6 +498,8 @@ static inline int sched_tick_offload_init(void) { return 0; } + + extern void schedule_idle(void); + ++#define cap_scale(v, s) ((v)*(s) >> SCHED_CAPACITY_SHIFT) ++ + /* + * !! For sched_setattr_nocheck() (kernel) only !! + * +-- +2.37.0 + + +From 15718ef8415324b6407ead6535988d1cb6132c14 Mon Sep 17 00:00:00 2001 +From: Alfred Chen +Date: Tue, 29 Sep 2020 20:44:57 +0800 +Subject: [PATCH 058/297] sched/pds: Refine skiplist implementation. + +--- + include/linux/skip_list.h | 82 +++++++++++++++++++-------------------- + kernel/sched/pds_imp.h | 4 +- + 2 files changed, 43 insertions(+), 43 deletions(-) + +diff --git a/include/linux/skip_list.h b/include/linux/skip_list.h +index 47ca955a451d..5f76038e0320 100644 +--- a/include/linux/skip_list.h ++++ b/include/linux/skip_list.h +@@ -65,17 +65,11 @@ struct skiplist_node { + &name, &name, &name, &name},\ + } + +-static inline void INIT_SKIPLIST_NODE(struct skiplist_node *node) +-{ +- /* only level 0 ->next matters in skiplist_empty() */ +- WRITE_ONCE(node->next[0], node); +-} +- + /** +- * FULL_INIT_SKIPLIST_NODE -- fully init a skiplist_node, expecially for header ++ * INIT_SKIPLIST_NODE -- init a skiplist_node, expecially for header + * @node: the skip list node to be inited. + */ +-static inline void FULL_INIT_SKIPLIST_NODE(struct skiplist_node *node) ++static inline void INIT_SKIPLIST_NODE(struct skiplist_node *node) + { + int i; + +@@ -86,15 +80,6 @@ static inline void FULL_INIT_SKIPLIST_NODE(struct skiplist_node *node) + } + } + +-/** +- * skiplist_empty - test whether a skip list is empty +- * @head: the skip list to test. +- */ +-static inline int skiplist_empty(const struct skiplist_node *head) +-{ +- return READ_ONCE(head->next[0]) == head; +-} +- + /** + * skiplist_entry - get the struct for this entry + * @ptr: the &struct skiplist_node pointer. +@@ -119,31 +104,47 @@ static inline int skiplist_empty(const struct skiplist_node *head) + #define DEFINE_SKIPLIST_INSERT_FUNC(func_name, search_func)\ + static inline int func_name(struct skiplist_node *head, struct skiplist_node *node)\ + {\ +- struct skiplist_node *update[NUM_SKIPLIST_LEVEL];\ + struct skiplist_node *p, *q;\ +- int k = head->level;\ ++ unsigned int k = head->level;\ ++ unsigned int l = node->level;\ + \ + p = head;\ +- do {\ ++ if (l > k) {\ ++ l = node->level = ++head->level;\ ++\ ++ node->next[l] = head;\ ++ node->prev[l] = head;\ ++ head->next[l] = node;\ ++ head->prev[l] = node;\ ++\ ++ do {\ ++ while (q = p->next[k], q != head && search_func(q, node))\ ++ p = q;\ ++\ ++ node->prev[k] = p;\ ++ node->next[k] = q;\ ++ q->prev[k] = node;\ ++ p->next[k] = node;\ ++ } while (k--);\ ++\ ++ return (p == head);\ ++ }\ ++\ ++ while (k > l) {\ + while (q = p->next[k], q != head && search_func(q, node))\ + p = q;\ +- update[k] = p;\ +- } while (--k >= 0);\ +-\ +- k = node->level;\ +- if (unlikely(k > head->level)) {\ +- node->level = k = ++head->level;\ +- update[k] = head;\ ++ k--;\ + }\ + \ + do {\ +- p = update[k];\ +- q = p->next[k];\ +- node->next[k] = q;\ +- p->next[k] = node;\ ++ while (q = p->next[k], q != head && search_func(q, node))\ ++ p = q;\ ++\ + node->prev[k] = p;\ ++ node->next[k] = q;\ + q->prev[k] = node;\ +- } while (--k >= 0);\ ++ p->next[k] = node;\ ++ } while (k--);\ + \ + return (p == head);\ + } +@@ -159,18 +160,17 @@ static inline int func_name(struct skiplist_node *head, struct skiplist_node *no + static inline int + skiplist_del_init(struct skiplist_node *head, struct skiplist_node *node) + { +- int l, m = node->level; ++ unsigned int i, level = node->level; + +- for (l = 0; l <= m; l++) { +- node->prev[l]->next[l] = node->next[l]; +- node->next[l]->prev[l] = node->prev[l]; ++ for (i = 0; i <= level; i++) { ++ node->prev[i]->next[i] = node->next[i]; ++ node->next[i]->prev[i] = node->prev[i]; + } +- if (m == head->level && m > 0) { +- while (head->next[m] == head && m > 0) +- m--; +- head->level = m; ++ if (level == head->level && level) { ++ while (head->next[level] == head && level) ++ level--; ++ head->level = level; + } +- INIT_SKIPLIST_NODE(node); + + return (node->prev[0] == head); + } +diff --git a/kernel/sched/pds_imp.h b/kernel/sched/pds_imp.h +index 6baee5e961b9..f93ac2fa3dc3 100644 +--- a/kernel/sched/pds_imp.h ++++ b/kernel/sched/pds_imp.h +@@ -89,7 +89,7 @@ DEFINE_SKIPLIST_INSERT_FUNC(pds_skiplist_insert, pds_skiplist_task_search); + */ + static inline void sched_queue_init(struct rq *rq) + { +- FULL_INIT_SKIPLIST_NODE(&rq->sl_header); ++ INIT_SKIPLIST_NODE(&rq->sl_header); + } + + /* +@@ -105,7 +105,7 @@ static inline void sched_queue_init_idle(struct rq *rq, struct task_struct *idle + idle->deadline = 0ULL; + update_task_priodl(idle); + +- FULL_INIT_SKIPLIST_NODE(&rq->sl_header); ++ INIT_SKIPLIST_NODE(&rq->sl_header); + + idle->sl_node.level = idle->sl_level; + pds_skiplist_insert(&rq->sl_header, &idle->sl_node); +-- +2.37.0 + + +From 638c507f60658678e16caef7de5864b5825e7ecf Mon Sep 17 00:00:00 2001 +From: Alfred Chen +Date: Thu, 1 Oct 2020 00:20:50 +0800 +Subject: [PATCH 059/297] sched/pds: Rework pds_skiplist_random_level(). + +--- + kernel/sched/pds_imp.h | 18 +++++++++--------- + 1 file changed, 9 insertions(+), 9 deletions(-) + +diff --git a/kernel/sched/pds_imp.h b/kernel/sched/pds_imp.h +index f93ac2fa3dc3..ef17fec4ec25 100644 +--- a/kernel/sched/pds_imp.h ++++ b/kernel/sched/pds_imp.h +@@ -192,11 +192,9 @@ static inline bool sched_task_need_requeue(struct task_struct *p, struct rq *rq) + * pds_skiplist_random_level -- Returns a pseudo-random level number for skip + * list node which is used in PDS run queue. + * +- * In current implementation, based on testing, the first 8 bits in microseconds +- * of niffies are suitable for random level population. +- * find_first_bit() is used to satisfy p = 0.5 between each levels, and there +- * should be platform hardware supported instruction(known as ctz/clz) to speed +- * up this function. ++ * __ffs() is used to satisfy p = 0.5 between each levels, and there should be ++ * platform instruction(known as ctz/clz) for acceleration. ++ * + * The skiplist level for a task is populated when task is created and doesn't + * change in task's life time. When task is being inserted into run queue, this + * skiplist level is set to task's sl_node->level, the skiplist insert function +@@ -204,8 +202,6 @@ static inline bool sched_task_need_requeue(struct task_struct *p, struct rq *rq) + */ + static inline int pds_skiplist_random_level(const struct task_struct *p) + { +- long unsigned int randseed; +- + /* + * 1. Some architectures don't have better than microsecond resolution + * so mask out ~microseconds as a factor of the random seed for skiplist +@@ -213,9 +209,13 @@ static inline int pds_skiplist_random_level(const struct task_struct *p) + * 2. Use address of task structure pointer as another factor of the + * random seed for task burst forking scenario. + */ +- randseed = (task_rq(p)->clock ^ (long unsigned int)p) >> 10; ++ unsigned long randseed = (task_rq(p)->clock ^ (unsigned long)p) >> 10; ++ ++ randseed &= __GENMASK(NUM_SKIPLIST_LEVEL - 1, 0); ++ if (randseed) ++ return __ffs(randseed); + +- return find_first_bit(&randseed, NUM_SKIPLIST_LEVEL - 1); ++ return (NUM_SKIPLIST_LEVEL - 1); + } + + static void sched_task_fork(struct task_struct *p, struct rq *rq) +-- +2.37.0 + + +From e9d8f798e5f97c0a6c14bd1c18540aedd0dfd066 Mon Sep 17 00:00:00 2001 +From: Alfred Chen +Date: Sun, 11 Oct 2020 09:22:57 +0800 +Subject: [PATCH 060/297] sched/alt: Rework best cpu selection. + +Based on testing, selecting first set CPU provide better performance +than current CPU affinity based best_mask_cpu(). + +Macro SCHED_CPUMASK_FIRST_BIT() and routine sched_cpumask_first_and() +are introduced to reduce overhead calling cpumask_xxxx() routines when +NR_CPUS <= 64. +--- + kernel/sched/alt_core.c | 36 ++++++++++++++++++++++++++++-------- + 1 file changed, 28 insertions(+), 8 deletions(-) + +diff --git a/kernel/sched/alt_core.c b/kernel/sched/alt_core.c +index d43ca62fd00f..f6d5c9768701 100644 +--- a/kernel/sched/alt_core.c ++++ b/kernel/sched/alt_core.c +@@ -105,6 +105,29 @@ EXPORT_SYMBOL_GPL(sched_smt_present); + * domain, see cpus_share_cache(). + */ + DEFINE_PER_CPU(int, sd_llc_id); ++ ++#if NR_CPUS <= 64 ++#define SCHED_CPUMASK_FIRST_BIT(mask) (__ffs((mask).bits[0])) ++ ++static inline unsigned int sched_cpumask_first_and(const struct cpumask *srcp, ++ const struct cpumask *andp) ++{ ++ unsigned long t = srcp->bits[0] & andp->bits[0]; ++ ++ if (t) ++ return __ffs(t); ++ ++ return nr_cpu_ids; ++} ++#else ++#define SCHED_CPUMASK_FIRST_BIT(mask) (cpumask_fist_bit(&(mask))) ++static inline unsigned int sched_cpumask_first_and(const struct cpumask *srcp, ++ const struct cpumask *andp) ++{ ++ return cpumask_first_and(srcp, andp); ++} ++#endif ++ + #endif /* CONFIG_SMP */ + + static DEFINE_MUTEX(sched_hotcpu_mutex); +@@ -1520,9 +1543,9 @@ static inline int select_task_rq(struct task_struct *p, struct rq *rq) + cpumask_and(&tmp, &chk_mask, &sched_rq_watermark[IDLE_WM]) || + cpumask_and(&tmp, &chk_mask, + &sched_rq_watermark[task_sched_prio(p, rq) + 1])) +- return best_mask_cpu(task_cpu(p), &tmp); ++ return SCHED_CPUMASK_FIRST_BIT(tmp); + +- return best_mask_cpu(task_cpu(p), &chk_mask); ++ return SCHED_CPUMASK_FIRST_BIT(chk_mask); + } + + void sched_set_stop_task(int cpu, struct task_struct *stop) +@@ -3094,8 +3117,8 @@ static inline int active_load_balance_cpu_stop(void *data) + { + struct rq *rq = this_rq(); + struct task_struct *p = data; +- cpumask_t tmp; + unsigned long flags; ++ int dcpu; + + local_irq_save(flags); + +@@ -3105,12 +3128,9 @@ static inline int active_load_balance_cpu_stop(void *data) + rq->active_balance = 0; + /* _something_ may have changed the task, double check again */ + if (task_on_rq_queued(p) && task_rq(p) == rq && +- cpumask_and(&tmp, p->cpus_ptr, &sched_sg_idle_mask)) { +- int cpu = cpu_of(rq); +- int dcpu = __best_mask_cpu(cpu, &tmp, +- per_cpu(sched_cpu_llc_mask, cpu)); ++ (dcpu = sched_cpumask_first_and(p->cpus_ptr, &sched_sg_idle_mask)) < ++ nr_cpu_ids) + rq = move_queued_task(rq, p, dcpu); +- } + + raw_spin_unlock(&rq->lock); + raw_spin_unlock(&p->pi_lock); +-- +2.37.0 + + +From 1265bd5059a7bbd33d7e3f357c253bce68545c1a Mon Sep 17 00:00:00 2001 +From: Alfred Chen +Date: Sun, 11 Oct 2020 11:15:21 +0800 +Subject: [PATCH 061/297] sched/alt: Remove unused sched_cpu_llc_mask. + +--- + kernel/sched/alt_core.c | 10 ++-------- + 1 file changed, 2 insertions(+), 8 deletions(-) + +diff --git a/kernel/sched/alt_core.c b/kernel/sched/alt_core.c +index f6d5c9768701..40b059846496 100644 +--- a/kernel/sched/alt_core.c ++++ b/kernel/sched/alt_core.c +@@ -92,7 +92,6 @@ static cpumask_t sched_rq_pending_mask ____cacheline_aligned_in_smp; + + DEFINE_PER_CPU(cpumask_t [NR_CPU_AFFINITY_CHK_LEVEL], sched_cpu_affinity_masks); + DEFINE_PER_CPU(cpumask_t *, sched_cpu_affinity_end_mask); +-DEFINE_PER_CPU(cpumask_t *, sched_cpu_llc_mask); + + #ifdef CONFIG_SCHED_SMT + DEFINE_STATIC_KEY_FALSE(sched_smt_present); +@@ -5892,8 +5891,6 @@ static void sched_init_topology_cpumask_early(void) + cpumask_copy(tmp, cpu_possible_mask); + cpumask_clear_cpu(cpu, tmp); + } +- per_cpu(sched_cpu_llc_mask, cpu) = +- &(per_cpu(sched_cpu_affinity_masks, cpu)[0]); + per_cpu(sched_cpu_affinity_end_mask, cpu) = + &(per_cpu(sched_cpu_affinity_masks, cpu)[1]); + /*per_cpu(sd_llc_id, cpu) = cpu;*/ +@@ -5923,7 +5920,6 @@ static void sched_init_topology_cpumask(void) + TOPOLOGY_CPUMASK(smt, topology_sibling_cpumask(cpu), false); + #endif + per_cpu(sd_llc_id, cpu) = cpumask_first(cpu_coregroup_mask(cpu)); +- per_cpu(sched_cpu_llc_mask, cpu) = chk; + TOPOLOGY_CPUMASK(coregroup, cpu_coregroup_mask(cpu), false); + + TOPOLOGY_CPUMASK(core, topology_core_cpumask(cpu), false); +@@ -5931,10 +5927,8 @@ static void sched_init_topology_cpumask(void) + TOPOLOGY_CPUMASK(others, cpu_online_mask, true); + + per_cpu(sched_cpu_affinity_end_mask, cpu) = chk; +- printk(KERN_INFO "sched: cpu#%02d llc_id = %d, llc_mask idx = %d\n", +- cpu, per_cpu(sd_llc_id, cpu), +- (int) (per_cpu(sched_cpu_llc_mask, cpu) - +- &(per_cpu(sched_cpu_affinity_masks, cpu)[0]))); ++ printk(KERN_INFO "sched: cpu#%02d llc_id = %d\n", ++ cpu, per_cpu(sd_llc_id, cpu)); + } + } + #endif +-- +2.37.0 + + +From 3af34a65f5b0831d955097aa5080d2aa2d862819 Mon Sep 17 00:00:00 2001 +From: Alfred Chen +Date: Fri, 16 Oct 2020 15:06:09 +0800 +Subject: [PATCH 062/297] sched/alt: Introduce sched_best_cpu(). + +Introduce new framework which currently only work for LLC. Can be expend +for IMIT or BIG.little in the furture. +--- + kernel/sched/alt_core.c | 51 ++++++++++++++++++++++++++++++++++------ + kernel/sched/alt_sched.h | 14 ----------- + kernel/sched/topology.c | 10 +++++++- + 3 files changed, 53 insertions(+), 22 deletions(-) + +diff --git a/kernel/sched/alt_core.c b/kernel/sched/alt_core.c +index 40b059846496..cec61ca0abb2 100644 +--- a/kernel/sched/alt_core.c ++++ b/kernel/sched/alt_core.c +@@ -105,9 +105,17 @@ EXPORT_SYMBOL_GPL(sched_smt_present); + */ + DEFINE_PER_CPU(int, sd_llc_id); + +-#if NR_CPUS <= 64 +-#define SCHED_CPUMASK_FIRST_BIT(mask) (__ffs((mask).bits[0])) ++enum { ++ LLC_LEVEL = 1, ++ NR_BEST_CPU_LEVEL ++}; ++ ++#define NR_BEST_CPU_MASK (1 << (NR_BEST_CPU_LEVEL - 1)) + ++static cpumask_t ++sched_best_cpu_masks[NR_CPUS][NR_BEST_CPU_MASK] ____cacheline_aligned_in_smp; ++ ++#if NR_CPUS <= 64 + static inline unsigned int sched_cpumask_first_and(const struct cpumask *srcp, + const struct cpumask *andp) + { +@@ -118,13 +126,35 @@ static inline unsigned int sched_cpumask_first_and(const struct cpumask *srcp, + + return nr_cpu_ids; + } ++ ++static inline unsigned int sched_best_cpu(const unsigned int cpu, ++ const struct cpumask *m) ++{ ++ cpumask_t *chk = sched_best_cpu_masks[cpu]; ++ unsigned long t; ++ ++ while ((t = chk->bits[0] & m->bits[0]) == 0UL) ++ chk++; ++ ++ return __ffs(t); ++} + #else +-#define SCHED_CPUMASK_FIRST_BIT(mask) (cpumask_fist_bit(&(mask))) + static inline unsigned int sched_cpumask_first_and(const struct cpumask *srcp, + const struct cpumask *andp) + { + return cpumask_first_and(srcp, andp); + } ++ ++static inline unsigned int sched_best_cpu(const unsigned int cpu, ++ const struct cpumask *m) ++{ ++ cpumask_t t, *chk = sched_best_cpu_masks[cpu]; ++ ++ while (!cpumask_and(&t, chk, m)) ++ chk++; ++ ++ return cpumask_any(t); ++} + #endif + + #endif /* CONFIG_SMP */ +@@ -822,7 +852,7 @@ int get_nohz_timer_target(void) + default_cpu = cpu; + } + +- for (mask = &(per_cpu(sched_cpu_affinity_masks, cpu)[0]); ++ for (mask = per_cpu(sched_cpu_affinity_masks, cpu); + mask < per_cpu(sched_cpu_affinity_end_mask, cpu); mask++) + for_each_cpu_and(i, mask, housekeeping_cpumask(HK_FLAG_TIMER)) + if (!idle_cpu(i)) +@@ -1542,9 +1572,9 @@ static inline int select_task_rq(struct task_struct *p, struct rq *rq) + cpumask_and(&tmp, &chk_mask, &sched_rq_watermark[IDLE_WM]) || + cpumask_and(&tmp, &chk_mask, + &sched_rq_watermark[task_sched_prio(p, rq) + 1])) +- return SCHED_CPUMASK_FIRST_BIT(tmp); ++ return sched_best_cpu(task_cpu(p), &tmp); + +- return SCHED_CPUMASK_FIRST_BIT(chk_mask); ++ return sched_best_cpu(task_cpu(p), &chk_mask); + } + + void sched_set_stop_task(int cpu, struct task_struct *stop) +@@ -3543,7 +3573,7 @@ static inline int take_other_rq_tasks(struct rq *rq, int cpu) + if (cpumask_empty(&sched_rq_pending_mask)) + return 0; + +- affinity_mask = &(per_cpu(sched_cpu_affinity_masks, cpu)[0]); ++ affinity_mask = per_cpu(sched_cpu_affinity_masks, cpu); + end_mask = per_cpu(sched_cpu_affinity_end_mask, cpu); + do { + int i; +@@ -5894,6 +5924,10 @@ static void sched_init_topology_cpumask_early(void) + per_cpu(sched_cpu_affinity_end_mask, cpu) = + &(per_cpu(sched_cpu_affinity_masks, cpu)[1]); + /*per_cpu(sd_llc_id, cpu) = cpu;*/ ++ ++ for (level = 0; level < NR_BEST_CPU_MASK; level++) ++ cpumask_copy(&sched_best_cpu_masks[cpu][level], ++ cpu_possible_mask); + } + } + +@@ -5929,6 +5963,9 @@ static void sched_init_topology_cpumask(void) + per_cpu(sched_cpu_affinity_end_mask, cpu) = chk; + printk(KERN_INFO "sched: cpu#%02d llc_id = %d\n", + cpu, per_cpu(sd_llc_id, cpu)); ++ ++ cpumask_copy(sched_best_cpu_masks[cpu], ++ cpu_coregroup_mask(cpu)); + } + } + #endif +diff --git a/kernel/sched/alt_sched.h b/kernel/sched/alt_sched.h +index 03f8b8b1aa27..fee65eeb1405 100644 +--- a/kernel/sched/alt_sched.h ++++ b/kernel/sched/alt_sched.h +@@ -225,20 +225,6 @@ enum { + + DECLARE_PER_CPU(cpumask_t [NR_CPU_AFFINITY_CHK_LEVEL], sched_cpu_affinity_masks); + +-static inline int __best_mask_cpu(int cpu, const cpumask_t *cpumask, +- const cpumask_t *mask) +-{ +- while ((cpu = cpumask_any_and(cpumask, mask)) >= nr_cpu_ids) +- mask++; +- return cpu; +-} +- +-static inline int best_mask_cpu(int cpu, const cpumask_t *cpumask) +-{ +- return cpumask_test_cpu(cpu, cpumask)? cpu : +- __best_mask_cpu(cpu, cpumask, &(per_cpu(sched_cpu_affinity_masks, cpu)[0])); +-} +- + extern void flush_smp_call_function_from_idle(void); + + #else /* !CONFIG_SMP */ +diff --git a/kernel/sched/topology.c b/kernel/sched/topology.c +index 2c9daf90398f..3e9d4820dce7 100644 +--- a/kernel/sched/topology.c ++++ b/kernel/sched/topology.c +@@ -2653,7 +2653,15 @@ int __read_mostly node_reclaim_distance = RECLAIM_DISTANCE; + + int sched_numa_find_closest(const struct cpumask *cpus, int cpu) + { +- return best_mask_cpu(cpu, cpus); ++ const cpumask_t *mask; ++ ++ if (cpumask_test_cpu(cpu, cpus)) ++ return cpu; ++ ++ mask = per_cpu(sched_cpu_affinity_masks, cpu); ++ while ((cpu = cpumask_any_and(cpus, mask)) >= nr_cpu_ids) ++ mask++; ++ return cpu; + } + #endif /* CONFIG_NUMA */ + #endif +-- +2.37.0 + + +From 2ecd48ff957ce766f54b9077964983ed4b718ec2 Mon Sep 17 00:00:00 2001 +From: Alfred Chen +Date: Mon, 19 Oct 2020 17:07:00 +0800 +Subject: [PATCH 063/297] Project-C v5.9-r1 + +--- + kernel/sched/alt_core.c | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +diff --git a/kernel/sched/alt_core.c b/kernel/sched/alt_core.c +index cec61ca0abb2..fa0ba0d55503 100644 +--- a/kernel/sched/alt_core.c ++++ b/kernel/sched/alt_core.c +@@ -52,7 +52,7 @@ + */ + EXPORT_TRACEPOINT_SYMBOL_GPL(pelt_irq_tp); + +-#define ALT_SCHED_VERSION "v5.9-r0" ++#define ALT_SCHED_VERSION "v5.9-r1" + + /* rt_prio(prio) defined in include/linux/sched/rt.h */ + #define rt_task(p) rt_prio((p)->prio) +-- +2.37.0 + + +From 296bd263b86305c7668226c75bf8acbdb29b284d Mon Sep 17 00:00:00 2001 +From: Alfred Chen +Date: Tue, 20 Oct 2020 07:10:30 +0800 +Subject: [PATCH 064/297] sched/alt: Fix compilation when NR_CPUS > 64 + +--- + kernel/sched/alt_core.c | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +diff --git a/kernel/sched/alt_core.c b/kernel/sched/alt_core.c +index fa0ba0d55503..edba089affc0 100644 +--- a/kernel/sched/alt_core.c ++++ b/kernel/sched/alt_core.c +@@ -153,7 +153,7 @@ static inline unsigned int sched_best_cpu(const unsigned int cpu, + while (!cpumask_and(&t, chk, m)) + chk++; + +- return cpumask_any(t); ++ return cpumask_any(&t); + } + #endif + +-- +2.37.0 + + +From ed9808f489f64b22b889f5e329d280fad96023ec Mon Sep 17 00:00:00 2001 +From: Alfred Chen +Date: Mon, 26 Oct 2020 13:37:09 +0800 +Subject: [PATCH 065/297] sched/alt: Reduce NUM_SKIPLIST_LEVEL to 4. + +--- + include/linux/skip_list.h | 8 +++----- + kernel/sched/pds_imp.h | 21 ++++++++------------- + 2 files changed, 11 insertions(+), 18 deletions(-) + +diff --git a/include/linux/skip_list.h b/include/linux/skip_list.h +index 5f76038e0320..637c83ecbd6b 100644 +--- a/include/linux/skip_list.h ++++ b/include/linux/skip_list.h +@@ -50,7 +50,7 @@ + + #include + +-#define NUM_SKIPLIST_LEVEL (8) ++#define NUM_SKIPLIST_LEVEL (4) + + struct skiplist_node { + int level; /* Levels in this node */ +@@ -59,10 +59,8 @@ struct skiplist_node { + }; + + #define SKIPLIST_NODE_INIT(name) { 0,\ +- {&name, &name, &name, &name,\ +- &name, &name, &name, &name},\ +- {&name, &name, &name, &name,\ +- &name, &name, &name, &name},\ ++ {&name, &name, &name, &name},\ ++ {&name, &name, &name, &name},\ + } + + /** +diff --git a/kernel/sched/pds_imp.h b/kernel/sched/pds_imp.h +index ef17fec4ec25..73fe42b84fc0 100644 +--- a/kernel/sched/pds_imp.h ++++ b/kernel/sched/pds_imp.h +@@ -168,22 +168,17 @@ static inline unsigned long sched_queue_watermark(struct rq *rq) + + static inline bool sched_task_need_requeue(struct task_struct *p, struct rq *rq) + { +- struct skiplist_node *node = p->sl_node.prev[0]; ++ struct skiplist_node *node; + +- if (node != &rq->sl_header) { +- struct task_struct *t = skiplist_entry(node, struct task_struct, sl_node); +- +- if (t->priodl > p->priodl) +- return true; +- } ++ node = p->sl_node.prev[0]; ++ if (node != &rq->sl_header && ++ skiplist_entry(node, struct task_struct, sl_node)->priodl > p->priodl) ++ return true; + + node = p->sl_node.next[0]; +- if (node != &rq->sl_header) { +- struct task_struct *t = skiplist_entry(node, struct task_struct, sl_node); +- +- if (t->priodl < p->priodl) +- return true; +- } ++ if (node != &rq->sl_header && ++ skiplist_entry(node, struct task_struct, sl_node)->priodl < p->priodl) ++ return true; + + return false; + } +-- +2.37.0 + + +From e4062774baf84edf6a5df6952f96cf66f14edae3 Mon Sep 17 00:00:00 2001 +From: Alfred Chen +Date: Tue, 3 Nov 2020 22:13:13 +0800 +Subject: [PATCH 066/297] Revert "sched/alt: Fix compilation when NR_CPUS > 64" + +This reverts commit 9a879be8808af904d6faf63b6a9247e76a3b9d7e. +--- + kernel/sched/alt_core.c | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +diff --git a/kernel/sched/alt_core.c b/kernel/sched/alt_core.c +index edba089affc0..fa0ba0d55503 100644 +--- a/kernel/sched/alt_core.c ++++ b/kernel/sched/alt_core.c +@@ -153,7 +153,7 @@ static inline unsigned int sched_best_cpu(const unsigned int cpu, + while (!cpumask_and(&t, chk, m)) + chk++; + +- return cpumask_any(&t); ++ return cpumask_any(t); + } + #endif + +-- +2.37.0 + + +From 7115fdcdcea39c530369d5c60f709d99665fab1d Mon Sep 17 00:00:00 2001 +From: Alfred Chen +Date: Tue, 3 Nov 2020 22:13:28 +0800 +Subject: [PATCH 067/297] Revert "sched/alt: Introduce sched_best_cpu()." + +This reverts commit 7e6b0567a19b1f9b8beb97255bf3ffee5a287f01. +--- + kernel/sched/alt_core.c | 51 ++++++---------------------------------- + kernel/sched/alt_sched.h | 14 +++++++++++ + kernel/sched/topology.c | 10 +------- + 3 files changed, 22 insertions(+), 53 deletions(-) + +diff --git a/kernel/sched/alt_core.c b/kernel/sched/alt_core.c +index fa0ba0d55503..57d10ccf39b8 100644 +--- a/kernel/sched/alt_core.c ++++ b/kernel/sched/alt_core.c +@@ -105,17 +105,9 @@ EXPORT_SYMBOL_GPL(sched_smt_present); + */ + DEFINE_PER_CPU(int, sd_llc_id); + +-enum { +- LLC_LEVEL = 1, +- NR_BEST_CPU_LEVEL +-}; +- +-#define NR_BEST_CPU_MASK (1 << (NR_BEST_CPU_LEVEL - 1)) +- +-static cpumask_t +-sched_best_cpu_masks[NR_CPUS][NR_BEST_CPU_MASK] ____cacheline_aligned_in_smp; +- + #if NR_CPUS <= 64 ++#define SCHED_CPUMASK_FIRST_BIT(mask) (__ffs((mask).bits[0])) ++ + static inline unsigned int sched_cpumask_first_and(const struct cpumask *srcp, + const struct cpumask *andp) + { +@@ -126,35 +118,13 @@ static inline unsigned int sched_cpumask_first_and(const struct cpumask *srcp, + + return nr_cpu_ids; + } +- +-static inline unsigned int sched_best_cpu(const unsigned int cpu, +- const struct cpumask *m) +-{ +- cpumask_t *chk = sched_best_cpu_masks[cpu]; +- unsigned long t; +- +- while ((t = chk->bits[0] & m->bits[0]) == 0UL) +- chk++; +- +- return __ffs(t); +-} + #else ++#define SCHED_CPUMASK_FIRST_BIT(mask) (cpumask_fist_bit(&(mask))) + static inline unsigned int sched_cpumask_first_and(const struct cpumask *srcp, + const struct cpumask *andp) + { + return cpumask_first_and(srcp, andp); + } +- +-static inline unsigned int sched_best_cpu(const unsigned int cpu, +- const struct cpumask *m) +-{ +- cpumask_t t, *chk = sched_best_cpu_masks[cpu]; +- +- while (!cpumask_and(&t, chk, m)) +- chk++; +- +- return cpumask_any(t); +-} + #endif + + #endif /* CONFIG_SMP */ +@@ -852,7 +822,7 @@ int get_nohz_timer_target(void) + default_cpu = cpu; + } + +- for (mask = per_cpu(sched_cpu_affinity_masks, cpu); ++ for (mask = &(per_cpu(sched_cpu_affinity_masks, cpu)[0]); + mask < per_cpu(sched_cpu_affinity_end_mask, cpu); mask++) + for_each_cpu_and(i, mask, housekeeping_cpumask(HK_FLAG_TIMER)) + if (!idle_cpu(i)) +@@ -1572,9 +1542,9 @@ static inline int select_task_rq(struct task_struct *p, struct rq *rq) + cpumask_and(&tmp, &chk_mask, &sched_rq_watermark[IDLE_WM]) || + cpumask_and(&tmp, &chk_mask, + &sched_rq_watermark[task_sched_prio(p, rq) + 1])) +- return sched_best_cpu(task_cpu(p), &tmp); ++ return SCHED_CPUMASK_FIRST_BIT(tmp); + +- return sched_best_cpu(task_cpu(p), &chk_mask); ++ return SCHED_CPUMASK_FIRST_BIT(chk_mask); + } + + void sched_set_stop_task(int cpu, struct task_struct *stop) +@@ -3573,7 +3543,7 @@ static inline int take_other_rq_tasks(struct rq *rq, int cpu) + if (cpumask_empty(&sched_rq_pending_mask)) + return 0; + +- affinity_mask = per_cpu(sched_cpu_affinity_masks, cpu); ++ affinity_mask = &(per_cpu(sched_cpu_affinity_masks, cpu)[0]); + end_mask = per_cpu(sched_cpu_affinity_end_mask, cpu); + do { + int i; +@@ -5924,10 +5894,6 @@ static void sched_init_topology_cpumask_early(void) + per_cpu(sched_cpu_affinity_end_mask, cpu) = + &(per_cpu(sched_cpu_affinity_masks, cpu)[1]); + /*per_cpu(sd_llc_id, cpu) = cpu;*/ +- +- for (level = 0; level < NR_BEST_CPU_MASK; level++) +- cpumask_copy(&sched_best_cpu_masks[cpu][level], +- cpu_possible_mask); + } + } + +@@ -5963,9 +5929,6 @@ static void sched_init_topology_cpumask(void) + per_cpu(sched_cpu_affinity_end_mask, cpu) = chk; + printk(KERN_INFO "sched: cpu#%02d llc_id = %d\n", + cpu, per_cpu(sd_llc_id, cpu)); +- +- cpumask_copy(sched_best_cpu_masks[cpu], +- cpu_coregroup_mask(cpu)); + } + } + #endif +diff --git a/kernel/sched/alt_sched.h b/kernel/sched/alt_sched.h +index fee65eeb1405..03f8b8b1aa27 100644 +--- a/kernel/sched/alt_sched.h ++++ b/kernel/sched/alt_sched.h +@@ -225,6 +225,20 @@ enum { + + DECLARE_PER_CPU(cpumask_t [NR_CPU_AFFINITY_CHK_LEVEL], sched_cpu_affinity_masks); + ++static inline int __best_mask_cpu(int cpu, const cpumask_t *cpumask, ++ const cpumask_t *mask) ++{ ++ while ((cpu = cpumask_any_and(cpumask, mask)) >= nr_cpu_ids) ++ mask++; ++ return cpu; ++} ++ ++static inline int best_mask_cpu(int cpu, const cpumask_t *cpumask) ++{ ++ return cpumask_test_cpu(cpu, cpumask)? cpu : ++ __best_mask_cpu(cpu, cpumask, &(per_cpu(sched_cpu_affinity_masks, cpu)[0])); ++} ++ + extern void flush_smp_call_function_from_idle(void); + + #else /* !CONFIG_SMP */ +diff --git a/kernel/sched/topology.c b/kernel/sched/topology.c +index 3e9d4820dce7..2c9daf90398f 100644 +--- a/kernel/sched/topology.c ++++ b/kernel/sched/topology.c +@@ -2653,15 +2653,7 @@ int __read_mostly node_reclaim_distance = RECLAIM_DISTANCE; + + int sched_numa_find_closest(const struct cpumask *cpus, int cpu) + { +- const cpumask_t *mask; +- +- if (cpumask_test_cpu(cpu, cpus)) +- return cpu; +- +- mask = per_cpu(sched_cpu_affinity_masks, cpu); +- while ((cpu = cpumask_any_and(cpus, mask)) >= nr_cpu_ids) +- mask++; +- return cpu; ++ return best_mask_cpu(cpu, cpus); + } + #endif /* CONFIG_NUMA */ + #endif +-- +2.37.0 + + +From 54b15bb3e55454ca70be55d86b6439ade2f36949 Mon Sep 17 00:00:00 2001 +From: Alfred Chen +Date: Tue, 3 Nov 2020 22:13:44 +0800 +Subject: [PATCH 068/297] Revert "sched/alt: Remove unused sched_cpu_llc_mask." + +This reverts commit d18994d3d143830fe250b9a27e76f3c1b51459d7. +--- + kernel/sched/alt_core.c | 10 ++++++++-- + 1 file changed, 8 insertions(+), 2 deletions(-) + +diff --git a/kernel/sched/alt_core.c b/kernel/sched/alt_core.c +index 57d10ccf39b8..1e2adb3d6a7b 100644 +--- a/kernel/sched/alt_core.c ++++ b/kernel/sched/alt_core.c +@@ -92,6 +92,7 @@ static cpumask_t sched_rq_pending_mask ____cacheline_aligned_in_smp; + + DEFINE_PER_CPU(cpumask_t [NR_CPU_AFFINITY_CHK_LEVEL], sched_cpu_affinity_masks); + DEFINE_PER_CPU(cpumask_t *, sched_cpu_affinity_end_mask); ++DEFINE_PER_CPU(cpumask_t *, sched_cpu_llc_mask); + + #ifdef CONFIG_SCHED_SMT + DEFINE_STATIC_KEY_FALSE(sched_smt_present); +@@ -5891,6 +5892,8 @@ static void sched_init_topology_cpumask_early(void) + cpumask_copy(tmp, cpu_possible_mask); + cpumask_clear_cpu(cpu, tmp); + } ++ per_cpu(sched_cpu_llc_mask, cpu) = ++ &(per_cpu(sched_cpu_affinity_masks, cpu)[0]); + per_cpu(sched_cpu_affinity_end_mask, cpu) = + &(per_cpu(sched_cpu_affinity_masks, cpu)[1]); + /*per_cpu(sd_llc_id, cpu) = cpu;*/ +@@ -5920,6 +5923,7 @@ static void sched_init_topology_cpumask(void) + TOPOLOGY_CPUMASK(smt, topology_sibling_cpumask(cpu), false); + #endif + per_cpu(sd_llc_id, cpu) = cpumask_first(cpu_coregroup_mask(cpu)); ++ per_cpu(sched_cpu_llc_mask, cpu) = chk; + TOPOLOGY_CPUMASK(coregroup, cpu_coregroup_mask(cpu), false); + + TOPOLOGY_CPUMASK(core, topology_core_cpumask(cpu), false); +@@ -5927,8 +5931,10 @@ static void sched_init_topology_cpumask(void) + TOPOLOGY_CPUMASK(others, cpu_online_mask, true); + + per_cpu(sched_cpu_affinity_end_mask, cpu) = chk; +- printk(KERN_INFO "sched: cpu#%02d llc_id = %d\n", +- cpu, per_cpu(sd_llc_id, cpu)); ++ printk(KERN_INFO "sched: cpu#%02d llc_id = %d, llc_mask idx = %d\n", ++ cpu, per_cpu(sd_llc_id, cpu), ++ (int) (per_cpu(sched_cpu_llc_mask, cpu) - ++ &(per_cpu(sched_cpu_affinity_masks, cpu)[0]))); + } + } + #endif +-- +2.37.0 + + +From df8313744bcccdcdf6c67655ca021e90fed6a5d3 Mon Sep 17 00:00:00 2001 +From: Alfred Chen +Date: Tue, 3 Nov 2020 22:13:59 +0800 +Subject: [PATCH 069/297] Revert "sched/alt: Rework best cpu selection." + +This reverts commit 173014cfa89544d02216612e812b950a31246c6d. +--- + kernel/sched/alt_core.c | 36 ++++++++---------------------------- + 1 file changed, 8 insertions(+), 28 deletions(-) + +diff --git a/kernel/sched/alt_core.c b/kernel/sched/alt_core.c +index 1e2adb3d6a7b..7cb0edc7fe8c 100644 +--- a/kernel/sched/alt_core.c ++++ b/kernel/sched/alt_core.c +@@ -105,29 +105,6 @@ EXPORT_SYMBOL_GPL(sched_smt_present); + * domain, see cpus_share_cache(). + */ + DEFINE_PER_CPU(int, sd_llc_id); +- +-#if NR_CPUS <= 64 +-#define SCHED_CPUMASK_FIRST_BIT(mask) (__ffs((mask).bits[0])) +- +-static inline unsigned int sched_cpumask_first_and(const struct cpumask *srcp, +- const struct cpumask *andp) +-{ +- unsigned long t = srcp->bits[0] & andp->bits[0]; +- +- if (t) +- return __ffs(t); +- +- return nr_cpu_ids; +-} +-#else +-#define SCHED_CPUMASK_FIRST_BIT(mask) (cpumask_fist_bit(&(mask))) +-static inline unsigned int sched_cpumask_first_and(const struct cpumask *srcp, +- const struct cpumask *andp) +-{ +- return cpumask_first_and(srcp, andp); +-} +-#endif +- + #endif /* CONFIG_SMP */ + + static DEFINE_MUTEX(sched_hotcpu_mutex); +@@ -1543,9 +1520,9 @@ static inline int select_task_rq(struct task_struct *p, struct rq *rq) + cpumask_and(&tmp, &chk_mask, &sched_rq_watermark[IDLE_WM]) || + cpumask_and(&tmp, &chk_mask, + &sched_rq_watermark[task_sched_prio(p, rq) + 1])) +- return SCHED_CPUMASK_FIRST_BIT(tmp); ++ return best_mask_cpu(task_cpu(p), &tmp); + +- return SCHED_CPUMASK_FIRST_BIT(chk_mask); ++ return best_mask_cpu(task_cpu(p), &chk_mask); + } + + void sched_set_stop_task(int cpu, struct task_struct *stop) +@@ -3117,8 +3094,8 @@ static inline int active_load_balance_cpu_stop(void *data) + { + struct rq *rq = this_rq(); + struct task_struct *p = data; ++ cpumask_t tmp; + unsigned long flags; +- int dcpu; + + local_irq_save(flags); + +@@ -3128,9 +3105,12 @@ static inline int active_load_balance_cpu_stop(void *data) + rq->active_balance = 0; + /* _something_ may have changed the task, double check again */ + if (task_on_rq_queued(p) && task_rq(p) == rq && +- (dcpu = sched_cpumask_first_and(p->cpus_ptr, &sched_sg_idle_mask)) < +- nr_cpu_ids) ++ cpumask_and(&tmp, p->cpus_ptr, &sched_sg_idle_mask)) { ++ int cpu = cpu_of(rq); ++ int dcpu = __best_mask_cpu(cpu, &tmp, ++ per_cpu(sched_cpu_llc_mask, cpu)); + rq = move_queued_task(rq, p, dcpu); ++ } + + raw_spin_unlock(&rq->lock); + raw_spin_unlock(&p->pi_lock); +-- +2.37.0 + + +From b8e67a31e27773a5b57f2bcf4c16b9d6a9882102 Mon Sep 17 00:00:00 2001 +From: Alfred Chen +Date: Mon, 9 Nov 2020 11:08:36 +0800 +Subject: [PATCH 070/297] sched/alt: Enhance best_mask_cpu() for better + performance. + +Enhance best_mask_cpu() performance when NR_CPUS <= 64. +--- + kernel/sched/alt_core.c | 6 ++++-- + kernel/sched/alt_sched.h | 31 ++++++++++++++++++++++--------- + 2 files changed, 26 insertions(+), 11 deletions(-) + +diff --git a/kernel/sched/alt_core.c b/kernel/sched/alt_core.c +index 7cb0edc7fe8c..3a4281ba65e6 100644 +--- a/kernel/sched/alt_core.c ++++ b/kernel/sched/alt_core.c +@@ -90,7 +90,7 @@ int sched_yield_type __read_mostly = 1; + #ifdef CONFIG_SMP + static cpumask_t sched_rq_pending_mask ____cacheline_aligned_in_smp; + +-DEFINE_PER_CPU(cpumask_t [NR_CPU_AFFINITY_CHK_LEVEL], sched_cpu_affinity_masks); ++DEFINE_PER_CPU(cpumask_t [NR_CPU_AFFINITY_LEVELS], sched_cpu_affinity_masks); + DEFINE_PER_CPU(cpumask_t *, sched_cpu_affinity_end_mask); + DEFINE_PER_CPU(cpumask_t *, sched_cpu_llc_mask); + +@@ -5867,7 +5867,7 @@ static void sched_init_topology_cpumask_early(void) + cpumask_t *tmp; + + for_each_possible_cpu(cpu) { +- for (level = 0; level < NR_CPU_AFFINITY_CHK_LEVEL; level++) { ++ for (level = 0; level < NR_CPU_AFFINITY_LEVELS; level++) { + tmp = &(per_cpu(sched_cpu_affinity_masks, cpu)[level]); + cpumask_copy(tmp, cpu_possible_mask); + cpumask_clear_cpu(cpu, tmp); +@@ -5898,6 +5898,8 @@ static void sched_init_topology_cpumask(void) + + chk = &(per_cpu(sched_cpu_affinity_masks, cpu)[0]); + ++ cpumask_copy(chk++, cpumask_of(cpu)); ++ + cpumask_complement(chk, cpumask_of(cpu)); + #ifdef CONFIG_SCHED_SMT + TOPOLOGY_CPUMASK(smt, topology_sibling_cpumask(cpu), false); +diff --git a/kernel/sched/alt_sched.h b/kernel/sched/alt_sched.h +index 03f8b8b1aa27..4698d6d16a2d 100644 +--- a/kernel/sched/alt_sched.h ++++ b/kernel/sched/alt_sched.h +@@ -213,30 +213,43 @@ static inline void unregister_sched_domain_sysctl(void) + extern bool sched_smp_initialized; + + enum { +- BASE_CPU_AFFINITY_CHK_LEVEL = 1, ++ ITSELF_LEVEL_SPACE_HOLDER, + #ifdef CONFIG_SCHED_SMT +- SMT_CPU_AFFINITY_CHK_LEVEL_SPACE_HOLDER, ++ SMT_LEVEL_SPACE_HOLDER, + #endif +-#ifdef CONFIG_SCHED_MC +- MC_CPU_AFFINITY_CHK_LEVEL_SPACE_HOLDER, +-#endif +- NR_CPU_AFFINITY_CHK_LEVEL ++ COREGROUP_LEVEL_SPACE_HOLDER, ++ CORE_LEVEL_SPACE_HOLDER, ++ OTHER_LEVEL_SPACE_HOLDER, ++ NR_CPU_AFFINITY_LEVELS + }; + +-DECLARE_PER_CPU(cpumask_t [NR_CPU_AFFINITY_CHK_LEVEL], sched_cpu_affinity_masks); ++DECLARE_PER_CPU(cpumask_t [NR_CPU_AFFINITY_LEVELS], sched_cpu_affinity_masks); + + static inline int __best_mask_cpu(int cpu, const cpumask_t *cpumask, + const cpumask_t *mask) + { ++#if NR_CPUS <= 64 ++ unsigned long t; ++ ++ while ((t = cpumask->bits[0] & mask->bits[0]) == 0UL) ++ mask++; ++ ++ return __ffs(t); ++#else + while ((cpu = cpumask_any_and(cpumask, mask)) >= nr_cpu_ids) + mask++; + return cpu; ++#endif + } + + static inline int best_mask_cpu(int cpu, const cpumask_t *cpumask) + { +- return cpumask_test_cpu(cpu, cpumask)? cpu : +- __best_mask_cpu(cpu, cpumask, &(per_cpu(sched_cpu_affinity_masks, cpu)[0])); ++#if NR_CPUS <= 64 ++ return __best_mask_cpu(cpu, cpumask, per_cpu(sched_cpu_affinity_masks, cpu)); ++#else ++ return cpumask_test_cpu(cpu, cpumask) ? cpu: ++ __best_mask_cpu(cpu, cpumask, per_cpu(sched_cpu_affinity_masks, cpu) + 1); ++#endif + } + + extern void flush_smp_call_function_from_idle(void); +-- +2.37.0 + + +From a8577b8726041fea13ca9e51472b1b5b253ced35 Mon Sep 17 00:00:00 2001 +From: Alfred Chen +Date: Fri, 20 Nov 2020 10:26:13 +0800 +Subject: [PATCH 071/297] Project-C v5.9-r2 + +--- + kernel/sched/alt_core.c | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +diff --git a/kernel/sched/alt_core.c b/kernel/sched/alt_core.c +index 3a4281ba65e6..e485c76b1668 100644 +--- a/kernel/sched/alt_core.c ++++ b/kernel/sched/alt_core.c +@@ -52,7 +52,7 @@ + */ + EXPORT_TRACEPOINT_SYMBOL_GPL(pelt_irq_tp); + +-#define ALT_SCHED_VERSION "v5.9-r1" ++#define ALT_SCHED_VERSION "v5.9-r2" + + /* rt_prio(prio) defined in include/linux/sched/rt.h */ + #define rt_task(p) rt_prio((p)->prio) +-- +2.37.0 + + +From 455a4ecb35ae5d682811f4ad1e83ec21f16e8b67 Mon Sep 17 00:00:00 2001 +From: Alfred Chen +Date: Fri, 4 Dec 2020 10:55:21 +0800 +Subject: [PATCH 072/297] sched/alt: Minor improvement for + sched_cpu_affinity_masks. + +--- + kernel/sched/alt_core.c | 29 +++++++++++++---------------- + 1 file changed, 13 insertions(+), 16 deletions(-) + +diff --git a/kernel/sched/alt_core.c b/kernel/sched/alt_core.c +index e485c76b1668..58a432fffd53 100644 +--- a/kernel/sched/alt_core.c ++++ b/kernel/sched/alt_core.c +@@ -800,7 +800,7 @@ int get_nohz_timer_target(void) + default_cpu = cpu; + } + +- for (mask = &(per_cpu(sched_cpu_affinity_masks, cpu)[0]); ++ for (mask = per_cpu(sched_cpu_affinity_masks, cpu) + 1; + mask < per_cpu(sched_cpu_affinity_end_mask, cpu); mask++) + for_each_cpu_and(i, mask, housekeeping_cpumask(HK_FLAG_TIMER)) + if (!idle_cpu(i)) +@@ -3524,7 +3524,7 @@ static inline int take_other_rq_tasks(struct rq *rq, int cpu) + if (cpumask_empty(&sched_rq_pending_mask)) + return 0; + +- affinity_mask = &(per_cpu(sched_cpu_affinity_masks, cpu)[0]); ++ affinity_mask = per_cpu(sched_cpu_affinity_masks, cpu) + 1; + end_mask = per_cpu(sched_cpu_affinity_end_mask, cpu); + do { + int i; +@@ -5863,19 +5863,18 @@ int sched_cpu_dying(unsigned int cpu) + #ifdef CONFIG_SMP + static void sched_init_topology_cpumask_early(void) + { +- int cpu, level; ++ int cpu; + cpumask_t *tmp; + + for_each_possible_cpu(cpu) { +- for (level = 0; level < NR_CPU_AFFINITY_LEVELS; level++) { +- tmp = &(per_cpu(sched_cpu_affinity_masks, cpu)[level]); +- cpumask_copy(tmp, cpu_possible_mask); +- cpumask_clear_cpu(cpu, tmp); +- } +- per_cpu(sched_cpu_llc_mask, cpu) = +- &(per_cpu(sched_cpu_affinity_masks, cpu)[0]); +- per_cpu(sched_cpu_affinity_end_mask, cpu) = +- &(per_cpu(sched_cpu_affinity_masks, cpu)[1]); ++ tmp = per_cpu(sched_cpu_affinity_masks, cpu); ++ ++ cpumask_copy(tmp, cpumask_of(cpu)); ++ tmp++; ++ cpumask_copy(tmp, cpu_possible_mask); ++ cpumask_clear_cpu(cpu, tmp); ++ per_cpu(sched_cpu_llc_mask, cpu) = tmp; ++ per_cpu(sched_cpu_affinity_end_mask, cpu) = ++tmp; + /*per_cpu(sd_llc_id, cpu) = cpu;*/ + } + } +@@ -5896,9 +5895,7 @@ static void sched_init_topology_cpumask(void) + /* take chance to reset time slice for idle tasks */ + cpu_rq(cpu)->idle->time_slice = sched_timeslice_ns; + +- chk = &(per_cpu(sched_cpu_affinity_masks, cpu)[0]); +- +- cpumask_copy(chk++, cpumask_of(cpu)); ++ chk = per_cpu(sched_cpu_affinity_masks, cpu) + 1; + + cpumask_complement(chk, cpumask_of(cpu)); + #ifdef CONFIG_SCHED_SMT +@@ -5916,7 +5913,7 @@ static void sched_init_topology_cpumask(void) + printk(KERN_INFO "sched: cpu#%02d llc_id = %d, llc_mask idx = %d\n", + cpu, per_cpu(sd_llc_id, cpu), + (int) (per_cpu(sched_cpu_llc_mask, cpu) - +- &(per_cpu(sched_cpu_affinity_masks, cpu)[0]))); ++ per_cpu(sched_cpu_affinity_masks, cpu))); + } + } + #endif +-- +2.37.0 + + +From e0fc06cd1ee65246bafcc6c2edce7bfc59963e4a Mon Sep 17 00:00:00 2001 +From: Alfred Chen +Date: Sun, 6 Dec 2020 19:26:20 +0800 +Subject: [PATCH 073/297] sched/pds: Fix for low priority NORMAL policy task. + +Task priodl is not properly set, which cause low priority task freezed. +--- + init/init_task.c | 7 ++++++- + kernel/sched/alt_core.c | 8 -------- + kernel/sched/bmq_imp.h | 8 ++++++++ + kernel/sched/pds_imp.h | 8 ++++++++ + 4 files changed, 22 insertions(+), 9 deletions(-) + +diff --git a/init/init_task.c b/init/init_task.c +index fc8fcdbbd07a..a98a65334c15 100644 +--- a/init/init_task.c ++++ b/init/init_task.c +@@ -75,10 +75,15 @@ struct task_struct init_task + .stack = init_stack, + .usage = REFCOUNT_INIT(2), + .flags = PF_KTHREAD, +-#ifdef CONFIG_SCHED_ALT ++#ifdef CONFIG_SCHED_BMQ + .prio = DEFAULT_PRIO + MAX_PRIORITY_ADJ, + .static_prio = DEFAULT_PRIO, + .normal_prio = DEFAULT_PRIO + MAX_PRIORITY_ADJ, ++#endif ++#ifdef CONFIG_SCHED_PDS ++ .prio = MAX_USER_RT_PRIO, ++ .static_prio = DEFAULT_PRIO, ++ .normal_prio = MAX_USER_RT_PRIO, + #else + .prio = MAX_PRIO - 20, + .static_prio = MAX_PRIO - 20, +diff --git a/kernel/sched/alt_core.c b/kernel/sched/alt_core.c +index 58a432fffd53..3808507c44c1 100644 +--- a/kernel/sched/alt_core.c ++++ b/kernel/sched/alt_core.c +@@ -1035,14 +1035,6 @@ static inline void hrtick_rq_init(struct rq *rq) + } + #endif /* CONFIG_SCHED_HRTICK */ + +-static inline int normal_prio(struct task_struct *p) +-{ +- if (task_has_rt_policy(p)) +- return MAX_RT_PRIO - 1 - p->rt_priority; +- +- return p->static_prio + MAX_PRIORITY_ADJ; +-} +- + /* + * Calculate the current priority, i.e. the priority + * taken into account by the scheduler. This value might +diff --git a/kernel/sched/bmq_imp.h b/kernel/sched/bmq_imp.h +index ad9a7c448da7..e213e82475ab 100644 +--- a/kernel/sched/bmq_imp.h ++++ b/kernel/sched/bmq_imp.h +@@ -36,6 +36,14 @@ static inline void deboost_task(struct task_struct *p) + /* + * Common interfaces + */ ++static inline int normal_prio(struct task_struct *p) ++{ ++ if (task_has_rt_policy(p)) ++ return MAX_RT_PRIO - 1 - p->rt_priority; ++ ++ return p->static_prio + MAX_PRIORITY_ADJ; ++} ++ + static inline int task_sched_prio(struct task_struct *p, struct rq *rq) + { + return (p->prio < MAX_RT_PRIO)? p->prio : MAX_RT_PRIO / 2 + (p->prio + p->boost_prio) / 2; +diff --git a/kernel/sched/pds_imp.h b/kernel/sched/pds_imp.h +index 73fe42b84fc0..5a5de14598d4 100644 +--- a/kernel/sched/pds_imp.h ++++ b/kernel/sched/pds_imp.h +@@ -26,6 +26,14 @@ static const unsigned char dl_level_map[] = { + 1, 0 + }; + ++static inline int normal_prio(struct task_struct *p) ++{ ++ if (task_has_rt_policy(p)) ++ return MAX_RT_PRIO - 1 - p->rt_priority; ++ ++ return MAX_USER_RT_PRIO; ++} ++ + static inline int + task_sched_prio(const struct task_struct *p, const struct rq *rq) + { +-- +2.37.0 + + +From 13914b2781c16df8a9098f6f3b37c09904e98092 Mon Sep 17 00:00:00 2001 +From: Alfred Chen +Date: Mon, 7 Dec 2020 10:19:13 +0800 +Subject: [PATCH 074/297] Project-C v5.9-r3 + +--- + kernel/sched/alt_core.c | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +diff --git a/kernel/sched/alt_core.c b/kernel/sched/alt_core.c +index 3808507c44c1..a9c82fffef59 100644 +--- a/kernel/sched/alt_core.c ++++ b/kernel/sched/alt_core.c +@@ -52,7 +52,7 @@ + */ + EXPORT_TRACEPOINT_SYMBOL_GPL(pelt_irq_tp); + +-#define ALT_SCHED_VERSION "v5.9-r2" ++#define ALT_SCHED_VERSION "v5.9-r3" + + /* rt_prio(prio) defined in include/linux/sched/rt.h */ + #define rt_task(p) rt_prio((p)->prio) +-- +2.37.0 + + +From f841dde802c5d228276d2d82a2d77cafc39a7386 Mon Sep 17 00:00:00 2001 +From: Alfred Chen +Date: Mon, 7 Dec 2020 14:06:53 +0800 +Subject: [PATCH 075/297] sched/alt: [Sync] c1cecf884ad7 sched: Cache + task_struct::flags in sched_submit_work() + +--- + kernel/sched/alt_core.c | 7 +++++-- + 1 file changed, 5 insertions(+), 2 deletions(-) + +diff --git a/kernel/sched/alt_core.c b/kernel/sched/alt_core.c +index a9c82fffef59..e96b385722cc 100644 +--- a/kernel/sched/alt_core.c ++++ b/kernel/sched/alt_core.c +@@ -3808,9 +3808,12 @@ void __noreturn do_task_dead(void) + + static inline void sched_submit_work(struct task_struct *tsk) + { ++ unsigned int task_flags; ++ + if (!tsk->state) + return; + ++ task_flags = tsk->flags; + /* + * If a worker went to sleep, notify and ask workqueue whether + * it wants to wake up a task to maintain concurrency. +@@ -3819,9 +3822,9 @@ static inline void sched_submit_work(struct task_struct *tsk) + * in the possible wakeup of a kworker and because wq_worker_sleeping() + * requires it. + */ +- if (tsk->flags & (PF_WQ_WORKER | PF_IO_WORKER)) { ++ if (task_flags & (PF_WQ_WORKER | PF_IO_WORKER)) { + preempt_disable(); +- if (tsk->flags & PF_WQ_WORKER) ++ if (task_flags & PF_WQ_WORKER) + wq_worker_sleeping(tsk); + else + io_wq_worker_sleeping(tsk); +-- +2.37.0 + + +From 819e09cc68cafc81a5c2a5a470470be85130ac41 Mon Sep 17 00:00:00 2001 +From: Alfred Chen +Date: Mon, 7 Dec 2020 14:29:25 +0800 +Subject: [PATCH 076/297] sched/alt: [Sync] ec618b84f6e1 sched: Fix + rq->nr_iowait ordering + +--- + kernel/sched/alt_core.c | 19 ++++++++++++++----- + 1 file changed, 14 insertions(+), 5 deletions(-) + +diff --git a/kernel/sched/alt_core.c b/kernel/sched/alt_core.c +index e96b385722cc..68a7c1d95263 100644 +--- a/kernel/sched/alt_core.c ++++ b/kernel/sched/alt_core.c +@@ -1697,6 +1697,15 @@ ttwu_do_activate(struct rq *rq, struct task_struct *p, int wake_flags) + if (p->sched_contributes_to_load) + rq->nr_uninterruptible--; + ++ if ( ++#ifdef CONFIG_SMP ++ !(wake_flags & WF_MIGRATED) && ++#endif ++ p->in_iowait) { ++ delayacct_blkio_end(p); ++ atomic_dec(&task_rq(p)->nr_iowait); ++ } ++ + activate_task(p, rq); + ttwu_do_wakeup(rq, p, 0); + } +@@ -2088,11 +2097,6 @@ static int try_to_wake_up(struct task_struct *p, unsigned int state, + if (READ_ONCE(p->on_rq) && ttwu_runnable(p, wake_flags)) + goto unlock; + +- if (p->in_iowait) { +- delayacct_blkio_end(p); +- atomic_dec(&task_rq(p)->nr_iowait); +- } +- + #ifdef CONFIG_SMP + /* + * Ensure we load p->on_cpu _after_ p->on_rq, otherwise it would be +@@ -2166,6 +2170,11 @@ static int try_to_wake_up(struct task_struct *p, unsigned int state, + cpu = select_task_rq(p, this_rq()); + + if (cpu != task_cpu(p)) { ++ if (p->in_iowait) { ++ delayacct_blkio_end(p); ++ atomic_dec(&task_rq(p)->nr_iowait); ++ } ++ + wake_flags |= WF_MIGRATED; + psi_ttwu_dequeue(p); + set_task_cpu(p, cpu); +-- +2.37.0 + + +From f55492a89b263df7277ed5725070cc58857ee2a7 Mon Sep 17 00:00:00 2001 +From: Alfred Chen +Date: Tue, 8 Dec 2020 15:01:33 +0800 +Subject: [PATCH 077/297] sched/pds: Fix PDS nice accounting. + +--- + kernel/sched/alt_sched.h | 2 ++ + kernel/sched/bmq.h | 6 ------ + kernel/sched/bmq_imp.h | 5 +++++ + kernel/sched/pds.h | 7 +------ + kernel/sched/pds_imp.h | 12 ++++++++++++ + 5 files changed, 20 insertions(+), 12 deletions(-) + +diff --git a/kernel/sched/alt_sched.h b/kernel/sched/alt_sched.h +index 4698d6d16a2d..fd75b7895469 100644 +--- a/kernel/sched/alt_sched.h ++++ b/kernel/sched/alt_sched.h +@@ -373,6 +373,8 @@ static inline bool task_running(struct task_struct *p) + return p->on_cpu; + } + ++extern int task_running_nice(struct task_struct *p); ++ + extern struct static_key_false sched_schedstats; + + #ifdef CONFIG_CPU_IDLE +diff --git a/kernel/sched/bmq.h b/kernel/sched/bmq.h +index 1364824cfa7d..4ce30c30bd3e 100644 +--- a/kernel/sched/bmq.h ++++ b/kernel/sched/bmq.h +@@ -11,10 +11,4 @@ struct bmq { + struct list_head heads[SCHED_BITS]; + }; + +- +-static inline int task_running_nice(struct task_struct *p) +-{ +- return (p->prio + p->boost_prio > DEFAULT_PRIO + MAX_PRIORITY_ADJ); +-} +- + #endif +diff --git a/kernel/sched/bmq_imp.h b/kernel/sched/bmq_imp.h +index e213e82475ab..83c2d019c446 100644 +--- a/kernel/sched/bmq_imp.h ++++ b/kernel/sched/bmq_imp.h +@@ -62,6 +62,11 @@ static inline void time_slice_expired(struct task_struct *p, struct rq *rq) + } + } + ++static inline int task_running_nice(struct task_struct *p) ++{ ++ return (p->prio + p->boost_prio > DEFAULT_PRIO + MAX_PRIORITY_ADJ); ++} ++ + static inline void update_task_priodl(struct task_struct *p) {} + + static inline unsigned long sched_queue_watermark(struct rq *rq) +diff --git a/kernel/sched/pds.h b/kernel/sched/pds.h +index 7fdeace7e8a5..623908cf4380 100644 +--- a/kernel/sched/pds.h ++++ b/kernel/sched/pds.h +@@ -3,12 +3,7 @@ + + /* bits: + * RT(0-99), (Low prio adj range, nice width, high prio adj range) / 2, cpu idle task */ +-#define SCHED_BITS (MAX_RT_PRIO + 20 + 1) ++#define SCHED_BITS (MAX_RT_PRIO + NICE_WIDTH / 2 + 1) + #define IDLE_TASK_SCHED_PRIO (SCHED_BITS - 1) + +-static inline int task_running_nice(struct task_struct *p) +-{ +- return (p->prio > DEFAULT_PRIO); +-} +- + #endif +diff --git a/kernel/sched/pds_imp.h b/kernel/sched/pds_imp.h +index 5a5de14598d4..6b2140f0a69e 100644 +--- a/kernel/sched/pds_imp.h ++++ b/kernel/sched/pds_imp.h +@@ -26,6 +26,13 @@ static const unsigned char dl_level_map[] = { + 1, 0 + }; + ++/* DEFAULT_SCHED_PRIO: ++ * dl_level_map[(user_prio2deadline[39] - user_prio2deadline[0]) >> 21] = ++ * dl_level_map[68] = ++ * 10 ++ */ ++#define DEFAULT_SCHED_PRIO (MAX_RT_PRIO + 10) ++ + static inline int normal_prio(struct task_struct *p) + { + if (task_has_rt_policy(p)) +@@ -51,6 +58,11 @@ task_sched_prio(const struct task_struct *p, const struct rq *rq) + return MAX_RT_PRIO + dl_level_map[delta]; + } + ++int task_running_nice(struct task_struct *p) ++{ ++ return task_sched_prio(p, task_rq(p)) > DEFAULT_SCHED_PRIO; ++} ++ + static inline void update_task_priodl(struct task_struct *p) + { + p->priodl = (((u64) (p->prio))<<56) | ((p->deadline)>>8); +-- +2.37.0 + + +From f4b910e9af3a7b8895978a7977e9197d2edc7255 Mon Sep 17 00:00:00 2001 +From: Alfred Chen +Date: Mon, 14 Dec 2020 14:03:30 +0800 +Subject: [PATCH 078/297] Project-C v5.10-r0 + +--- + kernel/sched/alt_core.c | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +diff --git a/kernel/sched/alt_core.c b/kernel/sched/alt_core.c +index 68a7c1d95263..1a857d7e230b 100644 +--- a/kernel/sched/alt_core.c ++++ b/kernel/sched/alt_core.c +@@ -52,7 +52,7 @@ + */ + EXPORT_TRACEPOINT_SYMBOL_GPL(pelt_irq_tp); + +-#define ALT_SCHED_VERSION "v5.9-r3" ++#define ALT_SCHED_VERSION "v5.10-r0" + + /* rt_prio(prio) defined in include/linux/sched/rt.h */ + #define rt_task(p) rt_prio((p)->prio) +-- +2.37.0 + + +From 044a6f7ec01500fc42d979242bf1f1c9bbefb36f Mon Sep 17 00:00:00 2001 +From: Alfred Chen +Date: Tue, 15 Dec 2020 11:19:05 +0800 +Subject: [PATCH 079/297] sched/bmq: Fix compilation issue. + +--- + kernel/sched/bmq_imp.h | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +diff --git a/kernel/sched/bmq_imp.h b/kernel/sched/bmq_imp.h +index 83c2d019c446..3faba5f9bb69 100644 +--- a/kernel/sched/bmq_imp.h ++++ b/kernel/sched/bmq_imp.h +@@ -62,7 +62,7 @@ static inline void time_slice_expired(struct task_struct *p, struct rq *rq) + } + } + +-static inline int task_running_nice(struct task_struct *p) ++inline int task_running_nice(struct task_struct *p) + { + return (p->prio + p->boost_prio > DEFAULT_PRIO + MAX_PRIORITY_ADJ); + } +-- +2.37.0 + + +From cd88ed10b08fe8511bdb2a66458983d82472850a Mon Sep 17 00:00:00 2001 +From: Alfred Chen +Date: Tue, 22 Dec 2020 11:08:31 +0800 +Subject: [PATCH 080/297] sched/alt: rcu_read_xxx() put_task_xxxx() sync up. + +--- + kernel/sched/alt_core.c | 19 +++++++++++-------- + 1 file changed, 11 insertions(+), 8 deletions(-) + +diff --git a/kernel/sched/alt_core.c b/kernel/sched/alt_core.c +index 1a857d7e230b..982562808cc7 100644 +--- a/kernel/sched/alt_core.c ++++ b/kernel/sched/alt_core.c +@@ -4786,10 +4786,15 @@ SYSCALL_DEFINE3(sched_setattr, pid_t, pid, struct sched_attr __user *, uattr, + rcu_read_lock(); + retval = -ESRCH; + p = find_process_by_pid(pid); +- if (p != NULL) +- retval = sched_setattr(p, &attr); ++ if (likely(p)) ++ get_task_struct(p); + rcu_read_unlock(); + ++ if (likely(p)) { ++ retval = sched_setattr(p, &attr); ++ put_task_struct(p); ++ } ++ + return retval; + } + +@@ -4961,13 +4966,11 @@ long sched_setaffinity(pid_t pid, const struct cpumask *in_mask) + struct task_struct *p; + int retval; + +- get_online_cpus(); + rcu_read_lock(); + + p = find_process_by_pid(pid); + if (!p) { + rcu_read_unlock(); +- put_online_cpus(); + return -ESRCH; + } + +@@ -4992,17 +4995,18 @@ long sched_setaffinity(pid_t pid, const struct cpumask *in_mask) + rcu_read_lock(); + if (!ns_capable(__task_cred(p)->user_ns, CAP_SYS_NICE)) { + rcu_read_unlock(); +- goto out_unlock; ++ goto out_free_new_mask; + } + rcu_read_unlock(); + } + + retval = security_task_setscheduler(p); + if (retval) +- goto out_unlock; ++ goto out_free_new_mask; + + cpuset_cpus_allowed(p, cpus_allowed); + cpumask_and(new_mask, in_mask, cpus_allowed); ++ + again: + retval = __set_cpus_allowed_ptr(p, new_mask, true); + +@@ -5018,13 +5022,12 @@ long sched_setaffinity(pid_t pid, const struct cpumask *in_mask) + goto again; + } + } +-out_unlock: ++out_free_new_mask: + free_cpumask_var(new_mask); + out_free_cpus_allowed: + free_cpumask_var(cpus_allowed); + out_put_task: + put_task_struct(p); +- put_online_cpus(); + return retval; + } + +-- +2.37.0 + + +From a210c246c4b5a9cdfbdd8f049aed32af47897e02 Mon Sep 17 00:00:00 2001 +From: Alfred Chen +Date: Fri, 25 Dec 2020 09:30:03 +0800 +Subject: [PATCH 081/297] sched/alt: Introduce sched_cpu_topo_masks. + +Introduce sched_cpu_topo_masks and rework best_mask_cpu(), which help to +prefered cpu implementation later. +--- + kernel/sched/alt_core.c | 26 ++++++++++++++++------ + kernel/sched/alt_sched.h | 48 +++++++++++++++++++++++++++++++++++----- + 2 files changed, 62 insertions(+), 12 deletions(-) + +diff --git a/kernel/sched/alt_core.c b/kernel/sched/alt_core.c +index 982562808cc7..4c008d3cd0db 100644 +--- a/kernel/sched/alt_core.c ++++ b/kernel/sched/alt_core.c +@@ -92,6 +92,8 @@ static cpumask_t sched_rq_pending_mask ____cacheline_aligned_in_smp; + + DEFINE_PER_CPU(cpumask_t [NR_CPU_AFFINITY_LEVELS], sched_cpu_affinity_masks); + DEFINE_PER_CPU(cpumask_t *, sched_cpu_affinity_end_mask); ++ ++DEFINE_PER_CPU(cpumask_t [NR_CPU_AFFINITY_LEVELS], sched_cpu_topo_masks); + DEFINE_PER_CPU(cpumask_t *, sched_cpu_llc_mask); + + #ifdef CONFIG_SCHED_SMT +@@ -5874,42 +5876,52 @@ static void sched_init_topology_cpumask_early(void) + cpumask_t *tmp; + + for_each_possible_cpu(cpu) { ++ /* init affinity masks */ + tmp = per_cpu(sched_cpu_affinity_masks, cpu); + + cpumask_copy(tmp, cpumask_of(cpu)); + tmp++; + cpumask_copy(tmp, cpu_possible_mask); + cpumask_clear_cpu(cpu, tmp); +- per_cpu(sched_cpu_llc_mask, cpu) = tmp; + per_cpu(sched_cpu_affinity_end_mask, cpu) = ++tmp; ++ /* init topo masks */ ++ tmp = per_cpu(sched_cpu_topo_masks, cpu); ++ ++ cpumask_copy(tmp, cpumask_of(cpu)); ++ tmp++; ++ cpumask_copy(tmp, cpu_possible_mask); ++ per_cpu(sched_cpu_llc_mask, cpu) = tmp; + /*per_cpu(sd_llc_id, cpu) = cpu;*/ + } + } + + #define TOPOLOGY_CPUMASK(name, mask, last) \ +- if (cpumask_and(chk, chk, mask)) \ +- printk(KERN_INFO "sched: cpu#%02d affinity mask: 0x%08lx - "#name,\ +- cpu, (chk++)->bits[0]); \ ++ if (cpumask_and(chk, chk, mask)) { \ ++ cpumask_copy(topo, mask); \ ++ printk(KERN_INFO "sched: cpu#%02d affinity: 0x%08lx topo: 0x%08lx - "#name,\ ++ cpu, (chk++)->bits[0], (topo++)->bits[0]); \ ++ } \ + if (!last) \ + cpumask_complement(chk, mask) + + static void sched_init_topology_cpumask(void) + { + int cpu; +- cpumask_t *chk; ++ cpumask_t *chk, *topo; + + for_each_online_cpu(cpu) { + /* take chance to reset time slice for idle tasks */ + cpu_rq(cpu)->idle->time_slice = sched_timeslice_ns; + + chk = per_cpu(sched_cpu_affinity_masks, cpu) + 1; ++ topo = per_cpu(sched_cpu_topo_masks, cpu) + 1; + + cpumask_complement(chk, cpumask_of(cpu)); + #ifdef CONFIG_SCHED_SMT + TOPOLOGY_CPUMASK(smt, topology_sibling_cpumask(cpu), false); + #endif + per_cpu(sd_llc_id, cpu) = cpumask_first(cpu_coregroup_mask(cpu)); +- per_cpu(sched_cpu_llc_mask, cpu) = chk; ++ per_cpu(sched_cpu_llc_mask, cpu) = topo; + TOPOLOGY_CPUMASK(coregroup, cpu_coregroup_mask(cpu), false); + + TOPOLOGY_CPUMASK(core, topology_core_cpumask(cpu), false); +@@ -5920,7 +5932,7 @@ static void sched_init_topology_cpumask(void) + printk(KERN_INFO "sched: cpu#%02d llc_id = %d, llc_mask idx = %d\n", + cpu, per_cpu(sd_llc_id, cpu), + (int) (per_cpu(sched_cpu_llc_mask, cpu) - +- per_cpu(sched_cpu_affinity_masks, cpu))); ++ per_cpu(sched_cpu_topo_masks, cpu))); + } + } + #endif +diff --git a/kernel/sched/alt_sched.h b/kernel/sched/alt_sched.h +index fd75b7895469..5d6ee22875b9 100644 +--- a/kernel/sched/alt_sched.h ++++ b/kernel/sched/alt_sched.h +@@ -223,7 +223,8 @@ enum { + NR_CPU_AFFINITY_LEVELS + }; + +-DECLARE_PER_CPU(cpumask_t [NR_CPU_AFFINITY_LEVELS], sched_cpu_affinity_masks); ++DECLARE_PER_CPU(cpumask_t [NR_CPU_AFFINITY_LEVELS], sched_cpu_topo_masks); ++DECLARE_PER_CPU(cpumask_t *, sched_cpu_llc_mask); + + static inline int __best_mask_cpu(int cpu, const cpumask_t *cpumask, + const cpumask_t *mask) +@@ -242,13 +243,50 @@ static inline int __best_mask_cpu(int cpu, const cpumask_t *cpumask, + #endif + } + +-static inline int best_mask_cpu(int cpu, const cpumask_t *cpumask) ++static inline int best_mask_cpu(int cpu, cpumask_t *mask) + { + #if NR_CPUS <= 64 +- return __best_mask_cpu(cpu, cpumask, per_cpu(sched_cpu_affinity_masks, cpu)); ++ unsigned long llc_match; ++ cpumask_t *chk = per_cpu(sched_cpu_llc_mask, cpu); ++ ++ if ((llc_match = mask->bits[0] & chk->bits[0])) { ++ unsigned long match; ++ ++ chk = per_cpu(sched_cpu_topo_masks, cpu); ++ if (mask->bits[0] & chk->bits[0]) ++ return cpu; ++ ++#ifdef CONFIG_SCHED_SMT ++ chk++; ++ if ((match = mask->bits[0] & chk->bits[0])) ++ return __ffs(match); ++#endif ++ ++ return __ffs(llc_match); ++ } ++ ++ return __best_mask_cpu(cpu, mask, chk + 1); + #else +- return cpumask_test_cpu(cpu, cpumask) ? cpu: +- __best_mask_cpu(cpu, cpumask, per_cpu(sched_cpu_affinity_masks, cpu) + 1); ++ cpumask_t llc_match; ++ cpumask_t *chk = per_cpu(sched_cpu_llc_mask, cpu); ++ ++ if (cpumask_and(&llc_match, mask, chk)) { ++ cpumask_t tmp; ++ ++ chk = per_cpu(sched_cpu_topo_masks, cpu); ++ if (cpumask_test_cpu(cpu, mask)) ++ return cpu; ++ ++#ifdef CONFIG_SCHED_SMT ++ chk++; ++ if (cpumask_and(&tmp, mask, chk)) ++ return cpumask_any(&tmp); ++#endif ++ ++ return cpumask_any(&llc_match); ++ } ++ ++ return __best_mask_cpu(cpu, mask, chk + 1); + #endif + } + +-- +2.37.0 + + +From e5a5786ad8751c3a56dca50133a52d580e320b2e Mon Sep 17 00:00:00 2001 +From: Alfred Chen +Date: Fri, 25 Dec 2020 09:33:25 +0800 +Subject: [PATCH 082/297] Project-C v5.10-r1 + +--- + kernel/sched/alt_core.c | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +diff --git a/kernel/sched/alt_core.c b/kernel/sched/alt_core.c +index 4c008d3cd0db..9880d9b50f7e 100644 +--- a/kernel/sched/alt_core.c ++++ b/kernel/sched/alt_core.c +@@ -52,7 +52,7 @@ + */ + EXPORT_TRACEPOINT_SYMBOL_GPL(pelt_irq_tp); + +-#define ALT_SCHED_VERSION "v5.10-r0" ++#define ALT_SCHED_VERSION "v5.10-r1" + + /* rt_prio(prio) defined in include/linux/sched/rt.h */ + #define rt_task(p) rt_prio((p)->prio) +-- +2.37.0 + + +From 75c6638ed1d2ce05c482ad7ebadd2ef9fb7873e3 Mon Sep 17 00:00:00 2001 +From: Alfred Chen +Date: Fri, 25 Dec 2020 11:33:48 +0800 +Subject: [PATCH 083/297] sched/alt: Fix UP compilation warning. + +--- + kernel/sched/bmq_imp.h | 2 ++ + kernel/sched/pds_imp.h | 2 ++ + 2 files changed, 4 insertions(+) + +diff --git a/kernel/sched/bmq_imp.h b/kernel/sched/bmq_imp.h +index 3faba5f9bb69..13eda4b26b6a 100644 +--- a/kernel/sched/bmq_imp.h ++++ b/kernel/sched/bmq_imp.h +@@ -185,11 +185,13 @@ static void do_sched_yield_type_1(struct task_struct *p, struct rq *rq) + p->boost_prio = MAX_PRIORITY_ADJ; + } + ++#ifdef CONFIG_SMP + static void sched_task_ttwu(struct task_struct *p) + { + if(this_rq()->clock_task - p->last_ran > sched_timeslice_ns) + boost_task(p); + } ++#endif + + static void sched_task_deactivate(struct task_struct *p, struct rq *rq) + { +diff --git a/kernel/sched/pds_imp.h b/kernel/sched/pds_imp.h +index 6b2140f0a69e..b1ad3d0b0430 100644 +--- a/kernel/sched/pds_imp.h ++++ b/kernel/sched/pds_imp.h +@@ -268,5 +268,7 @@ static void do_sched_yield_type_1(struct task_struct *p, struct rq *rq) + time_slice_expired(p, rq); + } + ++#ifdef CONFIG_SMP + static void sched_task_ttwu(struct task_struct *p) {} ++#endif + static void sched_task_deactivate(struct task_struct *p, struct rq *rq) {} +-- +2.37.0 + + +From def8da4761d52dd7b1311d7a5835549c28158883 Mon Sep 17 00:00:00 2001 +From: Alfred Chen +Date: Thu, 31 Dec 2020 08:50:38 +0800 +Subject: [PATCH 084/297] sched/alt: Fix compilation error in + sched_numa_find_closest(). + +--- + kernel/sched/alt_sched.h | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +diff --git a/kernel/sched/alt_sched.h b/kernel/sched/alt_sched.h +index 5d6ee22875b9..d979b4b1aac2 100644 +--- a/kernel/sched/alt_sched.h ++++ b/kernel/sched/alt_sched.h +@@ -243,7 +243,7 @@ static inline int __best_mask_cpu(int cpu, const cpumask_t *cpumask, + #endif + } + +-static inline int best_mask_cpu(int cpu, cpumask_t *mask) ++static inline int best_mask_cpu(int cpu, const cpumask_t *mask) + { + #if NR_CPUS <= 64 + unsigned long llc_match; +-- +2.37.0 + + +From 830d14b33ababafb52f415b4f5c5c6fe97a3fc2a Mon Sep 17 00:00:00 2001 +From: Alfred Chen +Date: Thu, 31 Dec 2020 08:56:11 +0800 +Subject: [PATCH 085/297] Project-C v5.10-r2 + +--- + kernel/sched/alt_core.c | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +diff --git a/kernel/sched/alt_core.c b/kernel/sched/alt_core.c +index 9880d9b50f7e..be766515662c 100644 +--- a/kernel/sched/alt_core.c ++++ b/kernel/sched/alt_core.c +@@ -52,7 +52,7 @@ + */ + EXPORT_TRACEPOINT_SYMBOL_GPL(pelt_irq_tp); + +-#define ALT_SCHED_VERSION "v5.10-r1" ++#define ALT_SCHED_VERSION "v5.10-r2" + + /* rt_prio(prio) defined in include/linux/sched/rt.h */ + #define rt_task(p) rt_prio((p)->prio) +-- +2.37.0 + + +From 9194a598ec866367b2981b4bae8809d150dac913 Mon Sep 17 00:00:00 2001 +From: Alfred Chen +Date: Mon, 4 Jan 2021 14:41:27 +0800 +Subject: [PATCH 086/297] sched/alt: Enable sched_schedstats sysctl interface. + +--- + kernel/sched/alt_core.c | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +diff --git a/kernel/sched/alt_core.c b/kernel/sched/alt_core.c +index be766515662c..148c2fc477af 100644 +--- a/kernel/sched/alt_core.c ++++ b/kernel/sched/alt_core.c +@@ -1667,7 +1667,7 @@ ttwu_stat(struct task_struct *p, int cpu, int wake_flags) + if (!schedstat_enabled()) + return; + +- rq= this_rq(); ++ rq = this_rq(); + + #ifdef CONFIG_SMP + if (cpu == rq->cpu) +-- +2.37.0 + + +From f50db27e9b2c96280175daf255b4e90b1baaaad4 Mon Sep 17 00:00:00 2001 +From: Alfred Chen +Date: Tue, 26 Jan 2021 14:04:46 +0800 +Subject: [PATCH 087/297] sched/alt: [Sync] 9f68b5b74c48 sched: Detect call to + schedule from critical entry code + +--- + kernel/sched/alt_core.c | 1 + + kernel/sched/alt_sched.h | 6 ++++++ + 2 files changed, 7 insertions(+) + +diff --git a/kernel/sched/alt_core.c b/kernel/sched/alt_core.c +index 148c2fc477af..86c1ca67b675 100644 +--- a/kernel/sched/alt_core.c ++++ b/kernel/sched/alt_core.c +@@ -3464,6 +3464,7 @@ static inline void schedule_debug(struct task_struct *prev, bool preempt) + preempt_count_set(PREEMPT_DISABLED); + } + rcu_sleep_check(); ++ SCHED_WARN_ON(ct_state() == CONTEXT_USER); + + profile_hit(SCHED_PROFILING, __builtin_return_address(0)); + +diff --git a/kernel/sched/alt_sched.h b/kernel/sched/alt_sched.h +index d979b4b1aac2..a157800317e9 100644 +--- a/kernel/sched/alt_sched.h ++++ b/kernel/sched/alt_sched.h +@@ -55,6 +55,12 @@ + #include "pds.h" + #endif + ++#ifdef CONFIG_SCHED_DEBUG ++# define SCHED_WARN_ON(x) WARN_ONCE(x, #x) ++#else ++# define SCHED_WARN_ON(x) ({ (void)(x), 0; }) ++#endif ++ + /* task_struct::on_rq states: */ + #define TASK_ON_RQ_QUEUED 1 + #define TASK_ON_RQ_MIGRATING 2 +-- +2.37.0 + + +From cf4cee8f6ff06dfd916491cc30c1ff95228cfabf Mon Sep 17 00:00:00 2001 +From: Alfred Chen +Date: Tue, 26 Jan 2021 14:07:25 +0800 +Subject: [PATCH 088/297] sched/alt: [Sync] 6775de4984ea context_tracking: Only + define schedule_user() on !HAVE_CONTEXT_TRACKING_OFFSTACK archs + +--- + kernel/sched/alt_core.c | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +diff --git a/kernel/sched/alt_core.c b/kernel/sched/alt_core.c +index 86c1ca67b675..2782c97e939b 100644 +--- a/kernel/sched/alt_core.c ++++ b/kernel/sched/alt_core.c +@@ -3903,7 +3903,7 @@ void __sched schedule_idle(void) + } while (need_resched()); + } + +-#ifdef CONFIG_CONTEXT_TRACKING ++#if defined(CONFIG_CONTEXT_TRACKING) && !defined(CONFIG_HAVE_CONTEXT_TRACKING_OFFSTACK) + asmlinkage __visible void __sched schedule_user(void) + { + /* +-- +2.37.0 + + +From d3b3d1475e598040c854177270df039504c9332d Mon Sep 17 00:00:00 2001 +From: Alfred Chen +Date: Tue, 26 Jan 2021 14:10:53 +0800 +Subject: [PATCH 089/297] sched/alt: [Sync] 345a957fcc95 sched: Reenable + interrupts in do_sched_yield() + +--- + kernel/sched/alt_core.c | 6 +----- + 1 file changed, 1 insertion(+), 5 deletions(-) + +diff --git a/kernel/sched/alt_core.c b/kernel/sched/alt_core.c +index 2782c97e939b..47c9144839bb 100644 +--- a/kernel/sched/alt_core.c ++++ b/kernel/sched/alt_core.c +@@ -5163,12 +5163,8 @@ static void do_sched_yield(void) + rq->skip = current; + } + +- /* +- * Since we are going to call schedule() anyway, there's +- * no need to preempt or enable interrupts: +- */ + preempt_disable(); +- raw_spin_unlock(&rq->lock); ++ raw_spin_unlock_irq(&rq->lock); + sched_preempt_enable_no_resched(); + + schedule(); +-- +2.37.0 + + +From fc856815179659d31e0e54e626652373f33e83b0 Mon Sep 17 00:00:00 2001 +From: Alfred Chen +Date: Tue, 26 Jan 2021 14:13:18 +0800 +Subject: [PATCH 090/297] sched/alt: [Sync] a8b62fd08505 stop_machine: Add + function and caller debug info + +--- + kernel/sched/alt_core.c | 1 + + 1 file changed, 1 insertion(+) + +diff --git a/kernel/sched/alt_core.c b/kernel/sched/alt_core.c +index 47c9144839bb..77319f8657f7 100644 +--- a/kernel/sched/alt_core.c ++++ b/kernel/sched/alt_core.c +@@ -5455,6 +5455,7 @@ void sched_show_task(struct task_struct *p) + (unsigned long)task_thread_info(p)->flags); + + print_worker_info(KERN_INFO, p); ++ print_stop_info(KERN_INFO, p); + show_stack(p, NULL, KERN_INFO); + put_task_stack(p); + } +-- +2.37.0 + + +From 5bfa362be6f639d6db7a9d7b9503261192fb1a01 Mon Sep 17 00:00:00 2001 +From: Alfred Chen +Date: Tue, 2 Feb 2021 23:17:30 +0800 +Subject: [PATCH 091/297] sched/alt: [Sync] 545b8c8df41f smp: Cleanup + smp_call_function*() + +--- + kernel/sched/alt_core.c | 12 ++---------- + 1 file changed, 2 insertions(+), 10 deletions(-) + +diff --git a/kernel/sched/alt_core.c b/kernel/sched/alt_core.c +index 77319f8657f7..32254626d29d 100644 +--- a/kernel/sched/alt_core.c ++++ b/kernel/sched/alt_core.c +@@ -892,14 +892,6 @@ static inline void check_preempt_curr(struct rq *rq) + resched_curr(rq); + } + +-static inline void +-rq_csd_init(struct rq *rq, call_single_data_t *csd, smp_call_func_t func) +-{ +- csd->flags = 0; +- csd->func = func; +- csd->info = rq; +-} +- + #ifdef CONFIG_SCHED_HRTICK + /* + * Use HR-timers to deliver accurate preemption points. +@@ -1016,7 +1008,7 @@ void hrtick_start(struct rq *rq, u64 delay) + static void hrtick_rq_init(struct rq *rq) + { + #ifdef CONFIG_SMP +- rq_csd_init(rq, &rq->hrtick_csd, __hrtick_start); ++ INIT_CSD(&rq->hrtick_csd, __hrtick_start, rq); + #endif + + hrtimer_init(&rq->hrtick_timer, CLOCK_MONOTONIC, HRTIMER_MODE_REL_HARD); +@@ -6024,7 +6016,7 @@ void __init sched_init(void) + #endif + + #ifdef CONFIG_NO_HZ_COMMON +- rq_csd_init(rq, &rq->nohz_csd, nohz_csd_func); ++ INIT_CSD(&rq->nohz_csd, nohz_csd_func, rq); + #endif + #endif /* CONFIG_SMP */ + rq->nr_switches = 0; +-- +2.37.0 + + +From 38e2b3f7621d87de4a62f1855004531ba5a4d575 Mon Sep 17 00:00:00 2001 +From: Alfred Chen +Date: Wed, 3 Feb 2021 14:23:17 +0800 +Subject: [PATCH 092/297] sched/alt: Sync up missing new interfaces. + +--- + kernel/sched/alt_core.c | 35 +++++++++++++++++++++++++++++++++++ + 1 file changed, 35 insertions(+) + +diff --git a/kernel/sched/alt_core.c b/kernel/sched/alt_core.c +index 32254626d29d..1feed396d704 100644 +--- a/kernel/sched/alt_core.c ++++ b/kernel/sched/alt_core.c +@@ -1090,7 +1090,18 @@ static inline void __set_task_cpu(struct task_struct *p, unsigned int cpu) + #endif + } + ++void migrate_disable(void) ++{ ++} ++EXPORT_SYMBOL_GPL(migrate_disable); ++ ++void migrate_enable(void) ++{ ++} ++EXPORT_SYMBOL_GPL(migrate_enable); ++ + #ifdef CONFIG_SMP ++ + void set_task_cpu(struct task_struct *p, unsigned int new_cpu) + { + #ifdef CONFIG_SCHED_DEBUG +@@ -5841,6 +5852,23 @@ int sched_cpu_starting(unsigned int cpu) + } + + #ifdef CONFIG_HOTPLUG_CPU ++ ++/* ++ * Invoked immediately before the stopper thread is invoked to bring the ++ * CPU down completely. At this point all per CPU kthreads except the ++ * hotplug thread (current) and the stopper thread (inactive) have been ++ * either parked or have been unbound from the outgoing CPU. Ensure that ++ * any of those which might be on the way out are gone. ++ * ++ * If after this point a bound task is being woken on this CPU then the ++ * responsible hotplug callback has failed to do it's job. ++ * sched_cpu_dying() will catch it with the appropriate fireworks. ++ */ ++int sched_cpu_wait_empty(unsigned int cpu) ++{ ++ return 0; ++} ++ + int sched_cpu_dying(unsigned int cpu) + { + struct rq *rq = cpu_rq(cpu); +@@ -6155,6 +6183,13 @@ void __cant_sleep(const char *file, int line, int preempt_offset) + add_taint(TAINT_WARN, LOCKDEP_STILL_OK); + } + EXPORT_SYMBOL_GPL(__cant_sleep); ++ ++#ifdef CONFIG_SMP ++void __cant_migrate(const char *file, int line) ++{ ++} ++EXPORT_SYMBOL_GPL(__cant_migrate); ++#endif + #endif + + #ifdef CONFIG_MAGIC_SYSRQ +-- +2.37.0 + + +From 887fdbe843b181de0e0f0eb3379209ff278a7bdb Mon Sep 17 00:00:00 2001 +From: Alfred Chen +Date: Thu, 4 Feb 2021 14:50:24 +0800 +Subject: [PATCH 093/297] sched/alt: [Sync] 5fbda3ecd14a sched: highmem: Store + local kmaps in task struct + +--- + kernel/sched/alt_core.c | 37 +++++++++++++++++++++++++++++++++++++ + 1 file changed, 37 insertions(+) + +diff --git a/kernel/sched/alt_core.c b/kernel/sched/alt_core.c +index 1feed396d704..60a888c99006 100644 +--- a/kernel/sched/alt_core.c ++++ b/kernel/sched/alt_core.c +@@ -2644,6 +2644,34 @@ static inline void finish_lock_switch(struct rq *rq) + raw_spin_unlock_irq(&rq->lock); + } + ++/* ++ * NOP if the arch has not defined these: ++ */ ++ ++#ifndef prepare_arch_switch ++# define prepare_arch_switch(next) do { } while (0) ++#endif ++ ++#ifndef finish_arch_post_lock_switch ++# define finish_arch_post_lock_switch() do { } while (0) ++#endif ++ ++static inline void kmap_local_sched_out(void) ++{ ++#ifdef CONFIG_KMAP_LOCAL ++ if (unlikely(current->kmap_ctrl.idx)) ++ __kmap_local_sched_out(); ++#endif ++} ++ ++static inline void kmap_local_sched_in(void) ++{ ++#ifdef CONFIG_KMAP_LOCAL ++ if (unlikely(current->kmap_ctrl.idx)) ++ __kmap_local_sched_in(); ++#endif ++} ++ + /** + * prepare_task_switch - prepare to switch tasks + * @rq: the runqueue preparing to switch +@@ -2665,6 +2693,7 @@ prepare_task_switch(struct rq *rq, struct task_struct *prev, + perf_event_task_sched_out(prev, next); + rseq_preempt(prev); + fire_sched_out_preempt_notifiers(prev, next); ++ kmap_local_sched_out(); + prepare_task(next); + prepare_arch_switch(next); + } +@@ -2732,6 +2761,14 @@ static struct rq *finish_task_switch(struct task_struct *prev) + finish_lock_switch(rq); + finish_arch_post_lock_switch(); + kcov_finish_switch(current); ++ /* ++ * kmap_local_sched_out() is invoked with rq::lock held and ++ * interrupts disabled. There is no requirement for that, but the ++ * sched out code does not have an interrupt enabled section. ++ * Restoring the maps on sched in does not require interrupts being ++ * disabled either. ++ */ ++ kmap_local_sched_in(); + + fire_sched_in_preempt_notifiers(current); + /* +-- +2.37.0 + + +From a089b7c2d2f1e1217abeea00e6d11d5b6f412b29 Mon Sep 17 00:00:00 2001 +From: Alfred Chen +Date: Fri, 29 Jan 2021 23:53:02 +0800 +Subject: [PATCH 094/297] sched/alt: [Sync] 565790d28b1e sched: Fix + balance_callback() + +--- + kernel/sched/alt_core.c | 85 ++++++++++++++++++++++++++++++++++++++++- + 1 file changed, 84 insertions(+), 1 deletion(-) + +diff --git a/kernel/sched/alt_core.c b/kernel/sched/alt_core.c +index 60a888c99006..920911a23150 100644 +--- a/kernel/sched/alt_core.c ++++ b/kernel/sched/alt_core.c +@@ -2617,6 +2617,76 @@ static inline void finish_task(struct task_struct *prev) + #endif + } + ++#ifdef CONFIG_SMP ++ ++static void do_balance_callbacks(struct rq *rq, struct callback_head *head) ++{ ++ void (*func)(struct rq *rq); ++ struct callback_head *next; ++ ++ lockdep_assert_held(&rq->lock); ++ ++ while (head) { ++ func = (void (*)(struct rq *))head->func; ++ next = head->next; ++ head->next = NULL; ++ head = next; ++ ++ func(rq); ++ } ++} ++ ++static void balance_push(struct rq *rq); ++ ++struct callback_head balance_push_callback = { ++ .next = NULL, ++ .func = (void (*)(struct callback_head *))balance_push, ++}; ++ ++static inline struct callback_head *splice_balance_callbacks(struct rq *rq) ++{ ++ struct callback_head *head = rq->balance_callback; ++ ++ lockdep_assert_held(&rq->lock); ++ if (head) ++ rq->balance_callback = NULL; ++ ++ return head; ++} ++ ++static void __balance_callbacks(struct rq *rq) ++{ ++ do_balance_callbacks(rq, splice_balance_callbacks(rq)); ++} ++ ++static inline void balance_callbacks(struct rq *rq, struct callback_head *head) ++{ ++ unsigned long flags; ++ ++ if (unlikely(head)) { ++ raw_spin_lock_irqsave(&rq->lock, flags); ++ do_balance_callbacks(rq, head); ++ raw_spin_unlock_irqrestore(&rq->lock, flags); ++ } ++} ++ ++#else ++ ++static inline void __balance_callbacks(struct rq *rq) ++{ ++} ++ ++static inline struct callback_head *splice_balance_callbacks(struct rq *rq) ++{ ++ return NULL; ++} ++ ++static inline void balance_callbacks(struct rq *rq, struct callback_head *head) ++{ ++} ++ ++#endif ++ + static inline void + prepare_lock_switch(struct rq *rq, struct task_struct *next) + { +@@ -2641,6 +2711,7 @@ static inline void finish_lock_switch(struct rq *rq) + * prev into current: + */ + spin_acquire(&rq->lock.dep_map, 0, 0, _THIS_IP_); ++ __balance_callbacks(rq); + raw_spin_unlock_irq(&rq->lock); + } + +@@ -3834,8 +3905,10 @@ static void __sched notrace __schedule(bool preempt) + + /* Also unlocks the rq: */ + rq = context_switch(rq, prev, next); +- } else ++ } else { ++ __balance_callbacks(rq); + raw_spin_unlock_irq(&rq->lock); ++ } + + #ifdef CONFIG_SCHED_SMT + sg_balance_check(rq); +@@ -4205,7 +4278,13 @@ void rt_mutex_setprio(struct task_struct *p, struct task_struct *pi_task) + + check_task_changed(rq, p); + out_unlock: ++ /* Avoid rq from going away on us: */ ++ preempt_disable(); ++ ++ __balance_callbacks(rq); + __task_access_unlock(p, lock); ++ ++ preempt_enable(); + } + #else + static inline int rt_effective_prio(struct task_struct *p, int prio) +@@ -4422,6 +4501,7 @@ static int __sched_setscheduler(struct task_struct *p, + int newprio = MAX_RT_PRIO - 1 - attr->sched_priority; + int retval, oldpolicy = -1; + int policy = attr->sched_policy; ++ struct callback_head *head; + unsigned long flags; + struct rq *rq; + int reset_on_fork; +@@ -4575,6 +4655,7 @@ static int __sched_setscheduler(struct task_struct *p, + + /* Avoid rq from going away on us: */ + preempt_disable(); ++ head = splice_balance_callbacks(rq); + __task_access_unlock(p, lock); + raw_spin_unlock_irqrestore(&p->pi_lock, flags); + +@@ -4583,6 +4664,8 @@ static int __sched_setscheduler(struct task_struct *p, + rt_mutex_adjust_pi(p); + } + ++ /* Run balance callbacks after we've adjusted the PI chain: */ ++ balance_callbacks(rq, head); + preempt_enable(); + + return 0; +-- +2.37.0 + + +From f93e650c480cf25219a3fb9104105b4479385bcf Mon Sep 17 00:00:00 2001 +From: Alfred Chen +Date: Sat, 30 Jan 2021 09:04:03 +0800 +Subject: [PATCH 095/297] sched/alt: Sync rq->balance_callback + +--- + kernel/sched/alt_core.c | 1 + + kernel/sched/alt_sched.h | 1 + + 2 files changed, 2 insertions(+) + +diff --git a/kernel/sched/alt_core.c b/kernel/sched/alt_core.c +index 920911a23150..eefc424a00e9 100644 +--- a/kernel/sched/alt_core.c ++++ b/kernel/sched/alt_core.c +@@ -6166,6 +6166,7 @@ void __init sched_init(void) + #ifdef CONFIG_NO_HZ_COMMON + INIT_CSD(&rq->nohz_csd, nohz_csd_func, rq); + #endif ++ rq->balance_callback = NULL; + #endif /* CONFIG_SMP */ + rq->nr_switches = 0; + +diff --git a/kernel/sched/alt_sched.h b/kernel/sched/alt_sched.h +index a157800317e9..879aa6313dc7 100644 +--- a/kernel/sched/alt_sched.h ++++ b/kernel/sched/alt_sched.h +@@ -128,6 +128,7 @@ struct rq { + int active_balance; + struct cpu_stop_work active_balance_work; + #endif ++ struct callback_head *balance_callback; + #endif /* CONFIG_SMP */ + #ifdef CONFIG_IRQ_TIME_ACCOUNTING + u64 prev_irq_time; +-- +2.37.0 + + +From 9ed0f5739bfd597a8413adbfe4356dc2e070dddb Mon Sep 17 00:00:00 2001 +From: Alfred Chen +Date: Sat, 30 Jan 2021 11:28:33 +0800 +Subject: [PATCH 096/297] sched/alt: [Sync] f2469a1fb43f sched/core: Wait for + tasks being pushed away on hotplug + +--- + kernel/sched/alt_core.c | 162 +++++++++++++++++++++++++++++++++++++++ + kernel/sched/alt_sched.h | 22 +++++- + 2 files changed, 182 insertions(+), 2 deletions(-) + +diff --git a/kernel/sched/alt_core.c b/kernel/sched/alt_core.c +index eefc424a00e9..7c4552821c3f 100644 +--- a/kernel/sched/alt_core.c ++++ b/kernel/sched/alt_core.c +@@ -3419,6 +3419,139 @@ static void sched_tick_stop(int cpu) + twork = per_cpu_ptr(tick_work_cpu, cpu); + cancel_delayed_work_sync(&twork->work); + } ++ ++static int __balance_push_cpu_stop(void *arg) ++{ ++ struct task_struct *p = arg; ++ struct rq *rq = this_rq(); ++ struct rq_flags rf; ++ int cpu; ++ ++ raw_spin_lock_irq(&p->pi_lock); ++ rq_lock(rq, &rf); ++ ++ update_rq_clock(rq); ++ ++ if (task_rq(p) == rq && task_on_rq_queued(p)) { ++ cpu = select_fallback_rq(rq->cpu, p); ++ rq = __migrate_task(rq, p, cpu); ++ } ++ ++ rq_unlock(rq, &rf); ++ raw_spin_unlock_irq(&p->pi_lock); ++ ++ put_task_struct(p); ++ ++ return 0; ++} ++ ++static DEFINE_PER_CPU(struct cpu_stop_work, push_work); ++ ++/* ++ * Ensure we only run per-cpu kthreads once the CPU goes !active. ++ */ ++static void balance_push(struct rq *rq) ++{ ++ struct task_struct *push_task = rq->curr; ++ ++ lockdep_assert_held(&rq->lock); ++ SCHED_WARN_ON(rq->cpu != smp_processor_id()); ++ /* ++ * Ensure the thing is persistent until balance_push_set(.on = false); ++ */ ++ rq->balance_callback = &balance_push_callback; ++ ++ /* ++ * Both the cpu-hotplug and stop task are in this case and are ++ * required to complete the hotplug process. ++ * ++ * XXX: the idle task does not match kthread_is_per_cpu() due to ++ * histerical raisins. ++ */ ++ if (rq->idle == push_task || ++ ((push_task->flags & PF_KTHREAD) && kthread_is_per_cpu(push_task)) || ++ is_migration_disabled(push_task)) { ++ ++ /* ++ * If this is the idle task on the outgoing CPU try to wake ++ * up the hotplug control thread which might wait for the ++ * last task to vanish. The rcuwait_active() check is ++ * accurate here because the waiter is pinned on this CPU ++ * and can't obviously be running in parallel. ++ * ++ * On RT kernels this also has to check whether there are ++ * pinned and scheduled out tasks on the runqueue. They ++ * need to leave the migrate disabled section first. ++ */ ++ if (!rq->nr_running && !rq_has_pinned_tasks(rq) && ++ rcuwait_active(&rq->hotplug_wait)) { ++ raw_spin_unlock(&rq->lock); ++ rcuwait_wake_up(&rq->hotplug_wait); ++ raw_spin_lock(&rq->lock); ++ } ++ return; ++ } ++ ++ get_task_struct(push_task); ++ /* ++ * Temporarily drop rq->lock such that we can wake-up the stop task. ++ * Both preemption and IRQs are still disabled. ++ */ ++ raw_spin_unlock(&rq->lock); ++ stop_one_cpu_nowait(rq->cpu, __balance_push_cpu_stop, push_task, ++ this_cpu_ptr(&push_work)); ++ /* ++ * At this point need_resched() is true and we'll take the loop in ++ * schedule(). The next pick is obviously going to be the stop task ++ * which kthread_is_per_cpu() and will push this task away. ++ */ ++ raw_spin_lock(&rq->lock); ++} ++ ++static void balance_push_set(int cpu, bool on) ++{ ++ struct rq *rq = cpu_rq(cpu); ++ struct rq_flags rf; ++ ++ rq_lock_irqsave(rq, &rf); ++ rq->balance_push = on; ++ if (on) { ++ WARN_ON_ONCE(rq->balance_callback); ++ rq->balance_callback = &balance_push_callback; ++ } else if (rq->balance_callback == &balance_push_callback) { ++ rq->balance_callback = NULL; ++ } ++ rq_unlock_irqrestore(rq, &rf); ++} ++ ++/* ++ * Invoked from a CPUs hotplug control thread after the CPU has been marked ++ * inactive. All tasks which are not per CPU kernel threads are either ++ * pushed off this CPU now via balance_push() or placed on a different CPU ++ * during wakeup. Wait until the CPU is quiescent. ++ */ ++static void balance_hotplug_wait(void) ++{ ++ struct rq *rq = this_rq(); ++ ++ rcuwait_wait_event(&rq->hotplug_wait, ++ rq->nr_running == 1 && !rq_has_pinned_tasks(rq), ++ TASK_UNINTERRUPTIBLE); ++} ++ ++#else ++ ++static void balance_push(struct rq *rq) ++{ ++} ++ ++static void balance_push_set(int cpu, bool on) ++{ ++} ++ ++static inline void balance_hotplug_wait(void) ++{ ++} + #endif /* CONFIG_HOTPLUG_CPU */ + + int __init sched_tick_offload_init(void) +@@ -5893,6 +6026,12 @@ int sched_cpu_activate(unsigned int cpu) + struct rq *rq = cpu_rq(cpu); + unsigned long flags; + ++ /* ++ * Make sure that when the hotplug state machine does a roll-back ++ * we clear balance_push. Ideally that would happen earlier... ++ */ ++ balance_push_set(cpu, false); ++ + #ifdef CONFIG_SCHED_SMT + /* + * When going up, increment the number of cores with SMT present. +@@ -5926,6 +6065,15 @@ int sched_cpu_deactivate(unsigned int cpu) + int ret; + + set_cpu_active(cpu, false); ++ ++ /* ++ * From this point forward, this CPU will refuse to run any task that ++ * is not: migrate_disable() or KTHREAD_IS_PER_CPU, and will actively ++ * push those tasks away until this gets cleared, see ++ * sched_cpu_dying(). ++ */ ++ balance_push_set(cpu, true); ++ + /* + * We've cleared cpu_active_mask, wait for all preempt-disabled and RCU + * users of this state to go away such that all new such users will +@@ -5951,9 +6099,14 @@ int sched_cpu_deactivate(unsigned int cpu) + + ret = cpuset_cpu_inactive(cpu); + if (ret) { ++ balance_push_set(cpu, false); + set_cpu_active(cpu, true); + return ret; + } ++ ++ /* Wait for all non per CPU kernel threads to vanish. */ ++ balance_hotplug_wait(); ++ + return 0; + } + +@@ -6002,6 +6155,12 @@ int sched_cpu_dying(unsigned int cpu) + migrate_tasks(rq); + raw_spin_unlock_irqrestore(&rq->lock, flags); + ++ /* ++ * Now that the CPU is offline, make sure we're welcome ++ * to new tasks once we come back up. ++ */ ++ balance_push_set(cpu, false); ++ + hrtick_clear(rq); + return 0; + } +@@ -6167,6 +6326,9 @@ void __init sched_init(void) + INIT_CSD(&rq->nohz_csd, nohz_csd_func, rq); + #endif + rq->balance_callback = NULL; ++#ifdef CONFIG_HOTPLUG_CPU ++ rcuwait_init(&rq->hotplug_wait); ++#endif + #endif /* CONFIG_SMP */ + rq->nr_switches = 0; + +diff --git a/kernel/sched/alt_sched.h b/kernel/sched/alt_sched.h +index 879aa6313dc7..30e80c4b0825 100644 +--- a/kernel/sched/alt_sched.h ++++ b/kernel/sched/alt_sched.h +@@ -126,9 +126,13 @@ struct rq { + + #ifdef CONFIG_SCHED_SMT + int active_balance; +- struct cpu_stop_work active_balance_work; ++ struct cpu_stop_work active_balance_work; ++#endif ++ struct callback_head *balance_callback; ++ unsigned char balance_push; ++#ifdef CONFIG_HOTPLUG_CPU ++ struct rcuwait hotplug_wait; + #endif +- struct callback_head *balance_callback; + #endif /* CONFIG_SMP */ + #ifdef CONFIG_IRQ_TIME_ACCOUNTING + u64 prev_irq_time; +@@ -388,6 +392,13 @@ task_rq_unlock(struct rq *rq, struct task_struct *p, struct rq_flags *rf) + raw_spin_unlock_irqrestore(&p->pi_lock, rf->flags); + } + ++static inline void ++rq_lock(struct rq *rq, struct rq_flags *rf) ++ __acquires(rq->lock) ++{ ++ raw_spin_lock(&rq->lock); ++} ++ + static inline void + rq_unlock_irq(struct rq *rq, struct rq_flags *rf) + __releases(rq->lock) +@@ -395,6 +406,13 @@ rq_unlock_irq(struct rq *rq, struct rq_flags *rf) + raw_spin_unlock_irq(&rq->lock); + } + ++static inline void ++rq_unlock(struct rq *rq, struct rq_flags *rf) ++ __releases(rq->lock) ++{ ++ raw_spin_unlock(&rq->lock); ++} ++ + static inline struct rq * + this_rq_lock_irq(struct rq_flags *rf) + __acquires(rq->lock) +-- +2.37.0 + + +From 3d09a455d788d8106b1de4dc44ecff646198eb57 Mon Sep 17 00:00:00 2001 +From: Alfred Chen +Date: Fri, 5 Feb 2021 10:35:00 +0800 +Subject: [PATCH 097/297] sched/alt: Fix compilation issue. + +--- + kernel/sched/alt_core.c | 13 ++++++++----- + 1 file changed, 8 insertions(+), 5 deletions(-) + +diff --git a/kernel/sched/alt_core.c b/kernel/sched/alt_core.c +index 7c4552821c3f..3882b4c977fd 100644 +--- a/kernel/sched/alt_core.c ++++ b/kernel/sched/alt_core.c +@@ -3469,8 +3469,9 @@ static void balance_push(struct rq *rq) + * histerical raisins. + */ + if (rq->idle == push_task || +- ((push_task->flags & PF_KTHREAD) && kthread_is_per_cpu(push_task)) || +- is_migration_disabled(push_task)) { ++ ((push_task->flags & PF_KTHREAD) && kthread_is_per_cpu(push_task))) { ++ /*((push_task->flags & PF_KTHREAD) && kthread_is_per_cpu(push_task)) || ++ is_migration_disabled(push_task)) {*/ + + /* + * If this is the idle task on the outgoing CPU try to wake +@@ -3483,8 +3484,9 @@ static void balance_push(struct rq *rq) + * pinned and scheduled out tasks on the runqueue. They + * need to leave the migrate disabled section first. + */ +- if (!rq->nr_running && !rq_has_pinned_tasks(rq) && +- rcuwait_active(&rq->hotplug_wait)) { ++ if (!rq->nr_running && rcuwait_active(&rq->hotplug_wait)) { ++ /*if (!rq->nr_running && !rq_has_pinned_tasks(rq) && ++ rcuwait_active(&rq->hotplug_wait)) {*/ + raw_spin_unlock(&rq->lock); + rcuwait_wake_up(&rq->hotplug_wait); + raw_spin_lock(&rq->lock); +@@ -3535,7 +3537,8 @@ static void balance_hotplug_wait(void) + struct rq *rq = this_rq(); + + rcuwait_wait_event(&rq->hotplug_wait, +- rq->nr_running == 1 && !rq_has_pinned_tasks(rq), ++ rq->nr_running == 1, ++/* rq->nr_running == 1 && !rq_has_pinned_tasks(rq),*/ + TASK_UNINTERRUPTIBLE); + } + +-- +2.37.0 + + +From 028e152a1ce8a01ff47edc77303be53d46b9268a Mon Sep 17 00:00:00 2001 +From: Alfred Chen +Date: Sat, 30 Jan 2021 21:50:29 +0800 +Subject: [PATCH 098/297] sched/alt: [Sync] 1cf12e08bc4d sched/hotplug: + Consolidate task migration on CPU unplug + +--- + kernel/sched/alt_core.c | 98 ++++++++--------------------------------- + 1 file changed, 19 insertions(+), 79 deletions(-) + +diff --git a/kernel/sched/alt_core.c b/kernel/sched/alt_core.c +index 3882b4c977fd..65d87ca69aff 100644 +--- a/kernel/sched/alt_core.c ++++ b/kernel/sched/alt_core.c +@@ -5889,81 +5889,6 @@ void idle_task_exit(void) + /* finish_cpu(), as ran on the BP, will clean up the active_mm state */ + } + +-/* +- * Migrate all tasks from the rq, sleeping tasks will be migrated by +- * try_to_wake_up()->select_task_rq(). +- * +- * Called with rq->lock held even though we'er in stop_machine() and +- * there's no concurrency possible, we hold the required locks anyway +- * because of lock validation efforts. +- */ +-static void migrate_tasks(struct rq *dead_rq) +-{ +- struct rq *rq = dead_rq; +- struct task_struct *p, *stop = rq->stop; +- int count = 0; +- +- /* +- * Fudge the rq selection such that the below task selection loop +- * doesn't get stuck on the currently eligible stop task. +- * +- * We're currently inside stop_machine() and the rq is either stuck +- * in the stop_machine_cpu_stop() loop, or we're executing this code, +- * either way we should never end up calling schedule() until we're +- * done here. +- */ +- rq->stop = NULL; +- +- p = sched_rq_first_task(rq); +- while (p != rq->idle) { +- int dest_cpu; +- +- /* skip the running task */ +- if (task_running(p) || 1 == p->nr_cpus_allowed) { +- p = sched_rq_next_task(p, rq); +- continue; +- } +- +- /* +- * Rules for changing task_struct::cpus_allowed are holding +- * both pi_lock and rq->lock, such that holding either +- * stabilizes the mask. +- * +- * Drop rq->lock is not quite as disastrous as it usually is +- * because !cpu_active at this point, which means load-balance +- * will not interfere. Also, stop-machine. +- */ +- raw_spin_unlock(&rq->lock); +- raw_spin_lock(&p->pi_lock); +- raw_spin_lock(&rq->lock); +- +- /* +- * Since we're inside stop-machine, _nothing_ should have +- * changed the task, WARN if weird stuff happened, because in +- * that case the above rq->lock drop is a fail too. +- */ +- if (WARN_ON(task_rq(p) != rq || !task_on_rq_queued(p))) { +- raw_spin_unlock(&p->pi_lock); +- p = sched_rq_next_task(p, rq); +- continue; +- } +- +- count++; +- /* Find suitable destination for @next, with force if needed. */ +- dest_cpu = select_fallback_rq(dead_rq->cpu, p); +- rq = __migrate_task(rq, p, dest_cpu); +- raw_spin_unlock(&rq->lock); +- raw_spin_unlock(&p->pi_lock); +- +- rq = dead_rq; +- raw_spin_lock(&rq->lock); +- /* Check queued task all over from the header again */ +- p = sched_rq_first_task(rq); +- } +- +- rq->stop = stop; +-} +- + static void set_rq_offline(struct rq *rq) + { + if (rq->online) +@@ -6107,9 +6032,6 @@ int sched_cpu_deactivate(unsigned int cpu) + return ret; + } + +- /* Wait for all non per CPU kernel threads to vanish. */ +- balance_hotplug_wait(); +- + return 0; + } + +@@ -6142,9 +6064,27 @@ int sched_cpu_starting(unsigned int cpu) + */ + int sched_cpu_wait_empty(unsigned int cpu) + { ++ balance_hotplug_wait(); + return 0; + } + ++/* ++ * Since this CPU is going 'away' for a while, fold any nr_active delta we ++ * might have. Called from the CPU stopper task after ensuring that the ++ * stopper is the last running task on the CPU, so nr_active count is ++ * stable. We need to take the teardown thread which is calling this into ++ * account, so we hand in adjust = 1 to the load calculation. ++ * ++ * Also see the comment "Global load-average calculations". ++ */ ++static void calc_load_migrate(struct rq *rq) ++{ ++ long delta = calc_load_fold_active(rq, 1); ++ ++ if (delta) ++ atomic_long_add(delta, &calc_load_tasks); ++} ++ + int sched_cpu_dying(unsigned int cpu) + { + struct rq *rq = cpu_rq(cpu); +@@ -6155,7 +6095,6 @@ int sched_cpu_dying(unsigned int cpu) + + raw_spin_lock_irqsave(&rq->lock, flags); + set_rq_offline(rq); +- migrate_tasks(rq); + raw_spin_unlock_irqrestore(&rq->lock, flags); + + /* +@@ -6164,6 +6103,7 @@ int sched_cpu_dying(unsigned int cpu) + */ + balance_push_set(cpu, false); + ++ calc_load_migrate(rq); + hrtick_clear(rq); + return 0; + } +-- +2.37.0 + + +From be69aaf70b0d8fea9395fbfd1a3a0c18b6b03fdd Mon Sep 17 00:00:00 2001 +From: Alfred Chen +Date: Sat, 30 Jan 2021 22:58:28 +0800 +Subject: [PATCH 099/297] sched/alt: [Sync] 120455c514f7 sched: Fix hotplug vs + CPU bandwidth control + +--- + kernel/sched/alt_core.c | 8 +++++++- + 1 file changed, 7 insertions(+), 1 deletion(-) + +diff --git a/kernel/sched/alt_core.c b/kernel/sched/alt_core.c +index 65d87ca69aff..8b9c3c414120 100644 +--- a/kernel/sched/alt_core.c ++++ b/kernel/sched/alt_core.c +@@ -5990,6 +5990,8 @@ int sched_cpu_activate(unsigned int cpu) + + int sched_cpu_deactivate(unsigned int cpu) + { ++ struct rq *rq = cpu_rq(cpu); ++ unsigned long flags; + int ret; + + set_cpu_active(cpu, false); +@@ -6011,6 +6013,11 @@ int sched_cpu_deactivate(unsigned int cpu) + */ + synchronize_rcu(); + ++ raw_spin_lock_irqsave(&rq->lock, flags); ++ update_rq_clock(rq); ++ set_rq_offline(rq); ++ raw_spin_unlock_irqrestore(&rq->lock, flags); ++ + #ifdef CONFIG_SCHED_SMT + /* + * When going down, decrement the number of cores with SMT present. +@@ -6094,7 +6101,6 @@ int sched_cpu_dying(unsigned int cpu) + sched_tick_stop(cpu); + + raw_spin_lock_irqsave(&rq->lock, flags); +- set_rq_offline(rq); + raw_spin_unlock_irqrestore(&rq->lock, flags); + + /* +-- +2.37.0 + + +From 0a71700b835f3dd2d6760263e87759e4d94e3861 Mon Sep 17 00:00:00 2001 +From: Alfred Chen +Date: Sun, 31 Jan 2021 11:02:35 +0800 +Subject: [PATCH 100/297] sched/alt: [Sync] 9cfc3e18adb0 sched: Massage + set_cpus_allowed() + +--- + kernel/sched/alt_core.c | 37 ++++++++++++++++++++++++------------- + 1 file changed, 24 insertions(+), 13 deletions(-) + +diff --git a/kernel/sched/alt_core.c b/kernel/sched/alt_core.c +index 8b9c3c414120..f161c317419f 100644 +--- a/kernel/sched/alt_core.c ++++ b/kernel/sched/alt_core.c +@@ -1260,17 +1260,26 @@ static int migration_cpu_stop(void *data) + return 0; + } + ++#define SCA_CHECK 0x01 ++ + static inline void +-set_cpus_allowed_common(struct task_struct *p, const struct cpumask *new_mask) ++set_cpus_allowed_common(struct task_struct *p, const struct cpumask *new_mask, u32 flags) + { + cpumask_copy(&p->cpus_mask, new_mask); + p->nr_cpus_allowed = cpumask_weight(new_mask); + } + ++static void ++__do_set_cpus_allowed(struct task_struct *p, const struct cpumask *new_mask, u32 flags) ++{ ++ set_cpus_allowed_common(p, new_mask, flags); ++} ++ + void do_set_cpus_allowed(struct task_struct *p, const struct cpumask *new_mask) + { +- set_cpus_allowed_common(p, new_mask); ++ __do_set_cpus_allowed(p, new_mask, 0); + } ++ + #endif + + /** +@@ -1561,16 +1570,17 @@ void sched_set_stop_task(int cpu, struct task_struct *stop) + * call is not atomic; no spinlocks may be held. + */ + static int __set_cpus_allowed_ptr(struct task_struct *p, +- const struct cpumask *new_mask, bool check) ++ const struct cpumask *new_mask, ++ u32 flags) + { + const struct cpumask *cpu_valid_mask = cpu_active_mask; + int dest_cpu; +- unsigned long flags; ++ unsigned long irq_flags; + struct rq *rq; + raw_spinlock_t *lock; + int ret = 0; + +- raw_spin_lock_irqsave(&p->pi_lock, flags); ++ raw_spin_lock_irqsave(&p->pi_lock, irq_flags); + rq = __task_access_lock(p, &lock); + + if (p->flags & PF_KTHREAD) { +@@ -1584,7 +1594,7 @@ static int __set_cpus_allowed_ptr(struct task_struct *p, + * Must re-check here, to close a race against __kthread_bind(), + * sched_setaffinity() is not guaranteed to observe the flag. + */ +- if (check && (p->flags & PF_NO_SETAFFINITY)) { ++ if ((flags & SCA_CHECK) && (p->flags & PF_NO_SETAFFINITY)) { + ret = -EINVAL; + goto out; + } +@@ -1598,7 +1608,7 @@ static int __set_cpus_allowed_ptr(struct task_struct *p, + goto out; + } + +- do_set_cpus_allowed(p, new_mask); ++ __do_set_cpus_allowed(p, new_mask, flags); + + if (p->flags & PF_KTHREAD) { + /* +@@ -1619,7 +1629,7 @@ static int __set_cpus_allowed_ptr(struct task_struct *p, + + /* Need help from migration thread: drop lock and wait. */ + __task_access_unlock(p, lock); +- raw_spin_unlock_irqrestore(&p->pi_lock, flags); ++ raw_spin_unlock_irqrestore(&p->pi_lock, irq_flags); + stop_one_cpu(cpu_of(rq), migration_cpu_stop, &arg); + return 0; + } +@@ -1635,14 +1645,14 @@ static int __set_cpus_allowed_ptr(struct task_struct *p, + + out: + __task_access_unlock(p, lock); +- raw_spin_unlock_irqrestore(&p->pi_lock, flags); ++ raw_spin_unlock_irqrestore(&p->pi_lock, irq_flags); + + return ret; + } + + int set_cpus_allowed_ptr(struct task_struct *p, const struct cpumask *new_mask) + { +- return __set_cpus_allowed_ptr(p, new_mask, false); ++ return __set_cpus_allowed_ptr(p, new_mask, 0); + } + EXPORT_SYMBOL_GPL(set_cpus_allowed_ptr); + +@@ -1655,7 +1665,8 @@ static inline int select_task_rq(struct task_struct *p, struct rq *rq) + + static inline int + __set_cpus_allowed_ptr(struct task_struct *p, +- const struct cpumask *new_mask, bool check) ++ const struct cpumask *new_mask, ++ u32 flags) + { + return set_cpus_allowed_ptr(p, new_mask); + } +@@ -5270,7 +5281,7 @@ long sched_setaffinity(pid_t pid, const struct cpumask *in_mask) + cpumask_and(new_mask, in_mask, cpus_allowed); + + again: +- retval = __set_cpus_allowed_ptr(p, new_mask, true); ++ retval = __set_cpus_allowed_ptr(p, new_mask, SCA_CHECK); + + if (!retval) { + cpuset_cpus_allowed(p, cpus_allowed); +@@ -5815,7 +5826,7 @@ void init_idle(struct task_struct *idle, int cpu) + * + * And since this is boot we can forgo the serialisation. + */ +- set_cpus_allowed_common(idle, cpumask_of(cpu)); ++ set_cpus_allowed_common(idle, cpumask_of(cpu), 0); + #endif + + /* Silence PROVE_RCU */ +-- +2.37.0 + + +From aa50f1a5556f41d41d9efd8543830a95d067d433 Mon Sep 17 00:00:00 2001 +From: Alfred Chen +Date: Tue, 2 Feb 2021 15:10:02 +0800 +Subject: [PATCH 101/297] sched/alt: [Sync] af449901b84c sched: Add + migrate_disable() + +--- + kernel/sched/alt_core.c | 421 ++++++++++++++++++++++++++-------------- + 1 file changed, 271 insertions(+), 150 deletions(-) + +diff --git a/kernel/sched/alt_core.c b/kernel/sched/alt_core.c +index f161c317419f..c61cb93915c9 100644 +--- a/kernel/sched/alt_core.c ++++ b/kernel/sched/alt_core.c +@@ -1090,15 +1090,18 @@ static inline void __set_task_cpu(struct task_struct *p, unsigned int cpu) + #endif + } + +-void migrate_disable(void) ++static inline bool is_migration_disabled(struct task_struct *p) + { ++#ifdef CONFIG_SMP ++ return p->migration_disabled; ++#else ++ return false; ++#endif + } +-EXPORT_SYMBOL_GPL(migrate_disable); + +-void migrate_enable(void) +-{ +-} +-EXPORT_SYMBOL_GPL(migrate_enable); ++#define SCA_CHECK 0x01 ++#define SCA_MIGRATE_DISABLE 0x02 ++#define SCA_MIGRATE_ENABLE 0x04 + + #ifdef CONFIG_SMP + +@@ -1126,6 +1129,8 @@ void set_task_cpu(struct task_struct *p, unsigned int new_cpu) + * Clearly, migrating tasks to offline CPUs is a fairly daft thing. + */ + WARN_ON_ONCE(!cpu_online(new_cpu)); ++ ++ WARN_ON_ONCE(is_migration_disabled(p)); + #endif + if (task_cpu(p) == new_cpu) + return; +@@ -1141,19 +1146,99 @@ static inline bool is_per_cpu_kthread(struct task_struct *p) + return ((p->flags & PF_KTHREAD) && (1 == p->nr_cpus_allowed)); + } + ++static void ++__do_set_cpus_allowed(struct task_struct *p, const struct cpumask *new_mask, u32 flags); ++ ++static int __set_cpus_allowed_ptr(struct task_struct *p, ++ const struct cpumask *new_mask, ++ u32 flags); ++ ++static void migrate_disable_switch(struct rq *rq, struct task_struct *p) ++{ ++ if (likely(!p->migration_disabled)) ++ return; ++ ++ if (p->cpus_ptr != &p->cpus_mask) ++ return; ++ ++ /* ++ * Violates locking rules! see comment in __do_set_cpus_allowed(). ++ */ ++ __do_set_cpus_allowed(p, cpumask_of(rq->cpu), SCA_MIGRATE_DISABLE); ++} ++ ++void migrate_disable(void) ++{ ++ struct task_struct *p = current; ++ ++ if (p->migration_disabled) { ++ p->migration_disabled++; ++ return; ++ } ++ ++ preempt_disable(); ++ /*this_rq()->nr_pinned++;*/ ++ p->migration_disabled = 1; ++ preempt_enable(); ++} ++EXPORT_SYMBOL_GPL(migrate_disable); ++ ++void migrate_enable(void) ++{ ++ struct task_struct *p = current; ++ ++ if (p->migration_disabled > 1) { ++ p->migration_disabled--; ++ return; ++ } ++ ++ /* ++ * Ensure stop_task runs either before or after this, and that ++ * __set_cpus_allowed_ptr(SCA_MIGRATE_ENABLE) doesn't schedule(). ++ */ ++ preempt_disable(); ++ if (p->cpus_ptr != &p->cpus_mask) ++ __set_cpus_allowed_ptr(p, &p->cpus_mask, SCA_MIGRATE_ENABLE); ++ /* ++ * Mustn't clear migration_disabled() until cpus_ptr points back at the ++ * regular cpus_mask, otherwise things that race (eg. ++ * select_fallback_rq) get confused. ++ */ ++ barrier(); ++ p->migration_disabled = 0; ++ /*this_rq()->nr_pinned--;*/ ++ preempt_enable(); ++} ++EXPORT_SYMBOL_GPL(migrate_enable); ++ + /* + * Per-CPU kthreads are allowed to run on !active && online CPUs, see + * __set_cpus_allowed_ptr() and select_fallback_rq(). + */ + static inline bool is_cpu_allowed(struct task_struct *p, int cpu) + { ++ /* When not in the task's cpumask, no point in looking further. */ + if (!cpumask_test_cpu(cpu, p->cpus_ptr)) + return false; + +- if (is_per_cpu_kthread(p)) ++ /* migrate_disabled() must be allowed to finish. */ ++ if (is_migration_disabled(p)) + return cpu_online(cpu); + +- return cpu_active(cpu); ++ /* Non kernel threads are not allowed during either online or offline. */ ++ if (!(p->flags & PF_KTHREAD)) ++ return cpu_active(cpu); ++ ++ /* KTHREAD_IS_PER_CPU is always allowed. */ ++ if (kthread_is_per_cpu(p)) ++ return cpu_online(cpu); ++ ++ /* Regular kernel threads don't get to stay during offline. */ ++ if (cpu_rq(cpu)->balance_push) ++ return false; ++ ++ /* But are allowed during online. */ ++ return cpu_online(cpu); + } + + /* +@@ -1260,11 +1345,14 @@ static int migration_cpu_stop(void *data) + return 0; + } + +-#define SCA_CHECK 0x01 +- + static inline void + set_cpus_allowed_common(struct task_struct *p, const struct cpumask *new_mask, u32 flags) + { ++ if (flags & (SCA_MIGRATE_ENABLE | SCA_MIGRATE_DISABLE)) { ++ p->cpus_ptr = new_mask; ++ return; ++ } ++ + cpumask_copy(&p->cpus_mask, new_mask); + p->nr_cpus_allowed = cpumask_weight(new_mask); + } +@@ -1272,6 +1360,23 @@ set_cpus_allowed_common(struct task_struct *p, const struct cpumask *new_mask, u + static void + __do_set_cpus_allowed(struct task_struct *p, const struct cpumask *new_mask, u32 flags) + { ++ /* ++ * This here violates the locking rules for affinity, since we're only ++ * supposed to change these variables while holding both rq->lock and ++ * p->pi_lock. ++ * ++ * HOWEVER, it magically works, because ttwu() is the only code that ++ * accesses these variables under p->pi_lock and only does so after ++ * smp_cond_load_acquire(&p->on_cpu, !VAL), and we're in __schedule() ++ * before finish_task(). ++ * ++ * XXX do further audits, this smells like something putrid. ++ */ ++ if (flags & SCA_MIGRATE_DISABLE) ++ SCHED_WARN_ON(!p->on_cpu); ++ else ++ lockdep_assert_held(&p->pi_lock); ++ + set_cpus_allowed_common(p, new_mask, flags); + } + +@@ -1486,6 +1591,12 @@ static int select_fallback_rq(int cpu, struct task_struct *p) + } + fallthrough; + case possible: ++ /* ++ * XXX When called from select_task_rq() we only ++ * hold p->pi_lock and again violate locking order. ++ * ++ * More yuck to audit. ++ */ + do_set_cpus_allowed(p, cpu_possible_mask); + state = fail; + break; +@@ -1583,9 +1694,16 @@ static int __set_cpus_allowed_ptr(struct task_struct *p, + raw_spin_lock_irqsave(&p->pi_lock, irq_flags); + rq = __task_access_lock(p, &lock); + +- if (p->flags & PF_KTHREAD) { ++ if (p->flags & PF_KTHREAD || is_migration_disabled(p)) { + /* +- * Kernel threads are allowed on online && !active CPUs ++ * Kernel threads are allowed on online && !active CPUs, ++ * however, during cpu-hot-unplug, even these might get pushed ++ * away if not KTHREAD_IS_PER_CPU. ++ * ++ * Specifically, migration_disabled() tasks must not fail the ++ * cpumask_any_and_distribute() pick below, esp. so on ++ * SCA_MIGRATE_ENABLE, otherwise we'll not call ++ * set_cpus_allowed_common() and actually reset p->cpus_ptr. + */ + cpu_valid_mask = cpu_online_mask; + } +@@ -1671,7 +1789,9 @@ __set_cpus_allowed_ptr(struct task_struct *p, + return set_cpus_allowed_ptr(p, new_mask); + } + +-#endif /* CONFIG_SMP */ ++static inline void migrate_disable_switch(struct rq *rq, struct task_struct *p) { } ++ ++#endif /* !CONFIG_SMP */ + + static void + ttwu_stat(struct task_struct *p, int cpu, int wake_flags) +@@ -3430,142 +3550,6 @@ static void sched_tick_stop(int cpu) + twork = per_cpu_ptr(tick_work_cpu, cpu); + cancel_delayed_work_sync(&twork->work); + } +- +-static int __balance_push_cpu_stop(void *arg) +-{ +- struct task_struct *p = arg; +- struct rq *rq = this_rq(); +- struct rq_flags rf; +- int cpu; +- +- raw_spin_lock_irq(&p->pi_lock); +- rq_lock(rq, &rf); +- +- update_rq_clock(rq); +- +- if (task_rq(p) == rq && task_on_rq_queued(p)) { +- cpu = select_fallback_rq(rq->cpu, p); +- rq = __migrate_task(rq, p, cpu); +- } +- +- rq_unlock(rq, &rf); +- raw_spin_unlock_irq(&p->pi_lock); +- +- put_task_struct(p); +- +- return 0; +-} +- +-static DEFINE_PER_CPU(struct cpu_stop_work, push_work); +- +-/* +- * Ensure we only run per-cpu kthreads once the CPU goes !active. +- */ +-static void balance_push(struct rq *rq) +-{ +- struct task_struct *push_task = rq->curr; +- +- lockdep_assert_held(&rq->lock); +- SCHED_WARN_ON(rq->cpu != smp_processor_id()); +- /* +- * Ensure the thing is persistent until balance_push_set(.on = false); +- */ +- rq->balance_callback = &balance_push_callback; +- +- /* +- * Both the cpu-hotplug and stop task are in this case and are +- * required to complete the hotplug process. +- * +- * XXX: the idle task does not match kthread_is_per_cpu() due to +- * histerical raisins. +- */ +- if (rq->idle == push_task || +- ((push_task->flags & PF_KTHREAD) && kthread_is_per_cpu(push_task))) { +- /*((push_task->flags & PF_KTHREAD) && kthread_is_per_cpu(push_task)) || +- is_migration_disabled(push_task)) {*/ +- +- /* +- * If this is the idle task on the outgoing CPU try to wake +- * up the hotplug control thread which might wait for the +- * last task to vanish. The rcuwait_active() check is +- * accurate here because the waiter is pinned on this CPU +- * and can't obviously be running in parallel. +- * +- * On RT kernels this also has to check whether there are +- * pinned and scheduled out tasks on the runqueue. They +- * need to leave the migrate disabled section first. +- */ +- if (!rq->nr_running && rcuwait_active(&rq->hotplug_wait)) { +- /*if (!rq->nr_running && !rq_has_pinned_tasks(rq) && +- rcuwait_active(&rq->hotplug_wait)) {*/ +- raw_spin_unlock(&rq->lock); +- rcuwait_wake_up(&rq->hotplug_wait); +- raw_spin_lock(&rq->lock); +- } +- return; +- } +- +- get_task_struct(push_task); +- /* +- * Temporarily drop rq->lock such that we can wake-up the stop task. +- * Both preemption and IRQs are still disabled. +- */ +- raw_spin_unlock(&rq->lock); +- stop_one_cpu_nowait(rq->cpu, __balance_push_cpu_stop, push_task, +- this_cpu_ptr(&push_work)); +- /* +- * At this point need_resched() is true and we'll take the loop in +- * schedule(). The next pick is obviously going to be the stop task +- * which kthread_is_per_cpu() and will push this task away. +- */ +- raw_spin_lock(&rq->lock); +-} +- +-static void balance_push_set(int cpu, bool on) +-{ +- struct rq *rq = cpu_rq(cpu); +- struct rq_flags rf; +- +- rq_lock_irqsave(rq, &rf); +- rq->balance_push = on; +- if (on) { +- WARN_ON_ONCE(rq->balance_callback); +- rq->balance_callback = &balance_push_callback; +- } else if (rq->balance_callback == &balance_push_callback) { +- rq->balance_callback = NULL; +- } +- rq_unlock_irqrestore(rq, &rf); +-} +- +-/* +- * Invoked from a CPUs hotplug control thread after the CPU has been marked +- * inactive. All tasks which are not per CPU kernel threads are either +- * pushed off this CPU now via balance_push() or placed on a different CPU +- * during wakeup. Wait until the CPU is quiescent. +- */ +-static void balance_hotplug_wait(void) +-{ +- struct rq *rq = this_rq(); +- +- rcuwait_wait_event(&rq->hotplug_wait, +- rq->nr_running == 1, +-/* rq->nr_running == 1 && !rq_has_pinned_tasks(rq),*/ +- TASK_UNINTERRUPTIBLE); +-} +- +-#else +- +-static void balance_push(struct rq *rq) +-{ +-} +- +-static void balance_push_set(int cpu, bool on) +-{ +-} +- +-static inline void balance_hotplug_wait(void) +-{ +-} + #endif /* CONFIG_HOTPLUG_CPU */ + + int __init sched_tick_offload_init(void) +@@ -4046,6 +4030,7 @@ static void __sched notrace __schedule(bool preempt) + */ + ++*switch_count; + ++ migrate_disable_switch(rq, prev); + psi_sched_switch(prev, next, !task_on_rq_queued(prev)); + + trace_sched_switch(preempt, prev, next); +@@ -5900,12 +5885,148 @@ void idle_task_exit(void) + /* finish_cpu(), as ran on the BP, will clean up the active_mm state */ + } + ++static int __balance_push_cpu_stop(void *arg) ++{ ++ struct task_struct *p = arg; ++ struct rq *rq = this_rq(); ++ struct rq_flags rf; ++ int cpu; ++ ++ raw_spin_lock_irq(&p->pi_lock); ++ rq_lock(rq, &rf); ++ ++ update_rq_clock(rq); ++ ++ if (task_rq(p) == rq && task_on_rq_queued(p)) { ++ cpu = select_fallback_rq(rq->cpu, p); ++ rq = __migrate_task(rq, p, cpu); ++ } ++ ++ rq_unlock(rq, &rf); ++ raw_spin_unlock_irq(&p->pi_lock); ++ ++ put_task_struct(p); ++ ++ return 0; ++} ++ ++static DEFINE_PER_CPU(struct cpu_stop_work, push_work); ++ ++/* ++ * Ensure we only run per-cpu kthreads once the CPU goes !active. ++ */ ++static void balance_push(struct rq *rq) ++{ ++ struct task_struct *push_task = rq->curr; ++ ++ lockdep_assert_held(&rq->lock); ++ SCHED_WARN_ON(rq->cpu != smp_processor_id()); ++ /* ++ * Ensure the thing is persistent until balance_push_set(.on = false); ++ */ ++ rq->balance_callback = &balance_push_callback; ++ ++ /* ++ * Both the cpu-hotplug and stop task are in this case and are ++ * required to complete the hotplug process. ++ * ++ * XXX: the idle task does not match kthread_is_per_cpu() due to ++ * histerical raisins. ++ */ ++ if (rq->idle == push_task || ++ ((push_task->flags & PF_KTHREAD) && kthread_is_per_cpu(push_task))) { ++ /*((push_task->flags & PF_KTHREAD) && kthread_is_per_cpu(push_task)) || ++ is_migration_disabled(push_task)) {*/ ++ ++ /* ++ * If this is the idle task on the outgoing CPU try to wake ++ * up the hotplug control thread which might wait for the ++ * last task to vanish. The rcuwait_active() check is ++ * accurate here because the waiter is pinned on this CPU ++ * and can't obviously be running in parallel. ++ * ++ * On RT kernels this also has to check whether there are ++ * pinned and scheduled out tasks on the runqueue. They ++ * need to leave the migrate disabled section first. ++ */ ++ if (!rq->nr_running && rcuwait_active(&rq->hotplug_wait)) { ++ /*if (!rq->nr_running && !rq_has_pinned_tasks(rq) && ++ rcuwait_active(&rq->hotplug_wait)) {*/ ++ raw_spin_unlock(&rq->lock); ++ rcuwait_wake_up(&rq->hotplug_wait); ++ raw_spin_lock(&rq->lock); ++ } ++ return; ++ } ++ ++ get_task_struct(push_task); ++ /* ++ * Temporarily drop rq->lock such that we can wake-up the stop task. ++ * Both preemption and IRQs are still disabled. ++ */ ++ raw_spin_unlock(&rq->lock); ++ stop_one_cpu_nowait(rq->cpu, __balance_push_cpu_stop, push_task, ++ this_cpu_ptr(&push_work)); ++ /* ++ * At this point need_resched() is true and we'll take the loop in ++ * schedule(). The next pick is obviously going to be the stop task ++ * which kthread_is_per_cpu() and will push this task away. ++ */ ++ raw_spin_lock(&rq->lock); ++} ++ ++static void balance_push_set(int cpu, bool on) ++{ ++ struct rq *rq = cpu_rq(cpu); ++ struct rq_flags rf; ++ ++ rq_lock_irqsave(rq, &rf); ++ rq->balance_push = on; ++ if (on) { ++ WARN_ON_ONCE(rq->balance_callback); ++ rq->balance_callback = &balance_push_callback; ++ } else if (rq->balance_callback == &balance_push_callback) { ++ rq->balance_callback = NULL; ++ } ++ rq_unlock_irqrestore(rq, &rf); ++} ++ ++/* ++ * Invoked from a CPUs hotplug control thread after the CPU has been marked ++ * inactive. All tasks which are not per CPU kernel threads are either ++ * pushed off this CPU now via balance_push() or placed on a different CPU ++ * during wakeup. Wait until the CPU is quiescent. ++ */ ++static void balance_hotplug_wait(void) ++{ ++ struct rq *rq = this_rq(); ++ ++ rcuwait_wait_event(&rq->hotplug_wait, ++ rq->nr_running == 1, ++/* rq->nr_running == 1 && !rq_has_pinned_tasks(rq),*/ ++ TASK_UNINTERRUPTIBLE); ++} ++ ++#else ++ ++static void balance_push(struct rq *rq) ++{ ++} ++ ++static void balance_push_set(int cpu, bool on) ++{ ++} ++ ++static inline void balance_hotplug_wait(void) ++{ ++} ++#endif /* CONFIG_HOTPLUG_CPU */ ++ + static void set_rq_offline(struct rq *rq) + { + if (rq->online) + rq->online = false; + } +-#endif /* CONFIG_HOTPLUG_CPU */ + + static void set_rq_online(struct rq *rq) + { +-- +2.37.0 + + +From 7cd866a92841ab94bf78068dedba172ac1db71ad Mon Sep 17 00:00:00 2001 +From: Alfred Chen +Date: Tue, 9 Feb 2021 17:27:46 +0800 +Subject: [PATCH 102/297] sched/alt: [Sync] 3015ef4b98f5 sched/core: Make + migrate disable and CPU hotplug cooperative + +--- + kernel/sched/alt_core.c | 28 ++++++++++++++++++---------- + kernel/sched/alt_sched.h | 1 + + 2 files changed, 19 insertions(+), 10 deletions(-) + +diff --git a/kernel/sched/alt_core.c b/kernel/sched/alt_core.c +index c61cb93915c9..556c27911635 100644 +--- a/kernel/sched/alt_core.c ++++ b/kernel/sched/alt_core.c +@@ -1177,7 +1177,7 @@ void migrate_disable(void) + } + + preempt_disable(); +- /*this_rq()->nr_pinned++;*/ ++ this_rq()->nr_pinned++; + p->migration_disabled = 1; + preempt_enable(); + } +@@ -1206,11 +1206,16 @@ void migrate_enable(void) + */ + barrier(); + p->migration_disabled = 0; +- /*this_rq()->nr_pinned--;*/ ++ this_rq()->nr_pinned--; + preempt_enable(); + } + EXPORT_SYMBOL_GPL(migrate_enable); + ++static inline bool rq_has_pinned_tasks(struct rq *rq) ++{ ++ return rq->nr_pinned; ++} ++ + /* + * Per-CPU kthreads are allowed to run on !active && online CPUs, see + * __set_cpus_allowed_ptr() and select_fallback_rq(). +@@ -1791,6 +1796,11 @@ __set_cpus_allowed_ptr(struct task_struct *p, + + static inline void migrate_disable_switch(struct rq *rq, struct task_struct *p) { } + ++static inline bool rq_has_pinned_tasks(struct rq *rq) ++{ ++ return false; ++} ++ + #endif /* !CONFIG_SMP */ + + static void +@@ -5934,9 +5944,8 @@ static void balance_push(struct rq *rq) + * histerical raisins. + */ + if (rq->idle == push_task || +- ((push_task->flags & PF_KTHREAD) && kthread_is_per_cpu(push_task))) { +- /*((push_task->flags & PF_KTHREAD) && kthread_is_per_cpu(push_task)) || +- is_migration_disabled(push_task)) {*/ ++ ((push_task->flags & PF_KTHREAD) && kthread_is_per_cpu(push_task)) || ++ is_migration_disabled(push_task)) { + + /* + * If this is the idle task on the outgoing CPU try to wake +@@ -5949,9 +5958,8 @@ static void balance_push(struct rq *rq) + * pinned and scheduled out tasks on the runqueue. They + * need to leave the migrate disabled section first. + */ +- if (!rq->nr_running && rcuwait_active(&rq->hotplug_wait)) { +- /*if (!rq->nr_running && !rq_has_pinned_tasks(rq) && +- rcuwait_active(&rq->hotplug_wait)) {*/ ++ if (!rq->nr_running && !rq_has_pinned_tasks(rq) && ++ rcuwait_active(&rq->hotplug_wait)) { + raw_spin_unlock(&rq->lock); + rcuwait_wake_up(&rq->hotplug_wait); + raw_spin_lock(&rq->lock); +@@ -6002,8 +6010,7 @@ static void balance_hotplug_wait(void) + struct rq *rq = this_rq(); + + rcuwait_wait_event(&rq->hotplug_wait, +- rq->nr_running == 1, +-/* rq->nr_running == 1 && !rq_has_pinned_tasks(rq),*/ ++ rq->nr_running == 1 && !rq_has_pinned_tasks(rq), + TASK_UNINTERRUPTIBLE); + } + +@@ -6233,6 +6240,7 @@ int sched_cpu_dying(unsigned int cpu) + sched_tick_stop(cpu); + + raw_spin_lock_irqsave(&rq->lock, flags); ++ BUG_ON(rq->nr_running != 1 || rq_has_pinned_tasks(rq)); + raw_spin_unlock_irqrestore(&rq->lock, flags); + + /* +diff --git a/kernel/sched/alt_sched.h b/kernel/sched/alt_sched.h +index 30e80c4b0825..cc2739f843af 100644 +--- a/kernel/sched/alt_sched.h ++++ b/kernel/sched/alt_sched.h +@@ -133,6 +133,7 @@ struct rq { + #ifdef CONFIG_HOTPLUG_CPU + struct rcuwait hotplug_wait; + #endif ++ unsigned int nr_pinned; + #endif /* CONFIG_SMP */ + #ifdef CONFIG_IRQ_TIME_ACCOUNTING + u64 prev_irq_time; +-- +2.37.0 + + +From c0099d5e58395efcaa4823e4bb0b40dfb0c65964 Mon Sep 17 00:00:00 2001 +From: Alfred Chen +Date: Wed, 10 Feb 2021 18:41:04 +0800 +Subject: [PATCH 103/297] sched/alt: [Sync] ded467dc83ac sched, lockdep: + Annotate ->pi_lock recursion + +--- + kernel/sched/alt_core.c | 15 +++++++++++++++ + 1 file changed, 15 insertions(+) + +diff --git a/kernel/sched/alt_core.c b/kernel/sched/alt_core.c +index 556c27911635..9fa24c75ee0d 100644 +--- a/kernel/sched/alt_core.c ++++ b/kernel/sched/alt_core.c +@@ -1649,6 +1649,7 @@ static inline int select_task_rq(struct task_struct *p, struct rq *rq) + + void sched_set_stop_task(int cpu, struct task_struct *stop) + { ++ static struct lock_class_key stop_pi_lock; + struct sched_param stop_param = { .sched_priority = STOP_PRIO }; + struct sched_param start_param = { .sched_priority = 0 }; + struct task_struct *old_stop = cpu_rq(cpu)->stop; +@@ -1663,6 +1664,20 @@ void sched_set_stop_task(int cpu, struct task_struct *stop) + * rely on PI working anyway. + */ + sched_setscheduler_nocheck(stop, SCHED_FIFO, &stop_param); ++ ++ /* ++ * The PI code calls rt_mutex_setprio() with ->pi_lock held to ++ * adjust the effective priority of a task. As a result, ++ * rt_mutex_setprio() can trigger (RT) balancing operations, ++ * which can then trigger wakeups of the stop thread to push ++ * around the current task. ++ * ++ * The stop task itself will never be part of the PI-chain, it ++ * never blocks, therefore that ->pi_lock recursion is safe. ++ * Tell lockdep about this by placing the stop->pi_lock in its ++ * own class. ++ */ ++ lockdep_set_class(&stop->pi_lock, &stop_pi_lock); + } + + cpu_rq(cpu)->stop = stop; +-- +2.37.0 + + +From faa649b6a21efaababa472a45d8b12f61ae6296f Mon Sep 17 00:00:00 2001 +From: Alfred Chen +Date: Wed, 10 Feb 2021 19:22:58 +0800 +Subject: [PATCH 104/297] sched/alt: Fix lockdep_assert_held in + splice_balance_callbacks() + +--- + kernel/sched/alt_core.c | 5 +++-- + 1 file changed, 3 insertions(+), 2 deletions(-) + +diff --git a/kernel/sched/alt_core.c b/kernel/sched/alt_core.c +index 9fa24c75ee0d..5c7f8b734b12 100644 +--- a/kernel/sched/alt_core.c ++++ b/kernel/sched/alt_core.c +@@ -2803,9 +2803,10 @@ static inline struct callback_head *splice_balance_callbacks(struct rq *rq) + { + struct callback_head *head = rq->balance_callback; + +- lockdep_assert_held(&rq->lock); +- if (head) ++ if (head) { ++ lockdep_assert_held(&rq->lock); + rq->balance_callback = NULL; ++ } + + return head; + } +-- +2.37.0 + + +From d54543bb65e824b5c53f054fb317ef70fd53e6b3 Mon Sep 17 00:00:00 2001 +From: Alfred Chen +Date: Sat, 13 Feb 2021 16:41:23 +0800 +Subject: [PATCH 105/297] sched/alt: Remove migrate_disable_switch(). + +--- + kernel/sched/alt_core.c | 24 +++++++----------------- + 1 file changed, 7 insertions(+), 17 deletions(-) + +diff --git a/kernel/sched/alt_core.c b/kernel/sched/alt_core.c +index 5c7f8b734b12..a69c9d449d3d 100644 +--- a/kernel/sched/alt_core.c ++++ b/kernel/sched/alt_core.c +@@ -1153,20 +1153,6 @@ static int __set_cpus_allowed_ptr(struct task_struct *p, + const struct cpumask *new_mask, + u32 flags); + +-static void migrate_disable_switch(struct rq *rq, struct task_struct *p) +-{ +- if (likely(!p->migration_disabled)) +- return; +- +- if (p->cpus_ptr != &p->cpus_mask) +- return; +- +- /* +- * Violates locking rules! see comment in __do_set_cpus_allowed(). +- */ +- __do_set_cpus_allowed(p, cpumask_of(rq->cpu), SCA_MIGRATE_DISABLE); +-} +- + void migrate_disable(void) + { + struct task_struct *p = current; +@@ -1179,6 +1165,13 @@ void migrate_disable(void) + preempt_disable(); + this_rq()->nr_pinned++; + p->migration_disabled = 1; ++ ++ /* ++ * Violates locking rules! see comment in __do_set_cpus_allowed(). ++ */ ++ if (p->cpus_ptr == &p->cpus_mask) ++ __do_set_cpus_allowed(p, cpumask_of(smp_processor_id()), SCA_MIGRATE_DISABLE); ++ + preempt_enable(); + } + EXPORT_SYMBOL_GPL(migrate_disable); +@@ -1809,8 +1802,6 @@ __set_cpus_allowed_ptr(struct task_struct *p, + return set_cpus_allowed_ptr(p, new_mask); + } + +-static inline void migrate_disable_switch(struct rq *rq, struct task_struct *p) { } +- + static inline bool rq_has_pinned_tasks(struct rq *rq) + { + return false; +@@ -4056,7 +4047,6 @@ static void __sched notrace __schedule(bool preempt) + */ + ++*switch_count; + +- migrate_disable_switch(rq, prev); + psi_sched_switch(prev, next, !task_on_rq_queued(prev)); + + trace_sched_switch(preempt, prev, next); +-- +2.37.0 + + +From 37059fa68af2b9666598eb0aecef3d9eb16d9b3e Mon Sep 17 00:00:00 2001 +From: Alfred Chen +Date: Mon, 15 Feb 2021 10:48:22 +0800 +Subject: [PATCH 106/297] sched/alt: Rework migration disable vs + set_cpus_allowed_ptr() + +--- + kernel/sched/alt_core.c | 16 ++++++++++++++-- + 1 file changed, 14 insertions(+), 2 deletions(-) + +diff --git a/kernel/sched/alt_core.c b/kernel/sched/alt_core.c +index a69c9d449d3d..1f781a4d4103 100644 +--- a/kernel/sched/alt_core.c ++++ b/kernel/sched/alt_core.c +@@ -1190,8 +1190,12 @@ void migrate_enable(void) + * __set_cpus_allowed_ptr(SCA_MIGRATE_ENABLE) doesn't schedule(). + */ + preempt_disable(); ++ /* ++ * Assumption: current should be running on allowed cpu ++ */ ++ WARN_ON_ONCE(!cpumask_test_cpu(smp_processor_id(), &p->cpus_mask)); + if (p->cpus_ptr != &p->cpus_mask) +- __set_cpus_allowed_ptr(p, &p->cpus_mask, SCA_MIGRATE_ENABLE); ++ __do_set_cpus_allowed(p, &p->cpus_mask, SCA_MIGRATE_ENABLE); + /* + * Mustn't clear migration_disabled() until cpus_ptr points back at the + * regular cpus_mask, otherwise things that race (eg. +@@ -1370,7 +1374,7 @@ __do_set_cpus_allowed(struct task_struct *p, const struct cpumask *new_mask, u32 + * + * XXX do further audits, this smells like something putrid. + */ +- if (flags & SCA_MIGRATE_DISABLE) ++ if (flags & (SCA_MIGRATE_DISABLE | SCA_MIGRATE_ENABLE)) + SCHED_WARN_ON(!p->on_cpu); + else + lockdep_assert_held(&p->pi_lock); +@@ -1755,6 +1759,14 @@ static int __set_cpus_allowed_ptr(struct task_struct *p, + if (cpumask_test_cpu(task_cpu(p), new_mask)) + goto out; + ++ if (p->migration_disabled) { ++ if (p->cpus_ptr != &p->cpus_mask) ++ __do_set_cpus_allowed(p, &p->cpus_mask, SCA_MIGRATE_ENABLE); ++ p->migration_disabled = 0; ++ /* When p is migrate_disabled, rq->lock should be held */ ++ rq->nr_pinned--; ++ } ++ + if (task_running(p) || p->state == TASK_WAKING) { + struct migration_arg arg = { p, dest_cpu }; + +-- +2.37.0 + + +From 0ffb554fdb86e863746b1a3907168c80f3c5f9a2 Mon Sep 17 00:00:00 2001 +From: Alfred Chen +Date: Mon, 15 Feb 2021 15:53:12 +0800 +Subject: [PATCH 107/297] sched/alt: [Sync] b19a888c1e9b sched/core: Fix typos + in comments + +--- + kernel/sched/alt_core.c | 14 +++++++------- + 1 file changed, 7 insertions(+), 7 deletions(-) + +diff --git a/kernel/sched/alt_core.c b/kernel/sched/alt_core.c +index 1f781a4d4103..b2d4dc2cc6e1 100644 +--- a/kernel/sched/alt_core.c ++++ b/kernel/sched/alt_core.c +@@ -203,7 +203,7 @@ static inline struct task_struct *rq_runnable_task(struct rq *rq) + * + * Normal scheduling state is serialized by rq->lock. __schedule() takes the + * local CPU's rq->lock, it optionally removes the task from the runqueue and +- * always looks at the local rq data structures to find the most elegible task ++ * always looks at the local rq data structures to find the most eligible task + * to run next. + * + * Task enqueue is also under rq->lock, possibly taken from another CPU. +@@ -653,7 +653,7 @@ static bool __wake_q_add(struct wake_q_head *head, struct task_struct *task) + + /* + * Atomically grab the task, if ->wake_q is !nil already it means +- * its already queued (either by us or someone else) and will get the ++ * it's already queued (either by us or someone else) and will get the + * wakeup due to that. + * + * In order to ensure that a pending wakeup will observe our pending +@@ -2320,7 +2320,7 @@ static int try_to_wake_up(struct task_struct *p, unsigned int state, + + /* + * If the owning (remote) CPU is still in the middle of schedule() with +- * this task as prev, wait until its done referencing the task. ++ * this task as prev, wait until it's done referencing the task. + * + * Pairs with the smp_store_release() in finish_task(). + * +@@ -3180,7 +3180,7 @@ unsigned long nr_iowait_cpu(int cpu) + } + + /* +- * IO-wait accounting, and how its mostly bollocks (on SMP). ++ * IO-wait accounting, and how it's mostly bollocks (on SMP). + * + * The idea behind IO-wait account is to account the idle time that we could + * have spend running if it were not for IO. That is, if we were to improve the +@@ -4404,7 +4404,7 @@ void rt_mutex_setprio(struct task_struct *p, struct task_struct *pi_task) + * right. rt_mutex_slowunlock()+rt_mutex_postunlock() work together to + * ensure a task is de-boosted (pi_task is set to NULL) before the + * task is allowed to run again (and can exit). This ensures the pointer +- * points to a blocked task -- which guaratees the task is present. ++ * points to a blocked task -- which guarantees the task is present. + */ + p->pi_top_task = pi_task; + +@@ -4472,7 +4472,7 @@ void set_user_nice(struct task_struct *p, long nice) + /* + * The RT priorities are set via sched_setscheduler(), but we still + * allow the 'normal' nice value to be set - but as expected +- * it wont have any effect on scheduling until the task is ++ * it won't have any effect on scheduling until the task is + * not SCHED_NORMAL/SCHED_BATCH: + */ + if (task_has_rt_policy(p)) +@@ -5507,7 +5507,7 @@ EXPORT_SYMBOL(__cond_resched_lock); + * + * The scheduler is at all times free to pick the calling task as the most + * eligible task to run, if removing the yield() call from your code breaks +- * it, its already broken. ++ * it, it's already broken. + * + * Typical broken usage is: + * +-- +2.37.0 + + +From bf49bd4919bd1d6d05c4876ae039ef9a0b6bb9cc Mon Sep 17 00:00:00 2001 +From: Alfred Chen +Date: Mon, 15 Feb 2021 23:42:56 +0800 +Subject: [PATCH 108/297] Project-C v5.11-r0 + +--- + kernel/sched/alt_core.c | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +diff --git a/kernel/sched/alt_core.c b/kernel/sched/alt_core.c +index b2d4dc2cc6e1..cd1b0b85af6d 100644 +--- a/kernel/sched/alt_core.c ++++ b/kernel/sched/alt_core.c +@@ -52,7 +52,7 @@ + */ + EXPORT_TRACEPOINT_SYMBOL_GPL(pelt_irq_tp); + +-#define ALT_SCHED_VERSION "v5.10-r2" ++#define ALT_SCHED_VERSION "v5.11-r0" + + /* rt_prio(prio) defined in include/linux/sched/rt.h */ + #define rt_task(p) rt_prio((p)->prio) +-- +2.37.0 + + +From d179e9a644963bed5dd386cddf930b6f565f73c6 Mon Sep 17 00:00:00 2001 +From: Alfred Chen +Date: Thu, 18 Feb 2021 15:36:42 +0800 +Subject: [PATCH 109/297] sched/alt: [Sync] 59a74b1544e1 sched: Fix kernel-doc + markup + +--- + kernel/sched/alt_core.c | 17 ++++++++--------- + 1 file changed, 8 insertions(+), 9 deletions(-) + +diff --git a/kernel/sched/alt_core.c b/kernel/sched/alt_core.c +index cd1b0b85af6d..2daa6605ccdc 100644 +--- a/kernel/sched/alt_core.c ++++ b/kernel/sched/alt_core.c +@@ -5417,15 +5417,6 @@ SYSCALL_DEFINE3(sched_getaffinity, pid_t, pid, unsigned int, len, + return ret; + } + +-/** +- * sys_sched_yield - yield the current processor to other threads. +- * +- * This function yields the current CPU to other tasks. It does this by +- * scheduling away the current task. If it still has the earliest deadline +- * it will be scheduled again as the next task. +- * +- * Return: 0. +- */ + static void do_sched_yield(void) + { + struct rq *rq; +@@ -5453,6 +5444,14 @@ static void do_sched_yield(void) + schedule(); + } + ++/** ++ * sys_sched_yield - yield the current processor to other threads. ++ * ++ * This function yields the current CPU to other tasks. If there are no ++ * other threads running on this CPU then this function will return. ++ * ++ * Return: 0. ++ */ + SYSCALL_DEFINE0(sched_yield) + { + do_sched_yield(); +-- +2.37.0 + + +From dd43a5c9f5117663f447f7e34ff495f46805adba Mon Sep 17 00:00:00 2001 +From: Alfred Chen +Date: Thu, 18 Feb 2021 15:58:32 +0800 +Subject: [PATCH 110/297] sched/alt: [Sync] 36c6e17bf169 sched/core: Print out + straggler tasks in sched_cpu_dying() + +--- + kernel/sched/alt_core.c | 26 ++++++++++++++++++++++++-- + kernel/sched/alt_sched.h | 2 +- + 2 files changed, 25 insertions(+), 3 deletions(-) + +diff --git a/kernel/sched/alt_core.c b/kernel/sched/alt_core.c +index 2daa6605ccdc..b469a7a9156e 100644 +--- a/kernel/sched/alt_core.c ++++ b/kernel/sched/alt_core.c +@@ -3761,7 +3761,7 @@ inline void alt_sched_debug(void) {} + + #ifdef CONFIG_SMP + +-#define SCHED_RQ_NR_MIGRATION (32UL) ++#define SCHED_RQ_NR_MIGRATION (32U) + /* + * Migrate pending tasks in @rq to @dest_cpu + * Will try to migrate mininal of half of @rq nr_running tasks and +@@ -6248,6 +6248,25 @@ static void calc_load_migrate(struct rq *rq) + atomic_long_add(delta, &calc_load_tasks); + } + ++static void dump_rq_tasks(struct rq *rq, const char *loglvl) ++{ ++ struct task_struct *g, *p; ++ int cpu = cpu_of(rq); ++ ++ lockdep_assert_held(&rq->lock); ++ ++ printk("%sCPU%d enqueued tasks (%u total):\n", loglvl, cpu, rq->nr_running); ++ for_each_process_thread(g, p) { ++ if (task_cpu(p) != cpu) ++ continue; ++ ++ if (!task_on_rq_queued(p)) ++ continue; ++ ++ printk("%s\tpid: %d, name: %s\n", loglvl, p->pid, p->comm); ++ } ++} ++ + int sched_cpu_dying(unsigned int cpu) + { + struct rq *rq = cpu_rq(cpu); +@@ -6257,7 +6276,10 @@ int sched_cpu_dying(unsigned int cpu) + sched_tick_stop(cpu); + + raw_spin_lock_irqsave(&rq->lock, flags); +- BUG_ON(rq->nr_running != 1 || rq_has_pinned_tasks(rq)); ++ if (rq->nr_running != 1 || rq_has_pinned_tasks(rq)) { ++ WARN(true, "Dying CPU not properly vacated!"); ++ dump_rq_tasks(rq, KERN_WARNING); ++ } + raw_spin_unlock_irqrestore(&rq->lock, flags); + + /* +diff --git a/kernel/sched/alt_sched.h b/kernel/sched/alt_sched.h +index cc2739f843af..192586fee177 100644 +--- a/kernel/sched/alt_sched.h ++++ b/kernel/sched/alt_sched.h +@@ -153,7 +153,7 @@ struct rq { + u64 last_ts_switch; + u64 clock_task; + +- unsigned long nr_running; ++ unsigned int nr_running; + unsigned long nr_uninterruptible; + + #ifdef CONFIG_SCHED_HRTICK +-- +2.37.0 + + +From e328096eaac153b4ecc56833b04e050dbed9eb0c Mon Sep 17 00:00:00 2001 +From: Alfred Chen +Date: Thu, 18 Feb 2021 22:19:45 +0800 +Subject: [PATCH 111/297] sched/alt: [Sync] 5ba2ffba13a1 sched: Fix CPU hotplug + / tighten is_per_cpu_kthread() + +--- + kernel/sched/alt_core.c | 10 ++++++++++ + 1 file changed, 10 insertions(+) + +diff --git a/kernel/sched/alt_core.c b/kernel/sched/alt_core.c +index b469a7a9156e..97ae80161672 100644 +--- a/kernel/sched/alt_core.c ++++ b/kernel/sched/alt_core.c +@@ -1979,6 +1979,13 @@ static void __ttwu_queue_wakelist(struct task_struct *p, int cpu, int wake_flags + + static inline bool ttwu_queue_cond(int cpu, int wake_flags) + { ++ /* ++ * Do not complicate things with the async wake_list while the CPU is ++ * in hotplug state. ++ */ ++ if (!cpu_active(cpu)) ++ return false; ++ + /* + * If the CPU does not share cache, then queue the task on the + * remote rqs wakelist to avoid accessing remote data. +@@ -6165,6 +6172,9 @@ int sched_cpu_deactivate(unsigned int cpu) + * users of this state to go away such that all new such users will + * observe it. + * ++ * Specifically, we rely on ttwu to no longer target this CPU, see ++ * ttwu_queue_cond() and is_cpu_allowed(). ++ * + * Do sync before park smpboot threads to take care the rcu boost case. + */ + synchronize_rcu(); +-- +2.37.0 + + +From 47e48c16fb86689337883e095638d8051f173655 Mon Sep 17 00:00:00 2001 +From: Alfred Chen +Date: Sun, 28 Feb 2021 22:16:39 +0800 +Subject: [PATCH 112/297] sched/alt: Implement __cant_migrate() for BMQ/PDS. + +--- + kernel/sched/alt_core.c | 33 +++++++++++++++++++++++++++++++++ + 1 file changed, 33 insertions(+) + +diff --git a/kernel/sched/alt_core.c b/kernel/sched/alt_core.c +index 97ae80161672..02d405a8a789 100644 +--- a/kernel/sched/alt_core.c ++++ b/kernel/sched/alt_core.c +@@ -1146,6 +1146,8 @@ static inline bool is_per_cpu_kthread(struct task_struct *p) + return ((p->flags & PF_KTHREAD) && (1 == p->nr_cpus_allowed)); + } + ++#define MDF_FORCE_ENABLED 0x80 ++ + static void + __do_set_cpus_allowed(struct task_struct *p, const struct cpumask *new_mask, u32 flags); + +@@ -1165,6 +1167,7 @@ void migrate_disable(void) + preempt_disable(); + this_rq()->nr_pinned++; + p->migration_disabled = 1; ++ p->migration_flags &= ~MDF_FORCE_ENABLED; + + /* + * Violates locking rules! see comment in __do_set_cpus_allowed(). +@@ -1763,6 +1766,7 @@ static int __set_cpus_allowed_ptr(struct task_struct *p, + if (p->cpus_ptr != &p->cpus_mask) + __do_set_cpus_allowed(p, &p->cpus_mask, SCA_MIGRATE_ENABLE); + p->migration_disabled = 0; ++ p->migration_flags |= MDF_FORCE_ENABLED; + /* When p is migrate_disabled, rq->lock should be held */ + rq->nr_pinned--; + } +@@ -6608,6 +6612,35 @@ EXPORT_SYMBOL_GPL(__cant_sleep); + #ifdef CONFIG_SMP + void __cant_migrate(const char *file, int line) + { ++ static unsigned long prev_jiffy; ++ ++ if (irqs_disabled()) ++ return; ++ ++ if (is_migration_disabled(current)) ++ return; ++ ++ if (!IS_ENABLED(CONFIG_PREEMPT_COUNT)) ++ return; ++ ++ if (preempt_count() > 0) ++ return; ++ ++ if (current->migration_flags & MDF_FORCE_ENABLED) ++ return; ++ ++ if (time_before(jiffies, prev_jiffy + HZ) && prev_jiffy) ++ return; ++ prev_jiffy = jiffies; ++ ++ pr_err("BUG: assuming non migratable context at %s:%d\n", file, line); ++ pr_err("in_atomic(): %d, irqs_disabled(): %d, migration_disabled() %u pid: %d, name: %s\n", ++ in_atomic(), irqs_disabled(), is_migration_disabled(current), ++ current->pid, current->comm); ++ ++ debug_show_held_locks(current); ++ dump_stack(); ++ add_taint(TAINT_WARN, LOCKDEP_STILL_OK); + } + EXPORT_SYMBOL_GPL(__cant_migrate); + #endif +-- +2.37.0 + + +From 2209d91d2a1d835bdf5df0a6fd0055cf3d61c268 Mon Sep 17 00:00:00 2001 +From: Alfred Chen +Date: Mon, 1 Mar 2021 13:52:16 +0800 +Subject: [PATCH 113/297] Project-C v5.11-r1 + +--- + kernel/sched/alt_core.c | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +diff --git a/kernel/sched/alt_core.c b/kernel/sched/alt_core.c +index 02d405a8a789..2ca0e6470022 100644 +--- a/kernel/sched/alt_core.c ++++ b/kernel/sched/alt_core.c +@@ -52,7 +52,7 @@ + */ + EXPORT_TRACEPOINT_SYMBOL_GPL(pelt_irq_tp); + +-#define ALT_SCHED_VERSION "v5.11-r0" ++#define ALT_SCHED_VERSION "v5.11-r1" + + /* rt_prio(prio) defined in include/linux/sched/rt.h */ + #define rt_task(p) rt_prio((p)->prio) +-- +2.37.0 + + +From 82a0930bbf23372c447336576957f2c3f7609ae8 Mon Sep 17 00:00:00 2001 +From: Alfred Chen +Date: Sun, 7 Mar 2021 10:03:57 +0800 +Subject: [PATCH 114/297] sched/alt: Implement fake cpu.shares for CGROUP + +--- + init/Kconfig | 4 +++- + kernel/sched/alt_core.c | 50 ++++++++++++++++++++++++++++++++++++++++ + kernel/sched/alt_sched.h | 45 ++++++++++++++++++++++++++++++++++++ + 3 files changed, 98 insertions(+), 1 deletion(-) + +diff --git a/init/Kconfig b/init/Kconfig +index c4acd49edf94..fcdfda2b98f1 100644 +--- a/init/Kconfig ++++ b/init/Kconfig +@@ -1028,12 +1028,13 @@ menuconfig CGROUP_SCHED + bandwidth allocation to such task groups. It uses cgroups to group + tasks. + +-if CGROUP_SCHED && !SCHED_ALT ++if CGROUP_SCHED + config FAIR_GROUP_SCHED + bool "Group scheduling for SCHED_OTHER" + depends on CGROUP_SCHED + default CGROUP_SCHED + ++if !SCHED_ALT + config CFS_BANDWIDTH + bool "CPU bandwidth provisioning for FAIR_GROUP_SCHED" + depends on FAIR_GROUP_SCHED +@@ -1056,6 +1057,7 @@ config RT_GROUP_SCHED + realtime bandwidth for them. + See Documentation/scheduler/sched-rt-group.rst for more information. + ++endif #!SCHED_ALT + endif #CGROUP_SCHED + + config UCLAMP_TASK_GROUP +diff --git a/kernel/sched/alt_core.c b/kernel/sched/alt_core.c +index 2ca0e6470022..b470dc451585 100644 +--- a/kernel/sched/alt_core.c ++++ b/kernel/sched/alt_core.c +@@ -6411,6 +6411,9 @@ struct task_group { + struct task_group *parent; + struct list_head siblings; + struct list_head children; ++#ifdef CONFIG_FAIR_GROUP_SCHED ++ unsigned long shares; ++#endif + }; + + /* +@@ -6829,7 +6832,54 @@ static void cpu_cgroup_attach(struct cgroup_taskset *tset) + { + } + ++#ifdef CONFIG_FAIR_GROUP_SCHED ++static DEFINE_MUTEX(shares_mutex); ++ ++int sched_group_set_shares(struct task_group *tg, unsigned long shares) ++{ ++ /* ++ * We can't change the weight of the root cgroup. ++ */ ++ if (&root_task_group == tg) ++ return -EINVAL; ++ ++ shares = clamp(shares, scale_load(MIN_SHARES), scale_load(MAX_SHARES)); ++ ++ mutex_lock(&shares_mutex); ++ if (tg->shares == shares) ++ goto done; ++ ++ tg->shares = shares; ++done: ++ mutex_unlock(&shares_mutex); ++ return 0; ++} ++ ++static int cpu_shares_write_u64(struct cgroup_subsys_state *css, ++ struct cftype *cftype, u64 shareval) ++{ ++ if (shareval > scale_load_down(ULONG_MAX)) ++ shareval = MAX_SHARES; ++ return sched_group_set_shares(css_tg(css), scale_load(shareval)); ++} ++ ++static u64 cpu_shares_read_u64(struct cgroup_subsys_state *css, ++ struct cftype *cft) ++{ ++ struct task_group *tg = css_tg(css); ++ ++ return (u64) scale_load_down(tg->shares); ++} ++#endif ++ + static struct cftype cpu_legacy_files[] = { ++#ifdef CONFIG_FAIR_GROUP_SCHED ++ { ++ .name = "shares", ++ .read_u64 = cpu_shares_read_u64, ++ .write_u64 = cpu_shares_write_u64, ++ }, ++#endif + { } /* Terminate */ + }; + +diff --git a/kernel/sched/alt_sched.h b/kernel/sched/alt_sched.h +index 192586fee177..51f11bf416f4 100644 +--- a/kernel/sched/alt_sched.h ++++ b/kernel/sched/alt_sched.h +@@ -61,6 +61,51 @@ + # define SCHED_WARN_ON(x) ({ (void)(x), 0; }) + #endif + ++/* ++ * Increase resolution of nice-level calculations for 64-bit architectures. ++ * The extra resolution improves shares distribution and load balancing of ++ * low-weight task groups (eg. nice +19 on an autogroup), deeper taskgroup ++ * hierarchies, especially on larger systems. This is not a user-visible change ++ * and does not change the user-interface for setting shares/weights. ++ * ++ * We increase resolution only if we have enough bits to allow this increased ++ * resolution (i.e. 64-bit). The costs for increasing resolution when 32-bit ++ * are pretty high and the returns do not justify the increased costs. ++ * ++ * Really only required when CONFIG_FAIR_GROUP_SCHED=y is also set, but to ++ * increase coverage and consistency always enable it on 64-bit platforms. ++ */ ++#ifdef CONFIG_64BIT ++# define NICE_0_LOAD_SHIFT (SCHED_FIXEDPOINT_SHIFT + SCHED_FIXEDPOINT_SHIFT) ++# define scale_load(w) ((w) << SCHED_FIXEDPOINT_SHIFT) ++# define scale_load_down(w) \ ++({ \ ++ unsigned long __w = (w); \ ++ if (__w) \ ++ __w = max(2UL, __w >> SCHED_FIXEDPOINT_SHIFT); \ ++ __w; \ ++}) ++#else ++# define NICE_0_LOAD_SHIFT (SCHED_FIXEDPOINT_SHIFT) ++# define scale_load(w) (w) ++# define scale_load_down(w) (w) ++#endif ++ ++#ifdef CONFIG_FAIR_GROUP_SCHED ++#define ROOT_TASK_GROUP_LOAD NICE_0_LOAD ++ ++/* ++ * A weight of 0 or 1 can cause arithmetics problems. ++ * A weight of a cfs_rq is the sum of weights of which entities ++ * are queued on this cfs_rq, so a weight of a entity should not be ++ * too large, so as the shares value of a task group. ++ * (The default weight is 1024 - so there's no practical ++ * limitation from this.) ++ */ ++#define MIN_SHARES (1UL << 1) ++#define MAX_SHARES (1UL << 18) ++#endif ++ + /* task_struct::on_rq states: */ + #define TASK_ON_RQ_QUEUED 1 + #define TASK_ON_RQ_MIGRATING 2 +-- +2.37.0 + + +From 5e4ca862d9b0b8023e76ebb23538388e553fc492 Mon Sep 17 00:00:00 2001 +From: Piotr Gorski +Date: Sat, 6 Mar 2021 20:38:54 +0100 +Subject: [PATCH 115/297] sched/alt: Backport "sched/core: Allow + try_invoke_on_locked_down_task() with irqs disabled" + +Signed-off-by: Piotr Gorski +--- + kernel/sched/alt_core.c | 9 ++++----- + 1 file changed, 4 insertions(+), 5 deletions(-) + +diff --git a/kernel/sched/alt_core.c b/kernel/sched/alt_core.c +index b470dc451585..dbbd8a46e75d 100644 +--- a/kernel/sched/alt_core.c ++++ b/kernel/sched/alt_core.c +@@ -2371,7 +2371,7 @@ static int try_to_wake_up(struct task_struct *p, unsigned int state, + + /** + * try_invoke_on_locked_down_task - Invoke a function on task in fixed state +- * @p: Process for which the function is to be invoked. ++ * @p: Process for which the function is to be invoked, can be @current. + * @func: Function to invoke. + * @arg: Argument to function. + * +@@ -2389,12 +2389,11 @@ static int try_to_wake_up(struct task_struct *p, unsigned int state, + */ + bool try_invoke_on_locked_down_task(struct task_struct *p, bool (*func)(struct task_struct *t, void *arg), void *arg) + { +- bool ret = false; + struct rq_flags rf; ++ bool ret = false; + struct rq *rq; + +- lockdep_assert_irqs_enabled(); +- raw_spin_lock_irq(&p->pi_lock); ++ raw_spin_lock_irqsave(&p->pi_lock, rf.flags); + if (p->on_rq) { + rq = __task_rq_lock(p, &rf); + if (task_rq(p) == rq) +@@ -2411,7 +2410,7 @@ bool try_invoke_on_locked_down_task(struct task_struct *p, bool (*func)(struct t + ret = func(p, arg); + } + } +- raw_spin_unlock_irq(&p->pi_lock); ++ raw_spin_unlock_irqrestore(&p->pi_lock, rf.flags); + return ret; + } + +-- +2.37.0 + + +From 2e805c59748b8190428907cc188a49a863c1ee56 Mon Sep 17 00:00:00 2001 +From: Alfred Chen +Date: Sun, 7 Mar 2021 10:27:35 +0800 +Subject: [PATCH 116/297] Project-C v5.11-r2 + +--- + kernel/sched/alt_core.c | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +diff --git a/kernel/sched/alt_core.c b/kernel/sched/alt_core.c +index dbbd8a46e75d..cb8a4aabbb21 100644 +--- a/kernel/sched/alt_core.c ++++ b/kernel/sched/alt_core.c +@@ -52,7 +52,7 @@ + */ + EXPORT_TRACEPOINT_SYMBOL_GPL(pelt_irq_tp); + +-#define ALT_SCHED_VERSION "v5.11-r1" ++#define ALT_SCHED_VERSION "v5.11-r2" + + /* rt_prio(prio) defined in include/linux/sched/rt.h */ + #define rt_task(p) rt_prio((p)->prio) +-- +2.37.0 + + +From 977b3402474428a89feb66df6c9c85aa46bdb055 Mon Sep 17 00:00:00 2001 +From: Alfred Chen +Date: Sun, 14 Mar 2021 17:28:24 +0800 +Subject: [PATCH 117/297] sched/alt: Fix limited cpufreq for schedutil. + +Here is the first fix for #12, which fix the limited cpufreq for +schedutil. But it still tend to stay at max cpufreq. +--- + kernel/sched/alt_sched.h | 3 ++- + kernel/sched/cpufreq_schedutil.c | 13 +++++++++---- + 2 files changed, 11 insertions(+), 5 deletions(-) + +diff --git a/kernel/sched/alt_sched.h b/kernel/sched/alt_sched.h +index 51f11bf416f4..7bcd96cc6bed 100644 +--- a/kernel/sched/alt_sched.h ++++ b/kernel/sched/alt_sched.h +@@ -598,7 +598,8 @@ static inline void cpufreq_update_util(struct rq *rq, unsigned int flags) + { + struct update_util_data *data; + +- data = rcu_dereference_sched(*this_cpu_ptr(&cpufreq_update_util_data)); ++ data = rcu_dereference_sched(*per_cpu_ptr(&cpufreq_update_util_data, ++ cpu_of(rq))); + if (data) + data->func(data, rq_clock(rq), flags); + } +diff --git a/kernel/sched/cpufreq_schedutil.c b/kernel/sched/cpufreq_schedutil.c +index 424b1e360af5..59536ee7c3d5 100644 +--- a/kernel/sched/cpufreq_schedutil.c ++++ b/kernel/sched/cpufreq_schedutil.c +@@ -165,11 +165,16 @@ static void sugov_get_util(struct sugov_cpu *sg_cpu) + sg_cpu->util = effective_cpu_util(sg_cpu->cpu, cpu_util_cfs(sg_cpu->cpu), max, + FREQUENCY_UTIL, NULL); + } ++ + #else /* CONFIG_SCHED_ALT */ +-static unsigned long sugov_get_util(struct sugov_cpu *sg_cpu) ++ ++static void sugov_get_util(struct sugov_cpu *sg_cpu) + { +- sg_cpu->max = arch_scale_cpu_capacity(sg_cpu->cpu); +- return sg_cpu->max; ++ unsigned long max = arch_scale_cpu_capacity(sg_cpu->cpu); ++ ++ sg_cpu->max = max; ++ sg_cpu->bw_dl = 0; ++ sg_cpu->util = cpu_rq(sg_cpu->cpu)->nr_running ? max:0UL; + } + #endif + +@@ -316,8 +321,8 @@ static inline void ignore_dl_rate_limit(struct sugov_cpu *sg_cpu) + { + #ifndef CONFIG_SCHED_ALT + if (cpu_bw_dl(cpu_rq(sg_cpu->cpu)) > sg_cpu->bw_dl) +-#endif + sg_cpu->sg_policy->limits_changed = true; ++#endif + } + + static inline bool sugov_update_single_common(struct sugov_cpu *sg_cpu, +-- +2.37.0 + + +From 1cfd6610f9c105c226e484148e4f8bce236f3638 Mon Sep 17 00:00:00 2001 +From: Alfred Chen +Date: Sun, 14 Mar 2021 22:23:58 +0800 +Subject: [PATCH 118/297] sched/alt: Add is_migration_disabled() checking in + sg_balance_trigger(). + +--- + kernel/sched/alt_core.c | 8 ++++++-- + 1 file changed, 6 insertions(+), 2 deletions(-) + +diff --git a/kernel/sched/alt_core.c b/kernel/sched/alt_core.c +index cb8a4aabbb21..6d56458e71a3 100644 +--- a/kernel/sched/alt_core.c ++++ b/kernel/sched/alt_core.c +@@ -1183,6 +1183,9 @@ void migrate_enable(void) + { + struct task_struct *p = current; + ++ if (0 == p->migration_disabled) ++ return; ++ + if (p->migration_disabled > 1) { + p->migration_disabled--; + return; +@@ -3389,7 +3392,8 @@ static inline int active_load_balance_cpu_stop(void *data) + rq->active_balance = 0; + /* _something_ may have changed the task, double check again */ + if (task_on_rq_queued(p) && task_rq(p) == rq && +- cpumask_and(&tmp, p->cpus_ptr, &sched_sg_idle_mask)) { ++ cpumask_and(&tmp, p->cpus_ptr, &sched_sg_idle_mask) && ++ !is_migration_disabled(p)) { + int cpu = cpu_of(rq); + int dcpu = __best_mask_cpu(cpu, &tmp, + per_cpu(sched_cpu_llc_mask, cpu)); +@@ -3417,7 +3421,7 @@ static inline int sg_balance_trigger(const int cpu) + curr = rq->curr; + res = (!is_idle_task(curr)) && (1 == rq->nr_running) &&\ + cpumask_intersects(curr->cpus_ptr, &sched_sg_idle_mask) &&\ +- (!rq->active_balance); ++ !is_migration_disabled(curr) && (!rq->active_balance); + + if (res) + rq->active_balance = 1; +-- +2.37.0 + + +From 746b3ce0081d3447b924954709fddcfa3f0f0b57 Mon Sep 17 00:00:00 2001 +From: Alfred Chen +Date: Thu, 25 Mar 2021 09:54:19 +0800 +Subject: [PATCH 119/297] Project-C v5.11-r3 + +--- + kernel/sched/alt_core.c | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +diff --git a/kernel/sched/alt_core.c b/kernel/sched/alt_core.c +index 6d56458e71a3..be7c185a741d 100644 +--- a/kernel/sched/alt_core.c ++++ b/kernel/sched/alt_core.c +@@ -52,7 +52,7 @@ + */ + EXPORT_TRACEPOINT_SYMBOL_GPL(pelt_irq_tp); + +-#define ALT_SCHED_VERSION "v5.11-r2" ++#define ALT_SCHED_VERSION "v5.11-r3" + + /* rt_prio(prio) defined in include/linux/sched/rt.h */ + #define rt_task(p) rt_prio((p)->prio) +-- +2.37.0 + + +From 805822b2b2210e3e94ff6e24fa699dc2b4597f50 Mon Sep 17 00:00:00 2001 +From: Alfred Chen +Date: Fri, 26 Mar 2021 14:20:06 +0800 +Subject: [PATCH 120/297] sched/alt: Implement cpu load history and util for + schedutil. + +--- + kernel/sched/alt_core.c | 16 ++++----- + kernel/sched/cpufreq_schedutil.c | 56 ++++++++++++++++++++++++++++++-- + 2 files changed, 61 insertions(+), 11 deletions(-) + +diff --git a/kernel/sched/alt_core.c b/kernel/sched/alt_core.c +index be7c185a741d..29b7d30fd45f 100644 +--- a/kernel/sched/alt_core.c ++++ b/kernel/sched/alt_core.c +@@ -558,14 +558,6 @@ static inline void enqueue_task(struct task_struct *p, struct rq *rq, int flags) + #endif + + sched_update_tick_dependency(rq); +- +- /* +- * If in_iowait is set, the code below may not trigger any cpufreq +- * utilization updates, so do it here explicitly with the IOWAIT flag +- * passed. +- */ +- if (p->in_iowait) +- cpufreq_update_util(rq, SCHED_CPUFREQ_IOWAIT); + } + + static inline void requeue_task(struct task_struct *p, struct rq *rq) +@@ -1057,7 +1049,13 @@ static void activate_task(struct task_struct *p, struct rq *rq) + { + enqueue_task(p, rq, ENQUEUE_WAKEUP); + p->on_rq = TASK_ON_RQ_QUEUED; +- cpufreq_update_util(rq, 0); ++ ++ /* ++ * If in_iowait is set, the code below may not trigger any cpufreq ++ * utilization updates, so do it here explicitly with the IOWAIT flag ++ * passed. ++ */ ++ cpufreq_update_util(rq, SCHED_CPUFREQ_IOWAIT * p->in_iowait); + } + + /* +diff --git a/kernel/sched/cpufreq_schedutil.c b/kernel/sched/cpufreq_schedutil.c +index 59536ee7c3d5..c44d2e33e114 100644 +--- a/kernel/sched/cpufreq_schedutil.c ++++ b/kernel/sched/cpufreq_schedutil.c +@@ -50,6 +50,13 @@ struct sugov_cpu { + unsigned long bw_dl; + unsigned long max; + ++#ifdef CONFIG_SCHED_ALT ++ /* For genenal cpu load util */ ++ s32 load_history; ++ u64 load_block; ++ u64 load_stamp; ++#endif ++ + /* The field below is for single-CPU policies only: */ + #ifdef CONFIG_NO_HZ_COMMON + unsigned long saved_idle_calls; +@@ -168,15 +175,52 @@ static void sugov_get_util(struct sugov_cpu *sg_cpu) + + #else /* CONFIG_SCHED_ALT */ + ++#define SG_CPU_LOAD_HISTORY_BITS (sizeof(s32) * 8ULL) ++#define SG_CPU_UTIL_SHIFT (8) ++#define SG_CPU_LOAD_HISTORY_SHIFT (SG_CPU_LOAD_HISTORY_BITS - 1 - SG_CPU_UTIL_SHIFT) ++#define SG_CPU_LOAD_HISTORY_TO_UTIL(l) (((l) >> SG_CPU_LOAD_HISTORY_SHIFT) & 0xff) ++ ++#define LOAD_BLOCK(t) ((t) >> 17) ++#define LOAD_HALF_BLOCK(t) ((t) >> 16) ++#define BLOCK_MASK(t) ((t) & ((0x01 << 18) - 1)) ++#define LOAD_BLOCK_BIT(b) (1UL << (SG_CPU_LOAD_HISTORY_BITS - 1 - (b))) ++#define CURRENT_LOAD_BIT LOAD_BLOCK_BIT(0) ++ + static void sugov_get_util(struct sugov_cpu *sg_cpu) + { + unsigned long max = arch_scale_cpu_capacity(sg_cpu->cpu); + + sg_cpu->max = max; + sg_cpu->bw_dl = 0; +- sg_cpu->util = cpu_rq(sg_cpu->cpu)->nr_running ? max:0UL; ++ sg_cpu->util = SG_CPU_LOAD_HISTORY_TO_UTIL(sg_cpu->load_history) * ++ (max >> SG_CPU_UTIL_SHIFT); + } +-#endif ++ ++static inline void sugov_cpu_load_update(struct sugov_cpu *sg_cpu, u64 time) ++{ ++ u64 delta = min(LOAD_BLOCK(time) - LOAD_BLOCK(sg_cpu->load_stamp), ++ SG_CPU_LOAD_HISTORY_BITS - 1); ++ u64 prev = !!(sg_cpu->load_history & CURRENT_LOAD_BIT); ++ u64 curr = !!cpu_rq(sg_cpu->cpu)->nr_running; ++ ++ if (delta) { ++ sg_cpu->load_history = sg_cpu->load_history >> delta; ++ ++ if (delta <= SG_CPU_UTIL_SHIFT) { ++ sg_cpu->load_block += (~BLOCK_MASK(sg_cpu->load_stamp)) * prev; ++ if (!!LOAD_HALF_BLOCK(sg_cpu->load_block) ^ curr) ++ sg_cpu->load_history ^= LOAD_BLOCK_BIT(delta); ++ } ++ ++ sg_cpu->load_block = BLOCK_MASK(time) * prev; ++ } else { ++ sg_cpu->load_block += (time - sg_cpu->load_stamp) * prev; ++ } ++ if (prev ^ curr) ++ sg_cpu->load_history ^= CURRENT_LOAD_BIT; ++ sg_cpu->load_stamp = time; ++} ++#endif /* CONFIG_SCHED_ALT */ + + /** + * sugov_iowait_reset() - Reset the IO boost status of a CPU. +@@ -328,6 +372,10 @@ static inline void ignore_dl_rate_limit(struct sugov_cpu *sg_cpu) + static inline bool sugov_update_single_common(struct sugov_cpu *sg_cpu, + u64 time, unsigned int flags) + { ++#ifdef CONFIG_SCHED_ALT ++ sugov_cpu_load_update(sg_cpu, time); ++#endif /* CONFIG_SCHED_ALT */ ++ + sugov_iowait_boost(sg_cpu, time, flags); + sg_cpu->last_update = time; + +@@ -454,6 +502,10 @@ sugov_update_shared(struct update_util_data *hook, u64 time, unsigned int flags) + + raw_spin_lock(&sg_policy->update_lock); + ++#ifdef CONFIG_SCHED_ALT ++ sugov_cpu_load_update(sg_cpu, time); ++#endif /* CONFIG_SCHED_ALT */ ++ + sugov_iowait_boost(sg_cpu, time, flags); + sg_cpu->last_update = time; + +-- +2.37.0 + + +From ca2227b48f1b32b8a21c607efbb96e93511ae698 Mon Sep 17 00:00:00 2001 +From: Alfred Chen +Date: Mon, 29 Mar 2021 15:35:51 +0800 +Subject: [PATCH 121/297] sched/alt: [Sync] 741ba80f6f9a sched: Relax the + set_cpus_allowed_ptr() semantics + +--- + kernel/sched/alt_core.c | 10 ---------- + 1 file changed, 10 deletions(-) + +diff --git a/kernel/sched/alt_core.c b/kernel/sched/alt_core.c +index 29b7d30fd45f..847a1baff835 100644 +--- a/kernel/sched/alt_core.c ++++ b/kernel/sched/alt_core.c +@@ -1749,16 +1749,6 @@ static int __set_cpus_allowed_ptr(struct task_struct *p, + + __do_set_cpus_allowed(p, new_mask, flags); + +- if (p->flags & PF_KTHREAD) { +- /* +- * For kernel threads that do indeed end up on online && +- * !active we want to ensure they are strict per-CPU threads. +- */ +- WARN_ON(cpumask_intersects(new_mask, cpu_online_mask) && +- !cpumask_intersects(new_mask, cpu_active_mask) && +- p->nr_cpus_allowed != 1); +- } +- + /* Can the task run on the task's current CPU? If so, we're done */ + if (cpumask_test_cpu(task_cpu(p), new_mask)) + goto out; +-- +2.37.0 + + +From ce639edbb7ef2a9015c33b0affed597d9b118f11 Mon Sep 17 00:00:00 2001 +From: Alfred Chen +Date: Tue, 30 Mar 2021 10:21:02 +0800 +Subject: [PATCH 122/297] sched/alt: Remove USER_PRIO, TASK_USER_PRIO and + MAX_USER_PRIO and MAX_USER_RT_PRIO + +9d061ba6bc17 sched: Remove USER_PRIO, TASK_USER_PRIO and MAX_USER_PRIO +ae18ad281e82 sched: Remove MAX_USER_RT_PRIO +--- + init/init_task.c | 4 ++-- + kernel/sched/alt_core.c | 6 +++--- + kernel/sched/pds_imp.h | 8 +++++--- + 3 files changed, 10 insertions(+), 8 deletions(-) + +diff --git a/init/init_task.c b/init/init_task.c +index a98a65334c15..b59f5a2fefc8 100644 +--- a/init/init_task.c ++++ b/init/init_task.c +@@ -81,9 +81,9 @@ struct task_struct init_task + .normal_prio = DEFAULT_PRIO + MAX_PRIORITY_ADJ, + #endif + #ifdef CONFIG_SCHED_PDS +- .prio = MAX_USER_RT_PRIO, ++ .prio = MAX_RT_PRIO, + .static_prio = DEFAULT_PRIO, +- .normal_prio = MAX_USER_RT_PRIO, ++ .normal_prio = MAX_RT_PRIO, + #else + .prio = MAX_PRIO - 20, + .static_prio = MAX_PRIO - 20, +diff --git a/kernel/sched/alt_core.c b/kernel/sched/alt_core.c +index 847a1baff835..426af5230831 100644 +--- a/kernel/sched/alt_core.c ++++ b/kernel/sched/alt_core.c +@@ -4697,11 +4697,11 @@ static int __sched_setscheduler(struct task_struct *p, + + /* + * Valid priorities for SCHED_FIFO and SCHED_RR are +- * 1..MAX_USER_RT_PRIO-1, valid priority for SCHED_NORMAL and ++ * 1..MAX_RT_PRIO-1, valid priority for SCHED_NORMAL and + * SCHED_BATCH and SCHED_IDLE is 0. + */ + if (attr->sched_priority < 0 || +- (p->mm && attr->sched_priority > MAX_USER_RT_PRIO - 1) || ++ (p->mm && attr->sched_priority > MAX_RT_PRIO - 1) || + (!p->mm && attr->sched_priority > MAX_RT_PRIO - 1)) + return -EINVAL; + if ((SCHED_RR == policy || SCHED_FIFO == policy) != +@@ -5614,7 +5614,7 @@ SYSCALL_DEFINE1(sched_get_priority_max, int, policy) + switch (policy) { + case SCHED_FIFO: + case SCHED_RR: +- ret = MAX_USER_RT_PRIO-1; ++ ret = MAX_RT_PRIO - 1; + break; + case SCHED_NORMAL: + case SCHED_BATCH: +diff --git a/kernel/sched/pds_imp.h b/kernel/sched/pds_imp.h +index b1ad3d0b0430..0f068a98db87 100644 +--- a/kernel/sched/pds_imp.h ++++ b/kernel/sched/pds_imp.h +@@ -38,7 +38,7 @@ static inline int normal_prio(struct task_struct *p) + if (task_has_rt_policy(p)) + return MAX_RT_PRIO - 1 - p->rt_priority; + +- return MAX_USER_RT_PRIO; ++ return MAX_RT_PRIO; + } + + static inline int +@@ -76,7 +76,8 @@ static inline void time_slice_expired(struct task_struct *p, struct rq *rq) + p->time_slice = sched_timeslice_ns; + + if (p->prio >= MAX_RT_PRIO) +- p->deadline = rq->clock + user_prio2deadline[TASK_USER_PRIO(p)]; ++ p->deadline = rq->clock + ++ user_prio2deadline[p->static_prio - MAX_RT_PRIO]; + update_task_priodl(p); + + if (SCHED_FIFO != p->policy && task_on_rq_queued(p)) +@@ -237,7 +238,8 @@ static void sched_task_fork(struct task_struct *p, struct rq *rq) + { + p->sl_level = pds_skiplist_random_level(p); + if (p->prio >= MAX_RT_PRIO) +- p->deadline = rq->clock + user_prio2deadline[TASK_USER_PRIO(p)]; ++ p->deadline = rq->clock + ++ user_prio2deadline[p->static_prio - MAX_RT_PRIO]; + update_task_priodl(p); + } + +-- +2.37.0 + + +From 560fe89ae1f42f337401be9dd4649ebeb365a690 Mon Sep 17 00:00:00 2001 +From: Alfred Chen +Date: Tue, 30 Mar 2021 10:49:16 +0800 +Subject: [PATCH 123/297] sched/alt: [Sync] b965f1ddb47d preempt/dynamic: + Provide cond_resched() and might_resched() static calls + +--- + kernel/sched/alt_core.c | 16 +++++++++++++--- + 1 file changed, 13 insertions(+), 3 deletions(-) + +diff --git a/kernel/sched/alt_core.c b/kernel/sched/alt_core.c +index 426af5230831..d8420f2625c8 100644 +--- a/kernel/sched/alt_core.c ++++ b/kernel/sched/alt_core.c +@@ -5460,17 +5460,27 @@ SYSCALL_DEFINE0(sched_yield) + return 0; + } + +-#ifndef CONFIG_PREEMPTION +-int __sched _cond_resched(void) ++#if !defined(CONFIG_PREEMPTION) || defined(CONFIG_PREEMPT_DYNAMIC) ++int __sched __cond_resched(void) + { + if (should_resched(0)) { + preempt_schedule_common(); + return 1; + } ++#ifndef CONFIG_PREEMPT_RCU + rcu_all_qs(); ++#endif + return 0; + } +-EXPORT_SYMBOL(_cond_resched); ++EXPORT_SYMBOL(__cond_resched); ++#endif ++ ++#ifdef CONFIG_PREEMPT_DYNAMIC ++DEFINE_STATIC_CALL_RET0(cond_resched, __cond_resched); ++EXPORT_STATIC_CALL_TRAMP(cond_resched); ++ ++DEFINE_STATIC_CALL_RET0(might_resched, __cond_resched); ++EXPORT_STATIC_CALL_TRAMP(might_resched); + #endif + + /* +-- +2.37.0 + + +From 346794d4112dd92eda4985ca64c94d67219f9bb1 Mon Sep 17 00:00:00 2001 +From: Alfred Chen +Date: Tue, 30 Mar 2021 10:54:18 +0800 +Subject: [PATCH 124/297] sched/alt: [Sync] 2c9a98d3bc80 preempt/dynamic: + Provide preempt_schedule[_notrace]() static calls + +--- + kernel/sched/alt_core.c | 12 ++++++++++++ + 1 file changed, 12 insertions(+) + +diff --git a/kernel/sched/alt_core.c b/kernel/sched/alt_core.c +index d8420f2625c8..739d2860747b 100644 +--- a/kernel/sched/alt_core.c ++++ b/kernel/sched/alt_core.c +@@ -4257,6 +4257,12 @@ asmlinkage __visible void __sched notrace preempt_schedule(void) + NOKPROBE_SYMBOL(preempt_schedule); + EXPORT_SYMBOL(preempt_schedule); + ++#ifdef CONFIG_PREEMPT_DYNAMIC ++DEFINE_STATIC_CALL(preempt_schedule, __preempt_schedule_func); ++EXPORT_STATIC_CALL(preempt_schedule); ++#endif ++ ++ + /** + * preempt_schedule_notrace - preempt_schedule called by tracing + * +@@ -4309,6 +4315,12 @@ asmlinkage __visible void __sched notrace preempt_schedule_notrace(void) + } + EXPORT_SYMBOL_GPL(preempt_schedule_notrace); + ++#ifdef CONFIG_PREEMPT_DYNAMIC ++DEFINE_STATIC_CALL(preempt_schedule_notrace, __preempt_schedule_notrace_func); ++EXPORT_STATIC_CALL(preempt_schedule_notrace); ++#endif ++ ++ + #endif /* CONFIG_PREEMPTION */ + + /* +-- +2.37.0 + + +From e1344821acfcc9164852671078c5e11b968fe132 Mon Sep 17 00:00:00 2001 +From: Alfred Chen +Date: Tue, 30 Mar 2021 13:38:47 +0800 +Subject: [PATCH 125/297] sched/alt: [Sync] 826bfeb37bb4 preempt/dynamic: + Support dynamic preempt with preempt= boot option + +--- + kernel/sched/alt_core.c | 185 ++++++++++++++++++++++++++++++++++++++- + kernel/sched/alt_sched.h | 1 + + 2 files changed, 185 insertions(+), 1 deletion(-) + +diff --git a/kernel/sched/alt_core.c b/kernel/sched/alt_core.c +index 739d2860747b..86ef6e3c1bbd 100644 +--- a/kernel/sched/alt_core.c ++++ b/kernel/sched/alt_core.c +@@ -4320,9 +4320,192 @@ DEFINE_STATIC_CALL(preempt_schedule_notrace, __preempt_schedule_notrace_func); + EXPORT_STATIC_CALL(preempt_schedule_notrace); + #endif + +- + #endif /* CONFIG_PREEMPTION */ + ++#ifdef CONFIG_PREEMPT_DYNAMIC ++ ++#include ++ ++/* ++ * SC:cond_resched ++ * SC:might_resched ++ * SC:preempt_schedule ++ * SC:preempt_schedule_notrace ++ * SC:irqentry_exit_cond_resched ++ * ++ * ++ * NONE: ++ * cond_resched <- __cond_resched ++ * might_resched <- RET0 ++ * preempt_schedule <- NOP ++ * preempt_schedule_notrace <- NOP ++ * irqentry_exit_cond_resched <- NOP ++ * ++ * VOLUNTARY: ++ * cond_resched <- __cond_resched ++ * might_resched <- __cond_resched ++ * preempt_schedule <- NOP ++ * preempt_schedule_notrace <- NOP ++ * irqentry_exit_cond_resched <- NOP ++ * ++ * FULL: ++ * cond_resched <- RET0 ++ * might_resched <- RET0 ++ * preempt_schedule <- preempt_schedule ++ * preempt_schedule_notrace <- preempt_schedule_notrace ++ * irqentry_exit_cond_resched <- irqentry_exit_cond_resched ++ */ ++ ++enum { ++ preempt_dynamic_none = 0, ++ preempt_dynamic_voluntary, ++ preempt_dynamic_full, ++}; ++ ++static int preempt_dynamic_mode = preempt_dynamic_full; ++ ++static int sched_dynamic_mode(const char *str) ++{ ++ if (!strcmp(str, "none")) ++ return 0; ++ ++ if (!strcmp(str, "voluntary")) ++ return 1; ++ ++ if (!strcmp(str, "full")) ++ return 2; ++ ++ return -1; ++} ++ ++static void sched_dynamic_update(int mode) ++{ ++ /* ++ * Avoid {NONE,VOLUNTARY} -> FULL transitions from ever ending up in ++ * the ZERO state, which is invalid. ++ */ ++ static_call_update(cond_resched, __cond_resched); ++ static_call_update(might_resched, __cond_resched); ++ static_call_update(preempt_schedule, __preempt_schedule_func); ++ static_call_update(preempt_schedule_notrace, __preempt_schedule_notrace_func); ++ static_call_update(irqentry_exit_cond_resched, irqentry_exit_cond_resched); ++ ++ switch (mode) { ++ case preempt_dynamic_none: ++ static_call_update(cond_resched, __cond_resched); ++ static_call_update(might_resched, (typeof(&__cond_resched)) __static_call_return0); ++ static_call_update(preempt_schedule, (typeof(&preempt_schedule)) NULL); ++ static_call_update(preempt_schedule_notrace, (typeof(&preempt_schedule_notrace)) NULL); ++ static_call_update(irqentry_exit_cond_resched, (typeof(&irqentry_exit_cond_resched)) NULL); ++ pr_info("Dynamic Preempt: none\n"); ++ break; ++ ++ case preempt_dynamic_voluntary: ++ static_call_update(cond_resched, __cond_resched); ++ static_call_update(might_resched, __cond_resched); ++ static_call_update(preempt_schedule, (typeof(&preempt_schedule)) NULL); ++ static_call_update(preempt_schedule_notrace, (typeof(&preempt_schedule_notrace)) NULL); ++ static_call_update(irqentry_exit_cond_resched, (typeof(&irqentry_exit_cond_resched)) NULL); ++ pr_info("Dynamic Preempt: voluntary\n"); ++ break; ++ ++ case preempt_dynamic_full: ++ static_call_update(cond_resched, (typeof(&__cond_resched)) __static_call_return0); ++ static_call_update(might_resched, (typeof(&__cond_resched)) __static_call_return0); ++ static_call_update(preempt_schedule, __preempt_schedule_func); ++ static_call_update(preempt_schedule_notrace, __preempt_schedule_notrace_func); ++ static_call_update(irqentry_exit_cond_resched, irqentry_exit_cond_resched); ++ pr_info("Dynamic Preempt: full\n"); ++ break; ++ } ++ ++ preempt_dynamic_mode = mode; ++} ++ ++static int __init setup_preempt_mode(char *str) ++{ ++ int mode = sched_dynamic_mode(str); ++ if (mode < 0) { ++ pr_warn("Dynamic Preempt: unsupported mode: %s\n", str); ++ return 1; ++ } ++ ++ sched_dynamic_update(mode); ++ return 0; ++} ++__setup("preempt=", setup_preempt_mode); ++ ++#ifdef CONFIG_SCHED_DEBUG ++ ++static ssize_t sched_dynamic_write(struct file *filp, const char __user *ubuf, ++ size_t cnt, loff_t *ppos) ++{ ++ char buf[16]; ++ int mode; ++ ++ if (cnt > 15) ++ cnt = 15; ++ ++ if (copy_from_user(&buf, ubuf, cnt)) ++ return -EFAULT; ++ ++ buf[cnt] = 0; ++ mode = sched_dynamic_mode(strstrip(buf)); ++ if (mode < 0) ++ return mode; ++ ++ sched_dynamic_update(mode); ++ ++ *ppos += cnt; ++ ++ return cnt; ++} ++ ++static int sched_dynamic_show(struct seq_file *m, void *v) ++{ ++ static const char * preempt_modes[] = { ++ "none", "voluntary", "full" ++ }; ++ int i; ++ ++ for (i = 0; i < ARRAY_SIZE(preempt_modes); i++) { ++ if (preempt_dynamic_mode == i) ++ seq_puts(m, "("); ++ seq_puts(m, preempt_modes[i]); ++ if (preempt_dynamic_mode == i) ++ seq_puts(m, ")"); ++ ++ seq_puts(m, " "); ++ } ++ ++ seq_puts(m, "\n"); ++ return 0; ++} ++ ++static int sched_dynamic_open(struct inode *inode, struct file *filp) ++{ ++ return single_open(filp, sched_dynamic_show, NULL); ++} ++ ++static const struct file_operations sched_dynamic_fops = { ++ .open = sched_dynamic_open, ++ .write = sched_dynamic_write, ++ .read = seq_read, ++ .llseek = seq_lseek, ++ .release = single_release, ++}; ++ ++static __init int sched_init_debug_dynamic(void) ++{ ++ debugfs_create_file("sched_preempt", 0644, NULL, NULL, &sched_dynamic_fops); ++ return 0; ++} ++late_initcall(sched_init_debug_dynamic); ++ ++#endif /* CONFIG_SCHED_DEBUG */ ++#endif /* CONFIG_PREEMPT_DYNAMIC */ ++ ++ + /* + * This is the entry point to schedule() from kernel preemption + * off of irq context. +diff --git a/kernel/sched/alt_sched.h b/kernel/sched/alt_sched.h +index 7bcd96cc6bed..32b523e2af06 100644 +--- a/kernel/sched/alt_sched.h ++++ b/kernel/sched/alt_sched.h +@@ -26,6 +26,7 @@ + #include + #include + #include ++#include + #include + #include + #include +-- +2.37.0 + + +From 4883a757728996d234c848c6aedd3d24c891fa45 Mon Sep 17 00:00:00 2001 +From: Alfred Chen +Date: Tue, 30 Mar 2021 13:42:07 +0800 +Subject: [PATCH 126/297] sched/alt: [Sync] f3d4b4b1dc1c sched: Add + cond_resched_rwlock + +--- + kernel/sched/alt_core.c | 40 ++++++++++++++++++++++++++++++++++++++++ + 1 file changed, 40 insertions(+) + +diff --git a/kernel/sched/alt_core.c b/kernel/sched/alt_core.c +index 86ef6e3c1bbd..e30d174d2ad8 100644 +--- a/kernel/sched/alt_core.c ++++ b/kernel/sched/alt_core.c +@@ -5706,6 +5706,46 @@ int __cond_resched_lock(spinlock_t *lock) + } + EXPORT_SYMBOL(__cond_resched_lock); + ++int __cond_resched_rwlock_read(rwlock_t *lock) ++{ ++ int resched = should_resched(PREEMPT_LOCK_OFFSET); ++ int ret = 0; ++ ++ lockdep_assert_held_read(lock); ++ ++ if (rwlock_needbreak(lock) || resched) { ++ read_unlock(lock); ++ if (resched) ++ preempt_schedule_common(); ++ else ++ cpu_relax(); ++ ret = 1; ++ read_lock(lock); ++ } ++ return ret; ++} ++EXPORT_SYMBOL(__cond_resched_rwlock_read); ++ ++int __cond_resched_rwlock_write(rwlock_t *lock) ++{ ++ int resched = should_resched(PREEMPT_LOCK_OFFSET); ++ int ret = 0; ++ ++ lockdep_assert_held_write(lock); ++ ++ if (rwlock_needbreak(lock) || resched) { ++ write_unlock(lock); ++ if (resched) ++ preempt_schedule_common(); ++ else ++ cpu_relax(); ++ ret = 1; ++ write_lock(lock); ++ } ++ return ret; ++} ++EXPORT_SYMBOL(__cond_resched_rwlock_write); ++ + /** + * yield - yield the current processor to other threads. + * +-- +2.37.0 + + +From 767e8f222653a8922f0831746c66e109614d0951 Mon Sep 17 00:00:00 2001 +From: Alfred Chen +Date: Wed, 31 Mar 2021 10:43:38 +0800 +Subject: [PATCH 127/297] sched/alt: [Sync] ef72661e28c6 sched: Harden + PREEMPT_DYNAMIC + +--- + kernel/sched/alt_core.c | 4 ++-- + 1 file changed, 2 insertions(+), 2 deletions(-) + +diff --git a/kernel/sched/alt_core.c b/kernel/sched/alt_core.c +index e30d174d2ad8..ece579819949 100644 +--- a/kernel/sched/alt_core.c ++++ b/kernel/sched/alt_core.c +@@ -4259,7 +4259,7 @@ EXPORT_SYMBOL(preempt_schedule); + + #ifdef CONFIG_PREEMPT_DYNAMIC + DEFINE_STATIC_CALL(preempt_schedule, __preempt_schedule_func); +-EXPORT_STATIC_CALL(preempt_schedule); ++EXPORT_STATIC_CALL_TRAMP(preempt_schedule); + #endif + + +@@ -4317,7 +4317,7 @@ EXPORT_SYMBOL_GPL(preempt_schedule_notrace); + + #ifdef CONFIG_PREEMPT_DYNAMIC + DEFINE_STATIC_CALL(preempt_schedule_notrace, __preempt_schedule_notrace_func); +-EXPORT_STATIC_CALL(preempt_schedule_notrace); ++EXPORT_STATIC_CALL_TRAMP(preempt_schedule_notrace); + #endif + + #endif /* CONFIG_PREEMPTION */ +-- +2.37.0 + + +From 08ee68e325c221b5b5706754aab77c6215dc51dc Mon Sep 17 00:00:00 2001 +From: Alfred Chen +Date: Thu, 1 Apr 2021 15:40:39 +0800 +Subject: [PATCH 128/297] sched/alt: Update task_prio() function header + +--- + kernel/sched/bmq_imp.h | 7 +++++-- + kernel/sched/pds_imp.h | 7 +++++-- + 2 files changed, 10 insertions(+), 4 deletions(-) + +diff --git a/kernel/sched/bmq_imp.h b/kernel/sched/bmq_imp.h +index 13eda4b26b6a..7c71f1141d00 100644 +--- a/kernel/sched/bmq_imp.h ++++ b/kernel/sched/bmq_imp.h +@@ -170,8 +170,11 @@ static void sched_task_fork(struct task_struct *p, struct rq *rq) + * @p: the task in question. + * + * Return: The priority value as seen by users in /proc. +- * RT tasks are offset by -100. Normal tasks are centered around 1, value goes +- * from 0(SCHED_ISO) up to 82 (nice +19 SCHED_IDLE). ++ * ++ * sched policy return value kernel prio user prio/nice/boost ++ * ++ * normal, batch, idle [0 ... 53] [100 ... 139] 0/[-20 ... 19]/[-7 ... 7] ++ * fifo, rr [-1 ... -100] [99 ... 0] [0 ... 99] + */ + int task_prio(const struct task_struct *p) + { +diff --git a/kernel/sched/pds_imp.h b/kernel/sched/pds_imp.h +index 0f068a98db87..335ce3a8e3ec 100644 +--- a/kernel/sched/pds_imp.h ++++ b/kernel/sched/pds_imp.h +@@ -248,8 +248,11 @@ static void sched_task_fork(struct task_struct *p, struct rq *rq) + * @p: the task in question. + * + * Return: The priority value as seen by users in /proc. +- * RT tasks are offset by -100. Normal tasks are centered around 1, value goes +- * from 0(SCHED_ISO) up to 82 (nice +19 SCHED_IDLE). ++ * ++ * sched policy return value kernel prio user prio/nice ++ * ++ * normal, batch, idle [0 ... 39] 100 0/[-20 ... 19] ++ * fifo, rr [-1 ... -100] [99 ... 0] [0 ... 99] + */ + int task_prio(const struct task_struct *p) + { +-- +2.37.0 + + +From f446239e56f01cc4f6ef4a1d7bd7b53479cb3448 Mon Sep 17 00:00:00 2001 +From: Alfred Chen +Date: Thu, 1 Apr 2021 22:42:09 +0800 +Subject: [PATCH 129/297] sched/alt: [Sync] 156ec6f42b8d sched/features: Fix + hrtick reprogramming + +--- + kernel/sched/alt_core.c | 7 +++---- + kernel/sched/alt_sched.h | 3 ++- + 2 files changed, 5 insertions(+), 5 deletions(-) + +diff --git a/kernel/sched/alt_core.c b/kernel/sched/alt_core.c +index ece579819949..f69ed4d89395 100644 +--- a/kernel/sched/alt_core.c ++++ b/kernel/sched/alt_core.c +@@ -937,8 +937,9 @@ static inline int hrtick_enabled(struct rq *rq) + static void __hrtick_restart(struct rq *rq) + { + struct hrtimer *timer = &rq->hrtick_timer; ++ ktime_t time = rq->hrtick_time; + +- hrtimer_start_expires(timer, HRTIMER_MODE_ABS_PINNED_HARD); ++ hrtimer_start(timer, time, HRTIMER_MODE_ABS_PINNED_HARD); + } + + /* +@@ -961,7 +962,6 @@ static void __hrtick_start(void *arg) + void hrtick_start(struct rq *rq, u64 delay) + { + struct hrtimer *timer = &rq->hrtick_timer; +- ktime_t time; + s64 delta; + + /* +@@ -969,9 +969,8 @@ void hrtick_start(struct rq *rq, u64 delay) + * doesn't make sense and can cause timer DoS. + */ + delta = max_t(s64, delay, 10000LL); +- time = ktime_add_ns(timer->base->get_time(), delta); + +- hrtimer_set_expires(timer, time); ++ rq->hrtick_time = ktime_add_ns(timer->base->get_time(), delta); + + if (rq == this_rq()) + __hrtick_restart(rq); +diff --git a/kernel/sched/alt_sched.h b/kernel/sched/alt_sched.h +index 32b523e2af06..2a6a0530fbb7 100644 +--- a/kernel/sched/alt_sched.h ++++ b/kernel/sched/alt_sched.h +@@ -206,7 +206,8 @@ struct rq { + #ifdef CONFIG_SMP + call_single_data_t hrtick_csd; + #endif +- struct hrtimer hrtick_timer; ++ struct hrtimer hrtick_timer; ++ ktime_t hrtick_time; + #endif + + #ifdef CONFIG_SCHEDSTATS +-- +2.37.0 + + +From 6c7833013dc88efb0192f1efc4acad3cee745c8e Mon Sep 17 00:00:00 2001 +From: Piotr Gorski +Date: Mon, 26 Apr 2021 13:33:02 +0200 +Subject: [PATCH 130/297] Project-C v5.12-r0 + +Signed-off-by: Piotr Gorski +--- + kernel/sched/alt_core.c | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +diff --git a/kernel/sched/alt_core.c b/kernel/sched/alt_core.c +index f69ed4d89395..02610d086d00 100644 +--- a/kernel/sched/alt_core.c ++++ b/kernel/sched/alt_core.c +@@ -52,7 +52,7 @@ + */ + EXPORT_TRACEPOINT_SYMBOL_GPL(pelt_irq_tp); + +-#define ALT_SCHED_VERSION "v5.11-r3" ++#define ALT_SCHED_VERSION "v5.12-r0" + + /* rt_prio(prio) defined in include/linux/sched/rt.h */ + #define rt_task(p) rt_prio((p)->prio) +-- +2.37.0 + + +From 9d465ac0e07e2f577469debce71ac5a4348279e5 Mon Sep 17 00:00:00 2001 +From: Alfred Chen +Date: Thu, 1 Apr 2021 09:57:05 +0800 +Subject: [PATCH 131/297] sched/alt: Cleanup in cpufreq_schedutil.c + +--- + kernel/sched/cpufreq_schedutil.c | 9 ++------- + 1 file changed, 2 insertions(+), 7 deletions(-) + +diff --git a/kernel/sched/cpufreq_schedutil.c b/kernel/sched/cpufreq_schedutil.c +index c44d2e33e114..ab803029b7fd 100644 +--- a/kernel/sched/cpufreq_schedutil.c ++++ b/kernel/sched/cpufreq_schedutil.c +@@ -905,10 +905,11 @@ struct cpufreq_governor *cpufreq_default_governor(void) + cpufreq_governor_init(schedutil_gov); + + #ifdef CONFIG_ENERGY_MODEL +-#ifndef CONFIG_SCHED_ALT + static void rebuild_sd_workfn(struct work_struct *work) + { ++#ifndef CONFIG_SCHED_ALT + rebuild_sched_domains_energy(); ++#endif /* CONFIG_SCHED_ALT */ + } + static DECLARE_WORK(rebuild_sd_work, rebuild_sd_workfn); + +@@ -929,10 +930,4 @@ void sched_cpufreq_governor_change(struct cpufreq_policy *policy, + } + + } +-#else /* CONFIG_SCHED_ALT */ +-void sched_cpufreq_governor_change(struct cpufreq_policy *policy, +- struct cpufreq_governor *old_gov) +-{ +-} +-#endif + #endif +-- +2.37.0 + + +From 0ba2755f9c2aacb23c4b5b832f8f33878d86b2bf Mon Sep 17 00:00:00 2001 +From: Alfred Chen +Date: Fri, 26 Mar 2021 13:46:22 +0800 +Subject: [PATCH 132/297] sched/alt: Remove unnecessary CONFIG_SMP macros + usage. + +--- + kernel/sched/alt_core.c | 6 ++---- + 1 file changed, 2 insertions(+), 4 deletions(-) + +diff --git a/kernel/sched/alt_core.c b/kernel/sched/alt_core.c +index 02610d086d00..ae37764ced1e 100644 +--- a/kernel/sched/alt_core.c ++++ b/kernel/sched/alt_core.c +@@ -3816,15 +3816,13 @@ static inline int take_other_rq_tasks(struct rq *rq, int cpu) + + if ((nr_migrated = migrate_pending_tasks(src_rq, rq, cpu))) { + src_rq->nr_running -= nr_migrated; +-#ifdef CONFIG_SMP + if (src_rq->nr_running < 2) + cpumask_clear_cpu(i, &sched_rq_pending_mask); +-#endif ++ + rq->nr_running += nr_migrated; +-#ifdef CONFIG_SMP + if (rq->nr_running > 1) + cpumask_set_cpu(cpu, &sched_rq_pending_mask); +-#endif ++ + update_sched_rq_watermark(rq); + cpufreq_update_util(rq, 0); + +-- +2.37.0 + + +From 603af480eceb0442af627cb0ea8364f4f61f19dd Mon Sep 17 00:00:00 2001 +From: Alfred Chen +Date: Sun, 4 Apr 2021 08:54:29 +0800 +Subject: [PATCH 133/297] sched/alt: Code clean up + +--- + kernel/sched/alt_core.c | 11 +---------- + 1 file changed, 1 insertion(+), 10 deletions(-) + +diff --git a/kernel/sched/alt_core.c b/kernel/sched/alt_core.c +index ae37764ced1e..3e79fdc14152 100644 +--- a/kernel/sched/alt_core.c ++++ b/kernel/sched/alt_core.c +@@ -1138,20 +1138,11 @@ void set_task_cpu(struct task_struct *p, unsigned int new_cpu) + __set_task_cpu(p, new_cpu); + } + +-static inline bool is_per_cpu_kthread(struct task_struct *p) +-{ +- return ((p->flags & PF_KTHREAD) && (1 == p->nr_cpus_allowed)); +-} +- + #define MDF_FORCE_ENABLED 0x80 + + static void + __do_set_cpus_allowed(struct task_struct *p, const struct cpumask *new_mask, u32 flags); + +-static int __set_cpus_allowed_ptr(struct task_struct *p, +- const struct cpumask *new_mask, +- u32 flags); +- + void migrate_disable(void) + { + struct task_struct *p = current; +@@ -1753,7 +1744,7 @@ static int __set_cpus_allowed_ptr(struct task_struct *p, + goto out; + + if (p->migration_disabled) { +- if (p->cpus_ptr != &p->cpus_mask) ++ if (likely(p->cpus_ptr != &p->cpus_mask)) + __do_set_cpus_allowed(p, &p->cpus_mask, SCA_MIGRATE_ENABLE); + p->migration_disabled = 0; + p->migration_flags |= MDF_FORCE_ENABLED; +-- +2.37.0 + + +From 66512fba36f59b1144a9256ec3954bd9cc16d353 Mon Sep 17 00:00:00 2001 +From: Alfred Chen +Date: Wed, 7 Apr 2021 11:43:30 +0800 +Subject: [PATCH 134/297] sched/alt: Don't migrate_disable() during migration. + +--- + kernel/sched/alt_core.c | 21 ++++++++++++--------- + 1 file changed, 12 insertions(+), 9 deletions(-) + +diff --git a/kernel/sched/alt_core.c b/kernel/sched/alt_core.c +index 3e79fdc14152..11ffc1cb4528 100644 +--- a/kernel/sched/alt_core.c ++++ b/kernel/sched/alt_core.c +@@ -1146,6 +1146,7 @@ __do_set_cpus_allowed(struct task_struct *p, const struct cpumask *new_mask, u32 + void migrate_disable(void) + { + struct task_struct *p = current; ++ int cpu; + + if (p->migration_disabled) { + p->migration_disabled++; +@@ -1153,16 +1154,18 @@ void migrate_disable(void) + } + + preempt_disable(); +- this_rq()->nr_pinned++; +- p->migration_disabled = 1; +- p->migration_flags &= ~MDF_FORCE_ENABLED; +- +- /* +- * Violates locking rules! see comment in __do_set_cpus_allowed(). +- */ +- if (p->cpus_ptr == &p->cpus_mask) +- __do_set_cpus_allowed(p, cpumask_of(smp_processor_id()), SCA_MIGRATE_DISABLE); ++ cpu = smp_processor_id(); ++ if (cpumask_test_cpu(cpu, &p->cpus_mask)) { ++ cpu_rq(cpu)->nr_pinned++; ++ p->migration_disabled = 1; ++ p->migration_flags &= ~MDF_FORCE_ENABLED; + ++ /* ++ * Violates locking rules! see comment in __do_set_cpus_allowed(). ++ */ ++ if (p->cpus_ptr == &p->cpus_mask) ++ __do_set_cpus_allowed(p, cpumask_of(cpu), SCA_MIGRATE_DISABLE); ++ } + preempt_enable(); + } + EXPORT_SYMBOL_GPL(migrate_disable); +-- +2.37.0 + + +From 98b132c71bd16c65729ef343b36a23f39b3d5c75 Mon Sep 17 00:00:00 2001 +From: Alfred Chen +Date: Wed, 7 Apr 2021 14:08:18 +0800 +Subject: [PATCH 135/297] sched/alt: migrate disable code clean up + +--- + kernel/sched/alt_core.c | 63 ++++++++++++++++++----------------------- + 1 file changed, 28 insertions(+), 35 deletions(-) + +diff --git a/kernel/sched/alt_core.c b/kernel/sched/alt_core.c +index 11ffc1cb4528..4ed1ff9f1aab 100644 +--- a/kernel/sched/alt_core.c ++++ b/kernel/sched/alt_core.c +@@ -1097,8 +1097,6 @@ static inline bool is_migration_disabled(struct task_struct *p) + } + + #define SCA_CHECK 0x01 +-#define SCA_MIGRATE_DISABLE 0x02 +-#define SCA_MIGRATE_ENABLE 0x04 + + #ifdef CONFIG_SMP + +@@ -1141,7 +1139,23 @@ void set_task_cpu(struct task_struct *p, unsigned int new_cpu) + #define MDF_FORCE_ENABLED 0x80 + + static void +-__do_set_cpus_allowed(struct task_struct *p, const struct cpumask *new_mask, u32 flags); ++__do_set_cpus_ptr(struct task_struct *p, const struct cpumask *new_mask) ++{ ++ /* ++ * This here violates the locking rules for affinity, since we're only ++ * supposed to change these variables while holding both rq->lock and ++ * p->pi_lock. ++ * ++ * HOWEVER, it magically works, because ttwu() is the only code that ++ * accesses these variables under p->pi_lock and only does so after ++ * smp_cond_load_acquire(&p->on_cpu, !VAL), and we're in __schedule() ++ * before finish_task(). ++ * ++ * XXX do further audits, this smells like something putrid. ++ */ ++ SCHED_WARN_ON(!p->on_cpu); ++ p->cpus_ptr = new_mask; ++} + + void migrate_disable(void) + { +@@ -1161,10 +1175,10 @@ void migrate_disable(void) + p->migration_flags &= ~MDF_FORCE_ENABLED; + + /* +- * Violates locking rules! see comment in __do_set_cpus_allowed(). ++ * Violates locking rules! see comment in __do_set_cpus_ptr(). + */ + if (p->cpus_ptr == &p->cpus_mask) +- __do_set_cpus_allowed(p, cpumask_of(cpu), SCA_MIGRATE_DISABLE); ++ __do_set_cpus_ptr(p, cpumask_of(cpu)); + } + preempt_enable(); + } +@@ -1192,7 +1206,7 @@ void migrate_enable(void) + */ + WARN_ON_ONCE(!cpumask_test_cpu(smp_processor_id(), &p->cpus_mask)); + if (p->cpus_ptr != &p->cpus_mask) +- __do_set_cpus_allowed(p, &p->cpus_mask, SCA_MIGRATE_ENABLE); ++ __do_set_cpus_ptr(p, &p->cpus_mask); + /* + * Mustn't clear migration_disabled() until cpus_ptr points back at the + * regular cpus_mask, otherwise things that race (eg. +@@ -1345,43 +1359,22 @@ static int migration_cpu_stop(void *data) + } + + static inline void +-set_cpus_allowed_common(struct task_struct *p, const struct cpumask *new_mask, u32 flags) ++set_cpus_allowed_common(struct task_struct *p, const struct cpumask *new_mask) + { +- if (flags & (SCA_MIGRATE_ENABLE | SCA_MIGRATE_DISABLE)) { +- p->cpus_ptr = new_mask; +- return; +- } +- + cpumask_copy(&p->cpus_mask, new_mask); + p->nr_cpus_allowed = cpumask_weight(new_mask); + } + + static void +-__do_set_cpus_allowed(struct task_struct *p, const struct cpumask *new_mask, u32 flags) ++__do_set_cpus_allowed(struct task_struct *p, const struct cpumask *new_mask) + { +- /* +- * This here violates the locking rules for affinity, since we're only +- * supposed to change these variables while holding both rq->lock and +- * p->pi_lock. +- * +- * HOWEVER, it magically works, because ttwu() is the only code that +- * accesses these variables under p->pi_lock and only does so after +- * smp_cond_load_acquire(&p->on_cpu, !VAL), and we're in __schedule() +- * before finish_task(). +- * +- * XXX do further audits, this smells like something putrid. +- */ +- if (flags & (SCA_MIGRATE_DISABLE | SCA_MIGRATE_ENABLE)) +- SCHED_WARN_ON(!p->on_cpu); +- else +- lockdep_assert_held(&p->pi_lock); +- +- set_cpus_allowed_common(p, new_mask, flags); ++ lockdep_assert_held(&p->pi_lock); ++ set_cpus_allowed_common(p, new_mask); + } + + void do_set_cpus_allowed(struct task_struct *p, const struct cpumask *new_mask) + { +- __do_set_cpus_allowed(p, new_mask, 0); ++ __do_set_cpus_allowed(p, new_mask); + } + + #endif +@@ -1740,7 +1733,7 @@ static int __set_cpus_allowed_ptr(struct task_struct *p, + goto out; + } + +- __do_set_cpus_allowed(p, new_mask, flags); ++ __do_set_cpus_allowed(p, new_mask); + + /* Can the task run on the task's current CPU? If so, we're done */ + if (cpumask_test_cpu(task_cpu(p), new_mask)) +@@ -1748,7 +1741,7 @@ static int __set_cpus_allowed_ptr(struct task_struct *p, + + if (p->migration_disabled) { + if (likely(p->cpus_ptr != &p->cpus_mask)) +- __do_set_cpus_allowed(p, &p->cpus_mask, SCA_MIGRATE_ENABLE); ++ __do_set_cpus_ptr(p, &p->cpus_mask); + p->migration_disabled = 0; + p->migration_flags |= MDF_FORCE_ENABLED; + /* When p is migrate_disabled, rq->lock should be held */ +@@ -6076,7 +6069,7 @@ void init_idle(struct task_struct *idle, int cpu) + * + * And since this is boot we can forgo the serialisation. + */ +- set_cpus_allowed_common(idle, cpumask_of(cpu), 0); ++ set_cpus_allowed_common(idle, cpumask_of(cpu)); + #endif + + /* Silence PROVE_RCU */ +-- +2.37.0 + + +From 6837036676afbba59f7876ae8761ba146c369822 Mon Sep 17 00:00:00 2001 +From: Alfred Chen +Date: Mon, 19 Apr 2021 10:11:56 +0800 +Subject: [PATCH 136/297] sched/alt: Fix task migratie to dying cpu. + +Fix #23 + +WARNING: CPU: 2 PID: 26 at kernel/sched/alt_core.c:6294 +sched_cpu_dying.cold+0xc/0xd2 +--- + kernel/sched/alt_core.c | 6 +++++- + 1 file changed, 5 insertions(+), 1 deletion(-) + +diff --git a/kernel/sched/alt_core.c b/kernel/sched/alt_core.c +index 4ed1ff9f1aab..6350afe33985 100644 +--- a/kernel/sched/alt_core.c ++++ b/kernel/sched/alt_core.c +@@ -1619,7 +1619,7 @@ static inline int select_task_rq(struct task_struct *p, struct rq *rq) + { + cpumask_t chk_mask, tmp; + +- if (unlikely(!cpumask_and(&chk_mask, p->cpus_ptr, cpu_online_mask))) ++ if (unlikely(!cpumask_and(&chk_mask, p->cpus_ptr, cpu_active_mask))) + return select_fallback_rq(task_cpu(p), p); + + if ( +@@ -3420,6 +3420,10 @@ static inline void sg_balance_check(struct rq *rq) + if (cpumask_empty(&sched_sg_idle_mask)) + return; + ++ /* exit when cpu is offline */ ++ if (unlikely(!rq->online)) ++ return; ++ + cpu = cpu_of(rq); + /* + * Only cpu in slibing idle group will do the checking and then +-- +2.37.0 + + +From 1716236d1fc190aa7b283ae2323dc38c19f350e1 Mon Sep 17 00:00:00 2001 +From: Alfred Chen +Date: Tue, 27 Apr 2021 10:37:22 +0800 +Subject: [PATCH 137/297] Project-C v5.12-r1 + +--- + kernel/sched/alt_core.c | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +diff --git a/kernel/sched/alt_core.c b/kernel/sched/alt_core.c +index 6350afe33985..c85e3ccf9302 100644 +--- a/kernel/sched/alt_core.c ++++ b/kernel/sched/alt_core.c +@@ -52,7 +52,7 @@ + */ + EXPORT_TRACEPOINT_SYMBOL_GPL(pelt_irq_tp); + +-#define ALT_SCHED_VERSION "v5.12-r0" ++#define ALT_SCHED_VERSION "v5.12-r1" + + /* rt_prio(prio) defined in include/linux/sched/rt.h */ + #define rt_task(p) rt_prio((p)->prio) +-- +2.37.0 + + +From 913f33c1caa0c1d396ccc470f275ce987cae3f09 Mon Sep 17 00:00:00 2001 +From: Alfred Chen +Date: Mon, 7 Jun 2021 14:32:09 +0800 +Subject: [PATCH 138/297] sched/alt: [Sync] 7fae6c8171d2 psi: Use ONCPU state + tracking machinery to detect reclaim + +--- + kernel/sched/alt_core.c | 1 - + 1 file changed, 1 deletion(-) + +diff --git a/kernel/sched/alt_core.c b/kernel/sched/alt_core.c +index c85e3ccf9302..8eea87597c2e 100644 +--- a/kernel/sched/alt_core.c ++++ b/kernel/sched/alt_core.c +@@ -3342,7 +3342,6 @@ void scheduler_tick(void) + + scheduler_task_tick(rq); + calc_global_load_tick(rq); +- psi_task_tick(rq); + + rq->last_tick = rq->clock; + raw_spin_unlock(&rq->lock); +-- +2.37.0 + + +From d71e5f775c30c6c0c015beb8b2e82af89afa31f3 Mon Sep 17 00:00:00 2001 +From: Alfred Chen +Date: Tue, 8 Jun 2021 06:33:50 +0800 +Subject: [PATCH 139/297] sched/alt: [Sync] c6f886546cb8 sched/fair: Trigger + the update of blocked load on newly idle cpu + +--- + kernel/sched/alt_sched.h | 1 + + 1 file changed, 1 insertion(+) + +diff --git a/kernel/sched/alt_sched.h b/kernel/sched/alt_sched.h +index 2a6a0530fbb7..b3436b11ba7c 100644 +--- a/kernel/sched/alt_sched.h ++++ b/kernel/sched/alt_sched.h +@@ -683,4 +683,5 @@ static inline int sched_numa_find_closest(const struct cpumask *cpus, int cpu) + void swake_up_all_locked(struct swait_queue_head *q); + void __prepare_to_swait(struct swait_queue_head *q, struct swait_queue *wait); + ++static inline void nohz_run_idle_balance(int cpu) { } + #endif /* ALT_SCHED_H */ +-- +2.37.0 + + +From 12e6ae4f0cf744e3ccb7c7f96126eb3bfb563189 Mon Sep 17 00:00:00 2001 +From: Alfred Chen +Date: Tue, 8 Jun 2021 08:52:49 +0800 +Subject: [PATCH 140/297] sched/alt: [Sync] 9432bbd969c6 static_call: Relax + static_call_update() function argument type + +--- + kernel/sched/alt_core.c | 18 +++++++++--------- + 1 file changed, 9 insertions(+), 9 deletions(-) + +diff --git a/kernel/sched/alt_core.c b/kernel/sched/alt_core.c +index 8eea87597c2e..fc35459d76b0 100644 +--- a/kernel/sched/alt_core.c ++++ b/kernel/sched/alt_core.c +@@ -4380,25 +4380,25 @@ static void sched_dynamic_update(int mode) + switch (mode) { + case preempt_dynamic_none: + static_call_update(cond_resched, __cond_resched); +- static_call_update(might_resched, (typeof(&__cond_resched)) __static_call_return0); +- static_call_update(preempt_schedule, (typeof(&preempt_schedule)) NULL); +- static_call_update(preempt_schedule_notrace, (typeof(&preempt_schedule_notrace)) NULL); +- static_call_update(irqentry_exit_cond_resched, (typeof(&irqentry_exit_cond_resched)) NULL); ++ static_call_update(might_resched, (void *)&__static_call_return0); ++ static_call_update(preempt_schedule, NULL); ++ static_call_update(preempt_schedule_notrace, NULL); ++ static_call_update(irqentry_exit_cond_resched, NULL); + pr_info("Dynamic Preempt: none\n"); + break; + + case preempt_dynamic_voluntary: + static_call_update(cond_resched, __cond_resched); + static_call_update(might_resched, __cond_resched); +- static_call_update(preempt_schedule, (typeof(&preempt_schedule)) NULL); +- static_call_update(preempt_schedule_notrace, (typeof(&preempt_schedule_notrace)) NULL); +- static_call_update(irqentry_exit_cond_resched, (typeof(&irqentry_exit_cond_resched)) NULL); ++ static_call_update(preempt_schedule, NULL); ++ static_call_update(preempt_schedule_notrace, NULL); ++ static_call_update(irqentry_exit_cond_resched, NULL); + pr_info("Dynamic Preempt: voluntary\n"); + break; + + case preempt_dynamic_full: +- static_call_update(cond_resched, (typeof(&__cond_resched)) __static_call_return0); +- static_call_update(might_resched, (typeof(&__cond_resched)) __static_call_return0); ++ static_call_update(cond_resched, (void *)&__static_call_return0); ++ static_call_update(might_resched, (void *)&__static_call_return0); + static_call_update(preempt_schedule, __preempt_schedule_func); + static_call_update(preempt_schedule_notrace, __preempt_schedule_notrace_func); + static_call_update(irqentry_exit_cond_resched, irqentry_exit_cond_resched); +-- +2.37.0 + + +From e968346110a1ee38f221db41c22703e9893bedac Mon Sep 17 00:00:00 2001 +From: Alfred Chen +Date: Tue, 8 Jun 2021 08:55:38 +0800 +Subject: [PATCH 141/297] sched/alt: [Sync] 13c2235b2b28 sched: Remove + unnecessary variable from schedule_tail() + +--- + kernel/sched/alt_core.c | 4 +--- + 1 file changed, 1 insertion(+), 3 deletions(-) + +diff --git a/kernel/sched/alt_core.c b/kernel/sched/alt_core.c +index fc35459d76b0..f052be4a3ff5 100644 +--- a/kernel/sched/alt_core.c ++++ b/kernel/sched/alt_core.c +@@ -3029,8 +3029,6 @@ static struct rq *finish_task_switch(struct task_struct *prev) + asmlinkage __visible void schedule_tail(struct task_struct *prev) + __releases(rq->lock) + { +- struct rq *rq; +- + /* + * New tasks start with FORK_PREEMPT_COUNT, see there and + * finish_task_switch() for details. +@@ -3040,7 +3038,7 @@ asmlinkage __visible void schedule_tail(struct task_struct *prev) + * PREEMPT_COUNT kernels). + */ + +- rq = finish_task_switch(prev); ++ finish_task_switch(prev); + preempt_enable(); + + if (current->set_child_tid) +-- +2.37.0 + + +From f99530410f74ad9c9040f52306ef4f4e470ddde4 Mon Sep 17 00:00:00 2001 +From: Alfred Chen +Date: Tue, 8 Jun 2021 08:58:13 +0800 +Subject: [PATCH 142/297] sched/alt: [Sync] 7e1b2eb74928 sched/core: Stop using + magic values in sched_dynamic_mode() + +--- + kernel/sched/alt_core.c | 6 +++--- + 1 file changed, 3 insertions(+), 3 deletions(-) + +diff --git a/kernel/sched/alt_core.c b/kernel/sched/alt_core.c +index f052be4a3ff5..b4a42e218b12 100644 +--- a/kernel/sched/alt_core.c ++++ b/kernel/sched/alt_core.c +@@ -4352,13 +4352,13 @@ static int preempt_dynamic_mode = preempt_dynamic_full; + static int sched_dynamic_mode(const char *str) + { + if (!strcmp(str, "none")) +- return 0; ++ return preempt_dynamic_none; + + if (!strcmp(str, "voluntary")) +- return 1; ++ return preempt_dynamic_voluntary; + + if (!strcmp(str, "full")) +- return 2; ++ return preempt_dynamic_full; + + return -1; + } +-- +2.37.0 + + +From 3a32bee45fe806e27213a530bf399c7de997da5b Mon Sep 17 00:00:00 2001 +From: Alfred Chen +Date: Tue, 8 Jun 2021 08:59:00 +0800 +Subject: [PATCH 143/297] sched/alt: [Sync] c4681f3f1cfc sched/core: Use + -EINVAL in sched_dynamic_mode() + +--- + kernel/sched/alt_core.c | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +diff --git a/kernel/sched/alt_core.c b/kernel/sched/alt_core.c +index b4a42e218b12..6b7136682e39 100644 +--- a/kernel/sched/alt_core.c ++++ b/kernel/sched/alt_core.c +@@ -4360,7 +4360,7 @@ static int sched_dynamic_mode(const char *str) + if (!strcmp(str, "full")) + return preempt_dynamic_full; + +- return -1; ++ return -EINVAL; + } + + static void sched_dynamic_update(int mode) +-- +2.37.0 + + +From 017d061930108f08e64ac9f1fd9af2ae0fca1812 Mon Sep 17 00:00:00 2001 +From: Alfred Chen +Date: Tue, 8 Jun 2021 09:53:27 +0800 +Subject: [PATCH 144/297] sched/alt: [Sync] b5c4477366fb sched: Use cpu_dying() + to fix balance_push vs hotplug-rollback + +--- + kernel/sched/alt_core.c | 26 ++++++++++++++------------ + kernel/sched/alt_sched.h | 1 - + 2 files changed, 14 insertions(+), 13 deletions(-) + +diff --git a/kernel/sched/alt_core.c b/kernel/sched/alt_core.c +index 6b7136682e39..4947e3446124 100644 +--- a/kernel/sched/alt_core.c ++++ b/kernel/sched/alt_core.c +@@ -1247,7 +1247,7 @@ static inline bool is_cpu_allowed(struct task_struct *p, int cpu) + return cpu_online(cpu); + + /* Regular kernel threads don't get to stay during offline. */ +- if (cpu_rq(cpu)->balance_push) ++ if (cpu_dying(cpu)) + return false; + + /* But are allowed during online. */ +@@ -6172,7 +6172,8 @@ static int __balance_push_cpu_stop(void *arg) + static DEFINE_PER_CPU(struct cpu_stop_work, push_work); + + /* +- * Ensure we only run per-cpu kthreads once the CPU goes !active. ++ * This is enabled below SCHED_AP_ACTIVE; when !cpu_active(), but only ++ * effective when the hotplug motion is down. + */ + static void balance_push(struct rq *rq) + { +@@ -6180,11 +6181,18 @@ static void balance_push(struct rq *rq) + + lockdep_assert_held(&rq->lock); + SCHED_WARN_ON(rq->cpu != smp_processor_id()); ++ + /* + * Ensure the thing is persistent until balance_push_set(.on = false); + */ + rq->balance_callback = &balance_push_callback; + ++ /* ++ * Only active while going offline. ++ */ ++ if (!cpu_dying(rq->cpu)) ++ return; ++ + /* + * Both the cpu-hotplug and stop task are in this case and are + * required to complete the hotplug process. +@@ -6238,7 +6246,6 @@ static void balance_push_set(int cpu, bool on) + struct rq_flags rf; + + rq_lock_irqsave(rq, &rf); +- rq->balance_push = on; + if (on) { + WARN_ON_ONCE(rq->balance_callback); + rq->balance_callback = &balance_push_callback; +@@ -6343,8 +6350,8 @@ int sched_cpu_activate(unsigned int cpu) + unsigned long flags; + + /* +- * Make sure that when the hotplug state machine does a roll-back +- * we clear balance_push. Ideally that would happen earlier... ++ * Clear the balance_push callback and prepare to schedule ++ * regular tasks. + */ + balance_push_set(cpu, false); + +@@ -6517,12 +6524,6 @@ int sched_cpu_dying(unsigned int cpu) + } + raw_spin_unlock_irqrestore(&rq->lock, flags); + +- /* +- * Now that the CPU is offline, make sure we're welcome +- * to new tasks once we come back up. +- */ +- balance_push_set(cpu, false); +- + calc_load_migrate(rq); + hrtick_clear(rq); + return 0; +@@ -6691,7 +6692,7 @@ void __init sched_init(void) + #ifdef CONFIG_NO_HZ_COMMON + INIT_CSD(&rq->nohz_csd, nohz_csd_func, rq); + #endif +- rq->balance_callback = NULL; ++ rq->balance_callback = &balance_push_callback; + #ifdef CONFIG_HOTPLUG_CPU + rcuwait_init(&rq->hotplug_wait); + #endif +@@ -6723,6 +6724,7 @@ void __init sched_init(void) + + #ifdef CONFIG_SMP + idle_thread_set_boot_cpu(); ++ balance_push_set(smp_processor_id(), false); + + sched_init_topology_cpumask_early(); + #endif /* SMP */ +diff --git a/kernel/sched/alt_sched.h b/kernel/sched/alt_sched.h +index b3436b11ba7c..6902a2579d73 100644 +--- a/kernel/sched/alt_sched.h ++++ b/kernel/sched/alt_sched.h +@@ -175,7 +175,6 @@ struct rq { + struct cpu_stop_work active_balance_work; + #endif + struct callback_head *balance_callback; +- unsigned char balance_push; + #ifdef CONFIG_HOTPLUG_CPU + struct rcuwait hotplug_wait; + #endif +-- +2.37.0 + + +From 07addd5369fb59a247afb899ca93fa47f9ab18c1 Mon Sep 17 00:00:00 2001 +From: Alfred Chen +Date: Tue, 8 Jun 2021 16:40:51 +0000 +Subject: [PATCH 145/297] sched/alt: Use common code in debug.c + +This commit sync-up following commits +8a99b6833c88 sched: Move SCHED_DEBUG sysctl to debugfs +1011dcce99f8 sched,preempt: Move preempt_dynamic to debug.c +--- + kernel/sched/Makefile | 3 +- + kernel/sched/alt_core.c | 75 ++-------------------------------------- + kernel/sched/alt_sched.h | 11 ++++-- + kernel/sched/debug.c | 10 ++++++ + 4 files changed, 24 insertions(+), 75 deletions(-) + +diff --git a/kernel/sched/Makefile b/kernel/sched/Makefile +index 01b041388419..31d587c16ec1 100644 +--- a/kernel/sched/Makefile ++++ b/kernel/sched/Makefile +@@ -29,7 +29,8 @@ endif + # build parallelizes well and finishes roughly at once: + # + ifdef CONFIG_SCHED_ALT +-obj-y += alt_core.o alt_debug.o ++obj-y += alt_core.o ++obj-$(CONFIG_SCHED_DEBUG) += alt_debug.o + else + obj-y += core.o + obj-y += fair.o +diff --git a/kernel/sched/alt_core.c b/kernel/sched/alt_core.c +index 4947e3446124..cf4ad8c4203d 100644 +--- a/kernel/sched/alt_core.c ++++ b/kernel/sched/alt_core.c +@@ -4347,9 +4347,9 @@ enum { + preempt_dynamic_full, + }; + +-static int preempt_dynamic_mode = preempt_dynamic_full; ++int preempt_dynamic_mode = preempt_dynamic_full; + +-static int sched_dynamic_mode(const char *str) ++int sched_dynamic_mode(const char *str) + { + if (!strcmp(str, "none")) + return preempt_dynamic_none; +@@ -4363,7 +4363,7 @@ static int sched_dynamic_mode(const char *str) + return -EINVAL; + } + +-static void sched_dynamic_update(int mode) ++void sched_dynamic_update(int mode) + { + /* + * Avoid {NONE,VOLUNTARY} -> FULL transitions from ever ending up in +@@ -4420,77 +4420,8 @@ static int __init setup_preempt_mode(char *str) + } + __setup("preempt=", setup_preempt_mode); + +-#ifdef CONFIG_SCHED_DEBUG +- +-static ssize_t sched_dynamic_write(struct file *filp, const char __user *ubuf, +- size_t cnt, loff_t *ppos) +-{ +- char buf[16]; +- int mode; +- +- if (cnt > 15) +- cnt = 15; +- +- if (copy_from_user(&buf, ubuf, cnt)) +- return -EFAULT; +- +- buf[cnt] = 0; +- mode = sched_dynamic_mode(strstrip(buf)); +- if (mode < 0) +- return mode; +- +- sched_dynamic_update(mode); +- +- *ppos += cnt; +- +- return cnt; +-} +- +-static int sched_dynamic_show(struct seq_file *m, void *v) +-{ +- static const char * preempt_modes[] = { +- "none", "voluntary", "full" +- }; +- int i; +- +- for (i = 0; i < ARRAY_SIZE(preempt_modes); i++) { +- if (preempt_dynamic_mode == i) +- seq_puts(m, "("); +- seq_puts(m, preempt_modes[i]); +- if (preempt_dynamic_mode == i) +- seq_puts(m, ")"); +- +- seq_puts(m, " "); +- } +- +- seq_puts(m, "\n"); +- return 0; +-} +- +-static int sched_dynamic_open(struct inode *inode, struct file *filp) +-{ +- return single_open(filp, sched_dynamic_show, NULL); +-} +- +-static const struct file_operations sched_dynamic_fops = { +- .open = sched_dynamic_open, +- .write = sched_dynamic_write, +- .read = seq_read, +- .llseek = seq_lseek, +- .release = single_release, +-}; +- +-static __init int sched_init_debug_dynamic(void) +-{ +- debugfs_create_file("sched_preempt", 0644, NULL, NULL, &sched_dynamic_fops); +- return 0; +-} +-late_initcall(sched_init_debug_dynamic); +- +-#endif /* CONFIG_SCHED_DEBUG */ + #endif /* CONFIG_PREEMPT_DYNAMIC */ + +- + /* + * This is the entry point to schedule() from kernel preemption + * off of irq context. +diff --git a/kernel/sched/alt_sched.h b/kernel/sched/alt_sched.h +index 6902a2579d73..cf9ed4480d86 100644 +--- a/kernel/sched/alt_sched.h ++++ b/kernel/sched/alt_sched.h +@@ -60,6 +60,7 @@ + # define SCHED_WARN_ON(x) WARN_ONCE(x, #x) + #else + # define SCHED_WARN_ON(x) ({ (void)(x), 0; }) ++static inline void resched_latency_warn(int cpu, u64 latency) {} + #endif + + /* +@@ -679,8 +680,14 @@ static inline int sched_numa_find_closest(const struct cpumask *cpus, int cpu) + } + #endif + +-void swake_up_all_locked(struct swait_queue_head *q); +-void __prepare_to_swait(struct swait_queue_head *q, struct swait_queue *wait); ++extern void swake_up_all_locked(struct swait_queue_head *q); ++extern void __prepare_to_swait(struct swait_queue_head *q, struct swait_queue *wait); ++ ++#ifdef CONFIG_PREEMPT_DYNAMIC ++extern int preempt_dynamic_mode; ++extern int sched_dynamic_mode(const char *str); ++extern void sched_dynamic_update(int mode); ++#endif + + static inline void nohz_run_idle_balance(int cpu) { } + #endif /* ALT_SCHED_H */ +diff --git a/kernel/sched/debug.c b/kernel/sched/debug.c +index bb3d63bdf4ae..c87c87da2ba5 100644 +--- a/kernel/sched/debug.c ++++ b/kernel/sched/debug.c +@@ -7,6 +7,7 @@ + * Copyright(C) 2007, Red Hat, Inc., Ingo Molnar + */ + ++#ifndef CONFIG_SCHED_ALT + /* + * This allows printing both to /proc/sched_debug and + * to the console +@@ -215,6 +216,7 @@ static const struct file_operations sched_scaling_fops = { + }; + + #endif /* SMP */ ++#endif /* !CONFIG_SCHED_ALT */ + + #ifdef CONFIG_PREEMPT_DYNAMIC + +@@ -278,6 +280,7 @@ static const struct file_operations sched_dynamic_fops = { + + #endif /* CONFIG_PREEMPT_DYNAMIC */ + ++#ifndef CONFIG_SCHED_ALT + __read_mostly bool sched_debug_verbose; + + static const struct seq_operations sched_debug_sops; +@@ -293,6 +296,7 @@ static const struct file_operations sched_debug_fops = { + .llseek = seq_lseek, + .release = seq_release, + }; ++#endif /* !CONFIG_SCHED_ALT */ + + static struct dentry *debugfs_sched; + +@@ -302,12 +306,15 @@ static __init int sched_init_debug(void) + + debugfs_sched = debugfs_create_dir("sched", NULL); + ++#ifndef CONFIG_SCHED_ALT + debugfs_create_file("features", 0644, debugfs_sched, NULL, &sched_feat_fops); + debugfs_create_bool("verbose", 0644, debugfs_sched, &sched_debug_verbose); ++#endif /* !CONFIG_SCHED_ALT */ + #ifdef CONFIG_PREEMPT_DYNAMIC + debugfs_create_file("preempt", 0644, debugfs_sched, NULL, &sched_dynamic_fops); + #endif + ++#ifndef CONFIG_SCHED_ALT + debugfs_create_u32("latency_ns", 0644, debugfs_sched, &sysctl_sched_latency); + debugfs_create_u32("min_granularity_ns", 0644, debugfs_sched, &sysctl_sched_min_granularity); + debugfs_create_u32("idle_min_granularity_ns", 0644, debugfs_sched, &sysctl_sched_idle_min_granularity); +@@ -336,11 +343,13 @@ static __init int sched_init_debug(void) + #endif + + debugfs_create_file("debug", 0444, debugfs_sched, NULL, &sched_debug_fops); ++#endif /* !CONFIG_SCHED_ALT */ + + return 0; + } + late_initcall(sched_init_debug); + ++#ifndef CONFIG_SCHED_ALT + #ifdef CONFIG_SMP + + static cpumask_var_t sd_sysctl_cpus; +@@ -1077,3 +1086,4 @@ void resched_latency_warn(int cpu, u64 latency) + "without schedule\n", + cpu, latency, cpu_rq(cpu)->ticks_without_resched); + } ++#endif /* !CONFIG_SCHED_ALT */ +-- +2.37.0 + + +From 400ec7dc8745e9cff692c2f01d8e1a00fc98eb30 Mon Sep 17 00:00:00 2001 +From: Alfred Chen +Date: Wed, 9 Jun 2021 09:48:48 +0000 +Subject: [PATCH 146/297] sched/alt: [Sync] c006fac556e4 sched: Warn on long + periods of pending need_resched + +--- + kernel/sched/alt_core.c | 74 +++++++++++++++++++++++++++++++++++++++- + kernel/sched/alt_sched.h | 6 ++++ + kernel/sched/debug.c | 2 +- + 3 files changed, 80 insertions(+), 2 deletions(-) + +diff --git a/kernel/sched/alt_core.c b/kernel/sched/alt_core.c +index cf4ad8c4203d..345a2b606b65 100644 +--- a/kernel/sched/alt_core.c ++++ b/kernel/sched/alt_core.c +@@ -52,6 +52,21 @@ + */ + EXPORT_TRACEPOINT_SYMBOL_GPL(pelt_irq_tp); + ++#ifdef CONFIG_SCHED_DEBUG ++#define sched_feat(x) (1) ++/* ++ * Print a warning if need_resched is set for the given duration (if ++ * LATENCY_WARN is enabled). ++ * ++ * If sysctl_resched_latency_warn_once is set, only one warning will be shown ++ * per boot. ++ */ ++__read_mostly int sysctl_resched_latency_warn_ms = 100; ++__read_mostly int sysctl_resched_latency_warn_once = 1; ++#else ++#define sched_feat(x) (0) ++#endif /* CONFIG_SCHED_DEBUG */ ++ + #define ALT_SCHED_VERSION "v5.12-r1" + + /* rt_prio(prio) defined in include/linux/sched/rt.h */ +@@ -3323,6 +3338,55 @@ static inline void scheduler_task_tick(struct rq *rq) + set_preempt_need_resched(); + } + ++#ifdef CONFIG_SCHED_DEBUG ++static u64 cpu_resched_latency(struct rq *rq) ++{ ++ int latency_warn_ms = READ_ONCE(sysctl_resched_latency_warn_ms); ++ u64 resched_latency, now = rq_clock(rq); ++ static bool warned_once; ++ ++ if (sysctl_resched_latency_warn_once && warned_once) ++ return 0; ++ ++ if (!need_resched() || !latency_warn_ms) ++ return 0; ++ ++ if (system_state == SYSTEM_BOOTING) ++ return 0; ++ ++ if (!rq->last_seen_need_resched_ns) { ++ rq->last_seen_need_resched_ns = now; ++ rq->ticks_without_resched = 0; ++ return 0; ++ } ++ ++ rq->ticks_without_resched++; ++ resched_latency = now - rq->last_seen_need_resched_ns; ++ if (resched_latency <= latency_warn_ms * NSEC_PER_MSEC) ++ return 0; ++ ++ warned_once = true; ++ ++ return resched_latency; ++} ++ ++static int __init setup_resched_latency_warn_ms(char *str) ++{ ++ long val; ++ ++ if ((kstrtol(str, 0, &val))) { ++ pr_warn("Unable to set resched_latency_warn_ms\n"); ++ return 1; ++ } ++ ++ sysctl_resched_latency_warn_ms = val; ++ return 1; ++} ++__setup("resched_latency_warn_ms=", setup_resched_latency_warn_ms); ++#else ++static inline u64 cpu_resched_latency(struct rq *rq) { return 0; } ++#endif /* CONFIG_SCHED_DEBUG */ ++ + /* + * This function gets called by the timer code, with HZ frequency. + * We call it with interrupts disabled. +@@ -3331,6 +3395,7 @@ void scheduler_tick(void) + { + int cpu __maybe_unused = smp_processor_id(); + struct rq *rq = cpu_rq(cpu); ++ u64 resched_latency; + + arch_scale_freq_tick(); + sched_clock_tick(); +@@ -3339,11 +3404,16 @@ void scheduler_tick(void) + update_rq_clock(rq); + + scheduler_task_tick(rq); ++ if (sched_feat(LATENCY_WARN)) ++ resched_latency = cpu_resched_latency(rq); + calc_global_load_tick(rq); + + rq->last_tick = rq->clock; + raw_spin_unlock(&rq->lock); + ++ if (sched_feat(LATENCY_WARN) && resched_latency) ++ resched_latency_warn(cpu, resched_latency); ++ + perf_event_task_tick(); + } + +@@ -4018,7 +4088,9 @@ static void __sched notrace __schedule(bool preempt) + next = choose_next_task(rq, cpu, prev); + clear_tsk_need_resched(prev); + clear_preempt_need_resched(); +- ++#ifdef CONFIG_SCHED_DEBUG ++ rq->last_seen_need_resched_ns = 0; ++#endif + + if (likely(prev != next)) { + next->last_ran = rq->clock_task; +diff --git a/kernel/sched/alt_sched.h b/kernel/sched/alt_sched.h +index cf9ed4480d86..ac11555ba4f1 100644 +--- a/kernel/sched/alt_sched.h ++++ b/kernel/sched/alt_sched.h +@@ -58,6 +58,7 @@ + + #ifdef CONFIG_SCHED_DEBUG + # define SCHED_WARN_ON(x) WARN_ONCE(x, #x) ++extern void resched_latency_warn(int cpu, u64 latency); + #else + # define SCHED_WARN_ON(x) ({ (void)(x), 0; }) + static inline void resched_latency_warn(int cpu, u64 latency) {} +@@ -155,6 +156,11 @@ struct rq { + + atomic_t nr_iowait; + ++#ifdef CONFIG_SCHED_DEBUG ++ u64 last_seen_need_resched_ns; ++ int ticks_without_resched; ++#endif ++ + #ifdef CONFIG_MEMBARRIER + int membarrier_state; + #endif +diff --git a/kernel/sched/debug.c b/kernel/sched/debug.c +index c87c87da2ba5..4e1680785704 100644 +--- a/kernel/sched/debug.c ++++ b/kernel/sched/debug.c +@@ -1076,6 +1076,7 @@ void proc_sched_set_task(struct task_struct *p) + memset(&p->stats, 0, sizeof(p->stats)); + #endif + } ++#endif /* !CONFIG_SCHED_ALT */ + + void resched_latency_warn(int cpu, u64 latency) + { +@@ -1086,4 +1087,3 @@ void resched_latency_warn(int cpu, u64 latency) + "without schedule\n", + cpu, latency, cpu_rq(cpu)->ticks_without_resched); + } +-#endif /* !CONFIG_SCHED_ALT */ +-- +2.37.0 + + +From 6e43f2c913988e36e242cd28ef52d4a35e05eab8 Mon Sep 17 00:00:00 2001 +From: Alfred Chen +Date: Fri, 11 Jun 2021 18:22:00 +0000 +Subject: [PATCH 147/297] Project-C v5.13-r0 + +--- + kernel/sched/alt_core.c | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +diff --git a/kernel/sched/alt_core.c b/kernel/sched/alt_core.c +index 345a2b606b65..2a485c184832 100644 +--- a/kernel/sched/alt_core.c ++++ b/kernel/sched/alt_core.c +@@ -67,7 +67,7 @@ __read_mostly int sysctl_resched_latency_warn_once = 1; + #define sched_feat(x) (0) + #endif /* CONFIG_SCHED_DEBUG */ + +-#define ALT_SCHED_VERSION "v5.12-r1" ++#define ALT_SCHED_VERSION "v5.13-r0" + + /* rt_prio(prio) defined in include/linux/sched/rt.h */ + #define rt_task(p) rt_prio((p)->prio) +-- +2.37.0 + + +From 47d4d9fce817d9c7ccf27639854e8ecee8936c23 Mon Sep 17 00:00:00 2001 +From: Alfred Chen +Date: Tue, 15 Jun 2021 09:48:47 +0000 +Subject: [PATCH 148/297] sched/alt: irq save/restore in migration_cpu_stop() + +--- + kernel/sched/alt_core.c | 6 +++--- + 1 file changed, 3 insertions(+), 3 deletions(-) + +diff --git a/kernel/sched/alt_core.c b/kernel/sched/alt_core.c +index 2a485c184832..3680162d8d19 100644 +--- a/kernel/sched/alt_core.c ++++ b/kernel/sched/alt_core.c +@@ -1344,12 +1344,13 @@ static int migration_cpu_stop(void *data) + struct migration_arg *arg = data; + struct task_struct *p = arg->task; + struct rq *rq = this_rq(); ++ unsigned long flags; + + /* + * The original target CPU might have gone down and we might + * be on another CPU but it doesn't matter. + */ +- local_irq_disable(); ++ local_irq_save(flags); + /* + * We need to explicitly wake pending tasks before running + * __migrate_task() such that we will not miss enforcing cpus_ptr +@@ -1367,9 +1368,8 @@ static int migration_cpu_stop(void *data) + if (task_rq(p) == rq && task_on_rq_queued(p)) + rq = __migrate_task(rq, p, arg->dest_cpu); + raw_spin_unlock(&rq->lock); +- raw_spin_unlock(&p->pi_lock); ++ raw_spin_unlock_irqrestore(&p->pi_lock, flags); + +- local_irq_enable(); + return 0; + } + +-- +2.37.0 + + +From 923b01e019bbce8d8b400c7b2dfc51f5ecd7945f Mon Sep 17 00:00:00 2001 +From: Alfred Chen +Date: Mon, 17 May 2021 10:25:28 +0000 +Subject: [PATCH 149/297] sched/pds: PDS improvement. + +PDS uses bitmap queue as queue data structure. Rename maybe needed after +all improvement are done. +--- + include/linux/sched.h | 7 +- + include/linux/skip_list.h | 175 ------------------------ + init/init_task.c | 3 +- + kernel/sched/alt_core.c | 11 +- + kernel/sched/alt_sched.h | 3 +- + kernel/sched/pds.h | 5 + + kernel/sched/pds_imp.h | 281 ++++++++++++++++++++++---------------- + 7 files changed, 183 insertions(+), 302 deletions(-) + delete mode 100644 include/linux/skip_list.h + +diff --git a/include/linux/sched.h b/include/linux/sched.h +index 2bbe9f377c6e..948c6d84757a 100644 +--- a/include/linux/sched.h ++++ b/include/linux/sched.h +@@ -34,7 +34,6 @@ + #include + #include + #include +-#include + #include + + /* task_struct member predeclarations (sorted alphabetically): */ +@@ -791,10 +790,8 @@ struct task_struct { + #ifdef CONFIG_SCHED_PDS + u64 deadline; + u64 priodl; +- /* skip list level */ +- int sl_level; +- /* skip list node */ +- struct skiplist_node sl_node; ++ int sq_idx; ++ struct list_head sq_node; + #endif /* CONFIG_SCHED_PDS */ + /* sched_clock time spent running */ + u64 sched_time; +diff --git a/include/linux/skip_list.h b/include/linux/skip_list.h +deleted file mode 100644 +index 637c83ecbd6b..000000000000 +--- a/include/linux/skip_list.h ++++ /dev/null +@@ -1,175 +0,0 @@ +-/* +- * Copyright (C) 2016 Alfred Chen. +- * +- * Code based on Con Kolivas's skip list implementation for BFS, and +- * which is based on example originally by William Pugh. +- * +- * Skip Lists are a probabilistic alternative to balanced trees, as +- * described in the June 1990 issue of CACM and were invented by +- * William Pugh in 1987. +- * +- * A couple of comments about this implementation: +- * +- * This file only provides a infrastructure of skip list. +- * +- * skiplist_node is embedded into container data structure, to get rid +- * the dependency of kmalloc/kfree operation in scheduler code. +- * +- * A customized search function should be defined using DEFINE_SKIPLIST_INSERT +- * macro and be used for skip list insert operation. +- * +- * Random Level is also not defined in this file, instead, it should be +- * customized implemented and set to node->level then pass to the customized +- * skiplist_insert function. +- * +- * Levels start at zero and go up to (NUM_SKIPLIST_LEVEL -1) +- * +- * NUM_SKIPLIST_LEVEL in this implementation is 8 instead of origin 16, +- * considering that there will be 256 entries to enable the top level when using +- * random level p=0.5, and that number is more than enough for a run queue usage +- * in a scheduler usage. And it also help to reduce the memory usage of the +- * embedded skip list node in task_struct to about 50%. +- * +- * The insertion routine has been implemented so as to use the +- * dirty hack described in the CACM paper: if a random level is +- * generated that is more than the current maximum level, the +- * current maximum level plus one is used instead. +- * +- * BFS Notes: In this implementation of skiplists, there are bidirectional +- * next/prev pointers and the insert function returns a pointer to the actual +- * node the value is stored. The key here is chosen by the scheduler so as to +- * sort tasks according to the priority list requirements and is no longer used +- * by the scheduler after insertion. The scheduler lookup, however, occurs in +- * O(1) time because it is always the first item in the level 0 linked list. +- * Since the task struct stores a copy of the node pointer upon skiplist_insert, +- * it can also remove it much faster than the original implementation with the +- * aid of prev<->next pointer manipulation and no searching. +- */ +-#ifndef _LINUX_SKIP_LIST_H +-#define _LINUX_SKIP_LIST_H +- +-#include +- +-#define NUM_SKIPLIST_LEVEL (4) +- +-struct skiplist_node { +- int level; /* Levels in this node */ +- struct skiplist_node *next[NUM_SKIPLIST_LEVEL]; +- struct skiplist_node *prev[NUM_SKIPLIST_LEVEL]; +-}; +- +-#define SKIPLIST_NODE_INIT(name) { 0,\ +- {&name, &name, &name, &name},\ +- {&name, &name, &name, &name},\ +- } +- +-/** +- * INIT_SKIPLIST_NODE -- init a skiplist_node, expecially for header +- * @node: the skip list node to be inited. +- */ +-static inline void INIT_SKIPLIST_NODE(struct skiplist_node *node) +-{ +- int i; +- +- node->level = 0; +- for (i = 0; i < NUM_SKIPLIST_LEVEL; i++) { +- WRITE_ONCE(node->next[i], node); +- node->prev[i] = node; +- } +-} +- +-/** +- * skiplist_entry - get the struct for this entry +- * @ptr: the &struct skiplist_node pointer. +- * @type: the type of the struct this is embedded in. +- * @member: the name of the skiplist_node within the struct. +- */ +-#define skiplist_entry(ptr, type, member) \ +- container_of(ptr, type, member) +- +-/** +- * DEFINE_SKIPLIST_INSERT_FUNC -- macro to define a customized skip list insert +- * function, which takes two parameters, first one is the header node of the +- * skip list, second one is the skip list node to be inserted +- * @func_name: the customized skip list insert function name +- * @search_func: the search function to be used, which takes two parameters, +- * 1st one is the itrator of skiplist_node in the list, the 2nd is the skip list +- * node to be inserted, the function should return true if search should be +- * continued, otherwise return false. +- * Returns 1 if @node is inserted as the first item of skip list at level zero, +- * otherwise 0 +- */ +-#define DEFINE_SKIPLIST_INSERT_FUNC(func_name, search_func)\ +-static inline int func_name(struct skiplist_node *head, struct skiplist_node *node)\ +-{\ +- struct skiplist_node *p, *q;\ +- unsigned int k = head->level;\ +- unsigned int l = node->level;\ +-\ +- p = head;\ +- if (l > k) {\ +- l = node->level = ++head->level;\ +-\ +- node->next[l] = head;\ +- node->prev[l] = head;\ +- head->next[l] = node;\ +- head->prev[l] = node;\ +-\ +- do {\ +- while (q = p->next[k], q != head && search_func(q, node))\ +- p = q;\ +-\ +- node->prev[k] = p;\ +- node->next[k] = q;\ +- q->prev[k] = node;\ +- p->next[k] = node;\ +- } while (k--);\ +-\ +- return (p == head);\ +- }\ +-\ +- while (k > l) {\ +- while (q = p->next[k], q != head && search_func(q, node))\ +- p = q;\ +- k--;\ +- }\ +-\ +- do {\ +- while (q = p->next[k], q != head && search_func(q, node))\ +- p = q;\ +-\ +- node->prev[k] = p;\ +- node->next[k] = q;\ +- q->prev[k] = node;\ +- p->next[k] = node;\ +- } while (k--);\ +-\ +- return (p == head);\ +-} +- +-/** +- * skiplist_del_init -- delete skip list node from a skip list and reset it's +- * init state +- * @head: the header node of the skip list to be deleted from. +- * @node: the skip list node to be deleted, the caller need to ensure @node is +- * in skip list which @head represent. +- * Returns 1 if @node is the first item of skip level at level zero, otherwise 0 +- */ +-static inline int +-skiplist_del_init(struct skiplist_node *head, struct skiplist_node *node) +-{ +- unsigned int i, level = node->level; +- +- for (i = 0; i <= level; i++) { +- node->prev[i]->next[i] = node->next[i]; +- node->next[i]->prev[i] = node->prev[i]; +- } +- if (level == head->level && level) { +- while (head->next[level] == head && level) +- level--; +- head->level = level; +- } +- +- return (node->prev[0] == head); +-} +-#endif /* _LINUX_SKIP_LIST_H */ +diff --git a/init/init_task.c b/init/init_task.c +index b59f5a2fefc8..0374297c45b9 100644 +--- a/init/init_task.c ++++ b/init/init_task.c +@@ -107,8 +107,7 @@ struct task_struct init_task + #endif + #ifdef CONFIG_SCHED_PDS + .deadline = 0, +- .sl_level = 0, +- .sl_node = SKIPLIST_NODE_INIT(init_task.sl_node), ++ .sq_node = LIST_HEAD_INIT(init_task.sq_node), + #endif + .time_slice = HZ, + #else +diff --git a/kernel/sched/alt_core.c b/kernel/sched/alt_core.c +index 3680162d8d19..01abbf28670f 100644 +--- a/kernel/sched/alt_core.c ++++ b/kernel/sched/alt_core.c +@@ -509,6 +509,7 @@ static inline void update_rq_clock(struct rq *rq) + if (unlikely(delta <= 0)) + return; + rq->clock += delta; ++ update_rq_time_edge(rq); + update_rq_clock_task(rq, delta); + } + +@@ -3815,7 +3816,15 @@ void alt_sched_debug(void) + sched_sg_idle_mask.bits[0]); + } + #else +-inline void alt_sched_debug(void) {} ++int alt_debug[20]; ++ ++inline void alt_sched_debug(void) ++{ ++ int i; ++ ++ for (i = 0; i < 3; i++) ++ printk(KERN_INFO "sched: %d\n", alt_debug[i]); ++} + #endif + + #ifdef CONFIG_SMP +diff --git a/kernel/sched/alt_sched.h b/kernel/sched/alt_sched.h +index ac11555ba4f1..cfb4669dfbbf 100644 +--- a/kernel/sched/alt_sched.h ++++ b/kernel/sched/alt_sched.h +@@ -147,7 +147,8 @@ struct rq { + struct bmq queue; + #endif + #ifdef CONFIG_SCHED_PDS +- struct skiplist_node sl_header; ++ struct sched_queue queue; ++ u64 time_edge; + #endif + unsigned long watermark; + +diff --git a/kernel/sched/pds.h b/kernel/sched/pds.h +index 623908cf4380..3afc6fd7a27f 100644 +--- a/kernel/sched/pds.h ++++ b/kernel/sched/pds.h +@@ -6,4 +6,9 @@ + #define SCHED_BITS (MAX_RT_PRIO + NICE_WIDTH / 2 + 1) + #define IDLE_TASK_SCHED_PRIO (SCHED_BITS - 1) + ++struct sched_queue { ++ DECLARE_BITMAP(bitmap, SCHED_BITS); ++ struct list_head heads[SCHED_BITS]; ++}; ++ + #endif +diff --git a/kernel/sched/pds_imp.h b/kernel/sched/pds_imp.h +index 335ce3a8e3ec..35886852c71a 100644 +--- a/kernel/sched/pds_imp.h ++++ b/kernel/sched/pds_imp.h +@@ -11,26 +11,7 @@ static const u64 user_prio2deadline[NICE_WIDTH] = { + /* 15 */ 117870029, 129657031, 142622734, 156885007, 172573507 + }; + +-static const unsigned char dl_level_map[] = { +-/* 0 4 8 12 */ +- 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 18, +-/* 16 20 24 28 */ +- 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 17, 17, 17, 17, 17, +-/* 32 36 40 44 */ +- 17, 17, 17, 17, 16, 16, 16, 16, 16, 16, 16, 16, 15, 15, 15, 15, +-/* 48 52 56 60 */ +- 15, 15, 15, 14, 14, 14, 14, 14, 14, 13, 13, 13, 13, 12, 12, 12, +-/* 64 68 72 76 */ +- 12, 11, 11, 11, 10, 10, 10, 9, 9, 8, 7, 6, 5, 4, 3, 2, +-/* 80 84 88 92 */ +- 1, 0 +-}; +- +-/* DEFAULT_SCHED_PRIO: +- * dl_level_map[(user_prio2deadline[39] - user_prio2deadline[0]) >> 21] = +- * dl_level_map[68] = +- * 10 +- */ ++#define SCHED_PRIO_SLOT (4ULL << 20) + #define DEFAULT_SCHED_PRIO (MAX_RT_PRIO + 10) + + static inline int normal_prio(struct task_struct *p) +@@ -41,21 +22,46 @@ static inline int normal_prio(struct task_struct *p) + return MAX_RT_PRIO; + } + ++extern int alt_debug[20]; ++ + static inline int +-task_sched_prio(const struct task_struct *p, const struct rq *rq) ++task_sched_prio_normal(const struct task_struct *p, const struct rq *rq) + { +- size_t delta; ++ int delta; ++ ++ delta = rq->time_edge + 20 - (p->deadline >> 23); ++ if (delta < 0) { ++ delta = 0; ++ alt_debug[0]++; ++ } ++ delta = 19 - min(delta, 19); ++ ++ return delta; ++} + ++static inline int ++task_sched_prio(const struct task_struct *p, const struct rq *rq) ++{ + if (p == rq->idle) + return IDLE_TASK_SCHED_PRIO; + + if (p->prio < MAX_RT_PRIO) + return p->prio; + +- delta = (rq->clock + user_prio2deadline[39] - p->deadline) >> 21; +- delta = min((size_t)delta, ARRAY_SIZE(dl_level_map) - 1); ++ return MAX_RT_PRIO + task_sched_prio_normal(p, rq); ++} ++ ++static inline int ++task_sched_prio_idx(const struct task_struct *p, const struct rq *rq) ++{ ++ if (p == rq->idle) ++ return IDLE_TASK_SCHED_PRIO; + +- return MAX_RT_PRIO + dl_level_map[delta]; ++ if (p->prio < MAX_RT_PRIO) ++ return p->prio; ++ ++ return MAX_RT_PRIO + ++ (task_sched_prio_normal(p, rq) + rq->time_edge) % 20; + } + + int task_running_nice(struct task_struct *p) +@@ -68,6 +74,53 @@ static inline void update_task_priodl(struct task_struct *p) + p->priodl = (((u64) (p->prio))<<56) | ((p->deadline)>>8); + } + ++ ++DECLARE_BITMAP(normal_mask, SCHED_BITS); ++ ++static inline void sched_shift_normal_bitmap(unsigned long *mask, unsigned int shift) ++{ ++ DECLARE_BITMAP(normal, SCHED_BITS); ++ ++ bitmap_and(normal, mask, normal_mask, SCHED_BITS); ++ bitmap_shift_right(normal, normal, shift, SCHED_BITS); ++ bitmap_and(normal, normal, normal_mask, SCHED_BITS); ++ ++ bitmap_andnot(mask, mask, normal_mask, SCHED_BITS); ++ bitmap_or(mask, mask, normal, SCHED_BITS); ++} ++ ++static inline void update_rq_time_edge(struct rq *rq) ++{ ++ struct list_head head; ++ u64 old = rq->time_edge; ++ u64 now = rq->clock >> 23; ++ u64 prio, delta = min(20ULL, now - old); ++ ++ if (now == old) ++ return; ++ ++ INIT_LIST_HEAD(&head); ++ ++ prio = MAX_RT_PRIO; ++ for_each_set_bit_from(prio, rq->queue.bitmap, MAX_RT_PRIO + delta) { ++ u64 idx; ++ ++ idx = MAX_RT_PRIO + ((prio - MAX_RT_PRIO) + rq->time_edge) % 20; ++ list_splice_tail_init(rq->queue.heads + idx, &head); ++ } ++ sched_shift_normal_bitmap(rq->queue.bitmap, delta); ++ rq->time_edge = now; ++ if (!list_empty(&head)) { ++ struct task_struct *p; ++ ++ list_for_each_entry(p, &head, sq_node) ++ p->sq_idx = MAX_RT_PRIO + now % 20; ++ ++ list_splice(&head, rq->queue.heads + MAX_RT_PRIO + now % 20); ++ set_bit(MAX_RT_PRIO, rq->queue.bitmap); ++ } ++} ++ + static inline void requeue_task(struct task_struct *p, struct rq *rq); + + static inline void time_slice_expired(struct task_struct *p, struct rq *rq) +@@ -77,40 +130,25 @@ static inline void time_slice_expired(struct task_struct *p, struct rq *rq) + + if (p->prio >= MAX_RT_PRIO) + p->deadline = rq->clock + +- user_prio2deadline[p->static_prio - MAX_RT_PRIO]; ++ SCHED_PRIO_SLOT * (p->static_prio - MAX_RT_PRIO + 1); + update_task_priodl(p); + + if (SCHED_FIFO != p->policy && task_on_rq_queued(p)) + requeue_task(p, rq); + } + +-/* +- * pds_skiplist_task_search -- search function used in PDS run queue skip list +- * node insert operation. +- * @it: iterator pointer to the node in the skip list +- * @node: pointer to the skiplist_node to be inserted +- * +- * Returns true if key of @it is less or equal to key value of @node, otherwise +- * false. +- */ +-static inline bool +-pds_skiplist_task_search(struct skiplist_node *it, struct skiplist_node *node) +-{ +- return (skiplist_entry(it, struct task_struct, sl_node)->priodl <= +- skiplist_entry(node, struct task_struct, sl_node)->priodl); +-} +- +-/* +- * Define the skip list insert function for PDS +- */ +-DEFINE_SKIPLIST_INSERT_FUNC(pds_skiplist_insert, pds_skiplist_task_search); +- + /* + * Init the queue structure in rq + */ + static inline void sched_queue_init(struct rq *rq) + { +- INIT_SKIPLIST_NODE(&rq->sl_header); ++ struct sched_queue *q = &rq->queue; ++ int i; ++ ++ bitmap_set(normal_mask, MAX_RT_PRIO, 20); ++ bitmap_zero(q->bitmap, SCHED_BITS); ++ for(i = 0; i < SCHED_BITS; i++) ++ INIT_LIST_HEAD(&q->heads[i]); + } + + /* +@@ -119,19 +157,33 @@ static inline void sched_queue_init(struct rq *rq) + */ + static inline void sched_queue_init_idle(struct rq *rq, struct task_struct *idle) + { ++ struct sched_queue *q = &rq->queue; + /*printk(KERN_INFO "sched: init(%d) - %px\n", cpu_of(rq), idle);*/ +- int default_prio = idle->prio; + +- idle->prio = MAX_PRIO; +- idle->deadline = 0ULL; +- update_task_priodl(idle); ++ idle->sq_idx = IDLE_TASK_SCHED_PRIO; ++ INIT_LIST_HEAD(&q->heads[idle->sq_idx]); ++ list_add(&idle->sq_node, &q->heads[idle->sq_idx]); ++ set_bit(idle->sq_idx, q->bitmap); ++} + +- INIT_SKIPLIST_NODE(&rq->sl_header); ++static inline unsigned long sched_prio2idx(unsigned long idx, struct rq *rq) ++{ ++ if (IDLE_TASK_SCHED_PRIO == idx || ++ idx < MAX_RT_PRIO) ++ return idx; + +- idle->sl_node.level = idle->sl_level; +- pds_skiplist_insert(&rq->sl_header, &idle->sl_node); ++ return MAX_RT_PRIO + ++ ((idx - MAX_RT_PRIO) + rq->time_edge) % 20; ++} + +- idle->prio = default_prio; ++static inline unsigned long sched_idx2prio(unsigned long idx, struct rq *rq) ++{ ++ if (IDLE_TASK_SCHED_PRIO == idx || ++ idx < MAX_RT_PRIO) ++ return idx; ++ ++ return MAX_RT_PRIO + ++ ((idx - MAX_RT_PRIO) + 20 - rq->time_edge % 20) % 20; + } + + /* +@@ -139,107 +191,99 @@ static inline void sched_queue_init_idle(struct rq *rq, struct task_struct *idle + */ + static inline struct task_struct *sched_rq_first_task(struct rq *rq) + { +- struct skiplist_node *node = rq->sl_header.next[0]; ++ unsigned long idx = find_first_bit(rq->queue.bitmap, SCHED_BITS); ++ const struct list_head *head = &rq->queue.heads[sched_prio2idx(idx, rq)]; + +- BUG_ON(node == &rq->sl_header); +- return skiplist_entry(node, struct task_struct, sl_node); ++ /* ++ if (list_empty(head)) { ++ pr_err("BUG: cpu%d(time_edge%llu) prio%lu idx%lu mismatched\n", ++ rq->cpu, rq->time_edge, idx, sched_prio2idx(idx, rq)); ++ BUG(); ++ }*/ ++ return list_first_entry(head, struct task_struct, sq_node); + } + + static inline struct task_struct * + sched_rq_next_task(struct task_struct *p, struct rq *rq) + { +- struct skiplist_node *next = p->sl_node.next[0]; ++ unsigned long idx = p->sq_idx; ++ struct list_head *head = &rq->queue.heads[idx]; ++ ++ if (list_is_last(&p->sq_node, head)) { ++ idx = find_next_bit(rq->queue.bitmap, SCHED_BITS, ++ sched_idx2prio(idx, rq) + 1); ++ head = &rq->queue.heads[sched_prio2idx(idx, rq)]; ++ ++ return list_first_entry(head, struct task_struct, sq_node); ++ } + +- BUG_ON(next == &rq->sl_header); +- return skiplist_entry(next, struct task_struct, sl_node); ++ return list_next_entry(p, sq_node); + } + + static inline unsigned long sched_queue_watermark(struct rq *rq) + { +- return task_sched_prio(sched_rq_first_task(rq), rq); ++ return find_first_bit(rq->queue.bitmap, SCHED_BITS); + } + + #define __SCHED_DEQUEUE_TASK(p, rq, flags, func) \ + psi_dequeue(p, flags & DEQUEUE_SLEEP); \ + sched_info_dequeued(rq, p); \ + \ +- if (skiplist_del_init(&rq->sl_header, &p->sl_node)) { \ ++ list_del(&p->sq_node); \ ++ if (list_empty(&rq->queue.heads[p->sq_idx])) { \ ++ clear_bit(sched_idx2prio(p->sq_idx, rq), \ ++ rq->queue.bitmap); \ + func; \ +- } ++ } \ ++ /*\ ++ pr_info("-->: cpu%d(time_edge%llu) prio%lu idx%u\n", \ ++ rq->cpu, rq->time_edge, sched_idx2prio(p->sq_idx, rq), p->sq_idx); \ ++ */ + + #define __SCHED_ENQUEUE_TASK(p, rq, flags) \ + sched_info_queued(rq, p); \ + psi_enqueue(p, flags); \ + \ +- p->sl_node.level = p->sl_level; \ +- pds_skiplist_insert(&rq->sl_header, &p->sl_node) ++ p->sq_idx = task_sched_prio_idx(p, rq); \ ++ list_add_tail(&p->sq_node, &rq->queue.heads[p->sq_idx]); \ ++ set_bit(sched_idx2prio(p->sq_idx, rq), rq->queue.bitmap); \ ++ /*\ ++ pr_info("<--: cpu%d(time_edge%llu) prio%lu idx%u\n", \ ++ rq->cpu, rq->time_edge, sched_idx2prio(p->sq_idx, rq), p->sq_idx); \ ++ */ + + /* + * Requeue a task @p to @rq + */ + #define __SCHED_REQUEUE_TASK(p, rq, func) \ + {\ +- bool b_first = skiplist_del_init(&rq->sl_header, &p->sl_node); \ ++ int idx = task_sched_prio_idx(p, rq); \ + \ +- p->sl_node.level = p->sl_level; \ +- if (pds_skiplist_insert(&rq->sl_header, &p->sl_node) || b_first) { \ ++ list_del(&p->sq_node); \ ++ list_add_tail(&p->sq_node, &rq->queue.heads[idx]); \ ++ if (idx != p->sq_idx) { \ ++ if (list_empty(&rq->queue.heads[p->sq_idx])) \ ++ clear_bit(sched_idx2prio(p->sq_idx, rq), rq->queue.bitmap); \ ++ p->sq_idx = idx; \ ++ set_bit(sched_idx2prio(p->sq_idx, rq), rq->queue.bitmap); \ + func; \ ++ /*\ ++ pr_info("<->: cpu%d(time_edge%llu) prio%lu idx%u\n", \ ++ rq->cpu, rq->time_edge, sched_idx2prio(p->sq_idx, rq), p->sq_idx); \ ++ */\ + } \ + } + + static inline bool sched_task_need_requeue(struct task_struct *p, struct rq *rq) + { +- struct skiplist_node *node; +- +- node = p->sl_node.prev[0]; +- if (node != &rq->sl_header && +- skiplist_entry(node, struct task_struct, sl_node)->priodl > p->priodl) +- return true; +- +- node = p->sl_node.next[0]; +- if (node != &rq->sl_header && +- skiplist_entry(node, struct task_struct, sl_node)->priodl < p->priodl) +- return true; +- +- return false; +-} +- +-/* +- * pds_skiplist_random_level -- Returns a pseudo-random level number for skip +- * list node which is used in PDS run queue. +- * +- * __ffs() is used to satisfy p = 0.5 between each levels, and there should be +- * platform instruction(known as ctz/clz) for acceleration. +- * +- * The skiplist level for a task is populated when task is created and doesn't +- * change in task's life time. When task is being inserted into run queue, this +- * skiplist level is set to task's sl_node->level, the skiplist insert function +- * may change it based on current level of the skip lsit. +- */ +-static inline int pds_skiplist_random_level(const struct task_struct *p) +-{ +- /* +- * 1. Some architectures don't have better than microsecond resolution +- * so mask out ~microseconds as a factor of the random seed for skiplist +- * insertion. +- * 2. Use address of task structure pointer as another factor of the +- * random seed for task burst forking scenario. +- */ +- unsigned long randseed = (task_rq(p)->clock ^ (unsigned long)p) >> 10; +- +- randseed &= __GENMASK(NUM_SKIPLIST_LEVEL - 1, 0); +- if (randseed) +- return __ffs(randseed); +- +- return (NUM_SKIPLIST_LEVEL - 1); ++ return (task_sched_prio_idx(p, rq) != p->sq_idx); + } + + static void sched_task_fork(struct task_struct *p, struct rq *rq) + { +- p->sl_level = pds_skiplist_random_level(p); + if (p->prio >= MAX_RT_PRIO) + p->deadline = rq->clock + +- user_prio2deadline[p->static_prio - MAX_RT_PRIO]; ++ SCHED_PRIO_SLOT * (p->static_prio - MAX_RT_PRIO + 1); + update_task_priodl(p); + } + +@@ -261,9 +305,10 @@ int task_prio(const struct task_struct *p) + if (p->prio < MAX_RT_PRIO) + return (p->prio - MAX_RT_PRIO); + +- preempt_disable(); +- ret = task_sched_prio(p, this_rq()) - MAX_RT_PRIO; +- preempt_enable(); ++ /*preempt_disable(); ++ ret = task_sched_prio(p, task_rq(p)) - MAX_RT_PRIO;*/ ++ ret = p->static_prio - MAX_RT_PRIO; ++ /*preempt_enable();*/ + + return ret; + } +-- +2.37.0 + + +From 41ea91a3c9688bca5332d74d25bcf199e9733a0b Mon Sep 17 00:00:00 2001 +From: Alfred Chen +Date: Mon, 17 May 2021 10:38:00 +0000 +Subject: [PATCH 150/297] sched/pds: Remove unused priodl in task structure + +--- + include/linux/sched.h | 1 - + include/linux/sched/deadline.h | 2 +- + kernel/sched/alt_core.c | 3 --- + kernel/sched/pds_imp.h | 8 -------- + 4 files changed, 1 insertion(+), 13 deletions(-) + +diff --git a/include/linux/sched.h b/include/linux/sched.h +index 948c6d84757a..41bf98528037 100644 +--- a/include/linux/sched.h ++++ b/include/linux/sched.h +@@ -789,7 +789,6 @@ struct task_struct { + #endif /* CONFIG_SCHED_BMQ */ + #ifdef CONFIG_SCHED_PDS + u64 deadline; +- u64 priodl; + int sq_idx; + struct list_head sq_node; + #endif /* CONFIG_SCHED_PDS */ +diff --git a/include/linux/sched/deadline.h b/include/linux/sched/deadline.h +index 9516a98cf160..20c59b190b1a 100644 +--- a/include/linux/sched/deadline.h ++++ b/include/linux/sched/deadline.h +@@ -12,7 +12,7 @@ static inline int dl_task(struct task_struct *p) + #endif + + #ifdef CONFIG_SCHED_PDS +-#define __tsk_deadline(p) ((p)->priodl) ++#define __tsk_deadline(p) ((((u64) ((p)->prio))<<56) | (((p)->deadline)>>8)) + #endif + + #else +diff --git a/kernel/sched/alt_core.c b/kernel/sched/alt_core.c +index 01abbf28670f..cbca3a54f912 100644 +--- a/kernel/sched/alt_core.c ++++ b/kernel/sched/alt_core.c +@@ -4628,7 +4628,6 @@ void rt_mutex_setprio(struct task_struct *p, struct task_struct *pi_task) + + trace_sched_pi_setprio(p, pi_task); + p->prio = prio; +- update_task_priodl(p); + + check_task_changed(rq, p); + out_unlock: +@@ -4673,7 +4672,6 @@ void set_user_nice(struct task_struct *p, long nice) + goto out_unlock; + + p->prio = effective_prio(p); +- update_task_priodl(p); + + check_task_changed(rq, p); + out_unlock: +@@ -4823,7 +4821,6 @@ static void __setscheduler(struct rq *rq, struct task_struct *p, + p->prio = normal_prio(p); + if (keep_boost) + p->prio = rt_effective_prio(p, p->prio); +- update_task_priodl(p); + } + + /* +diff --git a/kernel/sched/pds_imp.h b/kernel/sched/pds_imp.h +index 35886852c71a..c9ab90f8d5c6 100644 +--- a/kernel/sched/pds_imp.h ++++ b/kernel/sched/pds_imp.h +@@ -69,12 +69,6 @@ int task_running_nice(struct task_struct *p) + return task_sched_prio(p, task_rq(p)) > DEFAULT_SCHED_PRIO; + } + +-static inline void update_task_priodl(struct task_struct *p) +-{ +- p->priodl = (((u64) (p->prio))<<56) | ((p->deadline)>>8); +-} +- +- + DECLARE_BITMAP(normal_mask, SCHED_BITS); + + static inline void sched_shift_normal_bitmap(unsigned long *mask, unsigned int shift) +@@ -131,7 +125,6 @@ static inline void time_slice_expired(struct task_struct *p, struct rq *rq) + if (p->prio >= MAX_RT_PRIO) + p->deadline = rq->clock + + SCHED_PRIO_SLOT * (p->static_prio - MAX_RT_PRIO + 1); +- update_task_priodl(p); + + if (SCHED_FIFO != p->policy && task_on_rq_queued(p)) + requeue_task(p, rq); +@@ -284,7 +277,6 @@ static void sched_task_fork(struct task_struct *p, struct rq *rq) + if (p->prio >= MAX_RT_PRIO) + p->deadline = rq->clock + + SCHED_PRIO_SLOT * (p->static_prio - MAX_RT_PRIO + 1); +- update_task_priodl(p); + } + + /** +-- +2.37.0 + + +From 94accf4e15e13fbcd0792595f818b046acf69da5 Mon Sep 17 00:00:00 2001 +From: Alfred Chen +Date: Mon, 17 May 2021 12:49:04 +0000 +Subject: [PATCH 151/297] sched/pds: Code clean up. + +--- + kernel/sched/pds_imp.h | 42 +++++++++++++----------------------------- + 1 file changed, 13 insertions(+), 29 deletions(-) + +diff --git a/kernel/sched/pds_imp.h b/kernel/sched/pds_imp.h +index c9ab90f8d5c6..8cc656a7cc48 100644 +--- a/kernel/sched/pds_imp.h ++++ b/kernel/sched/pds_imp.h +@@ -88,11 +88,12 @@ static inline void update_rq_time_edge(struct rq *rq) + struct list_head head; + u64 old = rq->time_edge; + u64 now = rq->clock >> 23; +- u64 prio, delta = min(20ULL, now - old); ++ u64 prio, delta; + + if (now == old) + return; + ++ delta = min(20ULL, now - old); + INIT_LIST_HEAD(&head); + + prio = MAX_RT_PRIO; +@@ -115,17 +116,20 @@ static inline void update_rq_time_edge(struct rq *rq) + } + } + ++static inline void sched_renew_deadline(struct task_struct *p, const struct rq *rq) ++{ ++ if (p->prio >= MAX_RT_PRIO) ++ p->deadline = rq->clock + ++ SCHED_PRIO_SLOT * (p->static_prio - MAX_RT_PRIO + 1); ++} ++ + static inline void requeue_task(struct task_struct *p, struct rq *rq); + + static inline void time_slice_expired(struct task_struct *p, struct rq *rq) + { + /*printk(KERN_INFO "sched: time_slice_expired(%d) - %px\n", cpu_of(rq), p);*/ + p->time_slice = sched_timeslice_ns; +- +- if (p->prio >= MAX_RT_PRIO) +- p->deadline = rq->clock + +- SCHED_PRIO_SLOT * (p->static_prio - MAX_RT_PRIO + 1); +- ++ sched_renew_deadline(p, rq); + if (SCHED_FIFO != p->policy && task_on_rq_queued(p)) + requeue_task(p, rq); + } +@@ -187,12 +191,6 @@ static inline struct task_struct *sched_rq_first_task(struct rq *rq) + unsigned long idx = find_first_bit(rq->queue.bitmap, SCHED_BITS); + const struct list_head *head = &rq->queue.heads[sched_prio2idx(idx, rq)]; + +- /* +- if (list_empty(head)) { +- pr_err("BUG: cpu%d(time_edge%llu) prio%lu idx%lu mismatched\n", +- rq->cpu, rq->time_edge, idx, sched_prio2idx(idx, rq)); +- BUG(); +- }*/ + return list_first_entry(head, struct task_struct, sq_node); + } + +@@ -227,11 +225,7 @@ static inline unsigned long sched_queue_watermark(struct rq *rq) + clear_bit(sched_idx2prio(p->sq_idx, rq), \ + rq->queue.bitmap); \ + func; \ +- } \ +- /*\ +- pr_info("-->: cpu%d(time_edge%llu) prio%lu idx%u\n", \ +- rq->cpu, rq->time_edge, sched_idx2prio(p->sq_idx, rq), p->sq_idx); \ +- */ ++ } + + #define __SCHED_ENQUEUE_TASK(p, rq, flags) \ + sched_info_queued(rq, p); \ +@@ -239,11 +233,7 @@ static inline unsigned long sched_queue_watermark(struct rq *rq) + \ + p->sq_idx = task_sched_prio_idx(p, rq); \ + list_add_tail(&p->sq_node, &rq->queue.heads[p->sq_idx]); \ +- set_bit(sched_idx2prio(p->sq_idx, rq), rq->queue.bitmap); \ +- /*\ +- pr_info("<--: cpu%d(time_edge%llu) prio%lu idx%u\n", \ +- rq->cpu, rq->time_edge, sched_idx2prio(p->sq_idx, rq), p->sq_idx); \ +- */ ++ set_bit(sched_idx2prio(p->sq_idx, rq), rq->queue.bitmap); + + /* + * Requeue a task @p to @rq +@@ -260,10 +250,6 @@ static inline unsigned long sched_queue_watermark(struct rq *rq) + p->sq_idx = idx; \ + set_bit(sched_idx2prio(p->sq_idx, rq), rq->queue.bitmap); \ + func; \ +- /*\ +- pr_info("<->: cpu%d(time_edge%llu) prio%lu idx%u\n", \ +- rq->cpu, rq->time_edge, sched_idx2prio(p->sq_idx, rq), p->sq_idx); \ +- */\ + } \ + } + +@@ -274,9 +260,7 @@ static inline bool sched_task_need_requeue(struct task_struct *p, struct rq *rq) + + static void sched_task_fork(struct task_struct *p, struct rq *rq) + { +- if (p->prio >= MAX_RT_PRIO) +- p->deadline = rq->clock + +- SCHED_PRIO_SLOT * (p->static_prio - MAX_RT_PRIO + 1); ++ sched_renew_deadline(p, rq); + } + + /** +-- +2.37.0 + + +From 8635be47b8820c0fb240378109c80b5c92e29837 Mon Sep 17 00:00:00 2001 +From: Alfred Chen +Date: Mon, 17 May 2021 13:48:11 +0000 +Subject: [PATCH 152/297] sched/alt: BMQ&PDS share same name in data structure + +sq_idx and sq_node are shared in task_struct. +queue is shared in rq. +--- + include/linux/sched.h | 6 ++--- + init/init_task.c | 5 ++-- + kernel/sched/alt_sched.h | 10 +++++--- + kernel/sched/bmq.h | 5 ---- + kernel/sched/bmq_imp.h | 54 ++++++++++++++++++++-------------------- + kernel/sched/pds.h | 5 ---- + 6 files changed, 37 insertions(+), 48 deletions(-) + +diff --git a/include/linux/sched.h b/include/linux/sched.h +index 41bf98528037..2423bc6713eb 100644 +--- a/include/linux/sched.h ++++ b/include/linux/sched.h +@@ -782,15 +782,13 @@ struct task_struct { + #ifdef CONFIG_SCHED_ALT + u64 last_ran; + s64 time_slice; ++ int sq_idx; ++ struct list_head sq_node; + #ifdef CONFIG_SCHED_BMQ + int boost_prio; +- int bmq_idx; +- struct list_head bmq_node; + #endif /* CONFIG_SCHED_BMQ */ + #ifdef CONFIG_SCHED_PDS + u64 deadline; +- int sq_idx; +- struct list_head sq_node; + #endif /* CONFIG_SCHED_PDS */ + /* sched_clock time spent running */ + u64 sched_time; +diff --git a/init/init_task.c b/init/init_task.c +index 0374297c45b9..579d99864d49 100644 +--- a/init/init_task.c ++++ b/init/init_task.c +@@ -100,14 +100,13 @@ struct task_struct init_task + .fn = do_no_restart_syscall, + }, + #ifdef CONFIG_SCHED_ALT ++ .sq_node = LIST_HEAD_INIT(init_task.sq_node), + #ifdef CONFIG_SCHED_BMQ + .boost_prio = 0, +- .bmq_idx = 15, +- .bmq_node = LIST_HEAD_INIT(init_task.bmq_node), ++ .sq_idx = 15, + #endif + #ifdef CONFIG_SCHED_PDS + .deadline = 0, +- .sq_node = LIST_HEAD_INIT(init_task.sq_node), + #endif + .time_slice = HZ, + #else +diff --git a/kernel/sched/alt_sched.h b/kernel/sched/alt_sched.h +index cfb4669dfbbf..21f359102fbc 100644 +--- a/kernel/sched/alt_sched.h ++++ b/kernel/sched/alt_sched.h +@@ -131,6 +131,11 @@ static inline int task_on_rq_migrating(struct task_struct *p) + #define WF_MIGRATED 0x04 /* internal use, task got migrated */ + #define WF_ON_CPU 0x08 /* Wakee is on_rq */ + ++struct sched_queue { ++ DECLARE_BITMAP(bitmap, SCHED_BITS); ++ struct list_head heads[SCHED_BITS]; ++}; ++ + /* + * This is the main, per-CPU runqueue data structure. + * This data should only be modified by the local cpu. +@@ -143,11 +148,8 @@ struct rq { + struct task_struct *idle, *stop, *skip; + struct mm_struct *prev_mm; + +-#ifdef CONFIG_SCHED_BMQ +- struct bmq queue; +-#endif +-#ifdef CONFIG_SCHED_PDS + struct sched_queue queue; ++#ifdef CONFIG_SCHED_PDS + u64 time_edge; + #endif + unsigned long watermark; +diff --git a/kernel/sched/bmq.h b/kernel/sched/bmq.h +index 4ce30c30bd3e..2ae022def9f7 100644 +--- a/kernel/sched/bmq.h ++++ b/kernel/sched/bmq.h +@@ -6,9 +6,4 @@ + #define SCHED_BITS (MAX_RT_PRIO + NICE_WIDTH + 2 * MAX_PRIORITY_ADJ + 1) + #define IDLE_TASK_SCHED_PRIO (SCHED_BITS - 1) + +-struct bmq { +- DECLARE_BITMAP(bitmap, SCHED_BITS); +- struct list_head heads[SCHED_BITS]; +-}; +- + #endif +diff --git a/kernel/sched/bmq_imp.h b/kernel/sched/bmq_imp.h +index 7c71f1141d00..f6bd3421b95c 100644 +--- a/kernel/sched/bmq_imp.h ++++ b/kernel/sched/bmq_imp.h +@@ -67,8 +67,6 @@ inline int task_running_nice(struct task_struct *p) + return (p->prio + p->boost_prio > DEFAULT_PRIO + MAX_PRIORITY_ADJ); + } + +-static inline void update_task_priodl(struct task_struct *p) {} +- + static inline unsigned long sched_queue_watermark(struct rq *rq) + { + return find_first_bit(rq->queue.bitmap, SCHED_BITS); +@@ -76,7 +74,7 @@ static inline unsigned long sched_queue_watermark(struct rq *rq) + + static inline void sched_queue_init(struct rq *rq) + { +- struct bmq *q = &rq->queue; ++ struct sched_queue *q = &rq->queue; + int i; + + bitmap_zero(q->bitmap, SCHED_BITS); +@@ -86,12 +84,12 @@ static inline void sched_queue_init(struct rq *rq) + + static inline void sched_queue_init_idle(struct rq *rq, struct task_struct *idle) + { +- struct bmq *q = &rq->queue; ++ struct sched_queue *q = &rq->queue; + +- idle->bmq_idx = IDLE_TASK_SCHED_PRIO; +- INIT_LIST_HEAD(&q->heads[idle->bmq_idx]); +- list_add(&idle->bmq_node, &q->heads[idle->bmq_idx]); +- set_bit(idle->bmq_idx, q->bitmap); ++ idle->sq_idx = IDLE_TASK_SCHED_PRIO; ++ INIT_LIST_HEAD(&q->heads[idle->sq_idx]); ++ list_add(&idle->sq_node, &q->heads[idle->sq_idx]); ++ set_bit(idle->sq_idx, q->bitmap); + } + + /* +@@ -102,32 +100,32 @@ static inline struct task_struct *sched_rq_first_task(struct rq *rq) + unsigned long idx = find_first_bit(rq->queue.bitmap, SCHED_BITS); + const struct list_head *head = &rq->queue.heads[idx]; + +- return list_first_entry(head, struct task_struct, bmq_node); ++ return list_first_entry(head, struct task_struct, sq_node); + } + + static inline struct task_struct * + sched_rq_next_task(struct task_struct *p, struct rq *rq) + { +- unsigned long idx = p->bmq_idx; ++ unsigned long idx = p->sq_idx; + struct list_head *head = &rq->queue.heads[idx]; + +- if (list_is_last(&p->bmq_node, head)) { ++ if (list_is_last(&p->sq_node, head)) { + idx = find_next_bit(rq->queue.bitmap, SCHED_BITS, idx + 1); + head = &rq->queue.heads[idx]; + +- return list_first_entry(head, struct task_struct, bmq_node); ++ return list_first_entry(head, struct task_struct, sq_node); + } + +- return list_next_entry(p, bmq_node); ++ return list_next_entry(p, sq_node); + } + + #define __SCHED_DEQUEUE_TASK(p, rq, flags, func) \ + psi_dequeue(p, flags & DEQUEUE_SLEEP); \ + sched_info_dequeued(rq, p); \ + \ +- list_del(&p->bmq_node); \ +- if (list_empty(&rq->queue.heads[p->bmq_idx])) { \ +- clear_bit(p->bmq_idx, rq->queue.bitmap);\ ++ list_del(&p->sq_node); \ ++ if (list_empty(&rq->queue.heads[p->sq_idx])) { \ ++ clear_bit(p->sq_idx, rq->queue.bitmap);\ + func; \ + } + +@@ -135,28 +133,28 @@ sched_rq_next_task(struct task_struct *p, struct rq *rq) + sched_info_queued(rq, p); \ + psi_enqueue(p, flags); \ + \ +- p->bmq_idx = task_sched_prio(p, rq); \ +- list_add_tail(&p->bmq_node, &rq->queue.heads[p->bmq_idx]); \ +- set_bit(p->bmq_idx, rq->queue.bitmap) ++ p->sq_idx = task_sched_prio(p, rq); \ ++ list_add_tail(&p->sq_node, &rq->queue.heads[p->sq_idx]); \ ++ set_bit(p->sq_idx, rq->queue.bitmap) + + #define __SCHED_REQUEUE_TASK(p, rq, func) \ + { \ + int idx = task_sched_prio(p, rq); \ + \ +- list_del(&p->bmq_node); \ +- list_add_tail(&p->bmq_node, &rq->queue.heads[idx]); \ +- if (idx != p->bmq_idx) { \ +- if (list_empty(&rq->queue.heads[p->bmq_idx])) \ +- clear_bit(p->bmq_idx, rq->queue.bitmap); \ +- p->bmq_idx = idx; \ +- set_bit(p->bmq_idx, rq->queue.bitmap); \ ++ list_del(&p->sq_node); \ ++ list_add_tail(&p->sq_node, &rq->queue.heads[idx]); \ ++ if (idx != p->sq_idx) { \ ++ if (list_empty(&rq->queue.heads[p->sq_idx])) \ ++ clear_bit(p->sq_idx, rq->queue.bitmap); \ ++ p->sq_idx = idx; \ ++ set_bit(p->sq_idx, rq->queue.bitmap); \ + func; \ + } \ + } + + static inline bool sched_task_need_requeue(struct task_struct *p, struct rq *rq) + { +- return (task_sched_prio(p, rq) != p->bmq_idx); ++ return (task_sched_prio(p, rq) != p->sq_idx); + } + + static void sched_task_fork(struct task_struct *p, struct rq *rq) +@@ -201,3 +199,5 @@ static void sched_task_deactivate(struct task_struct *p, struct rq *rq) + if (rq_switch_time(rq) < boost_threshold(p)) + boost_task(p); + } ++ ++static inline void update_rq_time_edge(struct rq *rq) {} +diff --git a/kernel/sched/pds.h b/kernel/sched/pds.h +index 3afc6fd7a27f..623908cf4380 100644 +--- a/kernel/sched/pds.h ++++ b/kernel/sched/pds.h +@@ -6,9 +6,4 @@ + #define SCHED_BITS (MAX_RT_PRIO + NICE_WIDTH / 2 + 1) + #define IDLE_TASK_SCHED_PRIO (SCHED_BITS - 1) + +-struct sched_queue { +- DECLARE_BITMAP(bitmap, SCHED_BITS); +- struct list_head heads[SCHED_BITS]; +-}; +- + #endif +-- +2.37.0 + + +From 15abc98ac279821cd496765da1cd6f1536f6a40a Mon Sep 17 00:00:00 2001 +From: Alfred Chen +Date: Mon, 17 May 2021 14:17:37 +0000 +Subject: [PATCH 153/297] sched/alt: [Sync] 163dd7fa459f kthread: Fix + PF_KTHREAD vs to_kthread() race + +--- + kernel/sched/alt_core.c | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +diff --git a/kernel/sched/alt_core.c b/kernel/sched/alt_core.c +index cbca3a54f912..b1c17ff1642c 100644 +--- a/kernel/sched/alt_core.c ++++ b/kernel/sched/alt_core.c +@@ -6210,7 +6210,7 @@ static void balance_push(struct rq *rq) + * histerical raisins. + */ + if (rq->idle == push_task || +- ((push_task->flags & PF_KTHREAD) && kthread_is_per_cpu(push_task)) || ++ kthread_is_per_cpu(push_task) || + is_migration_disabled(push_task)) { + + /* +-- +2.37.0 + + +From d2156be52fe37185c10f22d238a9d0dc70b36ee9 Mon Sep 17 00:00:00 2001 +From: Alfred Chen +Date: Mon, 17 May 2021 14:49:46 +0000 +Subject: [PATCH 154/297] sched/alt: Rename BMQ&PDS implement files. + +--- + kernel/sched/alt_core.c | 4 +- + kernel/sched/alt_sched.h | 9 +- + kernel/sched/bmq.h | 206 +++++++++++++++++++++++++- + kernel/sched/bmq_imp.h | 203 -------------------------- + kernel/sched/pds.h | 303 ++++++++++++++++++++++++++++++++++++++- + kernel/sched/pds_imp.h | 300 -------------------------------------- + 6 files changed, 506 insertions(+), 519 deletions(-) + delete mode 100644 kernel/sched/bmq_imp.h + delete mode 100644 kernel/sched/pds_imp.h + +diff --git a/kernel/sched/alt_core.c b/kernel/sched/alt_core.c +index b1c17ff1642c..9ade1b64aa9c 100644 +--- a/kernel/sched/alt_core.c ++++ b/kernel/sched/alt_core.c +@@ -143,10 +143,10 @@ static cpumask_t sched_sg_idle_mask ____cacheline_aligned_in_smp; + static cpumask_t sched_rq_watermark[SCHED_BITS] ____cacheline_aligned_in_smp; + + #ifdef CONFIG_SCHED_BMQ +-#include "bmq_imp.h" ++#include "bmq.h" + #endif + #ifdef CONFIG_SCHED_PDS +-#include "pds_imp.h" ++#include "pds.h" + #endif + + static inline void update_sched_rq_watermark(struct rq *rq) +diff --git a/kernel/sched/alt_sched.h b/kernel/sched/alt_sched.h +index 21f359102fbc..58ff6212b446 100644 +--- a/kernel/sched/alt_sched.h ++++ b/kernel/sched/alt_sched.h +@@ -50,12 +50,17 @@ + #include + + #ifdef CONFIG_SCHED_BMQ +-#include "bmq.h" ++/* bits: ++ * RT(0-99), (Low prio adj range, nice width, high prio adj range) / 2, cpu idle task */ ++#define SCHED_BITS (MAX_RT_PRIO + NICE_WIDTH / 2 + MAX_PRIORITY_ADJ + 1) + #endif + #ifdef CONFIG_SCHED_PDS +-#include "pds.h" ++/* bits: RT(0-99), nice width / 2, cpu idle task */ ++#define SCHED_BITS (MAX_RT_PRIO + NICE_WIDTH / 2 + 1) + #endif + ++#define IDLE_TASK_SCHED_PRIO (SCHED_BITS - 1) ++ + #ifdef CONFIG_SCHED_DEBUG + # define SCHED_WARN_ON(x) WARN_ONCE(x, #x) + extern void resched_latency_warn(int cpu, u64 latency); +diff --git a/kernel/sched/bmq.h b/kernel/sched/bmq.h +index 2ae022def9f7..f6bd3421b95c 100644 +--- a/kernel/sched/bmq.h ++++ b/kernel/sched/bmq.h +@@ -1,9 +1,203 @@ +-#ifndef BMQ_H +-#define BMQ_H ++#define ALT_SCHED_VERSION_MSG "sched/bmq: BMQ CPU Scheduler "ALT_SCHED_VERSION" by Alfred Chen.\n" + +-/* bits: +- * RT(0-99), Low prio adj range, nice width, high prio adj range, cpu idle task */ +-#define SCHED_BITS (MAX_RT_PRIO + NICE_WIDTH + 2 * MAX_PRIORITY_ADJ + 1) +-#define IDLE_TASK_SCHED_PRIO (SCHED_BITS - 1) ++/* ++ * BMQ only routines ++ */ ++#define rq_switch_time(rq) ((rq)->clock - (rq)->last_ts_switch) ++#define boost_threshold(p) (sched_timeslice_ns >>\ ++ (15 - MAX_PRIORITY_ADJ - (p)->boost_prio)) + ++static inline void boost_task(struct task_struct *p) ++{ ++ int limit; ++ ++ switch (p->policy) { ++ case SCHED_NORMAL: ++ limit = -MAX_PRIORITY_ADJ; ++ break; ++ case SCHED_BATCH: ++ case SCHED_IDLE: ++ limit = 0; ++ break; ++ default: ++ return; ++ } ++ ++ if (p->boost_prio > limit) ++ p->boost_prio--; ++} ++ ++static inline void deboost_task(struct task_struct *p) ++{ ++ if (p->boost_prio < MAX_PRIORITY_ADJ) ++ p->boost_prio++; ++} ++ ++/* ++ * Common interfaces ++ */ ++static inline int normal_prio(struct task_struct *p) ++{ ++ if (task_has_rt_policy(p)) ++ return MAX_RT_PRIO - 1 - p->rt_priority; ++ ++ return p->static_prio + MAX_PRIORITY_ADJ; ++} ++ ++static inline int task_sched_prio(struct task_struct *p, struct rq *rq) ++{ ++ return (p->prio < MAX_RT_PRIO)? p->prio : MAX_RT_PRIO / 2 + (p->prio + p->boost_prio) / 2; ++} ++ ++static inline void requeue_task(struct task_struct *p, struct rq *rq); ++ ++static inline void time_slice_expired(struct task_struct *p, struct rq *rq) ++{ ++ p->time_slice = sched_timeslice_ns; ++ ++ if (SCHED_FIFO != p->policy && task_on_rq_queued(p)) { ++ if (SCHED_RR != p->policy) ++ deboost_task(p); ++ requeue_task(p, rq); ++ } ++} ++ ++inline int task_running_nice(struct task_struct *p) ++{ ++ return (p->prio + p->boost_prio > DEFAULT_PRIO + MAX_PRIORITY_ADJ); ++} ++ ++static inline unsigned long sched_queue_watermark(struct rq *rq) ++{ ++ return find_first_bit(rq->queue.bitmap, SCHED_BITS); ++} ++ ++static inline void sched_queue_init(struct rq *rq) ++{ ++ struct sched_queue *q = &rq->queue; ++ int i; ++ ++ bitmap_zero(q->bitmap, SCHED_BITS); ++ for(i = 0; i < SCHED_BITS; i++) ++ INIT_LIST_HEAD(&q->heads[i]); ++} ++ ++static inline void sched_queue_init_idle(struct rq *rq, struct task_struct *idle) ++{ ++ struct sched_queue *q = &rq->queue; ++ ++ idle->sq_idx = IDLE_TASK_SCHED_PRIO; ++ INIT_LIST_HEAD(&q->heads[idle->sq_idx]); ++ list_add(&idle->sq_node, &q->heads[idle->sq_idx]); ++ set_bit(idle->sq_idx, q->bitmap); ++} ++ ++/* ++ * This routine used in bmq scheduler only which assume the idle task in the bmq ++ */ ++static inline struct task_struct *sched_rq_first_task(struct rq *rq) ++{ ++ unsigned long idx = find_first_bit(rq->queue.bitmap, SCHED_BITS); ++ const struct list_head *head = &rq->queue.heads[idx]; ++ ++ return list_first_entry(head, struct task_struct, sq_node); ++} ++ ++static inline struct task_struct * ++sched_rq_next_task(struct task_struct *p, struct rq *rq) ++{ ++ unsigned long idx = p->sq_idx; ++ struct list_head *head = &rq->queue.heads[idx]; ++ ++ if (list_is_last(&p->sq_node, head)) { ++ idx = find_next_bit(rq->queue.bitmap, SCHED_BITS, idx + 1); ++ head = &rq->queue.heads[idx]; ++ ++ return list_first_entry(head, struct task_struct, sq_node); ++ } ++ ++ return list_next_entry(p, sq_node); ++} ++ ++#define __SCHED_DEQUEUE_TASK(p, rq, flags, func) \ ++ psi_dequeue(p, flags & DEQUEUE_SLEEP); \ ++ sched_info_dequeued(rq, p); \ ++ \ ++ list_del(&p->sq_node); \ ++ if (list_empty(&rq->queue.heads[p->sq_idx])) { \ ++ clear_bit(p->sq_idx, rq->queue.bitmap);\ ++ func; \ ++ } ++ ++#define __SCHED_ENQUEUE_TASK(p, rq, flags) \ ++ sched_info_queued(rq, p); \ ++ psi_enqueue(p, flags); \ ++ \ ++ p->sq_idx = task_sched_prio(p, rq); \ ++ list_add_tail(&p->sq_node, &rq->queue.heads[p->sq_idx]); \ ++ set_bit(p->sq_idx, rq->queue.bitmap) ++ ++#define __SCHED_REQUEUE_TASK(p, rq, func) \ ++{ \ ++ int idx = task_sched_prio(p, rq); \ ++\ ++ list_del(&p->sq_node); \ ++ list_add_tail(&p->sq_node, &rq->queue.heads[idx]); \ ++ if (idx != p->sq_idx) { \ ++ if (list_empty(&rq->queue.heads[p->sq_idx])) \ ++ clear_bit(p->sq_idx, rq->queue.bitmap); \ ++ p->sq_idx = idx; \ ++ set_bit(p->sq_idx, rq->queue.bitmap); \ ++ func; \ ++ } \ ++} ++ ++static inline bool sched_task_need_requeue(struct task_struct *p, struct rq *rq) ++{ ++ return (task_sched_prio(p, rq) != p->sq_idx); ++} ++ ++static void sched_task_fork(struct task_struct *p, struct rq *rq) ++{ ++ p->boost_prio = (p->boost_prio < 0) ? ++ p->boost_prio + MAX_PRIORITY_ADJ : MAX_PRIORITY_ADJ; ++} ++ ++/** ++ * task_prio - return the priority value of a given task. ++ * @p: the task in question. ++ * ++ * Return: The priority value as seen by users in /proc. ++ * ++ * sched policy return value kernel prio user prio/nice/boost ++ * ++ * normal, batch, idle [0 ... 53] [100 ... 139] 0/[-20 ... 19]/[-7 ... 7] ++ * fifo, rr [-1 ... -100] [99 ... 0] [0 ... 99] ++ */ ++int task_prio(const struct task_struct *p) ++{ ++ if (p->prio < MAX_RT_PRIO) ++ return (p->prio - MAX_RT_PRIO); ++ return (p->prio - MAX_RT_PRIO + p->boost_prio); ++} ++ ++static void do_sched_yield_type_1(struct task_struct *p, struct rq *rq) ++{ ++ p->boost_prio = MAX_PRIORITY_ADJ; ++} ++ ++#ifdef CONFIG_SMP ++static void sched_task_ttwu(struct task_struct *p) ++{ ++ if(this_rq()->clock_task - p->last_ran > sched_timeslice_ns) ++ boost_task(p); ++} + #endif ++ ++static void sched_task_deactivate(struct task_struct *p, struct rq *rq) ++{ ++ if (rq_switch_time(rq) < boost_threshold(p)) ++ boost_task(p); ++} ++ ++static inline void update_rq_time_edge(struct rq *rq) {} +diff --git a/kernel/sched/bmq_imp.h b/kernel/sched/bmq_imp.h +deleted file mode 100644 +index f6bd3421b95c..000000000000 +--- a/kernel/sched/bmq_imp.h ++++ /dev/null +@@ -1,203 +0,0 @@ +-#define ALT_SCHED_VERSION_MSG "sched/bmq: BMQ CPU Scheduler "ALT_SCHED_VERSION" by Alfred Chen.\n" +- +-/* +- * BMQ only routines +- */ +-#define rq_switch_time(rq) ((rq)->clock - (rq)->last_ts_switch) +-#define boost_threshold(p) (sched_timeslice_ns >>\ +- (15 - MAX_PRIORITY_ADJ - (p)->boost_prio)) +- +-static inline void boost_task(struct task_struct *p) +-{ +- int limit; +- +- switch (p->policy) { +- case SCHED_NORMAL: +- limit = -MAX_PRIORITY_ADJ; +- break; +- case SCHED_BATCH: +- case SCHED_IDLE: +- limit = 0; +- break; +- default: +- return; +- } +- +- if (p->boost_prio > limit) +- p->boost_prio--; +-} +- +-static inline void deboost_task(struct task_struct *p) +-{ +- if (p->boost_prio < MAX_PRIORITY_ADJ) +- p->boost_prio++; +-} +- +-/* +- * Common interfaces +- */ +-static inline int normal_prio(struct task_struct *p) +-{ +- if (task_has_rt_policy(p)) +- return MAX_RT_PRIO - 1 - p->rt_priority; +- +- return p->static_prio + MAX_PRIORITY_ADJ; +-} +- +-static inline int task_sched_prio(struct task_struct *p, struct rq *rq) +-{ +- return (p->prio < MAX_RT_PRIO)? p->prio : MAX_RT_PRIO / 2 + (p->prio + p->boost_prio) / 2; +-} +- +-static inline void requeue_task(struct task_struct *p, struct rq *rq); +- +-static inline void time_slice_expired(struct task_struct *p, struct rq *rq) +-{ +- p->time_slice = sched_timeslice_ns; +- +- if (SCHED_FIFO != p->policy && task_on_rq_queued(p)) { +- if (SCHED_RR != p->policy) +- deboost_task(p); +- requeue_task(p, rq); +- } +-} +- +-inline int task_running_nice(struct task_struct *p) +-{ +- return (p->prio + p->boost_prio > DEFAULT_PRIO + MAX_PRIORITY_ADJ); +-} +- +-static inline unsigned long sched_queue_watermark(struct rq *rq) +-{ +- return find_first_bit(rq->queue.bitmap, SCHED_BITS); +-} +- +-static inline void sched_queue_init(struct rq *rq) +-{ +- struct sched_queue *q = &rq->queue; +- int i; +- +- bitmap_zero(q->bitmap, SCHED_BITS); +- for(i = 0; i < SCHED_BITS; i++) +- INIT_LIST_HEAD(&q->heads[i]); +-} +- +-static inline void sched_queue_init_idle(struct rq *rq, struct task_struct *idle) +-{ +- struct sched_queue *q = &rq->queue; +- +- idle->sq_idx = IDLE_TASK_SCHED_PRIO; +- INIT_LIST_HEAD(&q->heads[idle->sq_idx]); +- list_add(&idle->sq_node, &q->heads[idle->sq_idx]); +- set_bit(idle->sq_idx, q->bitmap); +-} +- +-/* +- * This routine used in bmq scheduler only which assume the idle task in the bmq +- */ +-static inline struct task_struct *sched_rq_first_task(struct rq *rq) +-{ +- unsigned long idx = find_first_bit(rq->queue.bitmap, SCHED_BITS); +- const struct list_head *head = &rq->queue.heads[idx]; +- +- return list_first_entry(head, struct task_struct, sq_node); +-} +- +-static inline struct task_struct * +-sched_rq_next_task(struct task_struct *p, struct rq *rq) +-{ +- unsigned long idx = p->sq_idx; +- struct list_head *head = &rq->queue.heads[idx]; +- +- if (list_is_last(&p->sq_node, head)) { +- idx = find_next_bit(rq->queue.bitmap, SCHED_BITS, idx + 1); +- head = &rq->queue.heads[idx]; +- +- return list_first_entry(head, struct task_struct, sq_node); +- } +- +- return list_next_entry(p, sq_node); +-} +- +-#define __SCHED_DEQUEUE_TASK(p, rq, flags, func) \ +- psi_dequeue(p, flags & DEQUEUE_SLEEP); \ +- sched_info_dequeued(rq, p); \ +- \ +- list_del(&p->sq_node); \ +- if (list_empty(&rq->queue.heads[p->sq_idx])) { \ +- clear_bit(p->sq_idx, rq->queue.bitmap);\ +- func; \ +- } +- +-#define __SCHED_ENQUEUE_TASK(p, rq, flags) \ +- sched_info_queued(rq, p); \ +- psi_enqueue(p, flags); \ +- \ +- p->sq_idx = task_sched_prio(p, rq); \ +- list_add_tail(&p->sq_node, &rq->queue.heads[p->sq_idx]); \ +- set_bit(p->sq_idx, rq->queue.bitmap) +- +-#define __SCHED_REQUEUE_TASK(p, rq, func) \ +-{ \ +- int idx = task_sched_prio(p, rq); \ +-\ +- list_del(&p->sq_node); \ +- list_add_tail(&p->sq_node, &rq->queue.heads[idx]); \ +- if (idx != p->sq_idx) { \ +- if (list_empty(&rq->queue.heads[p->sq_idx])) \ +- clear_bit(p->sq_idx, rq->queue.bitmap); \ +- p->sq_idx = idx; \ +- set_bit(p->sq_idx, rq->queue.bitmap); \ +- func; \ +- } \ +-} +- +-static inline bool sched_task_need_requeue(struct task_struct *p, struct rq *rq) +-{ +- return (task_sched_prio(p, rq) != p->sq_idx); +-} +- +-static void sched_task_fork(struct task_struct *p, struct rq *rq) +-{ +- p->boost_prio = (p->boost_prio < 0) ? +- p->boost_prio + MAX_PRIORITY_ADJ : MAX_PRIORITY_ADJ; +-} +- +-/** +- * task_prio - return the priority value of a given task. +- * @p: the task in question. +- * +- * Return: The priority value as seen by users in /proc. +- * +- * sched policy return value kernel prio user prio/nice/boost +- * +- * normal, batch, idle [0 ... 53] [100 ... 139] 0/[-20 ... 19]/[-7 ... 7] +- * fifo, rr [-1 ... -100] [99 ... 0] [0 ... 99] +- */ +-int task_prio(const struct task_struct *p) +-{ +- if (p->prio < MAX_RT_PRIO) +- return (p->prio - MAX_RT_PRIO); +- return (p->prio - MAX_RT_PRIO + p->boost_prio); +-} +- +-static void do_sched_yield_type_1(struct task_struct *p, struct rq *rq) +-{ +- p->boost_prio = MAX_PRIORITY_ADJ; +-} +- +-#ifdef CONFIG_SMP +-static void sched_task_ttwu(struct task_struct *p) +-{ +- if(this_rq()->clock_task - p->last_ran > sched_timeslice_ns) +- boost_task(p); +-} +-#endif +- +-static void sched_task_deactivate(struct task_struct *p, struct rq *rq) +-{ +- if (rq_switch_time(rq) < boost_threshold(p)) +- boost_task(p); +-} +- +-static inline void update_rq_time_edge(struct rq *rq) {} +diff --git a/kernel/sched/pds.h b/kernel/sched/pds.h +index 623908cf4380..8cc656a7cc48 100644 +--- a/kernel/sched/pds.h ++++ b/kernel/sched/pds.h +@@ -1,9 +1,300 @@ +-#ifndef PDS_H +-#define PDS_H ++#define ALT_SCHED_VERSION_MSG "sched/pds: PDS CPU Scheduler "ALT_SCHED_VERSION" by Alfred Chen.\n" + +-/* bits: +- * RT(0-99), (Low prio adj range, nice width, high prio adj range) / 2, cpu idle task */ +-#define SCHED_BITS (MAX_RT_PRIO + NICE_WIDTH / 2 + 1) +-#define IDLE_TASK_SCHED_PRIO (SCHED_BITS - 1) ++static const u64 user_prio2deadline[NICE_WIDTH] = { ++/* -20 */ 4194304, 4613734, 5075107, 5582617, 6140878, ++/* -15 */ 6754965, 7430461, 8173507, 8990857, 9889942, ++/* -10 */ 10878936, 11966829, 13163511, 14479862, 15927848, ++/* -5 */ 17520632, 19272695, 21199964, 23319960, 25651956, ++/* 0 */ 28217151, 31038866, 34142752, 37557027, 41312729, ++/* 5 */ 45444001, 49988401, 54987241, 60485965, 66534561, ++/* 10 */ 73188017, 80506818, 88557499, 97413248, 107154572, ++/* 15 */ 117870029, 129657031, 142622734, 156885007, 172573507 ++}; + ++#define SCHED_PRIO_SLOT (4ULL << 20) ++#define DEFAULT_SCHED_PRIO (MAX_RT_PRIO + 10) ++ ++static inline int normal_prio(struct task_struct *p) ++{ ++ if (task_has_rt_policy(p)) ++ return MAX_RT_PRIO - 1 - p->rt_priority; ++ ++ return MAX_RT_PRIO; ++} ++ ++extern int alt_debug[20]; ++ ++static inline int ++task_sched_prio_normal(const struct task_struct *p, const struct rq *rq) ++{ ++ int delta; ++ ++ delta = rq->time_edge + 20 - (p->deadline >> 23); ++ if (delta < 0) { ++ delta = 0; ++ alt_debug[0]++; ++ } ++ delta = 19 - min(delta, 19); ++ ++ return delta; ++} ++ ++static inline int ++task_sched_prio(const struct task_struct *p, const struct rq *rq) ++{ ++ if (p == rq->idle) ++ return IDLE_TASK_SCHED_PRIO; ++ ++ if (p->prio < MAX_RT_PRIO) ++ return p->prio; ++ ++ return MAX_RT_PRIO + task_sched_prio_normal(p, rq); ++} ++ ++static inline int ++task_sched_prio_idx(const struct task_struct *p, const struct rq *rq) ++{ ++ if (p == rq->idle) ++ return IDLE_TASK_SCHED_PRIO; ++ ++ if (p->prio < MAX_RT_PRIO) ++ return p->prio; ++ ++ return MAX_RT_PRIO + ++ (task_sched_prio_normal(p, rq) + rq->time_edge) % 20; ++} ++ ++int task_running_nice(struct task_struct *p) ++{ ++ return task_sched_prio(p, task_rq(p)) > DEFAULT_SCHED_PRIO; ++} ++ ++DECLARE_BITMAP(normal_mask, SCHED_BITS); ++ ++static inline void sched_shift_normal_bitmap(unsigned long *mask, unsigned int shift) ++{ ++ DECLARE_BITMAP(normal, SCHED_BITS); ++ ++ bitmap_and(normal, mask, normal_mask, SCHED_BITS); ++ bitmap_shift_right(normal, normal, shift, SCHED_BITS); ++ bitmap_and(normal, normal, normal_mask, SCHED_BITS); ++ ++ bitmap_andnot(mask, mask, normal_mask, SCHED_BITS); ++ bitmap_or(mask, mask, normal, SCHED_BITS); ++} ++ ++static inline void update_rq_time_edge(struct rq *rq) ++{ ++ struct list_head head; ++ u64 old = rq->time_edge; ++ u64 now = rq->clock >> 23; ++ u64 prio, delta; ++ ++ if (now == old) ++ return; ++ ++ delta = min(20ULL, now - old); ++ INIT_LIST_HEAD(&head); ++ ++ prio = MAX_RT_PRIO; ++ for_each_set_bit_from(prio, rq->queue.bitmap, MAX_RT_PRIO + delta) { ++ u64 idx; ++ ++ idx = MAX_RT_PRIO + ((prio - MAX_RT_PRIO) + rq->time_edge) % 20; ++ list_splice_tail_init(rq->queue.heads + idx, &head); ++ } ++ sched_shift_normal_bitmap(rq->queue.bitmap, delta); ++ rq->time_edge = now; ++ if (!list_empty(&head)) { ++ struct task_struct *p; ++ ++ list_for_each_entry(p, &head, sq_node) ++ p->sq_idx = MAX_RT_PRIO + now % 20; ++ ++ list_splice(&head, rq->queue.heads + MAX_RT_PRIO + now % 20); ++ set_bit(MAX_RT_PRIO, rq->queue.bitmap); ++ } ++} ++ ++static inline void sched_renew_deadline(struct task_struct *p, const struct rq *rq) ++{ ++ if (p->prio >= MAX_RT_PRIO) ++ p->deadline = rq->clock + ++ SCHED_PRIO_SLOT * (p->static_prio - MAX_RT_PRIO + 1); ++} ++ ++static inline void requeue_task(struct task_struct *p, struct rq *rq); ++ ++static inline void time_slice_expired(struct task_struct *p, struct rq *rq) ++{ ++ /*printk(KERN_INFO "sched: time_slice_expired(%d) - %px\n", cpu_of(rq), p);*/ ++ p->time_slice = sched_timeslice_ns; ++ sched_renew_deadline(p, rq); ++ if (SCHED_FIFO != p->policy && task_on_rq_queued(p)) ++ requeue_task(p, rq); ++} ++ ++/* ++ * Init the queue structure in rq ++ */ ++static inline void sched_queue_init(struct rq *rq) ++{ ++ struct sched_queue *q = &rq->queue; ++ int i; ++ ++ bitmap_set(normal_mask, MAX_RT_PRIO, 20); ++ bitmap_zero(q->bitmap, SCHED_BITS); ++ for(i = 0; i < SCHED_BITS; i++) ++ INIT_LIST_HEAD(&q->heads[i]); ++} ++ ++/* ++ * Init idle task and put into queue structure of rq ++ * IMPORTANT: may be called multiple times for a single cpu ++ */ ++static inline void sched_queue_init_idle(struct rq *rq, struct task_struct *idle) ++{ ++ struct sched_queue *q = &rq->queue; ++ /*printk(KERN_INFO "sched: init(%d) - %px\n", cpu_of(rq), idle);*/ ++ ++ idle->sq_idx = IDLE_TASK_SCHED_PRIO; ++ INIT_LIST_HEAD(&q->heads[idle->sq_idx]); ++ list_add(&idle->sq_node, &q->heads[idle->sq_idx]); ++ set_bit(idle->sq_idx, q->bitmap); ++} ++ ++static inline unsigned long sched_prio2idx(unsigned long idx, struct rq *rq) ++{ ++ if (IDLE_TASK_SCHED_PRIO == idx || ++ idx < MAX_RT_PRIO) ++ return idx; ++ ++ return MAX_RT_PRIO + ++ ((idx - MAX_RT_PRIO) + rq->time_edge) % 20; ++} ++ ++static inline unsigned long sched_idx2prio(unsigned long idx, struct rq *rq) ++{ ++ if (IDLE_TASK_SCHED_PRIO == idx || ++ idx < MAX_RT_PRIO) ++ return idx; ++ ++ return MAX_RT_PRIO + ++ ((idx - MAX_RT_PRIO) + 20 - rq->time_edge % 20) % 20; ++} ++ ++/* ++ * This routine assume that the idle task always in queue ++ */ ++static inline struct task_struct *sched_rq_first_task(struct rq *rq) ++{ ++ unsigned long idx = find_first_bit(rq->queue.bitmap, SCHED_BITS); ++ const struct list_head *head = &rq->queue.heads[sched_prio2idx(idx, rq)]; ++ ++ return list_first_entry(head, struct task_struct, sq_node); ++} ++ ++static inline struct task_struct * ++sched_rq_next_task(struct task_struct *p, struct rq *rq) ++{ ++ unsigned long idx = p->sq_idx; ++ struct list_head *head = &rq->queue.heads[idx]; ++ ++ if (list_is_last(&p->sq_node, head)) { ++ idx = find_next_bit(rq->queue.bitmap, SCHED_BITS, ++ sched_idx2prio(idx, rq) + 1); ++ head = &rq->queue.heads[sched_prio2idx(idx, rq)]; ++ ++ return list_first_entry(head, struct task_struct, sq_node); ++ } ++ ++ return list_next_entry(p, sq_node); ++} ++ ++static inline unsigned long sched_queue_watermark(struct rq *rq) ++{ ++ return find_first_bit(rq->queue.bitmap, SCHED_BITS); ++} ++ ++#define __SCHED_DEQUEUE_TASK(p, rq, flags, func) \ ++ psi_dequeue(p, flags & DEQUEUE_SLEEP); \ ++ sched_info_dequeued(rq, p); \ ++ \ ++ list_del(&p->sq_node); \ ++ if (list_empty(&rq->queue.heads[p->sq_idx])) { \ ++ clear_bit(sched_idx2prio(p->sq_idx, rq), \ ++ rq->queue.bitmap); \ ++ func; \ ++ } ++ ++#define __SCHED_ENQUEUE_TASK(p, rq, flags) \ ++ sched_info_queued(rq, p); \ ++ psi_enqueue(p, flags); \ ++ \ ++ p->sq_idx = task_sched_prio_idx(p, rq); \ ++ list_add_tail(&p->sq_node, &rq->queue.heads[p->sq_idx]); \ ++ set_bit(sched_idx2prio(p->sq_idx, rq), rq->queue.bitmap); ++ ++/* ++ * Requeue a task @p to @rq ++ */ ++#define __SCHED_REQUEUE_TASK(p, rq, func) \ ++{\ ++ int idx = task_sched_prio_idx(p, rq); \ ++\ ++ list_del(&p->sq_node); \ ++ list_add_tail(&p->sq_node, &rq->queue.heads[idx]); \ ++ if (idx != p->sq_idx) { \ ++ if (list_empty(&rq->queue.heads[p->sq_idx])) \ ++ clear_bit(sched_idx2prio(p->sq_idx, rq), rq->queue.bitmap); \ ++ p->sq_idx = idx; \ ++ set_bit(sched_idx2prio(p->sq_idx, rq), rq->queue.bitmap); \ ++ func; \ ++ } \ ++} ++ ++static inline bool sched_task_need_requeue(struct task_struct *p, struct rq *rq) ++{ ++ return (task_sched_prio_idx(p, rq) != p->sq_idx); ++} ++ ++static void sched_task_fork(struct task_struct *p, struct rq *rq) ++{ ++ sched_renew_deadline(p, rq); ++} ++ ++/** ++ * task_prio - return the priority value of a given task. ++ * @p: the task in question. ++ * ++ * Return: The priority value as seen by users in /proc. ++ * ++ * sched policy return value kernel prio user prio/nice ++ * ++ * normal, batch, idle [0 ... 39] 100 0/[-20 ... 19] ++ * fifo, rr [-1 ... -100] [99 ... 0] [0 ... 99] ++ */ ++int task_prio(const struct task_struct *p) ++{ ++ int ret; ++ ++ if (p->prio < MAX_RT_PRIO) ++ return (p->prio - MAX_RT_PRIO); ++ ++ /*preempt_disable(); ++ ret = task_sched_prio(p, task_rq(p)) - MAX_RT_PRIO;*/ ++ ret = p->static_prio - MAX_RT_PRIO; ++ /*preempt_enable();*/ ++ ++ return ret; ++} ++ ++static void do_sched_yield_type_1(struct task_struct *p, struct rq *rq) ++{ ++ time_slice_expired(p, rq); ++} ++ ++#ifdef CONFIG_SMP ++static void sched_task_ttwu(struct task_struct *p) {} + #endif ++static void sched_task_deactivate(struct task_struct *p, struct rq *rq) {} +diff --git a/kernel/sched/pds_imp.h b/kernel/sched/pds_imp.h +deleted file mode 100644 +index 8cc656a7cc48..000000000000 +--- a/kernel/sched/pds_imp.h ++++ /dev/null +@@ -1,300 +0,0 @@ +-#define ALT_SCHED_VERSION_MSG "sched/pds: PDS CPU Scheduler "ALT_SCHED_VERSION" by Alfred Chen.\n" +- +-static const u64 user_prio2deadline[NICE_WIDTH] = { +-/* -20 */ 4194304, 4613734, 5075107, 5582617, 6140878, +-/* -15 */ 6754965, 7430461, 8173507, 8990857, 9889942, +-/* -10 */ 10878936, 11966829, 13163511, 14479862, 15927848, +-/* -5 */ 17520632, 19272695, 21199964, 23319960, 25651956, +-/* 0 */ 28217151, 31038866, 34142752, 37557027, 41312729, +-/* 5 */ 45444001, 49988401, 54987241, 60485965, 66534561, +-/* 10 */ 73188017, 80506818, 88557499, 97413248, 107154572, +-/* 15 */ 117870029, 129657031, 142622734, 156885007, 172573507 +-}; +- +-#define SCHED_PRIO_SLOT (4ULL << 20) +-#define DEFAULT_SCHED_PRIO (MAX_RT_PRIO + 10) +- +-static inline int normal_prio(struct task_struct *p) +-{ +- if (task_has_rt_policy(p)) +- return MAX_RT_PRIO - 1 - p->rt_priority; +- +- return MAX_RT_PRIO; +-} +- +-extern int alt_debug[20]; +- +-static inline int +-task_sched_prio_normal(const struct task_struct *p, const struct rq *rq) +-{ +- int delta; +- +- delta = rq->time_edge + 20 - (p->deadline >> 23); +- if (delta < 0) { +- delta = 0; +- alt_debug[0]++; +- } +- delta = 19 - min(delta, 19); +- +- return delta; +-} +- +-static inline int +-task_sched_prio(const struct task_struct *p, const struct rq *rq) +-{ +- if (p == rq->idle) +- return IDLE_TASK_SCHED_PRIO; +- +- if (p->prio < MAX_RT_PRIO) +- return p->prio; +- +- return MAX_RT_PRIO + task_sched_prio_normal(p, rq); +-} +- +-static inline int +-task_sched_prio_idx(const struct task_struct *p, const struct rq *rq) +-{ +- if (p == rq->idle) +- return IDLE_TASK_SCHED_PRIO; +- +- if (p->prio < MAX_RT_PRIO) +- return p->prio; +- +- return MAX_RT_PRIO + +- (task_sched_prio_normal(p, rq) + rq->time_edge) % 20; +-} +- +-int task_running_nice(struct task_struct *p) +-{ +- return task_sched_prio(p, task_rq(p)) > DEFAULT_SCHED_PRIO; +-} +- +-DECLARE_BITMAP(normal_mask, SCHED_BITS); +- +-static inline void sched_shift_normal_bitmap(unsigned long *mask, unsigned int shift) +-{ +- DECLARE_BITMAP(normal, SCHED_BITS); +- +- bitmap_and(normal, mask, normal_mask, SCHED_BITS); +- bitmap_shift_right(normal, normal, shift, SCHED_BITS); +- bitmap_and(normal, normal, normal_mask, SCHED_BITS); +- +- bitmap_andnot(mask, mask, normal_mask, SCHED_BITS); +- bitmap_or(mask, mask, normal, SCHED_BITS); +-} +- +-static inline void update_rq_time_edge(struct rq *rq) +-{ +- struct list_head head; +- u64 old = rq->time_edge; +- u64 now = rq->clock >> 23; +- u64 prio, delta; +- +- if (now == old) +- return; +- +- delta = min(20ULL, now - old); +- INIT_LIST_HEAD(&head); +- +- prio = MAX_RT_PRIO; +- for_each_set_bit_from(prio, rq->queue.bitmap, MAX_RT_PRIO + delta) { +- u64 idx; +- +- idx = MAX_RT_PRIO + ((prio - MAX_RT_PRIO) + rq->time_edge) % 20; +- list_splice_tail_init(rq->queue.heads + idx, &head); +- } +- sched_shift_normal_bitmap(rq->queue.bitmap, delta); +- rq->time_edge = now; +- if (!list_empty(&head)) { +- struct task_struct *p; +- +- list_for_each_entry(p, &head, sq_node) +- p->sq_idx = MAX_RT_PRIO + now % 20; +- +- list_splice(&head, rq->queue.heads + MAX_RT_PRIO + now % 20); +- set_bit(MAX_RT_PRIO, rq->queue.bitmap); +- } +-} +- +-static inline void sched_renew_deadline(struct task_struct *p, const struct rq *rq) +-{ +- if (p->prio >= MAX_RT_PRIO) +- p->deadline = rq->clock + +- SCHED_PRIO_SLOT * (p->static_prio - MAX_RT_PRIO + 1); +-} +- +-static inline void requeue_task(struct task_struct *p, struct rq *rq); +- +-static inline void time_slice_expired(struct task_struct *p, struct rq *rq) +-{ +- /*printk(KERN_INFO "sched: time_slice_expired(%d) - %px\n", cpu_of(rq), p);*/ +- p->time_slice = sched_timeslice_ns; +- sched_renew_deadline(p, rq); +- if (SCHED_FIFO != p->policy && task_on_rq_queued(p)) +- requeue_task(p, rq); +-} +- +-/* +- * Init the queue structure in rq +- */ +-static inline void sched_queue_init(struct rq *rq) +-{ +- struct sched_queue *q = &rq->queue; +- int i; +- +- bitmap_set(normal_mask, MAX_RT_PRIO, 20); +- bitmap_zero(q->bitmap, SCHED_BITS); +- for(i = 0; i < SCHED_BITS; i++) +- INIT_LIST_HEAD(&q->heads[i]); +-} +- +-/* +- * Init idle task and put into queue structure of rq +- * IMPORTANT: may be called multiple times for a single cpu +- */ +-static inline void sched_queue_init_idle(struct rq *rq, struct task_struct *idle) +-{ +- struct sched_queue *q = &rq->queue; +- /*printk(KERN_INFO "sched: init(%d) - %px\n", cpu_of(rq), idle);*/ +- +- idle->sq_idx = IDLE_TASK_SCHED_PRIO; +- INIT_LIST_HEAD(&q->heads[idle->sq_idx]); +- list_add(&idle->sq_node, &q->heads[idle->sq_idx]); +- set_bit(idle->sq_idx, q->bitmap); +-} +- +-static inline unsigned long sched_prio2idx(unsigned long idx, struct rq *rq) +-{ +- if (IDLE_TASK_SCHED_PRIO == idx || +- idx < MAX_RT_PRIO) +- return idx; +- +- return MAX_RT_PRIO + +- ((idx - MAX_RT_PRIO) + rq->time_edge) % 20; +-} +- +-static inline unsigned long sched_idx2prio(unsigned long idx, struct rq *rq) +-{ +- if (IDLE_TASK_SCHED_PRIO == idx || +- idx < MAX_RT_PRIO) +- return idx; +- +- return MAX_RT_PRIO + +- ((idx - MAX_RT_PRIO) + 20 - rq->time_edge % 20) % 20; +-} +- +-/* +- * This routine assume that the idle task always in queue +- */ +-static inline struct task_struct *sched_rq_first_task(struct rq *rq) +-{ +- unsigned long idx = find_first_bit(rq->queue.bitmap, SCHED_BITS); +- const struct list_head *head = &rq->queue.heads[sched_prio2idx(idx, rq)]; +- +- return list_first_entry(head, struct task_struct, sq_node); +-} +- +-static inline struct task_struct * +-sched_rq_next_task(struct task_struct *p, struct rq *rq) +-{ +- unsigned long idx = p->sq_idx; +- struct list_head *head = &rq->queue.heads[idx]; +- +- if (list_is_last(&p->sq_node, head)) { +- idx = find_next_bit(rq->queue.bitmap, SCHED_BITS, +- sched_idx2prio(idx, rq) + 1); +- head = &rq->queue.heads[sched_prio2idx(idx, rq)]; +- +- return list_first_entry(head, struct task_struct, sq_node); +- } +- +- return list_next_entry(p, sq_node); +-} +- +-static inline unsigned long sched_queue_watermark(struct rq *rq) +-{ +- return find_first_bit(rq->queue.bitmap, SCHED_BITS); +-} +- +-#define __SCHED_DEQUEUE_TASK(p, rq, flags, func) \ +- psi_dequeue(p, flags & DEQUEUE_SLEEP); \ +- sched_info_dequeued(rq, p); \ +- \ +- list_del(&p->sq_node); \ +- if (list_empty(&rq->queue.heads[p->sq_idx])) { \ +- clear_bit(sched_idx2prio(p->sq_idx, rq), \ +- rq->queue.bitmap); \ +- func; \ +- } +- +-#define __SCHED_ENQUEUE_TASK(p, rq, flags) \ +- sched_info_queued(rq, p); \ +- psi_enqueue(p, flags); \ +- \ +- p->sq_idx = task_sched_prio_idx(p, rq); \ +- list_add_tail(&p->sq_node, &rq->queue.heads[p->sq_idx]); \ +- set_bit(sched_idx2prio(p->sq_idx, rq), rq->queue.bitmap); +- +-/* +- * Requeue a task @p to @rq +- */ +-#define __SCHED_REQUEUE_TASK(p, rq, func) \ +-{\ +- int idx = task_sched_prio_idx(p, rq); \ +-\ +- list_del(&p->sq_node); \ +- list_add_tail(&p->sq_node, &rq->queue.heads[idx]); \ +- if (idx != p->sq_idx) { \ +- if (list_empty(&rq->queue.heads[p->sq_idx])) \ +- clear_bit(sched_idx2prio(p->sq_idx, rq), rq->queue.bitmap); \ +- p->sq_idx = idx; \ +- set_bit(sched_idx2prio(p->sq_idx, rq), rq->queue.bitmap); \ +- func; \ +- } \ +-} +- +-static inline bool sched_task_need_requeue(struct task_struct *p, struct rq *rq) +-{ +- return (task_sched_prio_idx(p, rq) != p->sq_idx); +-} +- +-static void sched_task_fork(struct task_struct *p, struct rq *rq) +-{ +- sched_renew_deadline(p, rq); +-} +- +-/** +- * task_prio - return the priority value of a given task. +- * @p: the task in question. +- * +- * Return: The priority value as seen by users in /proc. +- * +- * sched policy return value kernel prio user prio/nice +- * +- * normal, batch, idle [0 ... 39] 100 0/[-20 ... 19] +- * fifo, rr [-1 ... -100] [99 ... 0] [0 ... 99] +- */ +-int task_prio(const struct task_struct *p) +-{ +- int ret; +- +- if (p->prio < MAX_RT_PRIO) +- return (p->prio - MAX_RT_PRIO); +- +- /*preempt_disable(); +- ret = task_sched_prio(p, task_rq(p)) - MAX_RT_PRIO;*/ +- ret = p->static_prio - MAX_RT_PRIO; +- /*preempt_enable();*/ +- +- return ret; +-} +- +-static void do_sched_yield_type_1(struct task_struct *p, struct rq *rq) +-{ +- time_slice_expired(p, rq); +-} +- +-#ifdef CONFIG_SMP +-static void sched_task_ttwu(struct task_struct *p) {} +-#endif +-static void sched_task_deactivate(struct task_struct *p, struct rq *rq) {} +-- +2.37.0 + + +From b9d74857f996912e63dd264edf8807d79d118ecd Mon Sep 17 00:00:00 2001 +From: Alfred Chen +Date: Mon, 17 May 2021 16:55:30 +0000 +Subject: [PATCH 155/297] sched/alt: sched_queue_init_idle() share common code + +--- + kernel/sched/alt_core.c | 15 ++++++++++++++- + kernel/sched/bmq.h | 10 ---------- + kernel/sched/pds.h | 15 --------------- + 3 files changed, 14 insertions(+), 26 deletions(-) + +diff --git a/kernel/sched/alt_core.c b/kernel/sched/alt_core.c +index 9ade1b64aa9c..407d5d441298 100644 +--- a/kernel/sched/alt_core.c ++++ b/kernel/sched/alt_core.c +@@ -6045,6 +6045,19 @@ void dump_cpu_task(int cpu) + sched_show_task(cpu_curr(cpu)); + } + ++/* ++ * Init idle task and put into queue structure of rq ++ * IMPORTANT: may be called multiple times for a single cpu ++ */ ++static inline void sched_queue_init_idle(struct sched_queue *q, ++ struct task_struct *idle) ++{ ++ idle->sq_idx = IDLE_TASK_SCHED_PRIO; ++ INIT_LIST_HEAD(&q->heads[idle->sq_idx]); ++ list_add(&idle->sq_node, &q->heads[idle->sq_idx]); ++ set_bit(idle->sq_idx, q->bitmap); ++} ++ + /** + * init_idle - set up an idle thread for a given CPU + * @idle: task in question +@@ -6067,7 +6080,7 @@ void init_idle(struct task_struct *idle, int cpu) + idle->last_ran = rq->clock_task; + idle->state = TASK_RUNNING; + idle->flags |= PF_IDLE; +- sched_queue_init_idle(rq, idle); ++ sched_queue_init_idle(&rq->queue, idle); + + scs_task_reset(idle); + kasan_unpoison_task_stack(idle); +diff --git a/kernel/sched/bmq.h b/kernel/sched/bmq.h +index f6bd3421b95c..f5bd651a7666 100644 +--- a/kernel/sched/bmq.h ++++ b/kernel/sched/bmq.h +@@ -82,16 +82,6 @@ static inline void sched_queue_init(struct rq *rq) + INIT_LIST_HEAD(&q->heads[i]); + } + +-static inline void sched_queue_init_idle(struct rq *rq, struct task_struct *idle) +-{ +- struct sched_queue *q = &rq->queue; +- +- idle->sq_idx = IDLE_TASK_SCHED_PRIO; +- INIT_LIST_HEAD(&q->heads[idle->sq_idx]); +- list_add(&idle->sq_node, &q->heads[idle->sq_idx]); +- set_bit(idle->sq_idx, q->bitmap); +-} +- + /* + * This routine used in bmq scheduler only which assume the idle task in the bmq + */ +diff --git a/kernel/sched/pds.h b/kernel/sched/pds.h +index 8cc656a7cc48..c29122334bda 100644 +--- a/kernel/sched/pds.h ++++ b/kernel/sched/pds.h +@@ -148,21 +148,6 @@ static inline void sched_queue_init(struct rq *rq) + INIT_LIST_HEAD(&q->heads[i]); + } + +-/* +- * Init idle task and put into queue structure of rq +- * IMPORTANT: may be called multiple times for a single cpu +- */ +-static inline void sched_queue_init_idle(struct rq *rq, struct task_struct *idle) +-{ +- struct sched_queue *q = &rq->queue; +- /*printk(KERN_INFO "sched: init(%d) - %px\n", cpu_of(rq), idle);*/ +- +- idle->sq_idx = IDLE_TASK_SCHED_PRIO; +- INIT_LIST_HEAD(&q->heads[idle->sq_idx]); +- list_add(&idle->sq_node, &q->heads[idle->sq_idx]); +- set_bit(idle->sq_idx, q->bitmap); +-} +- + static inline unsigned long sched_prio2idx(unsigned long idx, struct rq *rq) + { + if (IDLE_TASK_SCHED_PRIO == idx || +-- +2.37.0 + + +From 3355290529f7bc9fa4f788818de2898074e43b90 Mon Sep 17 00:00:00 2001 +From: Alfred Chen +Date: Tue, 18 May 2021 10:40:43 +0000 +Subject: [PATCH 156/297] sched/alt: Merge BMQ&PDS common code. + +--- + kernel/sched/alt_core.c | 43 ++++++++++++------- + kernel/sched/bmq.h | 17 +------- + kernel/sched/pds.h | 91 ++++++++++++++++++----------------------- + 3 files changed, 70 insertions(+), 81 deletions(-) + +diff --git a/kernel/sched/alt_core.c b/kernel/sched/alt_core.c +index 407d5d441298..c81a9fc6a140 100644 +--- a/kernel/sched/alt_core.c ++++ b/kernel/sched/alt_core.c +@@ -149,9 +149,34 @@ static cpumask_t sched_rq_watermark[SCHED_BITS] ____cacheline_aligned_in_smp; + #include "pds.h" + #endif + ++/* sched_queue related functions */ ++static inline void sched_queue_init(struct sched_queue *q) ++{ ++ int i; ++ ++ bitmap_zero(q->bitmap, SCHED_BITS); ++ for(i = 0; i < SCHED_BITS; i++) ++ INIT_LIST_HEAD(&q->heads[i]); ++} ++ ++/* ++ * Init idle task and put into queue structure of rq ++ * IMPORTANT: may be called multiple times for a single cpu ++ */ ++static inline void sched_queue_init_idle(struct sched_queue *q, ++ struct task_struct *idle) ++{ ++ idle->sq_idx = IDLE_TASK_SCHED_PRIO; ++ INIT_LIST_HEAD(&q->heads[idle->sq_idx]); ++ list_add(&idle->sq_node, &q->heads[idle->sq_idx]); ++ set_bit(idle->sq_idx, q->bitmap); ++} ++ ++ ++/* water mark related functions*/ + static inline void update_sched_rq_watermark(struct rq *rq) + { +- unsigned long watermark = sched_queue_watermark(rq); ++ unsigned long watermark = find_first_bit(rq->queue.bitmap, SCHED_BITS); + unsigned long last_wm = rq->watermark; + unsigned long i; + int cpu; +@@ -6045,19 +6070,6 @@ void dump_cpu_task(int cpu) + sched_show_task(cpu_curr(cpu)); + } + +-/* +- * Init idle task and put into queue structure of rq +- * IMPORTANT: may be called multiple times for a single cpu +- */ +-static inline void sched_queue_init_idle(struct sched_queue *q, +- struct task_struct *idle) +-{ +- idle->sq_idx = IDLE_TASK_SCHED_PRIO; +- INIT_LIST_HEAD(&q->heads[idle->sq_idx]); +- list_add(&idle->sq_node, &q->heads[idle->sq_idx]); +- set_bit(idle->sq_idx, q->bitmap); +-} +- + /** + * init_idle - set up an idle thread for a given CPU + * @idle: task in question +@@ -6677,6 +6689,7 @@ void __init sched_init(void) + struct rq *rq; + + printk(KERN_INFO ALT_SCHED_VERSION_MSG); ++ sched_imp_init(); + + wait_bit_init(); + +@@ -6695,7 +6708,7 @@ void __init sched_init(void) + for_each_possible_cpu(i) { + rq = cpu_rq(i); + +- sched_queue_init(rq); ++ sched_queue_init(&rq->queue); + rq->watermark = IDLE_WM; + rq->skip = NULL; + +diff --git a/kernel/sched/bmq.h b/kernel/sched/bmq.h +index f5bd651a7666..7858ac1185ce 100644 +--- a/kernel/sched/bmq.h ++++ b/kernel/sched/bmq.h +@@ -62,26 +62,13 @@ static inline void time_slice_expired(struct task_struct *p, struct rq *rq) + } + } + ++static inline void sched_imp_init(void) {} ++ + inline int task_running_nice(struct task_struct *p) + { + return (p->prio + p->boost_prio > DEFAULT_PRIO + MAX_PRIORITY_ADJ); + } + +-static inline unsigned long sched_queue_watermark(struct rq *rq) +-{ +- return find_first_bit(rq->queue.bitmap, SCHED_BITS); +-} +- +-static inline void sched_queue_init(struct rq *rq) +-{ +- struct sched_queue *q = &rq->queue; +- int i; +- +- bitmap_zero(q->bitmap, SCHED_BITS); +- for(i = 0; i < SCHED_BITS; i++) +- INIT_LIST_HEAD(&q->heads[i]); +-} +- + /* + * This routine used in bmq scheduler only which assume the idle task in the bmq + */ +diff --git a/kernel/sched/pds.h b/kernel/sched/pds.h +index c29122334bda..64631b2770fe 100644 +--- a/kernel/sched/pds.h ++++ b/kernel/sched/pds.h +@@ -14,13 +14,7 @@ static const u64 user_prio2deadline[NICE_WIDTH] = { + #define SCHED_PRIO_SLOT (4ULL << 20) + #define DEFAULT_SCHED_PRIO (MAX_RT_PRIO + 10) + +-static inline int normal_prio(struct task_struct *p) +-{ +- if (task_has_rt_policy(p)) +- return MAX_RT_PRIO - 1 - p->rt_priority; +- +- return MAX_RT_PRIO; +-} ++DECLARE_BITMAP(normal_mask, SCHED_BITS); + + extern int alt_debug[20]; + +@@ -64,13 +58,49 @@ task_sched_prio_idx(const struct task_struct *p, const struct rq *rq) + (task_sched_prio_normal(p, rq) + rq->time_edge) % 20; + } + ++static inline unsigned long sched_prio2idx(unsigned long idx, struct rq *rq) ++{ ++ if (IDLE_TASK_SCHED_PRIO == idx || ++ idx < MAX_RT_PRIO) ++ return idx; ++ ++ return MAX_RT_PRIO + ++ ((idx - MAX_RT_PRIO) + rq->time_edge) % 20; ++} ++ ++static inline unsigned long sched_idx2prio(unsigned long idx, struct rq *rq) ++{ ++ if (IDLE_TASK_SCHED_PRIO == idx || ++ idx < MAX_RT_PRIO) ++ return idx; ++ ++ return MAX_RT_PRIO + ++ ((idx - MAX_RT_PRIO) + 20 - rq->time_edge % 20) % 20; ++} ++ ++static inline void sched_renew_deadline(struct task_struct *p, const struct rq *rq) ++{ ++ if (p->prio >= MAX_RT_PRIO) ++ p->deadline = rq->clock + ++ SCHED_PRIO_SLOT * (p->static_prio - MAX_RT_PRIO + 1); ++} ++ ++/* ++ * Common interfaces ++ */ ++static inline int normal_prio(struct task_struct *p) ++{ ++ if (task_has_rt_policy(p)) ++ return MAX_RT_PRIO - 1 - p->rt_priority; ++ ++ return MAX_RT_PRIO; ++} ++ + int task_running_nice(struct task_struct *p) + { + return task_sched_prio(p, task_rq(p)) > DEFAULT_SCHED_PRIO; + } + +-DECLARE_BITMAP(normal_mask, SCHED_BITS); +- + static inline void sched_shift_normal_bitmap(unsigned long *mask, unsigned int shift) + { + DECLARE_BITMAP(normal, SCHED_BITS); +@@ -116,13 +146,6 @@ static inline void update_rq_time_edge(struct rq *rq) + } + } + +-static inline void sched_renew_deadline(struct task_struct *p, const struct rq *rq) +-{ +- if (p->prio >= MAX_RT_PRIO) +- p->deadline = rq->clock + +- SCHED_PRIO_SLOT * (p->static_prio - MAX_RT_PRIO + 1); +-} +- + static inline void requeue_task(struct task_struct *p, struct rq *rq); + + static inline void time_slice_expired(struct task_struct *p, struct rq *rq) +@@ -134,38 +157,9 @@ static inline void time_slice_expired(struct task_struct *p, struct rq *rq) + requeue_task(p, rq); + } + +-/* +- * Init the queue structure in rq +- */ +-static inline void sched_queue_init(struct rq *rq) ++static inline void sched_imp_init(void) + { +- struct sched_queue *q = &rq->queue; +- int i; +- + bitmap_set(normal_mask, MAX_RT_PRIO, 20); +- bitmap_zero(q->bitmap, SCHED_BITS); +- for(i = 0; i < SCHED_BITS; i++) +- INIT_LIST_HEAD(&q->heads[i]); +-} +- +-static inline unsigned long sched_prio2idx(unsigned long idx, struct rq *rq) +-{ +- if (IDLE_TASK_SCHED_PRIO == idx || +- idx < MAX_RT_PRIO) +- return idx; +- +- return MAX_RT_PRIO + +- ((idx - MAX_RT_PRIO) + rq->time_edge) % 20; +-} +- +-static inline unsigned long sched_idx2prio(unsigned long idx, struct rq *rq) +-{ +- if (IDLE_TASK_SCHED_PRIO == idx || +- idx < MAX_RT_PRIO) +- return idx; +- +- return MAX_RT_PRIO + +- ((idx - MAX_RT_PRIO) + 20 - rq->time_edge % 20) % 20; + } + + /* +@@ -196,11 +190,6 @@ sched_rq_next_task(struct task_struct *p, struct rq *rq) + return list_next_entry(p, sq_node); + } + +-static inline unsigned long sched_queue_watermark(struct rq *rq) +-{ +- return find_first_bit(rq->queue.bitmap, SCHED_BITS); +-} +- + #define __SCHED_DEQUEUE_TASK(p, rq, flags, func) \ + psi_dequeue(p, flags & DEQUEUE_SLEEP); \ + sched_info_dequeued(rq, p); \ +-- +2.37.0 + + +From d7e78ce37e84fa1b89a567d655d2ab923b055caa Mon Sep 17 00:00:00 2001 +From: Alfred Chen +Date: Wed, 19 May 2021 10:56:37 +0000 +Subject: [PATCH 157/297] sched/pds: Refine task_sched_prio() and + task_sched_prio_idx() + +idle task should never be queued/dequued/requeued or be woken. +--- + kernel/sched/pds.h | 20 +++++++------------- + 1 file changed, 7 insertions(+), 13 deletions(-) + +diff --git a/kernel/sched/pds.h b/kernel/sched/pds.h +index 64631b2770fe..62b5ab738876 100644 +--- a/kernel/sched/pds.h ++++ b/kernel/sched/pds.h +@@ -36,26 +36,20 @@ task_sched_prio_normal(const struct task_struct *p, const struct rq *rq) + static inline int + task_sched_prio(const struct task_struct *p, const struct rq *rq) + { +- if (p == rq->idle) +- return IDLE_TASK_SCHED_PRIO; +- +- if (p->prio < MAX_RT_PRIO) +- return p->prio; ++ if (p->prio >= MAX_RT_PRIO) ++ return MAX_RT_PRIO + task_sched_prio_normal(p, rq); + +- return MAX_RT_PRIO + task_sched_prio_normal(p, rq); ++ return p->prio; + } + + static inline int + task_sched_prio_idx(const struct task_struct *p, const struct rq *rq) + { +- if (p == rq->idle) +- return IDLE_TASK_SCHED_PRIO; +- +- if (p->prio < MAX_RT_PRIO) +- return p->prio; ++ if (p->prio >= MAX_RT_PRIO) ++ return MAX_RT_PRIO + ++ (task_sched_prio_normal(p, rq) + rq->time_edge) % 20; + +- return MAX_RT_PRIO + +- (task_sched_prio_normal(p, rq) + rq->time_edge) % 20; ++ return p->prio; + } + + static inline unsigned long sched_prio2idx(unsigned long idx, struct rq *rq) +-- +2.37.0 + + +From acd958d1847d946f3f4c2a47ca4aa951babcd42c Mon Sep 17 00:00:00 2001 +From: Alfred Chen +Date: Mon, 24 May 2021 21:46:42 +0000 +Subject: [PATCH 158/297] sched/pds: Fix unexpected larger delta in + task_sched_prio_normal() + +--- + kernel/sched/alt_core.c | 23 ++++++++++++----------- + kernel/sched/bmq.h | 10 ++++++---- + kernel/sched/pds.h | 39 +++++++++++++++++++++------------------ + 3 files changed, 39 insertions(+), 33 deletions(-) + +diff --git a/kernel/sched/alt_core.c b/kernel/sched/alt_core.c +index c81a9fc6a140..21dc24e855eb 100644 +--- a/kernel/sched/alt_core.c ++++ b/kernel/sched/alt_core.c +@@ -1328,6 +1328,7 @@ static struct rq *move_queued_task(struct rq *rq, struct task_struct *p, int + + raw_spin_lock(&rq->lock); + BUG_ON(task_cpu(p) != new_cpu); ++ sched_task_sanity_check(p, rq); + enqueue_task(p, rq, 0); + p->on_rq = TASK_ON_RQ_QUEUED; + check_preempt_curr(rq); +@@ -1656,7 +1657,7 @@ static int select_fallback_rq(int cpu, struct task_struct *p) + return dest_cpu; + } + +-static inline int select_task_rq(struct task_struct *p, struct rq *rq) ++static inline int select_task_rq(struct task_struct *p) + { + cpumask_t chk_mask, tmp; + +@@ -1669,7 +1670,7 @@ static inline int select_task_rq(struct task_struct *p, struct rq *rq) + #endif + cpumask_and(&tmp, &chk_mask, &sched_rq_watermark[IDLE_WM]) || + cpumask_and(&tmp, &chk_mask, +- &sched_rq_watermark[task_sched_prio(p, rq) + 1])) ++ &sched_rq_watermark[task_sched_prio(p) + 1])) + return best_mask_cpu(task_cpu(p), &tmp); + + return best_mask_cpu(task_cpu(p), &chk_mask); +@@ -1823,7 +1824,7 @@ EXPORT_SYMBOL_GPL(set_cpus_allowed_ptr); + + #else /* CONFIG_SMP */ + +-static inline int select_task_rq(struct task_struct *p, struct rq *rq) ++static inline int select_task_rq(struct task_struct *p) + { + return 0; + } +@@ -2360,7 +2361,7 @@ static int try_to_wake_up(struct task_struct *p, unsigned int state, + + sched_task_ttwu(p); + +- cpu = select_task_rq(p, this_rq()); ++ cpu = select_task_rq(p); + + if (cpu != task_cpu(p)) { + if (p->in_iowait) { +@@ -2662,7 +2663,7 @@ void wake_up_new_task(struct task_struct *p) + + p->state = TASK_RUNNING; + +- rq = cpu_rq(select_task_rq(p, this_rq())); ++ rq = cpu_rq(select_task_rq(p)); + #ifdef CONFIG_SMP + rseq_migrate(p); + /* +@@ -3265,7 +3266,7 @@ void sched_exec(void) + if (rq != task_rq(p) || rq->nr_running < 2) + goto unlock; + +- dest_cpu = select_task_rq(p, task_rq(p)); ++ dest_cpu = select_task_rq(p); + if (dest_cpu == smp_processor_id()) + goto unlock; + +@@ -3847,7 +3848,7 @@ inline void alt_sched_debug(void) + { + int i; + +- for (i = 0; i < 3; i++) ++ for (i = 0; i < 6; i++) + printk(KERN_INFO "sched: %d\n", alt_debug[i]); + } + #endif +@@ -4562,7 +4563,7 @@ int default_wake_function(wait_queue_entry_t *curr, unsigned mode, int wake_flag + } + EXPORT_SYMBOL(default_wake_function); + +-static inline void check_task_changed(struct rq *rq, struct task_struct *p) ++static inline void check_task_changed(struct task_struct *p, struct rq *rq) + { + /* Trigger resched if task sched_prio has been modified. */ + if (task_on_rq_queued(p) && sched_task_need_requeue(p, rq)) { +@@ -4654,7 +4655,7 @@ void rt_mutex_setprio(struct task_struct *p, struct task_struct *pi_task) + trace_sched_pi_setprio(p, pi_task); + p->prio = prio; + +- check_task_changed(rq, p); ++ check_task_changed(p, rq); + out_unlock: + /* Avoid rq from going away on us: */ + preempt_disable(); +@@ -4698,7 +4699,7 @@ void set_user_nice(struct task_struct *p, long nice) + + p->prio = effective_prio(p); + +- check_task_changed(rq, p); ++ check_task_changed(p, rq); + out_unlock: + __task_access_unlock(p, lock); + raw_spin_unlock_irqrestore(&p->pi_lock, flags); +@@ -5027,7 +5028,7 @@ static int __sched_setscheduler(struct task_struct *p, + + __setscheduler(rq, p, attr, pi); + +- check_task_changed(rq, p); ++ check_task_changed(p, rq); + + /* Avoid rq from going away on us: */ + preempt_disable(); +diff --git a/kernel/sched/bmq.h b/kernel/sched/bmq.h +index 7858ac1185ce..eea8cb31ca1a 100644 +--- a/kernel/sched/bmq.h ++++ b/kernel/sched/bmq.h +@@ -44,7 +44,7 @@ static inline int normal_prio(struct task_struct *p) + return p->static_prio + MAX_PRIORITY_ADJ; + } + +-static inline int task_sched_prio(struct task_struct *p, struct rq *rq) ++static inline int task_sched_prio(struct task_struct *p) + { + return (p->prio < MAX_RT_PRIO)? p->prio : MAX_RT_PRIO / 2 + (p->prio + p->boost_prio) / 2; + } +@@ -62,6 +62,8 @@ static inline void time_slice_expired(struct task_struct *p, struct rq *rq) + } + } + ++static inline void sched_task_sanity_check(struct task_struct *p, struct rq *rq) {} ++ + static inline void sched_imp_init(void) {} + + inline int task_running_nice(struct task_struct *p) +@@ -110,13 +112,13 @@ sched_rq_next_task(struct task_struct *p, struct rq *rq) + sched_info_queued(rq, p); \ + psi_enqueue(p, flags); \ + \ +- p->sq_idx = task_sched_prio(p, rq); \ ++ p->sq_idx = task_sched_prio(p); \ + list_add_tail(&p->sq_node, &rq->queue.heads[p->sq_idx]); \ + set_bit(p->sq_idx, rq->queue.bitmap) + + #define __SCHED_REQUEUE_TASK(p, rq, func) \ + { \ +- int idx = task_sched_prio(p, rq); \ ++ int idx = task_sched_prio(p); \ + \ + list_del(&p->sq_node); \ + list_add_tail(&p->sq_node, &rq->queue.heads[idx]); \ +@@ -131,7 +133,7 @@ sched_rq_next_task(struct task_struct *p, struct rq *rq) + + static inline bool sched_task_need_requeue(struct task_struct *p, struct rq *rq) + { +- return (task_sched_prio(p, rq) != p->sq_idx); ++ return (task_sched_prio(p) != p->sq_idx); + } + + static void sched_task_fork(struct task_struct *p, struct rq *rq) +diff --git a/kernel/sched/pds.h b/kernel/sched/pds.h +index 62b5ab738876..7eac80b83fb3 100644 +--- a/kernel/sched/pds.h ++++ b/kernel/sched/pds.h +@@ -21,23 +21,22 @@ extern int alt_debug[20]; + static inline int + task_sched_prio_normal(const struct task_struct *p, const struct rq *rq) + { +- int delta; ++ int delta = (p->deadline >> 23) - rq->time_edge - 1; + +- delta = rq->time_edge + 20 - (p->deadline >> 23); +- if (delta < 0) { +- delta = 0; +- alt_debug[0]++; ++ if (unlikely(delta > 19)) { ++ pr_info("pds: task_sched_prio_normal delta %d, deadline %llu(%llu), time_edge %llu\n", ++ delta, p->deadline, p->deadline >> 23, rq->time_edge); ++ delta = 19; + } +- delta = 19 - min(delta, 19); + +- return delta; ++ return (delta < 0)? 0:delta; + } + + static inline int +-task_sched_prio(const struct task_struct *p, const struct rq *rq) ++task_sched_prio(const struct task_struct *p) + { + if (p->prio >= MAX_RT_PRIO) +- return MAX_RT_PRIO + task_sched_prio_normal(p, rq); ++ return MAX_RT_PRIO + task_sched_prio_normal(p, task_rq(p)); + + return p->prio; + } +@@ -92,7 +91,7 @@ static inline int normal_prio(struct task_struct *p) + + int task_running_nice(struct task_struct *p) + { +- return task_sched_prio(p, task_rq(p)) > DEFAULT_SCHED_PRIO; ++ return task_sched_prio(p) > DEFAULT_SCHED_PRIO; + } + + static inline void sched_shift_normal_bitmap(unsigned long *mask, unsigned int shift) +@@ -117,7 +116,7 @@ static inline void update_rq_time_edge(struct rq *rq) + if (now == old) + return; + +- delta = min(20ULL, now - old); ++ delta = min_t(u64, 20, now - old); + INIT_LIST_HEAD(&head); + + prio = MAX_RT_PRIO; +@@ -151,6 +150,12 @@ static inline void time_slice_expired(struct task_struct *p, struct rq *rq) + requeue_task(p, rq); + } + ++static inline void sched_task_sanity_check(struct task_struct *p, struct rq *rq) ++{ ++ if (unlikely(p->deadline > rq->clock + 40 * SCHED_PRIO_SLOT)) ++ p->deadline = rq->clock + 40 * SCHED_PRIO_SLOT; ++} ++ + static inline void sched_imp_init(void) + { + bitmap_set(normal_mask, MAX_RT_PRIO, 20); +@@ -212,11 +217,12 @@ sched_rq_next_task(struct task_struct *p, struct rq *rq) + \ + list_del(&p->sq_node); \ + list_add_tail(&p->sq_node, &rq->queue.heads[idx]); \ +- if (idx != p->sq_idx) { \ ++ if (idx != p->sq_idx) { \ + if (list_empty(&rq->queue.heads[p->sq_idx])) \ +- clear_bit(sched_idx2prio(p->sq_idx, rq), rq->queue.bitmap); \ ++ clear_bit(sched_idx2prio(p->sq_idx, rq), \ ++ rq->queue.bitmap); \ + p->sq_idx = idx; \ +- set_bit(sched_idx2prio(p->sq_idx, rq), rq->queue.bitmap); \ ++ set_bit(sched_idx2prio(p->sq_idx, rq), rq->queue.bitmap); \ + func; \ + } \ + } +@@ -249,10 +255,7 @@ int task_prio(const struct task_struct *p) + if (p->prio < MAX_RT_PRIO) + return (p->prio - MAX_RT_PRIO); + +- /*preempt_disable(); +- ret = task_sched_prio(p, task_rq(p)) - MAX_RT_PRIO;*/ +- ret = p->static_prio - MAX_RT_PRIO; +- /*preempt_enable();*/ ++ ret = task_sched_prio(p) - MAX_RT_PRIO; + + return ret; + } +-- +2.37.0 + + +From 1466e72c04940a3afcab4240de77724c2c444bd4 Mon Sep 17 00:00:00 2001 +From: Alfred Chen +Date: Tue, 25 May 2021 09:56:33 +0000 +Subject: [PATCH 159/297] sched/pds: Rewrite + task_sched_prio/task_sched_prio_idx/sched_prio2idx/sched_idx2prio + +--- + kernel/sched/pds.h | 27 +++++++-------------------- + 1 file changed, 7 insertions(+), 20 deletions(-) + +diff --git a/kernel/sched/pds.h b/kernel/sched/pds.h +index 7eac80b83fb3..8a1841e52e91 100644 +--- a/kernel/sched/pds.h ++++ b/kernel/sched/pds.h +@@ -35,39 +35,26 @@ task_sched_prio_normal(const struct task_struct *p, const struct rq *rq) + static inline int + task_sched_prio(const struct task_struct *p) + { +- if (p->prio >= MAX_RT_PRIO) +- return MAX_RT_PRIO + task_sched_prio_normal(p, task_rq(p)); +- +- return p->prio; ++ return (p->prio < MAX_RT_PRIO) ? p->prio : ++ MAX_RT_PRIO + task_sched_prio_normal(p, task_rq(p)); + } + + static inline int + task_sched_prio_idx(const struct task_struct *p, const struct rq *rq) + { +- if (p->prio >= MAX_RT_PRIO) +- return MAX_RT_PRIO + +- (task_sched_prio_normal(p, rq) + rq->time_edge) % 20; +- +- return p->prio; ++ return (p->prio < MAX_RT_PRIO) ? p->prio : MAX_RT_PRIO + ++ (task_sched_prio_normal(p, rq) + rq->time_edge) % 20; + } + + static inline unsigned long sched_prio2idx(unsigned long idx, struct rq *rq) + { +- if (IDLE_TASK_SCHED_PRIO == idx || +- idx < MAX_RT_PRIO) +- return idx; +- +- return MAX_RT_PRIO + +- ((idx - MAX_RT_PRIO) + rq->time_edge) % 20; ++ return (IDLE_TASK_SCHED_PRIO == idx || idx < MAX_RT_PRIO) ? idx : ++ MAX_RT_PRIO + ((idx - MAX_RT_PRIO) + rq->time_edge) % 20; + } + + static inline unsigned long sched_idx2prio(unsigned long idx, struct rq *rq) + { +- if (IDLE_TASK_SCHED_PRIO == idx || +- idx < MAX_RT_PRIO) +- return idx; +- +- return MAX_RT_PRIO + ++ return (idx < MAX_RT_PRIO) ? idx : MAX_RT_PRIO + + ((idx - MAX_RT_PRIO) + 20 - rq->time_edge % 20) % 20; + } + +-- +2.37.0 + + +From 77bcd8ac6bacddd905ed63b0c6756389c65a510e Mon Sep 17 00:00:00 2001 +From: Alfred Chen +Date: Tue, 25 May 2021 10:51:11 +0000 +Subject: [PATCH 160/297] sched/alt: One less bit for sched_queue.bitmap + +--- + kernel/sched/alt_core.c | 3 +-- + kernel/sched/alt_sched.h | 4 +++- + kernel/sched/bmq.h | 10 +++++----- + kernel/sched/pds.h | 4 ++-- + 4 files changed, 11 insertions(+), 10 deletions(-) + +diff --git a/kernel/sched/alt_core.c b/kernel/sched/alt_core.c +index 21dc24e855eb..8fd6fd9ec2ea 100644 +--- a/kernel/sched/alt_core.c ++++ b/kernel/sched/alt_core.c +@@ -169,14 +169,13 @@ static inline void sched_queue_init_idle(struct sched_queue *q, + idle->sq_idx = IDLE_TASK_SCHED_PRIO; + INIT_LIST_HEAD(&q->heads[idle->sq_idx]); + list_add(&idle->sq_node, &q->heads[idle->sq_idx]); +- set_bit(idle->sq_idx, q->bitmap); + } + + + /* water mark related functions*/ + static inline void update_sched_rq_watermark(struct rq *rq) + { +- unsigned long watermark = find_first_bit(rq->queue.bitmap, SCHED_BITS); ++ unsigned long watermark = find_first_bit(rq->queue.bitmap, SCHED_QUEUE_BITS); + unsigned long last_wm = rq->watermark; + unsigned long i; + int cpu; +diff --git a/kernel/sched/alt_sched.h b/kernel/sched/alt_sched.h +index 58ff6212b446..76ec6f9c737b 100644 +--- a/kernel/sched/alt_sched.h ++++ b/kernel/sched/alt_sched.h +@@ -136,8 +136,10 @@ static inline int task_on_rq_migrating(struct task_struct *p) + #define WF_MIGRATED 0x04 /* internal use, task got migrated */ + #define WF_ON_CPU 0x08 /* Wakee is on_rq */ + ++#define SCHED_QUEUE_BITS (SCHED_BITS - 1) ++ + struct sched_queue { +- DECLARE_BITMAP(bitmap, SCHED_BITS); ++ DECLARE_BITMAP(bitmap, SCHED_QUEUE_BITS); + struct list_head heads[SCHED_BITS]; + }; + +diff --git a/kernel/sched/bmq.h b/kernel/sched/bmq.h +index eea8cb31ca1a..85e4c477eda8 100644 +--- a/kernel/sched/bmq.h ++++ b/kernel/sched/bmq.h +@@ -76,7 +76,7 @@ inline int task_running_nice(struct task_struct *p) + */ + static inline struct task_struct *sched_rq_first_task(struct rq *rq) + { +- unsigned long idx = find_first_bit(rq->queue.bitmap, SCHED_BITS); ++ unsigned long idx = find_first_bit(rq->queue.bitmap, SCHED_QUEUE_BITS); + const struct list_head *head = &rq->queue.heads[idx]; + + return list_first_entry(head, struct task_struct, sq_node); +@@ -89,7 +89,7 @@ sched_rq_next_task(struct task_struct *p, struct rq *rq) + struct list_head *head = &rq->queue.heads[idx]; + + if (list_is_last(&p->sq_node, head)) { +- idx = find_next_bit(rq->queue.bitmap, SCHED_BITS, idx + 1); ++ idx = find_next_bit(rq->queue.bitmap, SCHED_QUEUE_BITS, idx + 1); + head = &rq->queue.heads[idx]; + + return list_first_entry(head, struct task_struct, sq_node); +@@ -104,7 +104,7 @@ sched_rq_next_task(struct task_struct *p, struct rq *rq) + \ + list_del(&p->sq_node); \ + if (list_empty(&rq->queue.heads[p->sq_idx])) { \ +- clear_bit(p->sq_idx, rq->queue.bitmap);\ ++ clear_bit(p->sq_idx, rq->queue.bitmap); \ + func; \ + } + +@@ -122,9 +122,9 @@ sched_rq_next_task(struct task_struct *p, struct rq *rq) + \ + list_del(&p->sq_node); \ + list_add_tail(&p->sq_node, &rq->queue.heads[idx]); \ +- if (idx != p->sq_idx) { \ ++ if (idx != p->sq_idx) { \ + if (list_empty(&rq->queue.heads[p->sq_idx])) \ +- clear_bit(p->sq_idx, rq->queue.bitmap); \ ++ clear_bit(p->sq_idx, rq->queue.bitmap); \ + p->sq_idx = idx; \ + set_bit(p->sq_idx, rq->queue.bitmap); \ + func; \ +diff --git a/kernel/sched/pds.h b/kernel/sched/pds.h +index 8a1841e52e91..ee3d5cfac781 100644 +--- a/kernel/sched/pds.h ++++ b/kernel/sched/pds.h +@@ -153,7 +153,7 @@ static inline void sched_imp_init(void) + */ + static inline struct task_struct *sched_rq_first_task(struct rq *rq) + { +- unsigned long idx = find_first_bit(rq->queue.bitmap, SCHED_BITS); ++ unsigned long idx = find_first_bit(rq->queue.bitmap, SCHED_QUEUE_BITS); + const struct list_head *head = &rq->queue.heads[sched_prio2idx(idx, rq)]; + + return list_first_entry(head, struct task_struct, sq_node); +@@ -166,7 +166,7 @@ sched_rq_next_task(struct task_struct *p, struct rq *rq) + struct list_head *head = &rq->queue.heads[idx]; + + if (list_is_last(&p->sq_node, head)) { +- idx = find_next_bit(rq->queue.bitmap, SCHED_BITS, ++ idx = find_next_bit(rq->queue.bitmap, SCHED_QUEUE_BITS, + sched_idx2prio(idx, rq) + 1); + head = &rq->queue.heads[sched_prio2idx(idx, rq)]; + +-- +2.37.0 + + +From 723d5c879541fa1022d4a769fef554aca300c9f4 Mon Sep 17 00:00:00 2001 +From: Alfred Chen +Date: Tue, 25 May 2021 14:08:11 +0000 +Subject: [PATCH 161/297] sched/pds: Introduce SCHED_NORMAL_PRIO_NUM + +--- + kernel/sched/alt_sched.h | 3 ++- + kernel/sched/pds.h | 30 ++++++++++++++++++------------ + 2 files changed, 20 insertions(+), 13 deletions(-) + +diff --git a/kernel/sched/alt_sched.h b/kernel/sched/alt_sched.h +index 76ec6f9c737b..1a579536fd30 100644 +--- a/kernel/sched/alt_sched.h ++++ b/kernel/sched/alt_sched.h +@@ -55,8 +55,9 @@ + #define SCHED_BITS (MAX_RT_PRIO + NICE_WIDTH / 2 + MAX_PRIORITY_ADJ + 1) + #endif + #ifdef CONFIG_SCHED_PDS ++#define SCHED_NORMAL_PRIO_NUM (NICE_WIDTH / 2) + /* bits: RT(0-99), nice width / 2, cpu idle task */ +-#define SCHED_BITS (MAX_RT_PRIO + NICE_WIDTH / 2 + 1) ++#define SCHED_BITS (MAX_RT_PRIO + SCHED_NORMAL_PRIO_NUM + 1) + #endif + + #define IDLE_TASK_SCHED_PRIO (SCHED_BITS - 1) +diff --git a/kernel/sched/pds.h b/kernel/sched/pds.h +index ee3d5cfac781..effd38a024d1 100644 +--- a/kernel/sched/pds.h ++++ b/kernel/sched/pds.h +@@ -21,12 +21,12 @@ extern int alt_debug[20]; + static inline int + task_sched_prio_normal(const struct task_struct *p, const struct rq *rq) + { +- int delta = (p->deadline >> 23) - rq->time_edge - 1; ++ s64 delta = (p->deadline >> 23) - rq->time_edge - 1; + +- if (unlikely(delta > 19)) { +- pr_info("pds: task_sched_prio_normal delta %d, deadline %llu(%llu), time_edge %llu\n", ++ if (unlikely(delta > SCHED_NORMAL_PRIO_NUM - 1)) { ++ pr_info("pds: task_sched_prio_normal delta %lld, deadline %llu(%llu), time_edge %llu\n", + delta, p->deadline, p->deadline >> 23, rq->time_edge); +- delta = 19; ++ delta = SCHED_NORMAL_PRIO_NUM - 1ULL; + } + + return (delta < 0)? 0:delta; +@@ -43,19 +43,23 @@ static inline int + task_sched_prio_idx(const struct task_struct *p, const struct rq *rq) + { + return (p->prio < MAX_RT_PRIO) ? p->prio : MAX_RT_PRIO + +- (task_sched_prio_normal(p, rq) + rq->time_edge) % 20; ++ (task_sched_prio_normal(p, rq) + rq->time_edge) % ++ SCHED_NORMAL_PRIO_NUM; + } + + static inline unsigned long sched_prio2idx(unsigned long idx, struct rq *rq) + { + return (IDLE_TASK_SCHED_PRIO == idx || idx < MAX_RT_PRIO) ? idx : +- MAX_RT_PRIO + ((idx - MAX_RT_PRIO) + rq->time_edge) % 20; ++ MAX_RT_PRIO + ((idx - MAX_RT_PRIO) + rq->time_edge) % ++ SCHED_NORMAL_PRIO_NUM; + } + + static inline unsigned long sched_idx2prio(unsigned long idx, struct rq *rq) + { + return (idx < MAX_RT_PRIO) ? idx : MAX_RT_PRIO + +- ((idx - MAX_RT_PRIO) + 20 - rq->time_edge % 20) % 20; ++ ((idx - MAX_RT_PRIO) + SCHED_NORMAL_PRIO_NUM - ++ rq->time_edge % SCHED_NORMAL_PRIO_NUM) % ++ SCHED_NORMAL_PRIO_NUM; + } + + static inline void sched_renew_deadline(struct task_struct *p, const struct rq *rq) +@@ -103,25 +107,27 @@ static inline void update_rq_time_edge(struct rq *rq) + if (now == old) + return; + +- delta = min_t(u64, 20, now - old); ++ delta = min_t(u64, SCHED_NORMAL_PRIO_NUM, now - old); + INIT_LIST_HEAD(&head); + + prio = MAX_RT_PRIO; + for_each_set_bit_from(prio, rq->queue.bitmap, MAX_RT_PRIO + delta) { + u64 idx; + +- idx = MAX_RT_PRIO + ((prio - MAX_RT_PRIO) + rq->time_edge) % 20; ++ idx = MAX_RT_PRIO + ((prio - MAX_RT_PRIO) + rq->time_edge) % ++ SCHED_NORMAL_PRIO_NUM; + list_splice_tail_init(rq->queue.heads + idx, &head); + } + sched_shift_normal_bitmap(rq->queue.bitmap, delta); + rq->time_edge = now; + if (!list_empty(&head)) { + struct task_struct *p; ++ u64 new_idx = MAX_RT_PRIO + now % SCHED_NORMAL_PRIO_NUM; + + list_for_each_entry(p, &head, sq_node) +- p->sq_idx = MAX_RT_PRIO + now % 20; ++ p->sq_idx = new_idx; + +- list_splice(&head, rq->queue.heads + MAX_RT_PRIO + now % 20); ++ list_splice(&head, rq->queue.heads + new_idx); + set_bit(MAX_RT_PRIO, rq->queue.bitmap); + } + } +@@ -145,7 +151,7 @@ static inline void sched_task_sanity_check(struct task_struct *p, struct rq *rq) + + static inline void sched_imp_init(void) + { +- bitmap_set(normal_mask, MAX_RT_PRIO, 20); ++ bitmap_set(normal_mask, MAX_RT_PRIO, SCHED_NORMAL_PRIO_NUM); + } + + /* +-- +2.37.0 + + +From 8951168051627eeb110c2a3ee161c7f5993d8de6 Mon Sep 17 00:00:00 2001 +From: Alfred Chen +Date: Tue, 25 May 2021 15:00:58 +0000 +Subject: [PATCH 162/297] sched/pds: SCHED_NORMAL_PRIO_NUM to 40 + +--- + kernel/sched/alt_sched.h | 2 +- + kernel/sched/pds.h | 8 ++++---- + 2 files changed, 5 insertions(+), 5 deletions(-) + +diff --git a/kernel/sched/alt_sched.h b/kernel/sched/alt_sched.h +index 1a579536fd30..eb5e8d31686c 100644 +--- a/kernel/sched/alt_sched.h ++++ b/kernel/sched/alt_sched.h +@@ -55,7 +55,7 @@ + #define SCHED_BITS (MAX_RT_PRIO + NICE_WIDTH / 2 + MAX_PRIORITY_ADJ + 1) + #endif + #ifdef CONFIG_SCHED_PDS +-#define SCHED_NORMAL_PRIO_NUM (NICE_WIDTH / 2) ++#define SCHED_NORMAL_PRIO_NUM (NICE_WIDTH) + /* bits: RT(0-99), nice width / 2, cpu idle task */ + #define SCHED_BITS (MAX_RT_PRIO + SCHED_NORMAL_PRIO_NUM + 1) + #endif +diff --git a/kernel/sched/pds.h b/kernel/sched/pds.h +index effd38a024d1..b1ea68e43ba7 100644 +--- a/kernel/sched/pds.h ++++ b/kernel/sched/pds.h +@@ -12,7 +12,7 @@ static const u64 user_prio2deadline[NICE_WIDTH] = { + }; + + #define SCHED_PRIO_SLOT (4ULL << 20) +-#define DEFAULT_SCHED_PRIO (MAX_RT_PRIO + 10) ++#define DEFAULT_SCHED_PRIO (MAX_RT_PRIO + SCHED_NORMAL_PRIO_NUM / 2) + + DECLARE_BITMAP(normal_mask, SCHED_BITS); + +@@ -21,11 +21,11 @@ extern int alt_debug[20]; + static inline int + task_sched_prio_normal(const struct task_struct *p, const struct rq *rq) + { +- s64 delta = (p->deadline >> 23) - rq->time_edge - 1; ++ s64 delta = (p->deadline >> 22) - rq->time_edge - 1; + + if (unlikely(delta > SCHED_NORMAL_PRIO_NUM - 1)) { + pr_info("pds: task_sched_prio_normal delta %lld, deadline %llu(%llu), time_edge %llu\n", +- delta, p->deadline, p->deadline >> 23, rq->time_edge); ++ delta, p->deadline, p->deadline >> 22, rq->time_edge); + delta = SCHED_NORMAL_PRIO_NUM - 1ULL; + } + +@@ -101,7 +101,7 @@ static inline void update_rq_time_edge(struct rq *rq) + { + struct list_head head; + u64 old = rq->time_edge; +- u64 now = rq->clock >> 23; ++ u64 now = rq->clock >> 22; + u64 prio, delta; + + if (now == old) +-- +2.37.0 + + +From a24585db80f4a0816bee71006bac92dcecc35c03 Mon Sep 17 00:00:00 2001 +From: Alfred Chen +Date: Thu, 27 May 2021 14:34:44 +0000 +Subject: [PATCH 163/297] sched/pds: Change MIN_NORMAL_PRIO to 128 + +--- + include/linux/sched/prio.h | 17 +++++++++++++++++ + kernel/sched/alt_sched.h | 7 ++++--- + kernel/sched/pds.h | 31 +++++++++++++++++-------------- + 3 files changed, 38 insertions(+), 17 deletions(-) + +diff --git a/include/linux/sched/prio.h b/include/linux/sched/prio.h +index 4d4f92bffeea..a191f253771b 100644 +--- a/include/linux/sched/prio.h ++++ b/include/linux/sched/prio.h +@@ -18,14 +18,31 @@ + #define MAX_PRIO (MAX_RT_PRIO + NICE_WIDTH) + #define DEFAULT_PRIO (MAX_RT_PRIO + NICE_WIDTH / 2) + ++#ifdef CONFIG_SCHED_ALT ++ ++/* Undefine MAX_PRIO and DEFAULT_PRIO */ ++#undef MAX_PRIO ++#undef DEFAULT_PRIO ++ + /* +/- priority levels from the base priority */ + #ifdef CONFIG_SCHED_BMQ + #define MAX_PRIORITY_ADJ 7 ++ ++#define MIN_NORMAL_PRIO (MAX_RT_PRIO) ++#define MAX_PRIO (MIN_NORMAL_PRIO + NICE_WIDTH) ++#define DEFAULT_PRIO (MIN_NORMAL_PRIO + NICE_WIDTH / 2) + #endif ++ + #ifdef CONFIG_SCHED_PDS + #define MAX_PRIORITY_ADJ 0 ++ ++#define MIN_NORMAL_PRIO (128) ++#define MAX_PRIO (MIN_NORMAL_PRIO + NICE_WIDTH) ++#define DEFAULT_PRIO (MIN_NORMAL_PRIO + NICE_WIDTH / 2) + #endif + ++#endif /* CONFIG_SCHED_ALT */ ++ + /* + * Convert user-nice values [ -20 ... 0 ... 19 ] + * to static priority [ MAX_RT_PRIO..MAX_PRIO-1 ], +diff --git a/kernel/sched/alt_sched.h b/kernel/sched/alt_sched.h +index eb5e8d31686c..52e1baa4f5da 100644 +--- a/kernel/sched/alt_sched.h ++++ b/kernel/sched/alt_sched.h +@@ -54,11 +54,12 @@ + * RT(0-99), (Low prio adj range, nice width, high prio adj range) / 2, cpu idle task */ + #define SCHED_BITS (MAX_RT_PRIO + NICE_WIDTH / 2 + MAX_PRIORITY_ADJ + 1) + #endif ++ + #ifdef CONFIG_SCHED_PDS + #define SCHED_NORMAL_PRIO_NUM (NICE_WIDTH) +-/* bits: RT(0-99), nice width / 2, cpu idle task */ +-#define SCHED_BITS (MAX_RT_PRIO + SCHED_NORMAL_PRIO_NUM + 1) +-#endif ++/* bits: RT(0-99), reserved(100-127), SCHED_NORMAL_PRIO_NUM, cpu idle task */ ++#define SCHED_BITS (MIN_NORMAL_PRIO + SCHED_NORMAL_PRIO_NUM + 1) ++#endif /* CONFIG_SCHED_PDS */ + + #define IDLE_TASK_SCHED_PRIO (SCHED_BITS - 1) + +diff --git a/kernel/sched/pds.h b/kernel/sched/pds.h +index b1ea68e43ba7..4a181e6ed52f 100644 +--- a/kernel/sched/pds.h ++++ b/kernel/sched/pds.h +@@ -12,7 +12,7 @@ static const u64 user_prio2deadline[NICE_WIDTH] = { + }; + + #define SCHED_PRIO_SLOT (4ULL << 20) +-#define DEFAULT_SCHED_PRIO (MAX_RT_PRIO + SCHED_NORMAL_PRIO_NUM / 2) ++#define DEFAULT_SCHED_PRIO (MIN_NORMAL_PRIO + SCHED_NORMAL_PRIO_NUM / 2) + + DECLARE_BITMAP(normal_mask, SCHED_BITS); + +@@ -36,13 +36,13 @@ static inline int + task_sched_prio(const struct task_struct *p) + { + return (p->prio < MAX_RT_PRIO) ? p->prio : +- MAX_RT_PRIO + task_sched_prio_normal(p, task_rq(p)); ++ MIN_NORMAL_PRIO + task_sched_prio_normal(p, task_rq(p)); + } + + static inline int + task_sched_prio_idx(const struct task_struct *p, const struct rq *rq) + { +- return (p->prio < MAX_RT_PRIO) ? p->prio : MAX_RT_PRIO + ++ return (p->prio < MAX_RT_PRIO) ? p->prio : MIN_NORMAL_PRIO + + (task_sched_prio_normal(p, rq) + rq->time_edge) % + SCHED_NORMAL_PRIO_NUM; + } +@@ -50,14 +50,15 @@ task_sched_prio_idx(const struct task_struct *p, const struct rq *rq) + static inline unsigned long sched_prio2idx(unsigned long idx, struct rq *rq) + { + return (IDLE_TASK_SCHED_PRIO == idx || idx < MAX_RT_PRIO) ? idx : +- MAX_RT_PRIO + ((idx - MAX_RT_PRIO) + rq->time_edge) % ++ MIN_NORMAL_PRIO + ++ ((idx - MIN_NORMAL_PRIO) + rq->time_edge) % + SCHED_NORMAL_PRIO_NUM; + } + + static inline unsigned long sched_idx2prio(unsigned long idx, struct rq *rq) + { +- return (idx < MAX_RT_PRIO) ? idx : MAX_RT_PRIO + +- ((idx - MAX_RT_PRIO) + SCHED_NORMAL_PRIO_NUM - ++ return (idx < MAX_RT_PRIO) ? idx : MIN_NORMAL_PRIO + ++ ((idx - MIN_NORMAL_PRIO) + SCHED_NORMAL_PRIO_NUM - + rq->time_edge % SCHED_NORMAL_PRIO_NUM) % + SCHED_NORMAL_PRIO_NUM; + } +@@ -66,7 +67,8 @@ static inline void sched_renew_deadline(struct task_struct *p, const struct rq * + { + if (p->prio >= MAX_RT_PRIO) + p->deadline = rq->clock + +- SCHED_PRIO_SLOT * (p->static_prio - MAX_RT_PRIO + 1); ++ SCHED_PRIO_SLOT * ++ (p->static_prio - MIN_NORMAL_PRIO + 1); + } + + /* +@@ -110,11 +112,12 @@ static inline void update_rq_time_edge(struct rq *rq) + delta = min_t(u64, SCHED_NORMAL_PRIO_NUM, now - old); + INIT_LIST_HEAD(&head); + +- prio = MAX_RT_PRIO; +- for_each_set_bit_from(prio, rq->queue.bitmap, MAX_RT_PRIO + delta) { ++ prio = MIN_NORMAL_PRIO; ++ for_each_set_bit_from(prio, rq->queue.bitmap, MIN_NORMAL_PRIO + delta) { + u64 idx; + +- idx = MAX_RT_PRIO + ((prio - MAX_RT_PRIO) + rq->time_edge) % ++ idx = MIN_NORMAL_PRIO + ++ ((prio - MIN_NORMAL_PRIO) + rq->time_edge) % + SCHED_NORMAL_PRIO_NUM; + list_splice_tail_init(rq->queue.heads + idx, &head); + } +@@ -122,13 +125,13 @@ static inline void update_rq_time_edge(struct rq *rq) + rq->time_edge = now; + if (!list_empty(&head)) { + struct task_struct *p; +- u64 new_idx = MAX_RT_PRIO + now % SCHED_NORMAL_PRIO_NUM; ++ u64 new_idx = MIN_NORMAL_PRIO + now % SCHED_NORMAL_PRIO_NUM; + + list_for_each_entry(p, &head, sq_node) + p->sq_idx = new_idx; + + list_splice(&head, rq->queue.heads + new_idx); +- set_bit(MAX_RT_PRIO, rq->queue.bitmap); ++ set_bit(MIN_NORMAL_PRIO, rq->queue.bitmap); + } + } + +@@ -151,7 +154,7 @@ static inline void sched_task_sanity_check(struct task_struct *p, struct rq *rq) + + static inline void sched_imp_init(void) + { +- bitmap_set(normal_mask, MAX_RT_PRIO, SCHED_NORMAL_PRIO_NUM); ++ bitmap_set(normal_mask, MIN_NORMAL_PRIO, SCHED_NORMAL_PRIO_NUM); + } + + /* +@@ -248,7 +251,7 @@ int task_prio(const struct task_struct *p) + if (p->prio < MAX_RT_PRIO) + return (p->prio - MAX_RT_PRIO); + +- ret = task_sched_prio(p) - MAX_RT_PRIO; ++ ret = task_sched_prio(p) - MIN_NORMAL_PRIO; + + return ret; + } +-- +2.37.0 + + +From 2205d2e629a8d04a821e6710936b708b8889bb94 Mon Sep 17 00:00:00 2001 +From: Alfred Chen +Date: Thu, 27 May 2021 15:19:51 +0000 +Subject: [PATCH 164/297] sched/pds: Optimization for MIN_NORMAL_PRIO=128 + +--- + kernel/sched/alt_core.c | 1 - + kernel/sched/bmq.h | 2 -- + kernel/sched/pds.h | 34 ++++++---------------------------- + 3 files changed, 6 insertions(+), 31 deletions(-) + +diff --git a/kernel/sched/alt_core.c b/kernel/sched/alt_core.c +index 8fd6fd9ec2ea..56c527cbcff5 100644 +--- a/kernel/sched/alt_core.c ++++ b/kernel/sched/alt_core.c +@@ -6689,7 +6689,6 @@ void __init sched_init(void) + struct rq *rq; + + printk(KERN_INFO ALT_SCHED_VERSION_MSG); +- sched_imp_init(); + + wait_bit_init(); + +diff --git a/kernel/sched/bmq.h b/kernel/sched/bmq.h +index 85e4c477eda8..ed6995865d81 100644 +--- a/kernel/sched/bmq.h ++++ b/kernel/sched/bmq.h +@@ -64,8 +64,6 @@ static inline void time_slice_expired(struct task_struct *p, struct rq *rq) + + static inline void sched_task_sanity_check(struct task_struct *p, struct rq *rq) {} + +-static inline void sched_imp_init(void) {} +- + inline int task_running_nice(struct task_struct *p) + { + return (p->prio + p->boost_prio > DEFAULT_PRIO + MAX_PRIORITY_ADJ); +diff --git a/kernel/sched/pds.h b/kernel/sched/pds.h +index 4a181e6ed52f..79121046e892 100644 +--- a/kernel/sched/pds.h ++++ b/kernel/sched/pds.h +@@ -14,8 +14,6 @@ static const u64 user_prio2deadline[NICE_WIDTH] = { + #define SCHED_PRIO_SLOT (4ULL << 20) + #define DEFAULT_SCHED_PRIO (MIN_NORMAL_PRIO + SCHED_NORMAL_PRIO_NUM / 2) + +-DECLARE_BITMAP(normal_mask, SCHED_BITS); +- + extern int alt_debug[20]; + + static inline int +@@ -66,8 +64,7 @@ static inline unsigned long sched_idx2prio(unsigned long idx, struct rq *rq) + static inline void sched_renew_deadline(struct task_struct *p, const struct rq *rq) + { + if (p->prio >= MAX_RT_PRIO) +- p->deadline = rq->clock + +- SCHED_PRIO_SLOT * ++ p->deadline = rq->clock + SCHED_PRIO_SLOT * + (p->static_prio - MIN_NORMAL_PRIO + 1); + } + +@@ -87,18 +84,6 @@ int task_running_nice(struct task_struct *p) + return task_sched_prio(p) > DEFAULT_SCHED_PRIO; + } + +-static inline void sched_shift_normal_bitmap(unsigned long *mask, unsigned int shift) +-{ +- DECLARE_BITMAP(normal, SCHED_BITS); +- +- bitmap_and(normal, mask, normal_mask, SCHED_BITS); +- bitmap_shift_right(normal, normal, shift, SCHED_BITS); +- bitmap_and(normal, normal, normal_mask, SCHED_BITS); +- +- bitmap_andnot(mask, mask, normal_mask, SCHED_BITS); +- bitmap_or(mask, mask, normal, SCHED_BITS); +-} +- + static inline void update_rq_time_edge(struct rq *rq) + { + struct list_head head; +@@ -112,26 +97,24 @@ static inline void update_rq_time_edge(struct rq *rq) + delta = min_t(u64, SCHED_NORMAL_PRIO_NUM, now - old); + INIT_LIST_HEAD(&head); + +- prio = MIN_NORMAL_PRIO; +- for_each_set_bit_from(prio, rq->queue.bitmap, MIN_NORMAL_PRIO + delta) { ++ for_each_set_bit(prio, &rq->queue.bitmap[2], delta) { + u64 idx; + + idx = MIN_NORMAL_PRIO + +- ((prio - MIN_NORMAL_PRIO) + rq->time_edge) % +- SCHED_NORMAL_PRIO_NUM; ++ (prio + rq->time_edge) % SCHED_NORMAL_PRIO_NUM; + list_splice_tail_init(rq->queue.heads + idx, &head); + } +- sched_shift_normal_bitmap(rq->queue.bitmap, delta); ++ rq->queue.bitmap[2] >>= delta; + rq->time_edge = now; + if (!list_empty(&head)) { +- struct task_struct *p; + u64 new_idx = MIN_NORMAL_PRIO + now % SCHED_NORMAL_PRIO_NUM; ++ struct task_struct *p; + + list_for_each_entry(p, &head, sq_node) + p->sq_idx = new_idx; + + list_splice(&head, rq->queue.heads + new_idx); +- set_bit(MIN_NORMAL_PRIO, rq->queue.bitmap); ++ rq->queue.bitmap[2] |= 1UL; + } + } + +@@ -152,11 +135,6 @@ static inline void sched_task_sanity_check(struct task_struct *p, struct rq *rq) + p->deadline = rq->clock + 40 * SCHED_PRIO_SLOT; + } + +-static inline void sched_imp_init(void) +-{ +- bitmap_set(normal_mask, MIN_NORMAL_PRIO, SCHED_NORMAL_PRIO_NUM); +-} +- + /* + * This routine assume that the idle task always in queue + */ +-- +2.37.0 + + +From 8a244645ca795a11c1a7f06b64ed6790bfb48e5a Mon Sep 17 00:00:00 2001 +From: Alfred Chen +Date: Fri, 28 May 2021 10:13:57 +0000 +Subject: [PATCH 165/297] sched/alt: Machine friendly time slice value + +--- + kernel/sched/alt_core.c | 6 +++--- + 1 file changed, 3 insertions(+), 3 deletions(-) + +diff --git a/kernel/sched/alt_core.c b/kernel/sched/alt_core.c +index 56c527cbcff5..b553f5fa60dd 100644 +--- a/kernel/sched/alt_core.c ++++ b/kernel/sched/alt_core.c +@@ -77,7 +77,7 @@ __read_mostly int sysctl_resched_latency_warn_once = 1; + #define STOP_PRIO (MAX_RT_PRIO - 1) + + /* Default time slice is 4 in ms, can be set via kernel parameter "sched_timeslice" */ +-u64 sched_timeslice_ns __read_mostly = (4 * 1000 * 1000); ++u64 sched_timeslice_ns __read_mostly = (4 << 20); + + static int __init sched_timeslice(char *str) + { +@@ -85,14 +85,14 @@ static int __init sched_timeslice(char *str) + + get_option(&str, ×lice_us); + if (timeslice_us >= 1000) +- sched_timeslice_ns = timeslice_us * 1000; ++ sched_timeslice_ns = (timeslice_us / 1000) << 20; + + return 0; + } + early_param("sched_timeslice", sched_timeslice); + + /* Reschedule if less than this many μs left */ +-#define RESCHED_NS (100 * 1000) ++#define RESCHED_NS (100 << 10) + + /** + * sched_yield_type - Choose what sort of yield sched_yield will perform. +-- +2.37.0 + + +From 0b3e9d9f56ab201fc165896356f8e0d38ea42c71 Mon Sep 17 00:00:00 2001 +From: Alfred Chen +Date: Fri, 28 May 2021 14:47:49 +0000 +Subject: [PATCH 166/297] sched/pds: Default 2ms time slice + +--- + kernel/sched/alt_core.c | 3 ++- + kernel/sched/bmq.h | 2 ++ + kernel/sched/pds.h | 51 ++++++++++++++++++----------------------- + 3 files changed, 26 insertions(+), 30 deletions(-) + +diff --git a/kernel/sched/alt_core.c b/kernel/sched/alt_core.c +index b553f5fa60dd..db8f5b24089d 100644 +--- a/kernel/sched/alt_core.c ++++ b/kernel/sched/alt_core.c +@@ -77,7 +77,7 @@ __read_mostly int sysctl_resched_latency_warn_once = 1; + #define STOP_PRIO (MAX_RT_PRIO - 1) + + /* Default time slice is 4 in ms, can be set via kernel parameter "sched_timeslice" */ +-u64 sched_timeslice_ns __read_mostly = (4 << 20); ++u64 sched_timeslice_ns __read_mostly = (2 << 20); + + static int __init sched_timeslice(char *str) + { +@@ -6689,6 +6689,7 @@ void __init sched_init(void) + struct rq *rq; + + printk(KERN_INFO ALT_SCHED_VERSION_MSG); ++ sched_imp_init(); + + wait_bit_init(); + +diff --git a/kernel/sched/bmq.h b/kernel/sched/bmq.h +index ed6995865d81..7299b5cc9a87 100644 +--- a/kernel/sched/bmq.h ++++ b/kernel/sched/bmq.h +@@ -36,6 +36,8 @@ static inline void deboost_task(struct task_struct *p) + /* + * Common interfaces + */ ++static inline void sched_imp_init(void) {} ++ + static inline int normal_prio(struct task_struct *p) + { + if (task_has_rt_policy(p)) +diff --git a/kernel/sched/pds.h b/kernel/sched/pds.h +index 79121046e892..6bba054465d3 100644 +--- a/kernel/sched/pds.h ++++ b/kernel/sched/pds.h +@@ -1,17 +1,7 @@ + #define ALT_SCHED_VERSION_MSG "sched/pds: PDS CPU Scheduler "ALT_SCHED_VERSION" by Alfred Chen.\n" + +-static const u64 user_prio2deadline[NICE_WIDTH] = { +-/* -20 */ 4194304, 4613734, 5075107, 5582617, 6140878, +-/* -15 */ 6754965, 7430461, 8173507, 8990857, 9889942, +-/* -10 */ 10878936, 11966829, 13163511, 14479862, 15927848, +-/* -5 */ 17520632, 19272695, 21199964, 23319960, 25651956, +-/* 0 */ 28217151, 31038866, 34142752, 37557027, 41312729, +-/* 5 */ 45444001, 49988401, 54987241, 60485965, 66534561, +-/* 10 */ 73188017, 80506818, 88557499, 97413248, 107154572, +-/* 15 */ 117870029, 129657031, 142622734, 156885007, 172573507 +-}; +- +-#define SCHED_PRIO_SLOT (4ULL << 20) ++static u64 user_prio2deadline[NICE_WIDTH]; ++ + #define DEFAULT_SCHED_PRIO (MIN_NORMAL_PRIO + SCHED_NORMAL_PRIO_NUM / 2) + + extern int alt_debug[20]; +@@ -19,11 +9,11 @@ extern int alt_debug[20]; + static inline int + task_sched_prio_normal(const struct task_struct *p, const struct rq *rq) + { +- s64 delta = (p->deadline >> 22) - rq->time_edge - 1; ++ s64 delta = (p->deadline >> 21) - rq->time_edge - 1; + + if (unlikely(delta > SCHED_NORMAL_PRIO_NUM - 1)) { + pr_info("pds: task_sched_prio_normal delta %lld, deadline %llu(%llu), time_edge %llu\n", +- delta, p->deadline, p->deadline >> 22, rq->time_edge); ++ delta, p->deadline, p->deadline >> 21, rq->time_edge); + delta = SCHED_NORMAL_PRIO_NUM - 1ULL; + } + +@@ -48,8 +38,7 @@ task_sched_prio_idx(const struct task_struct *p, const struct rq *rq) + static inline unsigned long sched_prio2idx(unsigned long idx, struct rq *rq) + { + return (IDLE_TASK_SCHED_PRIO == idx || idx < MAX_RT_PRIO) ? idx : +- MIN_NORMAL_PRIO + +- ((idx - MIN_NORMAL_PRIO) + rq->time_edge) % ++ MIN_NORMAL_PRIO + ((idx - MIN_NORMAL_PRIO) + rq->time_edge) % + SCHED_NORMAL_PRIO_NUM; + } + +@@ -64,13 +53,23 @@ static inline unsigned long sched_idx2prio(unsigned long idx, struct rq *rq) + static inline void sched_renew_deadline(struct task_struct *p, const struct rq *rq) + { + if (p->prio >= MAX_RT_PRIO) +- p->deadline = rq->clock + SCHED_PRIO_SLOT * +- (p->static_prio - MIN_NORMAL_PRIO + 1); ++ p->deadline = rq->clock + ++ user_prio2deadline[p->static_prio - MIN_NORMAL_PRIO]; + } + + /* + * Common interfaces + */ ++static inline void sched_imp_init(void) ++{ ++ int i; ++ ++ user_prio2deadline[0] = sched_timeslice_ns; ++ for (i = 1; i < NICE_WIDTH; i++) ++ user_prio2deadline[i] = ++ user_prio2deadline[i - 1] + sched_timeslice_ns; ++} ++ + static inline int normal_prio(struct task_struct *p) + { + if (task_has_rt_policy(p)) +@@ -88,7 +87,7 @@ static inline void update_rq_time_edge(struct rq *rq) + { + struct list_head head; + u64 old = rq->time_edge; +- u64 now = rq->clock >> 22; ++ u64 now = rq->clock >> 21; + u64 prio, delta; + + if (now == old) +@@ -131,8 +130,8 @@ static inline void time_slice_expired(struct task_struct *p, struct rq *rq) + + static inline void sched_task_sanity_check(struct task_struct *p, struct rq *rq) + { +- if (unlikely(p->deadline > rq->clock + 40 * SCHED_PRIO_SLOT)) +- p->deadline = rq->clock + 40 * SCHED_PRIO_SLOT; ++ if (unlikely(p->deadline > rq->clock + user_prio2deadline[NICE_WIDTH - 1])) ++ p->deadline = rq->clock + user_prio2deadline[NICE_WIDTH - 1]; + } + + /* +@@ -224,14 +223,8 @@ static void sched_task_fork(struct task_struct *p, struct rq *rq) + */ + int task_prio(const struct task_struct *p) + { +- int ret; +- +- if (p->prio < MAX_RT_PRIO) +- return (p->prio - MAX_RT_PRIO); +- +- ret = task_sched_prio(p) - MIN_NORMAL_PRIO; +- +- return ret; ++ return (p->prio < MAX_RT_PRIO) ? p->prio - MAX_RT_PRIO : ++ task_sched_prio(p) - MIN_NORMAL_PRIO; + } + + static void do_sched_yield_type_1(struct task_struct *p, struct rq *rq) +-- +2.37.0 + + +From 646019ef9049053285adc0bb1b5b0329db8fcd7e Mon Sep 17 00:00:00 2001 +From: Alfred Chen +Date: Tue, 1 Jun 2021 11:29:16 +0000 +Subject: [PATCH 167/297] sched/pds: Code clean up + +--- + kernel/sched/pds.h | 14 +++++--------- + 1 file changed, 5 insertions(+), 9 deletions(-) + +diff --git a/kernel/sched/pds.h b/kernel/sched/pds.h +index 6bba054465d3..d7f772401b3e 100644 +--- a/kernel/sched/pds.h ++++ b/kernel/sched/pds.h +@@ -2,8 +2,6 @@ + + static u64 user_prio2deadline[NICE_WIDTH]; + +-#define DEFAULT_SCHED_PRIO (MIN_NORMAL_PRIO + SCHED_NORMAL_PRIO_NUM / 2) +- + extern int alt_debug[20]; + + static inline int +@@ -14,10 +12,10 @@ task_sched_prio_normal(const struct task_struct *p, const struct rq *rq) + if (unlikely(delta > SCHED_NORMAL_PRIO_NUM - 1)) { + pr_info("pds: task_sched_prio_normal delta %lld, deadline %llu(%llu), time_edge %llu\n", + delta, p->deadline, p->deadline >> 21, rq->time_edge); +- delta = SCHED_NORMAL_PRIO_NUM - 1ULL; ++ return SCHED_NORMAL_PRIO_NUM - 1ULL; + } + +- return (delta < 0)? 0:delta; ++ return (delta < 0) ? 0 : delta; + } + + static inline int +@@ -72,15 +70,13 @@ static inline void sched_imp_init(void) + + static inline int normal_prio(struct task_struct *p) + { +- if (task_has_rt_policy(p)) +- return MAX_RT_PRIO - 1 - p->rt_priority; +- +- return MAX_RT_PRIO; ++ return task_has_rt_policy(p) ? (MAX_RT_PRIO - 1 - p->rt_priority) : ++ MAX_RT_PRIO; + } + + int task_running_nice(struct task_struct *p) + { +- return task_sched_prio(p) > DEFAULT_SCHED_PRIO; ++ return task_sched_prio(p) > DEFAULT_PRIO; + } + + static inline void update_rq_time_edge(struct rq *rq) +-- +2.37.0 + + +From b7ff88eacb7024a2aa67ca5254aa086443a195be Mon Sep 17 00:00:00 2001 +From: Alfred Chen +Date: Wed, 2 Jun 2021 10:25:26 +0000 +Subject: [PATCH 168/297] sched/pds: SCHED_NORMAL_PRIO_NUM to 64 + +--- + include/linux/sched/prio.h | 9 +++++---- + kernel/sched/alt_sched.h | 2 +- + kernel/sched/pds.h | 12 ++++++------ + 3 files changed, 12 insertions(+), 11 deletions(-) + +diff --git a/include/linux/sched/prio.h b/include/linux/sched/prio.h +index a191f253771b..6af9ae681116 100644 +--- a/include/linux/sched/prio.h ++++ b/include/linux/sched/prio.h +@@ -26,7 +26,7 @@ + + /* +/- priority levels from the base priority */ + #ifdef CONFIG_SCHED_BMQ +-#define MAX_PRIORITY_ADJ 7 ++#define MAX_PRIORITY_ADJ (7) + + #define MIN_NORMAL_PRIO (MAX_RT_PRIO) + #define MAX_PRIO (MIN_NORMAL_PRIO + NICE_WIDTH) +@@ -34,11 +34,12 @@ + #endif + + #ifdef CONFIG_SCHED_PDS +-#define MAX_PRIORITY_ADJ 0 ++#define MAX_PRIORITY_ADJ (0) + + #define MIN_NORMAL_PRIO (128) +-#define MAX_PRIO (MIN_NORMAL_PRIO + NICE_WIDTH) +-#define DEFAULT_PRIO (MIN_NORMAL_PRIO + NICE_WIDTH / 2) ++#define NORMAL_PRIO_NUM (64) ++#define MAX_PRIO (MIN_NORMAL_PRIO + NORMAL_PRIO_NUM) ++#define DEFAULT_PRIO (MAX_PRIO - NICE_WIDTH / 2) + #endif + + #endif /* CONFIG_SCHED_ALT */ +diff --git a/kernel/sched/alt_sched.h b/kernel/sched/alt_sched.h +index 52e1baa4f5da..db89d3d3be63 100644 +--- a/kernel/sched/alt_sched.h ++++ b/kernel/sched/alt_sched.h +@@ -56,7 +56,7 @@ + #endif + + #ifdef CONFIG_SCHED_PDS +-#define SCHED_NORMAL_PRIO_NUM (NICE_WIDTH) ++#define SCHED_NORMAL_PRIO_NUM (NORMAL_PRIO_NUM) + /* bits: RT(0-99), reserved(100-127), SCHED_NORMAL_PRIO_NUM, cpu idle task */ + #define SCHED_BITS (MIN_NORMAL_PRIO + SCHED_NORMAL_PRIO_NUM + 1) + #endif /* CONFIG_SCHED_PDS */ +diff --git a/kernel/sched/pds.h b/kernel/sched/pds.h +index d7f772401b3e..5abc6a9d0b9b 100644 +--- a/kernel/sched/pds.h ++++ b/kernel/sched/pds.h +@@ -7,7 +7,8 @@ extern int alt_debug[20]; + static inline int + task_sched_prio_normal(const struct task_struct *p, const struct rq *rq) + { +- s64 delta = (p->deadline >> 21) - rq->time_edge - 1; ++ s64 delta = (p->deadline >> 21) - rq->time_edge + ++ SCHED_NORMAL_PRIO_NUM - NICE_WIDTH - 1; + + if (unlikely(delta > SCHED_NORMAL_PRIO_NUM - 1)) { + pr_info("pds: task_sched_prio_normal delta %lld, deadline %llu(%llu), time_edge %llu\n", +@@ -51,8 +52,8 @@ static inline unsigned long sched_idx2prio(unsigned long idx, struct rq *rq) + static inline void sched_renew_deadline(struct task_struct *p, const struct rq *rq) + { + if (p->prio >= MAX_RT_PRIO) +- p->deadline = rq->clock + +- user_prio2deadline[p->static_prio - MIN_NORMAL_PRIO]; ++ p->deadline = rq->clock + user_prio2deadline[p->static_prio - ++ (MAX_PRIO - NICE_WIDTH)]; + } + + /* +@@ -95,8 +96,7 @@ static inline void update_rq_time_edge(struct rq *rq) + for_each_set_bit(prio, &rq->queue.bitmap[2], delta) { + u64 idx; + +- idx = MIN_NORMAL_PRIO + +- (prio + rq->time_edge) % SCHED_NORMAL_PRIO_NUM; ++ idx = MIN_NORMAL_PRIO + (prio + old) % SCHED_NORMAL_PRIO_NUM; + list_splice_tail_init(rq->queue.heads + idx, &head); + } + rq->queue.bitmap[2] >>= delta; +@@ -220,7 +220,7 @@ static void sched_task_fork(struct task_struct *p, struct rq *rq) + int task_prio(const struct task_struct *p) + { + return (p->prio < MAX_RT_PRIO) ? p->prio - MAX_RT_PRIO : +- task_sched_prio(p) - MIN_NORMAL_PRIO; ++ task_sched_prio_normal(p, task_rq(p)); + } + + static void do_sched_yield_type_1(struct task_struct *p, struct rq *rq) +-- +2.37.0 + + +From 933843a21fb8b21518c42735ef8dd02f99551444 Mon Sep 17 00:00:00 2001 +From: Alfred Chen +Date: Thu, 3 Jun 2021 09:31:01 +0000 +Subject: [PATCH 169/297] sched/pds: Optimize MOD operation when + NORMAL_PRIO_NUM==64 + +--- + kernel/sched/alt_sched.h | 5 ++--- + kernel/sched/pds.h | 33 +++++++++++++++++---------------- + 2 files changed, 19 insertions(+), 19 deletions(-) + +diff --git a/kernel/sched/alt_sched.h b/kernel/sched/alt_sched.h +index db89d3d3be63..f9f79422bf0e 100644 +--- a/kernel/sched/alt_sched.h ++++ b/kernel/sched/alt_sched.h +@@ -56,9 +56,8 @@ + #endif + + #ifdef CONFIG_SCHED_PDS +-#define SCHED_NORMAL_PRIO_NUM (NORMAL_PRIO_NUM) +-/* bits: RT(0-99), reserved(100-127), SCHED_NORMAL_PRIO_NUM, cpu idle task */ +-#define SCHED_BITS (MIN_NORMAL_PRIO + SCHED_NORMAL_PRIO_NUM + 1) ++/* bits: RT(0-99), reserved(100-127), NORMAL_PRIO_NUM, cpu idle task */ ++#define SCHED_BITS (MIN_NORMAL_PRIO + NORMAL_PRIO_NUM + 1) + #endif /* CONFIG_SCHED_PDS */ + + #define IDLE_TASK_SCHED_PRIO (SCHED_BITS - 1) +diff --git a/kernel/sched/pds.h b/kernel/sched/pds.h +index 5abc6a9d0b9b..41e9873d8cd7 100644 +--- a/kernel/sched/pds.h ++++ b/kernel/sched/pds.h +@@ -4,16 +4,18 @@ static u64 user_prio2deadline[NICE_WIDTH]; + + extern int alt_debug[20]; + ++#define NORMAL_PRIO_MOD(x) ((x) & (NORMAL_PRIO_NUM - 1)) ++ + static inline int + task_sched_prio_normal(const struct task_struct *p, const struct rq *rq) + { + s64 delta = (p->deadline >> 21) - rq->time_edge + +- SCHED_NORMAL_PRIO_NUM - NICE_WIDTH - 1; ++ NORMAL_PRIO_NUM - NICE_WIDTH - 1; + +- if (unlikely(delta > SCHED_NORMAL_PRIO_NUM - 1)) { ++ if (unlikely(delta > NORMAL_PRIO_NUM - 1)) { + pr_info("pds: task_sched_prio_normal delta %lld, deadline %llu(%llu), time_edge %llu\n", + delta, p->deadline, p->deadline >> 21, rq->time_edge); +- return SCHED_NORMAL_PRIO_NUM - 1ULL; ++ return NORMAL_PRIO_NUM - 1; + } + + return (delta < 0) ? 0 : delta; +@@ -30,23 +32,21 @@ static inline int + task_sched_prio_idx(const struct task_struct *p, const struct rq *rq) + { + return (p->prio < MAX_RT_PRIO) ? p->prio : MIN_NORMAL_PRIO + +- (task_sched_prio_normal(p, rq) + rq->time_edge) % +- SCHED_NORMAL_PRIO_NUM; ++ NORMAL_PRIO_MOD(task_sched_prio_normal(p, rq) + rq->time_edge); + } + +-static inline unsigned long sched_prio2idx(unsigned long idx, struct rq *rq) ++static inline unsigned long sched_prio2idx(unsigned long prio, struct rq *rq) + { +- return (IDLE_TASK_SCHED_PRIO == idx || idx < MAX_RT_PRIO) ? idx : +- MIN_NORMAL_PRIO + ((idx - MIN_NORMAL_PRIO) + rq->time_edge) % +- SCHED_NORMAL_PRIO_NUM; ++ return (IDLE_TASK_SCHED_PRIO == prio || prio < MAX_RT_PRIO) ? prio : ++ MIN_NORMAL_PRIO + NORMAL_PRIO_MOD((prio - MIN_NORMAL_PRIO) + ++ rq->time_edge); + } + + static inline unsigned long sched_idx2prio(unsigned long idx, struct rq *rq) + { + return (idx < MAX_RT_PRIO) ? idx : MIN_NORMAL_PRIO + +- ((idx - MIN_NORMAL_PRIO) + SCHED_NORMAL_PRIO_NUM - +- rq->time_edge % SCHED_NORMAL_PRIO_NUM) % +- SCHED_NORMAL_PRIO_NUM; ++ NORMAL_PRIO_MOD((idx - MIN_NORMAL_PRIO) + NORMAL_PRIO_NUM - ++ NORMAL_PRIO_MOD(rq->time_edge)); + } + + static inline void sched_renew_deadline(struct task_struct *p, const struct rq *rq) +@@ -90,19 +90,20 @@ static inline void update_rq_time_edge(struct rq *rq) + if (now == old) + return; + +- delta = min_t(u64, SCHED_NORMAL_PRIO_NUM, now - old); ++ delta = min_t(u64, NORMAL_PRIO_NUM, now - old); + INIT_LIST_HEAD(&head); + + for_each_set_bit(prio, &rq->queue.bitmap[2], delta) { + u64 idx; + +- idx = MIN_NORMAL_PRIO + (prio + old) % SCHED_NORMAL_PRIO_NUM; ++ idx = MIN_NORMAL_PRIO + NORMAL_PRIO_MOD(prio + old); + list_splice_tail_init(rq->queue.heads + idx, &head); + } +- rq->queue.bitmap[2] >>= delta; ++ rq->queue.bitmap[2] = (NORMAL_PRIO_NUM == delta) ? 0UL : ++ rq->queue.bitmap[2] >> delta; + rq->time_edge = now; + if (!list_empty(&head)) { +- u64 new_idx = MIN_NORMAL_PRIO + now % SCHED_NORMAL_PRIO_NUM; ++ u64 new_idx = MIN_NORMAL_PRIO + NORMAL_PRIO_MOD(now); + struct task_struct *p; + + list_for_each_entry(p, &head, sq_node) +-- +2.37.0 + + +From 6c682a2c0b8a680256765402c3294e9b4d55cc5d Mon Sep 17 00:00:00 2001 +From: Alfred Chen +Date: Sat, 5 Jun 2021 21:28:50 +0000 +Subject: [PATCH 170/297] sched/pds: Code clean up + +--- + kernel/sched/pds.h | 18 +++++++----------- + 1 file changed, 7 insertions(+), 11 deletions(-) + +diff --git a/kernel/sched/pds.h b/kernel/sched/pds.h +index 41e9873d8cd7..5ce0a16eb454 100644 +--- a/kernel/sched/pds.h ++++ b/kernel/sched/pds.h +@@ -21,8 +21,7 @@ task_sched_prio_normal(const struct task_struct *p, const struct rq *rq) + return (delta < 0) ? 0 : delta; + } + +-static inline int +-task_sched_prio(const struct task_struct *p) ++static inline int task_sched_prio(const struct task_struct *p) + { + return (p->prio < MAX_RT_PRIO) ? p->prio : + MIN_NORMAL_PRIO + task_sched_prio_normal(p, task_rq(p)); +@@ -93,23 +92,21 @@ static inline void update_rq_time_edge(struct rq *rq) + delta = min_t(u64, NORMAL_PRIO_NUM, now - old); + INIT_LIST_HEAD(&head); + +- for_each_set_bit(prio, &rq->queue.bitmap[2], delta) { +- u64 idx; ++ for_each_set_bit(prio, &rq->queue.bitmap[2], delta) ++ list_splice_tail_init(rq->queue.heads + MIN_NORMAL_PRIO + ++ NORMAL_PRIO_MOD(prio + old), &head); + +- idx = MIN_NORMAL_PRIO + NORMAL_PRIO_MOD(prio + old); +- list_splice_tail_init(rq->queue.heads + idx, &head); +- } + rq->queue.bitmap[2] = (NORMAL_PRIO_NUM == delta) ? 0UL : + rq->queue.bitmap[2] >> delta; + rq->time_edge = now; + if (!list_empty(&head)) { +- u64 new_idx = MIN_NORMAL_PRIO + NORMAL_PRIO_MOD(now); ++ u64 idx = MIN_NORMAL_PRIO + NORMAL_PRIO_MOD(now); + struct task_struct *p; + + list_for_each_entry(p, &head, sq_node) +- p->sq_idx = new_idx; ++ p->sq_idx = idx; + +- list_splice(&head, rq->queue.heads + new_idx); ++ list_splice(&head, rq->queue.heads + idx); + rq->queue.bitmap[2] |= 1UL; + } + } +@@ -118,7 +115,6 @@ static inline void requeue_task(struct task_struct *p, struct rq *rq); + + static inline void time_slice_expired(struct task_struct *p, struct rq *rq) + { +- /*printk(KERN_INFO "sched: time_slice_expired(%d) - %px\n", cpu_of(rq), p);*/ + p->time_slice = sched_timeslice_ns; + sched_renew_deadline(p, rq); + if (SCHED_FIFO != p->policy && task_on_rq_queued(p)) +-- +2.37.0 + + +From 197fcf258831095d311a7cf3ea262c24f4406487 Mon Sep 17 00:00:00 2001 +From: Alfred Chen +Date: Sun, 6 Jun 2021 09:32:26 +0000 +Subject: [PATCH 171/297] sched/alt: Merge BMQ&PDS common code (II) + +--- + kernel/sched/alt_core.c | 89 +++++++++++++++++++++++++++--- + kernel/sched/bmq.h | 117 ++++++++++------------------------------ + kernel/sched/pds.h | 96 ++------------------------------- + 3 files changed, 113 insertions(+), 189 deletions(-) + +diff --git a/kernel/sched/alt_core.c b/kernel/sched/alt_core.c +index db8f5b24089d..626bd8d20c4f 100644 +--- a/kernel/sched/alt_core.c ++++ b/kernel/sched/alt_core.c +@@ -142,6 +142,8 @@ static cpumask_t sched_sg_idle_mask ____cacheline_aligned_in_smp; + #endif + static cpumask_t sched_rq_watermark[SCHED_BITS] ____cacheline_aligned_in_smp; + ++static inline void requeue_task(struct task_struct *p, struct rq *rq); ++ + #ifdef CONFIG_SCHED_BMQ + #include "bmq.h" + #endif +@@ -171,8 +173,7 @@ static inline void sched_queue_init_idle(struct sched_queue *q, + list_add(&idle->sq_node, &q->heads[idle->sq_idx]); + } + +- +-/* water mark related functions*/ ++/* water mark related functions */ + static inline void update_sched_rq_watermark(struct rq *rq) + { + unsigned long watermark = find_first_bit(rq->queue.bitmap, SCHED_QUEUE_BITS); +@@ -180,8 +181,6 @@ static inline void update_sched_rq_watermark(struct rq *rq) + unsigned long i; + int cpu; + +- /*printk(KERN_INFO "sched: watermark(%d) %d, last %d\n", +- cpu_of(rq), watermark, last_wm);*/ + if (watermark == last_wm) + return; + +@@ -216,6 +215,34 @@ static inline void update_sched_rq_watermark(struct rq *rq) + #endif + } + ++/* ++ * This routine assume that the idle task always in queue ++ */ ++static inline struct task_struct *sched_rq_first_task(struct rq *rq) ++{ ++ unsigned long idx = find_first_bit(rq->queue.bitmap, SCHED_QUEUE_BITS); ++ const struct list_head *head = &rq->queue.heads[sched_prio2idx(idx, rq)]; ++ ++ return list_first_entry(head, struct task_struct, sq_node); ++} ++ ++static inline struct task_struct * ++sched_rq_next_task(struct task_struct *p, struct rq *rq) ++{ ++ unsigned long idx = p->sq_idx; ++ struct list_head *head = &rq->queue.heads[idx]; ++ ++ if (list_is_last(&p->sq_node, head)) { ++ idx = find_next_bit(rq->queue.bitmap, SCHED_QUEUE_BITS, ++ sched_idx2prio(idx, rq) + 1); ++ head = &rq->queue.heads[sched_prio2idx(idx, rq)]; ++ ++ return list_first_entry(head, struct task_struct, sq_node); ++ } ++ ++ return list_next_entry(p, sq_node); ++} ++ + static inline struct task_struct *rq_runnable_task(struct rq *rq) + { + struct task_struct *next = sched_rq_first_task(rq); +@@ -563,6 +590,25 @@ static inline void sched_update_tick_dependency(struct rq *rq) { } + * Add/Remove/Requeue task to/from the runqueue routines + * Context: rq->lock + */ ++#define __SCHED_DEQUEUE_TASK(p, rq, flags, func) \ ++ psi_dequeue(p, flags & DEQUEUE_SLEEP); \ ++ sched_info_dequeued(rq, p); \ ++ \ ++ list_del(&p->sq_node); \ ++ if (list_empty(&rq->queue.heads[p->sq_idx])) { \ ++ clear_bit(sched_idx2prio(p->sq_idx, rq), \ ++ rq->queue.bitmap); \ ++ func; \ ++ } ++ ++#define __SCHED_ENQUEUE_TASK(p, rq, flags) \ ++ sched_info_queued(rq, p); \ ++ psi_enqueue(p, flags); \ ++ \ ++ p->sq_idx = task_sched_prio_idx(p, rq); \ ++ list_add_tail(&p->sq_node, &rq->queue.heads[p->sq_idx]); \ ++ set_bit(sched_idx2prio(p->sq_idx, rq), rq->queue.bitmap); ++ + static inline void dequeue_task(struct task_struct *p, struct rq *rq, int flags) + { + lockdep_assert_held(&rq->lock); +@@ -602,12 +648,25 @@ static inline void enqueue_task(struct task_struct *p, struct rq *rq, int flags) + + static inline void requeue_task(struct task_struct *p, struct rq *rq) + { ++ int idx; ++ + lockdep_assert_held(&rq->lock); + /*printk(KERN_INFO "sched: requeue(%d) %px %016llx\n", cpu_of(rq), p, p->priodl);*/ + WARN_ONCE(task_rq(p) != rq, "sched: cpu[%d] requeue task reside on cpu%d\n", + cpu_of(rq), task_cpu(p)); + +- __SCHED_REQUEUE_TASK(p, rq, update_sched_rq_watermark(rq)); ++ idx = task_sched_prio_idx(p, rq); ++ ++ list_del(&p->sq_node); ++ list_add_tail(&p->sq_node, &rq->queue.heads[idx]); ++ if (idx != p->sq_idx) { ++ if (list_empty(&rq->queue.heads[p->sq_idx])) ++ clear_bit(sched_idx2prio(p->sq_idx, rq), ++ rq->queue.bitmap); ++ p->sq_idx = idx; ++ set_bit(sched_idx2prio(p->sq_idx, rq), rq->queue.bitmap); ++ update_sched_rq_watermark(rq); ++ } + } + + /* +@@ -4565,7 +4624,7 @@ EXPORT_SYMBOL(default_wake_function); + static inline void check_task_changed(struct task_struct *p, struct rq *rq) + { + /* Trigger resched if task sched_prio has been modified. */ +- if (task_on_rq_queued(p) && sched_task_need_requeue(p, rq)) { ++ if (task_on_rq_queued(p) && task_sched_prio_idx(p, rq) != p->sq_idx) { + requeue_task(p, rq); + check_preempt_curr(rq); + } +@@ -4755,6 +4814,24 @@ SYSCALL_DEFINE1(nice, int, increment) + + #endif + ++/** ++ * task_prio - return the priority value of a given task. ++ * @p: the task in question. ++ * ++ * Return: The priority value as seen by users in /proc. ++ * ++ * sched policy return value kernel prio user prio/nice ++ * ++ * (BMQ)normal, batch, idle[0 ... 53] [100 ... 139] 0/[-20 ... 19]/[-7 ... 7] ++ * (PDS)normal, batch, idle[0 ... 39] 100 0/[-20 ... 19] ++ * fifo, rr [-1 ... -100] [99 ... 0] [0 ... 99] ++ */ ++int task_prio(const struct task_struct *p) ++{ ++ return (p->prio < MAX_RT_PRIO) ? p->prio - MAX_RT_PRIO : ++ task_sched_prio_normal(p, task_rq(p)); ++} ++ + /** + * idle_cpu - is a given CPU idle currently? + * @cpu: the processor in question. +diff --git a/kernel/sched/bmq.h b/kernel/sched/bmq.h +index 7299b5cc9a87..840173f29e42 100644 +--- a/kernel/sched/bmq.h ++++ b/kernel/sched/bmq.h +@@ -36,6 +36,33 @@ static inline void deboost_task(struct task_struct *p) + /* + * Common interfaces + */ ++static inline int ++task_sched_prio_normal(const struct task_struct *p, const struct rq *rq) ++{ ++ return p->prio + p->boost_prio - MAX_RT_PRIO; ++} ++ ++static inline int task_sched_prio(const struct task_struct *p) ++{ ++ return (p->prio < MAX_RT_PRIO)? p->prio : MAX_RT_PRIO / 2 + (p->prio + p->boost_prio) / 2; ++} ++ ++static inline int ++task_sched_prio_idx(const struct task_struct *p, const struct rq *rq) ++{ ++ return task_sched_prio(p); ++} ++ ++static inline unsigned long sched_prio2idx(unsigned long prio, struct rq *rq) ++{ ++ return prio; ++} ++ ++static inline unsigned long sched_idx2prio(unsigned long idx, struct rq *rq) ++{ ++ return idx; ++} ++ + static inline void sched_imp_init(void) {} + + static inline int normal_prio(struct task_struct *p) +@@ -46,13 +73,6 @@ static inline int normal_prio(struct task_struct *p) + return p->static_prio + MAX_PRIORITY_ADJ; + } + +-static inline int task_sched_prio(struct task_struct *p) +-{ +- return (p->prio < MAX_RT_PRIO)? p->prio : MAX_RT_PRIO / 2 + (p->prio + p->boost_prio) / 2; +-} +- +-static inline void requeue_task(struct task_struct *p, struct rq *rq); +- + static inline void time_slice_expired(struct task_struct *p, struct rq *rq) + { + p->time_slice = sched_timeslice_ns; +@@ -71,95 +91,12 @@ inline int task_running_nice(struct task_struct *p) + return (p->prio + p->boost_prio > DEFAULT_PRIO + MAX_PRIORITY_ADJ); + } + +-/* +- * This routine used in bmq scheduler only which assume the idle task in the bmq +- */ +-static inline struct task_struct *sched_rq_first_task(struct rq *rq) +-{ +- unsigned long idx = find_first_bit(rq->queue.bitmap, SCHED_QUEUE_BITS); +- const struct list_head *head = &rq->queue.heads[idx]; +- +- return list_first_entry(head, struct task_struct, sq_node); +-} +- +-static inline struct task_struct * +-sched_rq_next_task(struct task_struct *p, struct rq *rq) +-{ +- unsigned long idx = p->sq_idx; +- struct list_head *head = &rq->queue.heads[idx]; +- +- if (list_is_last(&p->sq_node, head)) { +- idx = find_next_bit(rq->queue.bitmap, SCHED_QUEUE_BITS, idx + 1); +- head = &rq->queue.heads[idx]; +- +- return list_first_entry(head, struct task_struct, sq_node); +- } +- +- return list_next_entry(p, sq_node); +-} +- +-#define __SCHED_DEQUEUE_TASK(p, rq, flags, func) \ +- psi_dequeue(p, flags & DEQUEUE_SLEEP); \ +- sched_info_dequeued(rq, p); \ +- \ +- list_del(&p->sq_node); \ +- if (list_empty(&rq->queue.heads[p->sq_idx])) { \ +- clear_bit(p->sq_idx, rq->queue.bitmap); \ +- func; \ +- } +- +-#define __SCHED_ENQUEUE_TASK(p, rq, flags) \ +- sched_info_queued(rq, p); \ +- psi_enqueue(p, flags); \ +- \ +- p->sq_idx = task_sched_prio(p); \ +- list_add_tail(&p->sq_node, &rq->queue.heads[p->sq_idx]); \ +- set_bit(p->sq_idx, rq->queue.bitmap) +- +-#define __SCHED_REQUEUE_TASK(p, rq, func) \ +-{ \ +- int idx = task_sched_prio(p); \ +-\ +- list_del(&p->sq_node); \ +- list_add_tail(&p->sq_node, &rq->queue.heads[idx]); \ +- if (idx != p->sq_idx) { \ +- if (list_empty(&rq->queue.heads[p->sq_idx])) \ +- clear_bit(p->sq_idx, rq->queue.bitmap); \ +- p->sq_idx = idx; \ +- set_bit(p->sq_idx, rq->queue.bitmap); \ +- func; \ +- } \ +-} +- +-static inline bool sched_task_need_requeue(struct task_struct *p, struct rq *rq) +-{ +- return (task_sched_prio(p) != p->sq_idx); +-} +- + static void sched_task_fork(struct task_struct *p, struct rq *rq) + { + p->boost_prio = (p->boost_prio < 0) ? + p->boost_prio + MAX_PRIORITY_ADJ : MAX_PRIORITY_ADJ; + } + +-/** +- * task_prio - return the priority value of a given task. +- * @p: the task in question. +- * +- * Return: The priority value as seen by users in /proc. +- * +- * sched policy return value kernel prio user prio/nice/boost +- * +- * normal, batch, idle [0 ... 53] [100 ... 139] 0/[-20 ... 19]/[-7 ... 7] +- * fifo, rr [-1 ... -100] [99 ... 0] [0 ... 99] +- */ +-int task_prio(const struct task_struct *p) +-{ +- if (p->prio < MAX_RT_PRIO) +- return (p->prio - MAX_RT_PRIO); +- return (p->prio - MAX_RT_PRIO + p->boost_prio); +-} +- + static void do_sched_yield_type_1(struct task_struct *p, struct rq *rq) + { + p->boost_prio = MAX_PRIORITY_ADJ; +diff --git a/kernel/sched/pds.h b/kernel/sched/pds.h +index 5ce0a16eb454..31c6bd4d29c8 100644 +--- a/kernel/sched/pds.h ++++ b/kernel/sched/pds.h +@@ -6,6 +6,9 @@ extern int alt_debug[20]; + + #define NORMAL_PRIO_MOD(x) ((x) & (NORMAL_PRIO_NUM - 1)) + ++/* ++ * Common interfaces ++ */ + static inline int + task_sched_prio_normal(const struct task_struct *p, const struct rq *rq) + { +@@ -55,9 +58,6 @@ static inline void sched_renew_deadline(struct task_struct *p, const struct rq * + (MAX_PRIO - NICE_WIDTH)]; + } + +-/* +- * Common interfaces +- */ + static inline void sched_imp_init(void) + { + int i; +@@ -111,8 +111,6 @@ static inline void update_rq_time_edge(struct rq *rq) + } + } + +-static inline void requeue_task(struct task_struct *p, struct rq *rq); +- + static inline void time_slice_expired(struct task_struct *p, struct rq *rq) + { + p->time_slice = sched_timeslice_ns; +@@ -127,99 +125,11 @@ static inline void sched_task_sanity_check(struct task_struct *p, struct rq *rq) + p->deadline = rq->clock + user_prio2deadline[NICE_WIDTH - 1]; + } + +-/* +- * This routine assume that the idle task always in queue +- */ +-static inline struct task_struct *sched_rq_first_task(struct rq *rq) +-{ +- unsigned long idx = find_first_bit(rq->queue.bitmap, SCHED_QUEUE_BITS); +- const struct list_head *head = &rq->queue.heads[sched_prio2idx(idx, rq)]; +- +- return list_first_entry(head, struct task_struct, sq_node); +-} +- +-static inline struct task_struct * +-sched_rq_next_task(struct task_struct *p, struct rq *rq) +-{ +- unsigned long idx = p->sq_idx; +- struct list_head *head = &rq->queue.heads[idx]; +- +- if (list_is_last(&p->sq_node, head)) { +- idx = find_next_bit(rq->queue.bitmap, SCHED_QUEUE_BITS, +- sched_idx2prio(idx, rq) + 1); +- head = &rq->queue.heads[sched_prio2idx(idx, rq)]; +- +- return list_first_entry(head, struct task_struct, sq_node); +- } +- +- return list_next_entry(p, sq_node); +-} +- +-#define __SCHED_DEQUEUE_TASK(p, rq, flags, func) \ +- psi_dequeue(p, flags & DEQUEUE_SLEEP); \ +- sched_info_dequeued(rq, p); \ +- \ +- list_del(&p->sq_node); \ +- if (list_empty(&rq->queue.heads[p->sq_idx])) { \ +- clear_bit(sched_idx2prio(p->sq_idx, rq), \ +- rq->queue.bitmap); \ +- func; \ +- } +- +-#define __SCHED_ENQUEUE_TASK(p, rq, flags) \ +- sched_info_queued(rq, p); \ +- psi_enqueue(p, flags); \ +- \ +- p->sq_idx = task_sched_prio_idx(p, rq); \ +- list_add_tail(&p->sq_node, &rq->queue.heads[p->sq_idx]); \ +- set_bit(sched_idx2prio(p->sq_idx, rq), rq->queue.bitmap); +- +-/* +- * Requeue a task @p to @rq +- */ +-#define __SCHED_REQUEUE_TASK(p, rq, func) \ +-{\ +- int idx = task_sched_prio_idx(p, rq); \ +-\ +- list_del(&p->sq_node); \ +- list_add_tail(&p->sq_node, &rq->queue.heads[idx]); \ +- if (idx != p->sq_idx) { \ +- if (list_empty(&rq->queue.heads[p->sq_idx])) \ +- clear_bit(sched_idx2prio(p->sq_idx, rq), \ +- rq->queue.bitmap); \ +- p->sq_idx = idx; \ +- set_bit(sched_idx2prio(p->sq_idx, rq), rq->queue.bitmap); \ +- func; \ +- } \ +-} +- +-static inline bool sched_task_need_requeue(struct task_struct *p, struct rq *rq) +-{ +- return (task_sched_prio_idx(p, rq) != p->sq_idx); +-} +- + static void sched_task_fork(struct task_struct *p, struct rq *rq) + { + sched_renew_deadline(p, rq); + } + +-/** +- * task_prio - return the priority value of a given task. +- * @p: the task in question. +- * +- * Return: The priority value as seen by users in /proc. +- * +- * sched policy return value kernel prio user prio/nice +- * +- * normal, batch, idle [0 ... 39] 100 0/[-20 ... 19] +- * fifo, rr [-1 ... -100] [99 ... 0] [0 ... 99] +- */ +-int task_prio(const struct task_struct *p) +-{ +- return (p->prio < MAX_RT_PRIO) ? p->prio - MAX_RT_PRIO : +- task_sched_prio_normal(p, task_rq(p)); +-} +- + static void do_sched_yield_type_1(struct task_struct *p, struct rq *rq) + { + time_slice_expired(p, rq); +-- +2.37.0 + + +From 41412d3db98db2fa1d3a4ed9de6cca1da3edfbff Mon Sep 17 00:00:00 2001 +From: Alfred Chen +Date: Sun, 6 Jun 2021 18:04:37 +0000 +Subject: [PATCH 172/297] sched/pds: Introduce sched_timeslice_shift + +--- + kernel/sched/alt_core.c | 28 +++++++++++++++------------- + kernel/sched/bmq.h | 2 ++ + kernel/sched/pds.h | 13 ++++++++++--- + 3 files changed, 27 insertions(+), 16 deletions(-) + +diff --git a/kernel/sched/alt_core.c b/kernel/sched/alt_core.c +index 626bd8d20c4f..799605256a19 100644 +--- a/kernel/sched/alt_core.c ++++ b/kernel/sched/alt_core.c +@@ -79,13 +79,24 @@ __read_mostly int sysctl_resched_latency_warn_once = 1; + /* Default time slice is 4 in ms, can be set via kernel parameter "sched_timeslice" */ + u64 sched_timeslice_ns __read_mostly = (2 << 20); + ++static inline void requeue_task(struct task_struct *p, struct rq *rq); ++ ++#ifdef CONFIG_SCHED_BMQ ++#include "bmq.h" ++#endif ++#ifdef CONFIG_SCHED_PDS ++#include "pds.h" ++#endif ++ + static int __init sched_timeslice(char *str) + { +- int timeslice_us; ++ int timeslice_ms; + +- get_option(&str, ×lice_us); +- if (timeslice_us >= 1000) +- sched_timeslice_ns = (timeslice_us / 1000) << 20; ++ get_option(&str, ×lice_ms); ++ if (2 != timeslice_ms) ++ timeslice_ms = 4; ++ sched_timeslice_ns = timeslice_ms << 20; ++ sched_timeslice_imp(timeslice_ms); + + return 0; + } +@@ -142,15 +153,6 @@ static cpumask_t sched_sg_idle_mask ____cacheline_aligned_in_smp; + #endif + static cpumask_t sched_rq_watermark[SCHED_BITS] ____cacheline_aligned_in_smp; + +-static inline void requeue_task(struct task_struct *p, struct rq *rq); +- +-#ifdef CONFIG_SCHED_BMQ +-#include "bmq.h" +-#endif +-#ifdef CONFIG_SCHED_PDS +-#include "pds.h" +-#endif +- + /* sched_queue related functions */ + static inline void sched_queue_init(struct sched_queue *q) + { +diff --git a/kernel/sched/bmq.h b/kernel/sched/bmq.h +index 840173f29e42..f9f58c21c1e4 100644 +--- a/kernel/sched/bmq.h ++++ b/kernel/sched/bmq.h +@@ -36,6 +36,8 @@ static inline void deboost_task(struct task_struct *p) + /* + * Common interfaces + */ ++static inline void sched_timeslice_imp(const int timeslice_ms) {} ++ + static inline int + task_sched_prio_normal(const struct task_struct *p, const struct rq *rq) + { +diff --git a/kernel/sched/pds.h b/kernel/sched/pds.h +index 31c6bd4d29c8..b9b19c6a7622 100644 +--- a/kernel/sched/pds.h ++++ b/kernel/sched/pds.h +@@ -1,6 +1,7 @@ + #define ALT_SCHED_VERSION_MSG "sched/pds: PDS CPU Scheduler "ALT_SCHED_VERSION" by Alfred Chen.\n" + + static u64 user_prio2deadline[NICE_WIDTH]; ++static int sched_timeslice_shift = 22; + + extern int alt_debug[20]; + +@@ -9,15 +10,21 @@ extern int alt_debug[20]; + /* + * Common interfaces + */ ++static inline void sched_timeslice_imp(const int timeslice_ms) ++{ ++ if (2 == timeslice_ms) ++ sched_timeslice_shift = 21; ++} ++ + static inline int + task_sched_prio_normal(const struct task_struct *p, const struct rq *rq) + { +- s64 delta = (p->deadline >> 21) - rq->time_edge + ++ s64 delta = (p->deadline >> sched_timeslice_shift) - rq->time_edge + + NORMAL_PRIO_NUM - NICE_WIDTH - 1; + + if (unlikely(delta > NORMAL_PRIO_NUM - 1)) { + pr_info("pds: task_sched_prio_normal delta %lld, deadline %llu(%llu), time_edge %llu\n", +- delta, p->deadline, p->deadline >> 21, rq->time_edge); ++ delta, p->deadline, p->deadline >> sched_timeslice_shift, rq->time_edge); + return NORMAL_PRIO_NUM - 1; + } + +@@ -83,7 +90,7 @@ static inline void update_rq_time_edge(struct rq *rq) + { + struct list_head head; + u64 old = rq->time_edge; +- u64 now = rq->clock >> 21; ++ u64 now = rq->clock >> sched_timeslice_shift; + u64 prio, delta; + + if (now == old) +-- +2.37.0 + + +From f1bdc0b54db882449520211ce31c6a8249797ecb Mon Sep 17 00:00:00 2001 +From: Alfred Chen +Date: Mon, 7 Jun 2021 09:31:11 +0000 +Subject: [PATCH 173/297] sched/pds: Optimize task deadline + +--- + include/linux/sched/deadline.h | 2 +- + kernel/sched/alt_core.c | 11 +---------- + kernel/sched/bmq.h | 2 -- + kernel/sched/pds.h | 29 ++++++++--------------------- + 4 files changed, 10 insertions(+), 34 deletions(-) + +diff --git a/include/linux/sched/deadline.h b/include/linux/sched/deadline.h +index 20c59b190b1a..fa30f98cb2be 100644 +--- a/include/linux/sched/deadline.h ++++ b/include/linux/sched/deadline.h +@@ -12,7 +12,7 @@ static inline int dl_task(struct task_struct *p) + #endif + + #ifdef CONFIG_SCHED_PDS +-#define __tsk_deadline(p) ((((u64) ((p)->prio))<<56) | (((p)->deadline)>>8)) ++#define __tsk_deadline(p) ((((u64) ((p)->prio))<<56) | (p)->deadline) + #endif + + #else +diff --git a/kernel/sched/alt_core.c b/kernel/sched/alt_core.c +index 799605256a19..946983ca5763 100644 +--- a/kernel/sched/alt_core.c ++++ b/kernel/sched/alt_core.c +@@ -3902,15 +3902,7 @@ void alt_sched_debug(void) + sched_sg_idle_mask.bits[0]); + } + #else +-int alt_debug[20]; +- +-inline void alt_sched_debug(void) +-{ +- int i; +- +- for (i = 0; i < 6; i++) +- printk(KERN_INFO "sched: %d\n", alt_debug[i]); +-} ++inline void alt_sched_debug(void) {} + #endif + + #ifdef CONFIG_SMP +@@ -6768,7 +6760,6 @@ void __init sched_init(void) + struct rq *rq; + + printk(KERN_INFO ALT_SCHED_VERSION_MSG); +- sched_imp_init(); + + wait_bit_init(); + +diff --git a/kernel/sched/bmq.h b/kernel/sched/bmq.h +index f9f58c21c1e4..b425f8979b6f 100644 +--- a/kernel/sched/bmq.h ++++ b/kernel/sched/bmq.h +@@ -65,8 +65,6 @@ static inline unsigned long sched_idx2prio(unsigned long idx, struct rq *rq) + return idx; + } + +-static inline void sched_imp_init(void) {} +- + static inline int normal_prio(struct task_struct *p) + { + if (task_has_rt_policy(p)) +diff --git a/kernel/sched/pds.h b/kernel/sched/pds.h +index b9b19c6a7622..4898b3ae8e41 100644 +--- a/kernel/sched/pds.h ++++ b/kernel/sched/pds.h +@@ -1,10 +1,7 @@ + #define ALT_SCHED_VERSION_MSG "sched/pds: PDS CPU Scheduler "ALT_SCHED_VERSION" by Alfred Chen.\n" + +-static u64 user_prio2deadline[NICE_WIDTH]; + static int sched_timeslice_shift = 22; + +-extern int alt_debug[20]; +- + #define NORMAL_PRIO_MOD(x) ((x) & (NORMAL_PRIO_NUM - 1)) + + /* +@@ -19,12 +16,11 @@ static inline void sched_timeslice_imp(const int timeslice_ms) + static inline int + task_sched_prio_normal(const struct task_struct *p, const struct rq *rq) + { +- s64 delta = (p->deadline >> sched_timeslice_shift) - rq->time_edge + +- NORMAL_PRIO_NUM - NICE_WIDTH - 1; ++ s64 delta = p->deadline - rq->time_edge + NORMAL_PRIO_NUM - NICE_WIDTH; + + if (unlikely(delta > NORMAL_PRIO_NUM - 1)) { +- pr_info("pds: task_sched_prio_normal delta %lld, deadline %llu(%llu), time_edge %llu\n", +- delta, p->deadline, p->deadline >> sched_timeslice_shift, rq->time_edge); ++ pr_info("pds: task_sched_prio_normal delta %lld, deadline %llu, time_edge %llu\n", ++ delta, p->deadline, rq->time_edge); + return NORMAL_PRIO_NUM - 1; + } + +@@ -61,18 +57,8 @@ static inline unsigned long sched_idx2prio(unsigned long idx, struct rq *rq) + static inline void sched_renew_deadline(struct task_struct *p, const struct rq *rq) + { + if (p->prio >= MAX_RT_PRIO) +- p->deadline = rq->clock + user_prio2deadline[p->static_prio - +- (MAX_PRIO - NICE_WIDTH)]; +-} +- +-static inline void sched_imp_init(void) +-{ +- int i; +- +- user_prio2deadline[0] = sched_timeslice_ns; +- for (i = 1; i < NICE_WIDTH; i++) +- user_prio2deadline[i] = +- user_prio2deadline[i - 1] + sched_timeslice_ns; ++ p->deadline = (rq->clock >> sched_timeslice_shift) + ++ p->static_prio - (MAX_PRIO - NICE_WIDTH); + } + + static inline int normal_prio(struct task_struct *p) +@@ -128,8 +114,9 @@ static inline void time_slice_expired(struct task_struct *p, struct rq *rq) + + static inline void sched_task_sanity_check(struct task_struct *p, struct rq *rq) + { +- if (unlikely(p->deadline > rq->clock + user_prio2deadline[NICE_WIDTH - 1])) +- p->deadline = rq->clock + user_prio2deadline[NICE_WIDTH - 1]; ++ u64 max_dl = rq->time_edge + NICE_WIDTH - 1; ++ if (unlikely(p->deadline > max_dl)) ++ p->deadline = max_dl; + } + + static void sched_task_fork(struct task_struct *p, struct rq *rq) +-- +2.37.0 + + +From a06dd8ed4d172d532f029e7030447d36cfdab3a2 Mon Sep 17 00:00:00 2001 +From: Alfred Chen +Date: Fri, 11 Jun 2021 18:19:19 +0000 +Subject: [PATCH 174/297] sched/pds: Optimize parameter and return types + +--- + kernel/sched/bmq.h | 4 ++-- + kernel/sched/pds.h | 4 ++-- + 2 files changed, 4 insertions(+), 4 deletions(-) + +diff --git a/kernel/sched/bmq.h b/kernel/sched/bmq.h +index b425f8979b6f..76db5eb21a01 100644 +--- a/kernel/sched/bmq.h ++++ b/kernel/sched/bmq.h +@@ -55,12 +55,12 @@ task_sched_prio_idx(const struct task_struct *p, const struct rq *rq) + return task_sched_prio(p); + } + +-static inline unsigned long sched_prio2idx(unsigned long prio, struct rq *rq) ++static inline int sched_prio2idx(int prio, struct rq *rq) + { + return prio; + } + +-static inline unsigned long sched_idx2prio(unsigned long idx, struct rq *rq) ++static inline int sched_idx2prio(int idx, struct rq *rq) + { + return idx; + } +diff --git a/kernel/sched/pds.h b/kernel/sched/pds.h +index 4898b3ae8e41..ed336dd35809 100644 +--- a/kernel/sched/pds.h ++++ b/kernel/sched/pds.h +@@ -40,14 +40,14 @@ task_sched_prio_idx(const struct task_struct *p, const struct rq *rq) + NORMAL_PRIO_MOD(task_sched_prio_normal(p, rq) + rq->time_edge); + } + +-static inline unsigned long sched_prio2idx(unsigned long prio, struct rq *rq) ++static inline int sched_prio2idx(int prio, struct rq *rq) + { + return (IDLE_TASK_SCHED_PRIO == prio || prio < MAX_RT_PRIO) ? prio : + MIN_NORMAL_PRIO + NORMAL_PRIO_MOD((prio - MIN_NORMAL_PRIO) + + rq->time_edge); + } + +-static inline unsigned long sched_idx2prio(unsigned long idx, struct rq *rq) ++static inline int sched_idx2prio(int idx, struct rq *rq) + { + return (idx < MAX_RT_PRIO) ? idx : MIN_NORMAL_PRIO + + NORMAL_PRIO_MOD((idx - MIN_NORMAL_PRIO) + NORMAL_PRIO_NUM - +-- +2.37.0 + + +From 957d9475a9c6ee7522f5228a278aac130ad69d61 Mon Sep 17 00:00:00 2001 +From: Alfred Chen +Date: Sun, 13 Jun 2021 11:34:41 +0000 +Subject: [PATCH 175/297] sched/pds: Use common normal_prio() + +--- + init/init_task.c | 7 +------ + kernel/sched/alt_core.c | 13 +++++++++++++ + kernel/sched/bmq.h | 8 -------- + kernel/sched/pds.h | 6 ------ + 4 files changed, 14 insertions(+), 20 deletions(-) + +diff --git a/init/init_task.c b/init/init_task.c +index 579d99864d49..2d0bad762895 100644 +--- a/init/init_task.c ++++ b/init/init_task.c +@@ -75,15 +75,10 @@ struct task_struct init_task + .stack = init_stack, + .usage = REFCOUNT_INIT(2), + .flags = PF_KTHREAD, +-#ifdef CONFIG_SCHED_BMQ ++#ifdef CONFIG_SCHED_ALT + .prio = DEFAULT_PRIO + MAX_PRIORITY_ADJ, + .static_prio = DEFAULT_PRIO, + .normal_prio = DEFAULT_PRIO + MAX_PRIORITY_ADJ, +-#endif +-#ifdef CONFIG_SCHED_PDS +- .prio = MAX_RT_PRIO, +- .static_prio = DEFAULT_PRIO, +- .normal_prio = MAX_RT_PRIO, + #else + .prio = MAX_PRIO - 20, + .static_prio = MAX_PRIO - 20, +diff --git a/kernel/sched/alt_core.c b/kernel/sched/alt_core.c +index 946983ca5763..57c34cf29956 100644 +--- a/kernel/sched/alt_core.c ++++ b/kernel/sched/alt_core.c +@@ -1121,6 +1121,19 @@ static inline void hrtick_rq_init(struct rq *rq) + } + #endif /* CONFIG_SCHED_HRTICK */ + ++/* ++ * Calculate the expected normal priority: i.e. priority ++ * without taking RT-inheritance into account. Might be ++ * boosted by interactivity modifiers. Changes upon fork, ++ * setprio syscalls, and whenever the interactivity ++ * estimator recalculates. ++ */ ++static inline int normal_prio(struct task_struct *p) ++{ ++ return task_has_rt_policy(p) ? (MAX_RT_PRIO - 1 - p->rt_priority) : ++ p->static_prio + MAX_PRIORITY_ADJ; ++} ++ + /* + * Calculate the current priority, i.e. the priority + * taken into account by the scheduler. This value might +diff --git a/kernel/sched/bmq.h b/kernel/sched/bmq.h +index 76db5eb21a01..7635c00dde7f 100644 +--- a/kernel/sched/bmq.h ++++ b/kernel/sched/bmq.h +@@ -65,14 +65,6 @@ static inline int sched_idx2prio(int idx, struct rq *rq) + return idx; + } + +-static inline int normal_prio(struct task_struct *p) +-{ +- if (task_has_rt_policy(p)) +- return MAX_RT_PRIO - 1 - p->rt_priority; +- +- return p->static_prio + MAX_PRIORITY_ADJ; +-} +- + static inline void time_slice_expired(struct task_struct *p, struct rq *rq) + { + p->time_slice = sched_timeslice_ns; +diff --git a/kernel/sched/pds.h b/kernel/sched/pds.h +index ed336dd35809..c23294178c2b 100644 +--- a/kernel/sched/pds.h ++++ b/kernel/sched/pds.h +@@ -61,12 +61,6 @@ static inline void sched_renew_deadline(struct task_struct *p, const struct rq * + p->static_prio - (MAX_PRIO - NICE_WIDTH); + } + +-static inline int normal_prio(struct task_struct *p) +-{ +- return task_has_rt_policy(p) ? (MAX_RT_PRIO - 1 - p->rt_priority) : +- MAX_RT_PRIO; +-} +- + int task_running_nice(struct task_struct *p) + { + return task_sched_prio(p) > DEFAULT_PRIO; +-- +2.37.0 + + +From b99fc38d6a6f675b02f577f89343fa0a7ccbb798 Mon Sep 17 00:00:00 2001 +From: Alfred Chen +Date: Fri, 25 Jun 2021 07:24:08 +0000 +Subject: [PATCH 176/297] sched/alt: Optimization and code clean-up + +--- + kernel/sched/alt_core.c | 20 +++++++------------- + kernel/sched/pds.h | 2 +- + 2 files changed, 8 insertions(+), 14 deletions(-) + +diff --git a/kernel/sched/alt_core.c b/kernel/sched/alt_core.c +index 57c34cf29956..a8ba783b07ff 100644 +--- a/kernel/sched/alt_core.c ++++ b/kernel/sched/alt_core.c +@@ -77,7 +77,7 @@ __read_mostly int sysctl_resched_latency_warn_once = 1; + #define STOP_PRIO (MAX_RT_PRIO - 1) + + /* Default time slice is 4 in ms, can be set via kernel parameter "sched_timeslice" */ +-u64 sched_timeslice_ns __read_mostly = (2 << 20); ++u64 sched_timeslice_ns __read_mostly = (4 << 20); + + static inline void requeue_task(struct task_struct *p, struct rq *rq); + +@@ -193,9 +193,8 @@ static inline void update_sched_rq_watermark(struct rq *rq) + cpumask_andnot(&sched_rq_watermark[i], + &sched_rq_watermark[i], cpumask_of(cpu)); + #ifdef CONFIG_SCHED_SMT +- if (!static_branch_likely(&sched_smt_present)) +- return; +- if (IDLE_WM == last_wm) ++ if (static_branch_likely(&sched_smt_present) && ++ IDLE_WM == last_wm) + cpumask_andnot(&sched_sg_idle_mask, + &sched_sg_idle_mask, cpu_smt_mask(cpu)); + #endif +@@ -205,10 +204,9 @@ static inline void update_sched_rq_watermark(struct rq *rq) + for (i = last_wm + 1; i <= watermark; i++) + cpumask_set_cpu(cpu, &sched_rq_watermark[i]); + #ifdef CONFIG_SCHED_SMT +- if (!static_branch_likely(&sched_smt_present)) +- return; +- if (IDLE_WM == watermark) { ++ if (static_branch_likely(&sched_smt_present) && IDLE_WM == watermark) { + cpumask_t tmp; ++ + cpumask_and(&tmp, cpu_smt_mask(cpu), &sched_rq_watermark[IDLE_WM]); + if (cpumask_equal(&tmp, cpu_smt_mask(cpu))) + cpumask_or(&sched_sg_idle_mask, cpu_smt_mask(cpu), +@@ -1003,13 +1001,10 @@ static void hrtick_clear(struct rq *rq) + static enum hrtimer_restart hrtick(struct hrtimer *timer) + { + struct rq *rq = container_of(timer, struct rq, hrtick_timer); +- struct task_struct *p; + + WARN_ON_ONCE(cpu_of(rq) != smp_processor_id()); + + raw_spin_lock(&rq->lock); +- p = rq->curr; +- p->time_slice = 0; + resched_curr(rq); + raw_spin_unlock(&rq->lock); + +@@ -2733,9 +2728,7 @@ void wake_up_new_task(struct task_struct *p) + struct rq *rq; + + raw_spin_lock_irqsave(&p->pi_lock, flags); +- + p->state = TASK_RUNNING; +- + rq = cpu_rq(select_task_rq(p)); + #ifdef CONFIG_SMP + rseq_migrate(p); +@@ -2743,6 +2736,7 @@ void wake_up_new_task(struct task_struct *p) + * Fork balancing, do it here and not earlier because: + * - cpus_ptr can change in the fork path + * - any previously selected CPU might disappear through hotplug ++ * + * Use __set_task_cpu() to avoid calling sched_class::migrate_task_rq, + * as we're not fully set-up yet. + */ +@@ -2750,8 +2744,8 @@ void wake_up_new_task(struct task_struct *p) + #endif + + raw_spin_lock(&rq->lock); +- + update_rq_clock(rq); ++ + activate_task(p, rq); + trace_sched_wakeup_new(p); + check_preempt_curr(rq); +diff --git a/kernel/sched/pds.h b/kernel/sched/pds.h +index c23294178c2b..06d88e72b543 100644 +--- a/kernel/sched/pds.h ++++ b/kernel/sched/pds.h +@@ -63,7 +63,7 @@ static inline void sched_renew_deadline(struct task_struct *p, const struct rq * + + int task_running_nice(struct task_struct *p) + { +- return task_sched_prio(p) > DEFAULT_PRIO; ++ return (p->prio > DEFAULT_PRIO); + } + + static inline void update_rq_time_edge(struct rq *rq) +-- +2.37.0 + + +From 53012af8bf91faf80f525225027156355b9616a8 Mon Sep 17 00:00:00 2001 +From: Alfred Chen +Date: Tue, 6 Jul 2021 11:18:04 +0000 +Subject: [PATCH 177/297] Project-C v5.13-r1 + +--- + kernel/sched/alt_core.c | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +diff --git a/kernel/sched/alt_core.c b/kernel/sched/alt_core.c +index a8ba783b07ff..b65b12c6014f 100644 +--- a/kernel/sched/alt_core.c ++++ b/kernel/sched/alt_core.c +@@ -67,7 +67,7 @@ __read_mostly int sysctl_resched_latency_warn_once = 1; + #define sched_feat(x) (0) + #endif /* CONFIG_SCHED_DEBUG */ + +-#define ALT_SCHED_VERSION "v5.13-r0" ++#define ALT_SCHED_VERSION "v5.13-r1" + + /* rt_prio(prio) defined in include/linux/sched/rt.h */ + #define rt_task(p) rt_prio((p)->prio) +-- +2.37.0 + + +From 7a7d569b3d8439a666a0ebd6b090909accd1fe46 Mon Sep 17 00:00:00 2001 +From: Alfred Chen +Date: Sun, 27 Jun 2021 14:45:03 +0000 +Subject: [PATCH 178/297] sched/alt: Reverse sched_rq_watermark order + +--- + kernel/sched/alt_core.c | 21 +++++++++++---------- + 1 file changed, 11 insertions(+), 10 deletions(-) + +diff --git a/kernel/sched/alt_core.c b/kernel/sched/alt_core.c +index b65b12c6014f..ffe95d0b5856 100644 +--- a/kernel/sched/alt_core.c ++++ b/kernel/sched/alt_core.c +@@ -189,9 +189,10 @@ static inline void update_sched_rq_watermark(struct rq *rq) + rq->watermark = watermark; + cpu = cpu_of(rq); + if (watermark < last_wm) { +- for (i = watermark + 1; i <= last_wm; i++) +- cpumask_andnot(&sched_rq_watermark[i], +- &sched_rq_watermark[i], cpumask_of(cpu)); ++ for (i = last_wm; i > watermark; i--) ++ cpumask_andnot(&sched_rq_watermark[SCHED_BITS - 1 - i], ++ &sched_rq_watermark[SCHED_BITS - 1 - i], ++ cpumask_of(cpu)); + #ifdef CONFIG_SCHED_SMT + if (static_branch_likely(&sched_smt_present) && + IDLE_WM == last_wm) +@@ -201,13 +202,13 @@ static inline void update_sched_rq_watermark(struct rq *rq) + return; + } + /* last_wm < watermark */ +- for (i = last_wm + 1; i <= watermark; i++) +- cpumask_set_cpu(cpu, &sched_rq_watermark[i]); ++ for (i = watermark; i > last_wm; i--) ++ cpumask_set_cpu(cpu, &sched_rq_watermark[SCHED_BITS - 1 - i]); + #ifdef CONFIG_SCHED_SMT + if (static_branch_likely(&sched_smt_present) && IDLE_WM == watermark) { + cpumask_t tmp; + +- cpumask_and(&tmp, cpu_smt_mask(cpu), &sched_rq_watermark[IDLE_WM]); ++ cpumask_and(&tmp, cpu_smt_mask(cpu), &sched_rq_watermark[0]); + if (cpumask_equal(&tmp, cpu_smt_mask(cpu))) + cpumask_or(&sched_sg_idle_mask, cpu_smt_mask(cpu), + &sched_sg_idle_mask); +@@ -1736,9 +1737,9 @@ static inline int select_task_rq(struct task_struct *p) + #ifdef CONFIG_SCHED_SMT + cpumask_and(&tmp, &chk_mask, &sched_sg_idle_mask) || + #endif +- cpumask_and(&tmp, &chk_mask, &sched_rq_watermark[IDLE_WM]) || ++ cpumask_and(&tmp, &chk_mask, &sched_rq_watermark[0]) || + cpumask_and(&tmp, &chk_mask, +- &sched_rq_watermark[task_sched_prio(p) + 1])) ++ &sched_rq_watermark[SCHED_BITS - task_sched_prio(p)])) + return best_mask_cpu(task_cpu(p), &tmp); + + return best_mask_cpu(task_cpu(p), &chk_mask); +@@ -3592,7 +3593,7 @@ static inline void sg_balance_check(struct rq *rq) + */ + if (cpumask_test_cpu(cpu, &sched_sg_idle_mask) && + cpumask_andnot(&chk, cpu_online_mask, &sched_rq_pending_mask) && +- cpumask_andnot(&chk, &chk, &sched_rq_watermark[IDLE_WM])) { ++ cpumask_andnot(&chk, &chk, &sched_rq_watermark[0])) { + int i, tried = 0; + + for_each_cpu_wrap(i, &chk, cpu) { +@@ -3905,7 +3906,7 @@ void alt_sched_debug(void) + { + printk(KERN_INFO "sched: pending: 0x%04lx, idle: 0x%04lx, sg_idle: 0x%04lx\n", + sched_rq_pending_mask.bits[0], +- sched_rq_watermark[IDLE_WM].bits[0], ++ sched_rq_watermark[0].bits[0], + sched_sg_idle_mask.bits[0]); + } + #else +-- +2.37.0 + + +From 6b4a7bbec176bcd7141d6122f1bc09f94a2c47dd Mon Sep 17 00:00:00 2001 +From: Alfred Chen +Date: Mon, 28 Jun 2021 12:52:23 +0000 +Subject: [PATCH 179/297] sched/alt: Use atomic operation in + update_sched_rq_watermark() and code clean-up + +--- + kernel/sched/alt_core.c | 25 +++++++++++-------------- + 1 file changed, 11 insertions(+), 14 deletions(-) + +diff --git a/kernel/sched/alt_core.c b/kernel/sched/alt_core.c +index ffe95d0b5856..472d73646b67 100644 +--- a/kernel/sched/alt_core.c ++++ b/kernel/sched/alt_core.c +@@ -146,8 +146,6 @@ DEFINE_PER_CPU_SHARED_ALIGNED(struct rq, runqueues); + # define finish_arch_post_lock_switch() do { } while (0) + #endif + +-#define IDLE_WM (IDLE_TASK_SCHED_PRIO) +- + #ifdef CONFIG_SCHED_SMT + static cpumask_t sched_sg_idle_mask ____cacheline_aligned_in_smp; + #endif +@@ -190,12 +188,10 @@ static inline void update_sched_rq_watermark(struct rq *rq) + cpu = cpu_of(rq); + if (watermark < last_wm) { + for (i = last_wm; i > watermark; i--) +- cpumask_andnot(&sched_rq_watermark[SCHED_BITS - 1 - i], +- &sched_rq_watermark[SCHED_BITS - 1 - i], +- cpumask_of(cpu)); ++ cpumask_clear_cpu(cpu, sched_rq_watermark + SCHED_BITS - 1 - i); + #ifdef CONFIG_SCHED_SMT + if (static_branch_likely(&sched_smt_present) && +- IDLE_WM == last_wm) ++ IDLE_TASK_SCHED_PRIO == last_wm) + cpumask_andnot(&sched_sg_idle_mask, + &sched_sg_idle_mask, cpu_smt_mask(cpu)); + #endif +@@ -203,12 +199,13 @@ static inline void update_sched_rq_watermark(struct rq *rq) + } + /* last_wm < watermark */ + for (i = watermark; i > last_wm; i--) +- cpumask_set_cpu(cpu, &sched_rq_watermark[SCHED_BITS - 1 - i]); ++ cpumask_set_cpu(cpu, sched_rq_watermark + SCHED_BITS - 1 - i); + #ifdef CONFIG_SCHED_SMT +- if (static_branch_likely(&sched_smt_present) && IDLE_WM == watermark) { ++ if (static_branch_likely(&sched_smt_present) && ++ IDLE_TASK_SCHED_PRIO == watermark) { + cpumask_t tmp; + +- cpumask_and(&tmp, cpu_smt_mask(cpu), &sched_rq_watermark[0]); ++ cpumask_and(&tmp, cpu_smt_mask(cpu), sched_rq_watermark); + if (cpumask_equal(&tmp, cpu_smt_mask(cpu))) + cpumask_or(&sched_sg_idle_mask, cpu_smt_mask(cpu), + &sched_sg_idle_mask); +@@ -1737,9 +1734,9 @@ static inline int select_task_rq(struct task_struct *p) + #ifdef CONFIG_SCHED_SMT + cpumask_and(&tmp, &chk_mask, &sched_sg_idle_mask) || + #endif +- cpumask_and(&tmp, &chk_mask, &sched_rq_watermark[0]) || ++ cpumask_and(&tmp, &chk_mask, sched_rq_watermark) || + cpumask_and(&tmp, &chk_mask, +- &sched_rq_watermark[SCHED_BITS - task_sched_prio(p)])) ++ sched_rq_watermark + SCHED_BITS - task_sched_prio(p))) + return best_mask_cpu(task_cpu(p), &tmp); + + return best_mask_cpu(task_cpu(p), &chk_mask); +@@ -3593,7 +3590,7 @@ static inline void sg_balance_check(struct rq *rq) + */ + if (cpumask_test_cpu(cpu, &sched_sg_idle_mask) && + cpumask_andnot(&chk, cpu_online_mask, &sched_rq_pending_mask) && +- cpumask_andnot(&chk, &chk, &sched_rq_watermark[0])) { ++ cpumask_andnot(&chk, &chk, sched_rq_watermark)) { + int i, tried = 0; + + for_each_cpu_wrap(i, &chk, cpu) { +@@ -6773,7 +6770,7 @@ void __init sched_init(void) + + #ifdef CONFIG_SMP + for (i = 0; i < SCHED_BITS; i++) +- cpumask_copy(&sched_rq_watermark[i], cpu_present_mask); ++ cpumask_copy(sched_rq_watermark + i, cpu_present_mask); + #endif + + #ifdef CONFIG_CGROUP_SCHED +@@ -6787,7 +6784,7 @@ void __init sched_init(void) + rq = cpu_rq(i); + + sched_queue_init(&rq->queue); +- rq->watermark = IDLE_WM; ++ rq->watermark = IDLE_TASK_SCHED_PRIO; + rq->skip = NULL; + + raw_spin_lock_init(&rq->lock); +-- +2.37.0 + + +From 7e147334ae87c0f527a38a2a716990715ba92688 Mon Sep 17 00:00:00 2001 +From: Alfred Chen +Date: Wed, 7 Jul 2021 10:38:54 +0000 +Subject: [PATCH 180/297] sched/alt: inline some BMQ/PDS interfaces + +--- + kernel/sched/bmq.h | 6 +++--- + kernel/sched/pds.h | 12 +++++------- + 2 files changed, 8 insertions(+), 10 deletions(-) + +diff --git a/kernel/sched/bmq.h b/kernel/sched/bmq.h +index 7635c00dde7f..be3ee4a553ca 100644 +--- a/kernel/sched/bmq.h ++++ b/kernel/sched/bmq.h +@@ -89,20 +89,20 @@ static void sched_task_fork(struct task_struct *p, struct rq *rq) + p->boost_prio + MAX_PRIORITY_ADJ : MAX_PRIORITY_ADJ; + } + +-static void do_sched_yield_type_1(struct task_struct *p, struct rq *rq) ++static inline void do_sched_yield_type_1(struct task_struct *p, struct rq *rq) + { + p->boost_prio = MAX_PRIORITY_ADJ; + } + + #ifdef CONFIG_SMP +-static void sched_task_ttwu(struct task_struct *p) ++static inline void sched_task_ttwu(struct task_struct *p) + { + if(this_rq()->clock_task - p->last_ran > sched_timeslice_ns) + boost_task(p); + } + #endif + +-static void sched_task_deactivate(struct task_struct *p, struct rq *rq) ++static inline void sched_task_deactivate(struct task_struct *p, struct rq *rq) + { + if (rq_switch_time(rq) < boost_threshold(p)) + boost_task(p); +diff --git a/kernel/sched/pds.h b/kernel/sched/pds.h +index 06d88e72b543..0f1f0d708b77 100644 +--- a/kernel/sched/pds.h ++++ b/kernel/sched/pds.h +@@ -18,11 +18,9 @@ task_sched_prio_normal(const struct task_struct *p, const struct rq *rq) + { + s64 delta = p->deadline - rq->time_edge + NORMAL_PRIO_NUM - NICE_WIDTH; + +- if (unlikely(delta > NORMAL_PRIO_NUM - 1)) { +- pr_info("pds: task_sched_prio_normal delta %lld, deadline %llu, time_edge %llu\n", +- delta, p->deadline, rq->time_edge); ++ if (WARN_ONCE(delta > NORMAL_PRIO_NUM - 1, ++ "pds: task_sched_prio_normal() delta %lld\n", delta)) + return NORMAL_PRIO_NUM - 1; +- } + + return (delta < 0) ? 0 : delta; + } +@@ -118,12 +116,12 @@ static void sched_task_fork(struct task_struct *p, struct rq *rq) + sched_renew_deadline(p, rq); + } + +-static void do_sched_yield_type_1(struct task_struct *p, struct rq *rq) ++static inline void do_sched_yield_type_1(struct task_struct *p, struct rq *rq) + { + time_slice_expired(p, rq); + } + + #ifdef CONFIG_SMP +-static void sched_task_ttwu(struct task_struct *p) {} ++static inline void sched_task_ttwu(struct task_struct *p) {} + #endif +-static void sched_task_deactivate(struct task_struct *p, struct rq *rq) {} ++static inline void sched_task_deactivate(struct task_struct *p, struct rq *rq) {} +-- +2.37.0 + + +From a70f521116f0b9875d0d3eda5681d9ab42742a30 Mon Sep 17 00:00:00 2001 +From: Alfred Chen +Date: Sat, 10 Jul 2021 15:28:11 +0000 +Subject: [PATCH 181/297] sched/alt: Remove over design in best_mask_cpu() + +--- + kernel/sched/alt_core.c | 30 +++++++-------------- + kernel/sched/alt_sched.h | 58 ++++------------------------------------ + 2 files changed, 15 insertions(+), 73 deletions(-) + +diff --git a/kernel/sched/alt_core.c b/kernel/sched/alt_core.c +index 472d73646b67..38f2ef489b37 100644 +--- a/kernel/sched/alt_core.c ++++ b/kernel/sched/alt_core.c +@@ -207,8 +207,8 @@ static inline void update_sched_rq_watermark(struct rq *rq) + + cpumask_and(&tmp, cpu_smt_mask(cpu), sched_rq_watermark); + if (cpumask_equal(&tmp, cpu_smt_mask(cpu))) +- cpumask_or(&sched_sg_idle_mask, cpu_smt_mask(cpu), +- &sched_sg_idle_mask); ++ cpumask_or(&sched_sg_idle_mask, ++ &sched_sg_idle_mask, cpu_smt_mask(cpu)); + } + #endif + } +@@ -3528,8 +3528,7 @@ static inline int active_load_balance_cpu_stop(void *data) + cpumask_and(&tmp, p->cpus_ptr, &sched_sg_idle_mask) && + !is_migration_disabled(p)) { + int cpu = cpu_of(rq); +- int dcpu = __best_mask_cpu(cpu, &tmp, +- per_cpu(sched_cpu_llc_mask, cpu)); ++ int dcpu = __best_mask_cpu(&tmp, per_cpu(sched_cpu_llc_mask, cpu)); + rq = move_queued_task(rq, p, dcpu); + } + +@@ -3573,34 +3572,25 @@ static inline int sg_balance_trigger(const int cpu) + static inline void sg_balance_check(struct rq *rq) + { + cpumask_t chk; +- int cpu; +- +- /* exit when no sg in idle */ +- if (cpumask_empty(&sched_sg_idle_mask)) +- return; ++ int cpu = cpu_of(rq); + + /* exit when cpu is offline */ + if (unlikely(!rq->online)) + return; + +- cpu = cpu_of(rq); + /* + * Only cpu in slibing idle group will do the checking and then + * find potential cpus which can migrate the current running task + */ + if (cpumask_test_cpu(cpu, &sched_sg_idle_mask) && +- cpumask_andnot(&chk, cpu_online_mask, &sched_rq_pending_mask) && +- cpumask_andnot(&chk, &chk, sched_rq_watermark)) { +- int i, tried = 0; ++ cpumask_andnot(&chk, cpu_online_mask, sched_rq_watermark) && ++ cpumask_andnot(&chk, &chk, &sched_rq_pending_mask)) { ++ int i; + + for_each_cpu_wrap(i, &chk, cpu) { +- if (cpumask_subset(cpu_smt_mask(i), &chk)) { +- if (sg_balance_trigger(i)) +- return; +- if (tried) +- return; +- tried++; +- } ++ if (cpumask_subset(cpu_smt_mask(i), &chk) && ++ sg_balance_trigger(i)) ++ return; + } + } + } +diff --git a/kernel/sched/alt_sched.h b/kernel/sched/alt_sched.h +index f9f79422bf0e..7a48809550bf 100644 +--- a/kernel/sched/alt_sched.h ++++ b/kernel/sched/alt_sched.h +@@ -302,68 +302,20 @@ enum { + DECLARE_PER_CPU(cpumask_t [NR_CPU_AFFINITY_LEVELS], sched_cpu_topo_masks); + DECLARE_PER_CPU(cpumask_t *, sched_cpu_llc_mask); + +-static inline int __best_mask_cpu(int cpu, const cpumask_t *cpumask, +- const cpumask_t *mask) ++static inline int ++__best_mask_cpu(const cpumask_t *cpumask, const cpumask_t *mask) + { +-#if NR_CPUS <= 64 +- unsigned long t; ++ int cpu; + +- while ((t = cpumask->bits[0] & mask->bits[0]) == 0UL) +- mask++; +- +- return __ffs(t); +-#else + while ((cpu = cpumask_any_and(cpumask, mask)) >= nr_cpu_ids) + mask++; ++ + return cpu; +-#endif + } + + static inline int best_mask_cpu(int cpu, const cpumask_t *mask) + { +-#if NR_CPUS <= 64 +- unsigned long llc_match; +- cpumask_t *chk = per_cpu(sched_cpu_llc_mask, cpu); +- +- if ((llc_match = mask->bits[0] & chk->bits[0])) { +- unsigned long match; +- +- chk = per_cpu(sched_cpu_topo_masks, cpu); +- if (mask->bits[0] & chk->bits[0]) +- return cpu; +- +-#ifdef CONFIG_SCHED_SMT +- chk++; +- if ((match = mask->bits[0] & chk->bits[0])) +- return __ffs(match); +-#endif +- +- return __ffs(llc_match); +- } +- +- return __best_mask_cpu(cpu, mask, chk + 1); +-#else +- cpumask_t llc_match; +- cpumask_t *chk = per_cpu(sched_cpu_llc_mask, cpu); +- +- if (cpumask_and(&llc_match, mask, chk)) { +- cpumask_t tmp; +- +- chk = per_cpu(sched_cpu_topo_masks, cpu); +- if (cpumask_test_cpu(cpu, mask)) +- return cpu; +- +-#ifdef CONFIG_SCHED_SMT +- chk++; +- if (cpumask_and(&tmp, mask, chk)) +- return cpumask_any(&tmp); +-#endif +- +- return cpumask_any(&llc_match); +- } +- +- return __best_mask_cpu(cpu, mask, chk + 1); +-#endif ++ return __best_mask_cpu(mask, per_cpu(sched_cpu_topo_masks, cpu)); + } + + extern void flush_smp_call_function_from_idle(void); +-- +2.37.0 + + +From 3ab1f5f8743219d142b6ed816d2b4472b6beee23 Mon Sep 17 00:00:00 2001 +From: Alfred Chen +Date: Sat, 10 Jul 2021 20:52:27 +0000 +Subject: [PATCH 182/297] sched/alt: Remove sched_cpu_affinity_masks + +--- + kernel/sched/alt_core.c | 44 ++++++++++++++++------------------------- + 1 file changed, 17 insertions(+), 27 deletions(-) + +diff --git a/kernel/sched/alt_core.c b/kernel/sched/alt_core.c +index 38f2ef489b37..bb5f78a1e256 100644 +--- a/kernel/sched/alt_core.c ++++ b/kernel/sched/alt_core.c +@@ -116,11 +116,9 @@ int sched_yield_type __read_mostly = 1; + #ifdef CONFIG_SMP + static cpumask_t sched_rq_pending_mask ____cacheline_aligned_in_smp; + +-DEFINE_PER_CPU(cpumask_t [NR_CPU_AFFINITY_LEVELS], sched_cpu_affinity_masks); +-DEFINE_PER_CPU(cpumask_t *, sched_cpu_affinity_end_mask); +- + DEFINE_PER_CPU(cpumask_t [NR_CPU_AFFINITY_LEVELS], sched_cpu_topo_masks); + DEFINE_PER_CPU(cpumask_t *, sched_cpu_llc_mask); ++DEFINE_PER_CPU(cpumask_t *, sched_cpu_topo_end_mask); + + #ifdef CONFIG_SCHED_SMT + DEFINE_STATIC_KEY_FALSE(sched_smt_present); +@@ -891,8 +889,8 @@ int get_nohz_timer_target(void) + default_cpu = cpu; + } + +- for (mask = per_cpu(sched_cpu_affinity_masks, cpu) + 1; +- mask < per_cpu(sched_cpu_affinity_end_mask, cpu); mask++) ++ for (mask = per_cpu(sched_cpu_topo_masks, cpu) + 1; ++ mask < per_cpu(sched_cpu_topo_end_mask, cpu); mask++) + for_each_cpu_and(i, mask, housekeeping_cpumask(HK_FLAG_TIMER)) + if (!idle_cpu(i)) + return i; +@@ -3932,7 +3930,7 @@ migrate_pending_tasks(struct rq *rq, struct rq *dest_rq, const int dest_cpu) + + static inline int take_other_rq_tasks(struct rq *rq, int cpu) + { +- struct cpumask *affinity_mask, *end_mask; ++ struct cpumask *topo_mask, *end_mask; + + if (unlikely(!rq->online)) + return 0; +@@ -3940,11 +3938,11 @@ static inline int take_other_rq_tasks(struct rq *rq, int cpu) + if (cpumask_empty(&sched_rq_pending_mask)) + return 0; + +- affinity_mask = per_cpu(sched_cpu_affinity_masks, cpu) + 1; +- end_mask = per_cpu(sched_cpu_affinity_end_mask, cpu); ++ topo_mask = per_cpu(sched_cpu_topo_masks, cpu) + 1; ++ end_mask = per_cpu(sched_cpu_topo_end_mask, cpu); + do { + int i; +- for_each_cpu_and(i, &sched_rq_pending_mask, affinity_mask) { ++ for_each_cpu_and(i, &sched_rq_pending_mask, topo_mask) { + int nr_migrated; + struct rq *src_rq; + +@@ -3975,7 +3973,7 @@ static inline int take_other_rq_tasks(struct rq *rq, int cpu) + spin_release(&src_rq->lock.dep_map, _RET_IP_); + do_raw_spin_unlock(&src_rq->lock); + } +- } while (++affinity_mask < end_mask); ++ } while (++topo_mask < end_mask); + + return 0; + } +@@ -6637,14 +6635,6 @@ static void sched_init_topology_cpumask_early(void) + cpumask_t *tmp; + + for_each_possible_cpu(cpu) { +- /* init affinity masks */ +- tmp = per_cpu(sched_cpu_affinity_masks, cpu); +- +- cpumask_copy(tmp, cpumask_of(cpu)); +- tmp++; +- cpumask_copy(tmp, cpu_possible_mask); +- cpumask_clear_cpu(cpu, tmp); +- per_cpu(sched_cpu_affinity_end_mask, cpu) = ++tmp; + /* init topo masks */ + tmp = per_cpu(sched_cpu_topo_masks, cpu); + +@@ -6652,32 +6642,32 @@ static void sched_init_topology_cpumask_early(void) + tmp++; + cpumask_copy(tmp, cpu_possible_mask); + per_cpu(sched_cpu_llc_mask, cpu) = tmp; ++ per_cpu(sched_cpu_topo_end_mask, cpu) = ++tmp; + /*per_cpu(sd_llc_id, cpu) = cpu;*/ + } + } + +-#define TOPOLOGY_CPUMASK(name, mask, last) \ +- if (cpumask_and(chk, chk, mask)) { \ ++#define TOPOLOGY_CPUMASK(name, mask, last)\ ++ if (cpumask_and(topo, topo, mask)) { \ + cpumask_copy(topo, mask); \ +- printk(KERN_INFO "sched: cpu#%02d affinity: 0x%08lx topo: 0x%08lx - "#name,\ +- cpu, (chk++)->bits[0], (topo++)->bits[0]); \ ++ printk(KERN_INFO "sched: cpu#%02d topo: 0x%08lx - "#name, \ ++ cpu, (topo++)->bits[0]); \ + } \ + if (!last) \ +- cpumask_complement(chk, mask) ++ cpumask_complement(topo, mask) + + static void sched_init_topology_cpumask(void) + { + int cpu; +- cpumask_t *chk, *topo; ++ cpumask_t *topo; + + for_each_online_cpu(cpu) { + /* take chance to reset time slice for idle tasks */ + cpu_rq(cpu)->idle->time_slice = sched_timeslice_ns; + +- chk = per_cpu(sched_cpu_affinity_masks, cpu) + 1; + topo = per_cpu(sched_cpu_topo_masks, cpu) + 1; + +- cpumask_complement(chk, cpumask_of(cpu)); ++ cpumask_complement(topo, cpumask_of(cpu)); + #ifdef CONFIG_SCHED_SMT + TOPOLOGY_CPUMASK(smt, topology_sibling_cpumask(cpu), false); + #endif +@@ -6689,7 +6679,7 @@ static void sched_init_topology_cpumask(void) + + TOPOLOGY_CPUMASK(others, cpu_online_mask, true); + +- per_cpu(sched_cpu_affinity_end_mask, cpu) = chk; ++ per_cpu(sched_cpu_topo_end_mask, cpu) = topo; + printk(KERN_INFO "sched: cpu#%02d llc_id = %d, llc_mask idx = %d\n", + cpu, per_cpu(sd_llc_id, cpu), + (int) (per_cpu(sched_cpu_llc_mask, cpu) - +-- +2.37.0 + + +From 10baf332ef6005de5c1303ce61a994cd6db734b5 Mon Sep 17 00:00:00 2001 +From: Alfred Chen +Date: Mon, 2 Aug 2021 13:56:40 +0800 +Subject: [PATCH 183/297] sched/alt: kernel document update for sched_timeslice + +--- + Documentation/admin-guide/kernel-parameters.txt | 6 +++--- + 1 file changed, 3 insertions(+), 3 deletions(-) + +diff --git a/Documentation/admin-guide/kernel-parameters.txt b/Documentation/admin-guide/kernel-parameters.txt +index d243405aa3bf..d92bf8eb93b9 100644 +--- a/Documentation/admin-guide/kernel-parameters.txt ++++ b/Documentation/admin-guide/kernel-parameters.txt +@@ -5274,9 +5274,9 @@ + See drivers/net/irda/sa1100_ir.c. + + sched_timeslice= +- [KNL] Time slice in us for BMQ/PDS scheduler. +- Format: (must be >= 1000) +- Default: 4000 ++ [KNL] Time slice in ms for Project C BMQ/PDS scheduler. ++ Format: integer 2, 4 ++ Default: 4 + See Documentation/scheduler/sched-BMQ.txt + + sched_verbose [KNL] Enables verbose scheduler debug messages. +-- +2.37.0 + + +From c1010d7f83034c76337bf83fcb032913e169a380 Mon Sep 17 00:00:00 2001 +From: Alfred Chen +Date: Thu, 29 Jul 2021 19:11:07 +0000 +Subject: [PATCH 184/297] Project-C v5.13-r2 + +--- + kernel/sched/alt_core.c | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +diff --git a/kernel/sched/alt_core.c b/kernel/sched/alt_core.c +index bb5f78a1e256..e296d56e85f0 100644 +--- a/kernel/sched/alt_core.c ++++ b/kernel/sched/alt_core.c +@@ -67,7 +67,7 @@ __read_mostly int sysctl_resched_latency_warn_once = 1; + #define sched_feat(x) (0) + #endif /* CONFIG_SCHED_DEBUG */ + +-#define ALT_SCHED_VERSION "v5.13-r1" ++#define ALT_SCHED_VERSION "v5.13-r2" + + /* rt_prio(prio) defined in include/linux/sched/rt.h */ + #define rt_task(p) rt_prio((p)->prio) +-- +2.37.0 + + +From f1ba538d3943cdf2b4dbd6888798c06bcc9c9f3f Mon Sep 17 00:00:00 2001 +From: Alfred Chen +Date: Sun, 1 Aug 2021 09:17:30 +0000 +Subject: [PATCH 185/297] sched/alt: Optimize sched_exec() + +Improve exec performance under heavy load. +Kernel compilation comparation +>>>>>16 job(s) +1m21.148s(baseline) --> 1m19.474s (- ~2.06%) +>>>>>24 job(s) +1m22.362s(baseline) --> 1m19.771s (- ~3.15%) +--- + kernel/sched/alt_core.c | 8 +------- + 1 file changed, 1 insertion(+), 7 deletions(-) + +diff --git a/kernel/sched/alt_core.c b/kernel/sched/alt_core.c +index e296d56e85f0..b5e91c874cc1 100644 +--- a/kernel/sched/alt_core.c ++++ b/kernel/sched/alt_core.c +@@ -3321,15 +3321,9 @@ void sched_exec(void) + struct task_struct *p = current; + unsigned long flags; + int dest_cpu; +- struct rq *rq; + + raw_spin_lock_irqsave(&p->pi_lock, flags); +- rq = this_rq(); +- +- if (rq != task_rq(p) || rq->nr_running < 2) +- goto unlock; +- +- dest_cpu = select_task_rq(p); ++ dest_cpu = cpumask_any(p->cpus_ptr); + if (dest_cpu == smp_processor_id()) + goto unlock; + +-- +2.37.0 + + +From b613b42d52b674261a5c7bdf8e0e6dc962108b24 Mon Sep 17 00:00:00 2001 +From: Alfred Chen +Date: Sun, 22 Aug 2021 17:06:15 +0000 +Subject: [PATCH 186/297] Project-C v5.13-r3 + +--- + kernel/sched/alt_core.c | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +diff --git a/kernel/sched/alt_core.c b/kernel/sched/alt_core.c +index b5e91c874cc1..b10012b67435 100644 +--- a/kernel/sched/alt_core.c ++++ b/kernel/sched/alt_core.c +@@ -67,7 +67,7 @@ __read_mostly int sysctl_resched_latency_warn_once = 1; + #define sched_feat(x) (0) + #endif /* CONFIG_SCHED_DEBUG */ + +-#define ALT_SCHED_VERSION "v5.13-r2" ++#define ALT_SCHED_VERSION "v5.13-r3" + + /* rt_prio(prio) defined in include/linux/sched/rt.h */ + #define rt_task(p) rt_prio((p)->prio) +-- +2.37.0 + + +From f555d191414d5e38b86abc045318539ab992cc80 Mon Sep 17 00:00:00 2001 +From: Alfred Chen +Date: Tue, 10 Aug 2021 15:29:01 +0000 +Subject: [PATCH 187/297] sched/alt: [Sync] 2b8ca1a907d5 sched/core: Remove the + pointless BUG_ON(!task) from wake_up_q() + +--- + kernel/sched/alt_core.c | 1 - + 1 file changed, 1 deletion(-) + +diff --git a/kernel/sched/alt_core.c b/kernel/sched/alt_core.c +index b10012b67435..9417f9b6e88d 100644 +--- a/kernel/sched/alt_core.c ++++ b/kernel/sched/alt_core.c +@@ -807,7 +807,6 @@ void wake_up_q(struct wake_q_head *head) + struct task_struct *task; + + task = container_of(node, struct task_struct, wake_q); +- BUG_ON(!task); + /* task can safely be re-inserted now: */ + node = node->next; + task->wake_q.next = NULL; +-- +2.37.0 + + +From 59a86828d9cefa08656255569bd164cdd724d1e2 Mon Sep 17 00:00:00 2001 +From: Alfred Chen +Date: Tue, 10 Aug 2021 16:45:51 +0000 +Subject: [PATCH 188/297] sched/alt: [Sync] 4e29fb709885 sched: Rename + sched_info_{queued,dequeued} + +--- + kernel/sched/alt_core.c | 4 ++-- + 1 file changed, 2 insertions(+), 2 deletions(-) + +diff --git a/kernel/sched/alt_core.c b/kernel/sched/alt_core.c +index 9417f9b6e88d..c9c5d609feb6 100644 +--- a/kernel/sched/alt_core.c ++++ b/kernel/sched/alt_core.c +@@ -588,7 +588,7 @@ static inline void sched_update_tick_dependency(struct rq *rq) { } + */ + #define __SCHED_DEQUEUE_TASK(p, rq, flags, func) \ + psi_dequeue(p, flags & DEQUEUE_SLEEP); \ +- sched_info_dequeued(rq, p); \ ++ sched_info_dequeue(rq, p); \ + \ + list_del(&p->sq_node); \ + if (list_empty(&rq->queue.heads[p->sq_idx])) { \ +@@ -598,7 +598,7 @@ static inline void sched_update_tick_dependency(struct rq *rq) { } + } + + #define __SCHED_ENQUEUE_TASK(p, rq, flags) \ +- sched_info_queued(rq, p); \ ++ sched_info_enqueue(rq, p); \ + psi_enqueue(p, flags); \ + \ + p->sq_idx = task_sched_prio_idx(p, rq); \ +-- +2.37.0 + + +From 2332c00ed6782e048b3576c589171bf667e93687 Mon Sep 17 00:00:00 2001 +From: Alfred Chen +Date: Wed, 11 Aug 2021 14:11:13 +0000 +Subject: [PATCH 189/297] sched/alt: [Sync] f1a0a376ca0c sched/core: Initialize + the idle task with preemption disabled + +--- + kernel/sched/alt_core.c | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +diff --git a/kernel/sched/alt_core.c b/kernel/sched/alt_core.c +index c9c5d609feb6..299a80c5df45 100644 +--- a/kernel/sched/alt_core.c ++++ b/kernel/sched/alt_core.c +@@ -6135,7 +6135,7 @@ void dump_cpu_task(int cpu) + * NOTE: this function does not set the idle thread's NEED_RESCHED + * flag, to make booting more robust. + */ +-void init_idle(struct task_struct *idle, int cpu) ++void __init init_idle(struct task_struct *idle, int cpu) + { + struct rq *rq = cpu_rq(cpu); + unsigned long flags; +-- +2.37.0 + + +From 28206256499d46b20d2445c670d24be11125b19b Mon Sep 17 00:00:00 2001 +From: Alfred Chen +Date: Wed, 11 Aug 2021 14:13:54 +0000 +Subject: [PATCH 190/297] sched/alt: [Sync] 01aee8fd7fb2 sched: Make + nr_running() return 32-bit value + +--- + kernel/sched/alt_core.c | 4 ++-- + 1 file changed, 2 insertions(+), 2 deletions(-) + +diff --git a/kernel/sched/alt_core.c b/kernel/sched/alt_core.c +index 299a80c5df45..9f10b21c0b4d 100644 +--- a/kernel/sched/alt_core.c ++++ b/kernel/sched/alt_core.c +@@ -3216,9 +3216,9 @@ context_switch(struct rq *rq, struct task_struct *prev, + * externally visible scheduler statistics: current number of runnable + * threads, total number of context switches performed since bootup. + */ +-unsigned long nr_running(void) ++unsigned int nr_running(void) + { +- unsigned long i, sum = 0; ++ unsigned int i, sum = 0; + + for_each_online_cpu(i) + sum += cpu_rq(i)->nr_running; +-- +2.37.0 + + +From 31d00404a4796c9badeb779cae9d4105cef33672 Mon Sep 17 00:00:00 2001 +From: Alfred Chen +Date: Wed, 11 Aug 2021 14:15:11 +0000 +Subject: [PATCH 191/297] sched/alt: [Sybc] 9745516841a5 sched: Make + nr_iowait() return 32-bit value + +--- + kernel/sched/alt_core.c | 4 ++-- + 1 file changed, 2 insertions(+), 2 deletions(-) + +diff --git a/kernel/sched/alt_core.c b/kernel/sched/alt_core.c +index 9f10b21c0b4d..2f030370f31a 100644 +--- a/kernel/sched/alt_core.c ++++ b/kernel/sched/alt_core.c +@@ -3298,9 +3298,9 @@ unsigned long nr_iowait_cpu(int cpu) + * Task CPU affinities can make all that even more 'interesting'. + */ + +-unsigned long nr_iowait(void) ++unsigned int nr_iowait(void) + { +- unsigned long i, sum = 0; ++ unsigned int i, sum = 0; + + for_each_possible_cpu(i) + sum += nr_iowait_cpu(i); +-- +2.37.0 + + +From 097316137fdb3237622875f349ef79e19e8b78eb Mon Sep 17 00:00:00 2001 +From: Alfred Chen +Date: Wed, 11 Aug 2021 14:16:22 +0000 +Subject: [PATCH 192/297] sched/alt: [Sync] 8fc2858e572c sched: Make + nr_iowait_cpu() return 32-bit value + +--- + kernel/sched/alt_core.c | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +diff --git a/kernel/sched/alt_core.c b/kernel/sched/alt_core.c +index 2f030370f31a..5536baf21677 100644 +--- a/kernel/sched/alt_core.c ++++ b/kernel/sched/alt_core.c +@@ -3263,7 +3263,7 @@ unsigned long long nr_context_switches(void) + * it does become runnable. + */ + +-unsigned long nr_iowait_cpu(int cpu) ++unsigned int nr_iowait_cpu(int cpu) + { + return atomic_read(&cpu_rq(cpu)->nr_iowait); + } +-- +2.37.0 + + +From 66de136c366461e6b3c8c1714f5b62baf8dfd781 Mon Sep 17 00:00:00 2001 +From: Alfred Chen +Date: Wed, 11 Aug 2021 14:24:19 +0000 +Subject: [PATCH 193/297] sched/alt: [Sync] 00b89fe0197f sched: Make the idle + task quack like a per-CPU kthread + +--- + kernel/sched/alt_core.c | 22 ++++++++++++++++------ + 1 file changed, 16 insertions(+), 6 deletions(-) + +diff --git a/kernel/sched/alt_core.c b/kernel/sched/alt_core.c +index 5536baf21677..7e12ba94220c 100644 +--- a/kernel/sched/alt_core.c ++++ b/kernel/sched/alt_core.c +@@ -6142,13 +6142,27 @@ void __init init_idle(struct task_struct *idle, int cpu) + + __sched_fork(0, idle); + ++ /* ++ * The idle task doesn't need the kthread struct to function, but it ++ * is dressed up as a per-CPU kthread and thus needs to play the part ++ * if we want to avoid special-casing it in code that deals with per-CPU ++ * kthreads. ++ */ ++ set_kthread_struct(idle); ++ + raw_spin_lock_irqsave(&idle->pi_lock, flags); + raw_spin_lock(&rq->lock); + update_rq_clock(rq); + + idle->last_ran = rq->clock_task; + idle->state = TASK_RUNNING; +- idle->flags |= PF_IDLE; ++ /* ++ * PF_KTHREAD should already be set at this point; regardless, make it ++ * look like a proper per-CPU kthread. ++ */ ++ idle->flags |= PF_IDLE | PF_KTHREAD | PF_NO_SETAFFINITY; ++ kthread_set_per_cpu(idle, cpu); ++ + sched_queue_init_idle(&rq->queue, idle); + + scs_task_reset(idle); +@@ -6287,12 +6301,8 @@ static void balance_push(struct rq *rq) + /* + * Both the cpu-hotplug and stop task are in this case and are + * required to complete the hotplug process. +- * +- * XXX: the idle task does not match kthread_is_per_cpu() due to +- * histerical raisins. + */ +- if (rq->idle == push_task || +- kthread_is_per_cpu(push_task) || ++ if (kthread_is_per_cpu(push_task) || + is_migration_disabled(push_task)) { + + /* +-- +2.37.0 + + +From 45c01cf84560529ad4cb0015df3cf1abb14cab42 Mon Sep 17 00:00:00 2001 +From: Alfred Chen +Date: Wed, 11 Aug 2021 14:59:06 +0000 +Subject: [PATCH 194/297] sched/alt: [Sync] 15faafc6b449 sched,init: Fix + DEBUG_PREEMPT vs early boot + +--- + kernel/sched/alt_core.c | 1 + + 1 file changed, 1 insertion(+) + +diff --git a/kernel/sched/alt_core.c b/kernel/sched/alt_core.c +index 7e12ba94220c..aae0c674519a 100644 +--- a/kernel/sched/alt_core.c ++++ b/kernel/sched/alt_core.c +@@ -6696,6 +6696,7 @@ void __init sched_init_smp(void) + /* Move init over to a non-isolated CPU */ + if (set_cpus_allowed_ptr(current, housekeeping_cpumask(HK_FLAG_DOMAIN)) < 0) + BUG(); ++ current->flags &= ~PF_NO_SETAFFINITY; + + sched_init_topology_cpumask(); + +-- +2.37.0 + + +From d83e17ccfae26716b9d867edd40bd9f5d165c356 Mon Sep 17 00:00:00 2001 +From: Alfred Chen +Date: Wed, 11 Aug 2021 15:11:11 +0000 +Subject: [PATCH 195/297] sched/alt: [Sync] 1faa491a49d5 sched/debug: Remove + obsolete init_schedstats() + +--- + kernel/sched/alt_core.c | 19 ++----------------- + 1 file changed, 2 insertions(+), 17 deletions(-) + +diff --git a/kernel/sched/alt_core.c b/kernel/sched/alt_core.c +index aae0c674519a..09639e0e15b8 100644 +--- a/kernel/sched/alt_core.c ++++ b/kernel/sched/alt_core.c +@@ -2636,7 +2636,6 @@ void sched_post_fork(struct task_struct *p) {} + #ifdef CONFIG_SCHEDSTATS + + DEFINE_STATIC_KEY_FALSE(sched_schedstats); +-static bool __initdata __sched_schedstats = false; + + static void set_schedstats(bool enabled) + { +@@ -2660,16 +2659,11 @@ static int __init setup_schedstats(char *str) + if (!str) + goto out; + +- /* +- * This code is called before jump labels have been set up, so we can't +- * change the static branch directly just yet. Instead set a temporary +- * variable so init_schedstats() can do it later. +- */ + if (!strcmp(str, "enable")) { +- __sched_schedstats = true; ++ set_schedstats(true); + ret = 1; + } else if (!strcmp(str, "disable")) { +- __sched_schedstats = false; ++ set_schedstats(false); + ret = 1; + } + out: +@@ -2680,11 +2674,6 @@ static int __init setup_schedstats(char *str) + } + __setup("schedstats=", setup_schedstats); + +-static void __init init_schedstats(void) +-{ +- set_schedstats(__sched_schedstats); +-} +- + #ifdef CONFIG_PROC_SYSCTL + int sysctl_schedstats(struct ctl_table *table, int write, + void __user *buffer, size_t *lenp, loff_t *ppos) +@@ -2706,8 +2695,6 @@ int sysctl_schedstats(struct ctl_table *table, int write, + return err; + } + #endif /* CONFIG_PROC_SYSCTL */ +-#else /* !CONFIG_SCHEDSTATS */ +-static inline void init_schedstats(void) {} + #endif /* CONFIG_SCHEDSTATS */ + + /* +@@ -6823,8 +6810,6 @@ void __init sched_init(void) + sched_init_topology_cpumask_early(); + #endif /* SMP */ + +- init_schedstats(); +- + psi_init(); + } + +-- +2.37.0 + + +From d93abecc1add5ac16b9a2765c8673352f9facf66 Mon Sep 17 00:00:00 2001 +From: Alfred Chen +Date: Wed, 11 Aug 2021 15:29:45 +0000 +Subject: [PATCH 196/297] sched/alt: [Sync] b03fbd4ff24c sched: Introduce + task_is_running() + +--- + kernel/sched/alt_core.c | 4 ++-- + 1 file changed, 2 insertions(+), 2 deletions(-) + +diff --git a/kernel/sched/alt_core.c b/kernel/sched/alt_core.c +index 09639e0e15b8..7c9f81a86420 100644 +--- a/kernel/sched/alt_core.c ++++ b/kernel/sched/alt_core.c +@@ -4214,7 +4214,7 @@ static inline void sched_submit_work(struct task_struct *tsk) + { + unsigned int task_flags; + +- if (!tsk->state) ++ if (task_is_running(tsk)) + return; + + task_flags = tsk->flags; +@@ -6032,7 +6032,7 @@ void sched_show_task(struct task_struct *p) + + pr_info("task:%-15.15s state:%c", p->comm, task_state_to_char(p)); + +- if (p->state == TASK_RUNNING) ++ if (task_is_running(p)) + pr_cont(" running task "); + #ifdef CONFIG_DEBUG_STACK_USAGE + free = stack_not_used(p); +-- +2.37.0 + + +From ac5a483cb08e12aed6b0c476026f6bb3bc68f24f Mon Sep 17 00:00:00 2001 +From: Alfred Chen +Date: Wed, 11 Aug 2021 15:33:03 +0000 +Subject: [PATCH 197/297] sched/alt: [Sync] d6c23bb3a2ad sched: Add + get_current_state() + +--- + kernel/sched/alt_core.c | 6 +++--- + 1 file changed, 3 insertions(+), 3 deletions(-) + +diff --git a/kernel/sched/alt_core.c b/kernel/sched/alt_core.c +index 7c9f81a86420..6c2e8c7d781e 100644 +--- a/kernel/sched/alt_core.c ++++ b/kernel/sched/alt_core.c +@@ -6823,15 +6823,15 @@ static inline int preempt_count_equals(int preempt_offset) + + void __might_sleep(const char *file, int line, int preempt_offset) + { ++ unsigned int state = get_current_state(); + /* + * Blocking primitives will set (and therefore destroy) current->state, + * since we will exit with TASK_RUNNING make sure we enter with it, + * otherwise we will destroy state. + */ +- WARN_ONCE(current->state != TASK_RUNNING && current->task_state_change, ++ WARN_ONCE(state != TASK_RUNNING && current->task_state_change, + "do not call blocking ops when !TASK_RUNNING; " +- "state=%lx set at [<%p>] %pS\n", +- current->state, ++ "state=%x set at [<%p>] %pS\n", state, + (void *)current->task_state_change, + (void *)current->task_state_change); + +-- +2.37.0 + + +From 60c9304c7fdbb00d8e83783bd4268a732b09f993 Mon Sep 17 00:00:00 2001 +From: Alfred Chen +Date: Wed, 11 Aug 2021 16:02:47 +0000 +Subject: [PATCH 198/297] sched/alt: [Sync] 2f064a59a11f sched: Change + task_struct::state + +--- + kernel/sched/alt_core.c | 52 ++++++++++++++++++++++------------------- + 1 file changed, 28 insertions(+), 24 deletions(-) + +diff --git a/kernel/sched/alt_core.c b/kernel/sched/alt_core.c +index 6c2e8c7d781e..4909bf54bd4c 100644 +--- a/kernel/sched/alt_core.c ++++ b/kernel/sched/alt_core.c +@@ -1207,12 +1207,14 @@ static inline bool is_migration_disabled(struct task_struct *p) + void set_task_cpu(struct task_struct *p, unsigned int new_cpu) + { + #ifdef CONFIG_SCHED_DEBUG ++ unsigned int state = READ_ONCE(p->__state); ++ + /* + * We should never call set_task_cpu() on a blocked task, + * ttwu() will sort out the placement. + */ +- WARN_ON_ONCE(p->state != TASK_RUNNING && p->state != TASK_WAKING && +- !p->on_rq); ++ WARN_ON_ONCE(state != TASK_RUNNING && state != TASK_WAKING && !p->on_rq); ++ + #ifdef CONFIG_LOCKDEP + /* + * The caller should hold either p->pi_lock or rq->lock, when changing +@@ -1512,7 +1514,7 @@ inline int task_curr(const struct task_struct *p) + * smp_call_function() if an IPI is sent by the same process we are + * waiting to become inactive. + */ +-unsigned long wait_task_inactive(struct task_struct *p, long match_state) ++unsigned long wait_task_inactive(struct task_struct *p, unsigned int match_state) + { + unsigned long flags; + bool running, on_rq; +@@ -1535,7 +1537,7 @@ unsigned long wait_task_inactive(struct task_struct *p, long match_state) + * running somewhere else! + */ + while (task_running(p) && p == rq->curr) { +- if (match_state && unlikely(p->state != match_state)) ++ if (match_state && unlikely(READ_ONCE(p->__state) != match_state)) + return 0; + cpu_relax(); + } +@@ -1550,7 +1552,7 @@ unsigned long wait_task_inactive(struct task_struct *p, long match_state) + running = task_running(p); + on_rq = p->on_rq; + ncsw = 0; +- if (!match_state || p->state == match_state) ++ if (!match_state || READ_ONCE(p->__state) == match_state) + ncsw = p->nvcsw | LONG_MIN; /* sets MSB */ + task_access_unlock_irqrestore(p, lock, &flags); + +@@ -1853,7 +1855,7 @@ static int __set_cpus_allowed_ptr(struct task_struct *p, + rq->nr_pinned--; + } + +- if (task_running(p) || p->state == TASK_WAKING) { ++ if (task_running(p) || READ_ONCE(p->__state) == TASK_WAKING) { + struct migration_arg arg = { p, dest_cpu }; + + /* Need help from migration thread: drop lock and wait. */ +@@ -1937,7 +1939,7 @@ static inline void + ttwu_do_wakeup(struct rq *rq, struct task_struct *p, int wake_flags) + { + check_preempt_curr(rq); +- p->state = TASK_RUNNING; ++ WRITE_ONCE(p->__state, TASK_RUNNING); + trace_sched_wakeup(p); + } + +@@ -2302,12 +2304,12 @@ static int try_to_wake_up(struct task_struct *p, unsigned int state, + * - we're serialized against set_special_state() by virtue of + * it disabling IRQs (this allows not taking ->pi_lock). + */ +- if (!(p->state & state)) ++ if (!(READ_ONCE(p->__state) & state)) + goto out; + + success = 1; + trace_sched_waking(p); +- p->state = TASK_RUNNING; ++ WRITE_ONCE(p->__state, TASK_RUNNING); + trace_sched_wakeup(p); + goto out; + } +@@ -2320,7 +2322,7 @@ static int try_to_wake_up(struct task_struct *p, unsigned int state, + */ + raw_spin_lock_irqsave(&p->pi_lock, flags); + smp_mb__after_spinlock(); +- if (!(p->state & state)) ++ if (!(READ_ONCE(p->__state) & state)) + goto unlock; + + trace_sched_waking(p); +@@ -2386,7 +2388,7 @@ static int try_to_wake_up(struct task_struct *p, unsigned int state, + * TASK_WAKING such that we can unlock p->pi_lock before doing the + * enqueue, such as ttwu_queue_wakelist(). + */ +- p->state = TASK_WAKING; ++ WRITE_ONCE(p->__state, TASK_WAKING); + + /* + * If the owning (remote) CPU is still in the middle of schedule() with +@@ -2482,7 +2484,7 @@ bool try_invoke_on_locked_down_task(struct task_struct *p, bool (*func)(struct t + ret = func(p, arg); + __task_rq_unlock(rq, &rf); + } else { +- switch (p->state) { ++ switch (READ_ONCE(p->__state)) { + case TASK_RUNNING: + case TASK_WAKING: + break; +@@ -2558,7 +2560,7 @@ int sched_fork(unsigned long clone_flags, struct task_struct *p) + * nobody will actually run it, and a signal or other external + * event cannot wake it up and insert it on the runqueue either. + */ +- p->state = TASK_NEW; ++ p->__state = TASK_NEW; + + /* + * Make sure we do not leak PI boosting priority to the child. +@@ -2710,7 +2712,7 @@ void wake_up_new_task(struct task_struct *p) + struct rq *rq; + + raw_spin_lock_irqsave(&p->pi_lock, flags); +- p->state = TASK_RUNNING; ++ WRITE_ONCE(p->__state, TASK_RUNNING); + rq = cpu_rq(select_task_rq(p)); + #ifdef CONFIG_SMP + rseq_migrate(p); +@@ -3063,7 +3065,7 @@ static struct rq *finish_task_switch(struct task_struct *prev) + * running on another CPU and we could rave with its RUNNING -> DEAD + * transition, resulting in a double drop. + */ +- prev_state = prev->state; ++ prev_state = READ_ONCE(prev->__state); + vtime_task_switch(prev); + perf_event_task_sched_in(prev, current); + finish_task(prev); +@@ -3841,7 +3843,7 @@ static inline void schedule_debug(struct task_struct *prev, bool preempt) + #endif + + #ifdef CONFIG_DEBUG_ATOMIC_SLEEP +- if (!preempt && prev->state && prev->non_block_count) { ++ if (!preempt && READ_ONCE(prev->__state) && prev->non_block_count) { + printk(KERN_ERR "BUG: scheduling in a non-blocking section: %s/%d/%i\n", + prev->comm, prev->pid, prev->non_block_count); + dump_stack(); +@@ -4108,10 +4110,10 @@ static void __sched notrace __schedule(bool preempt) + * - we form a control dependency vs deactivate_task() below. + * - ptrace_{,un}freeze_traced() can change ->state underneath us. + */ +- prev_state = prev->state; +- if (!preempt && prev_state && prev_state == prev->state) { ++ prev_state = READ_ONCE(prev->__state); ++ if (!preempt && prev_state) { + if (signal_pending_state(prev_state, prev)) { +- prev->state = TASK_RUNNING; ++ WRITE_ONCE(prev->__state, TASK_RUNNING); + } else { + prev->sched_contributes_to_load = + (prev_state & TASK_UNINTERRUPTIBLE) && +@@ -4289,7 +4291,7 @@ void __sched schedule_idle(void) + * current task can be in any other state. Note, idle is always in the + * TASK_RUNNING state. + */ +- WARN_ON_ONCE(current->state); ++ WARN_ON_ONCE(current->__state); + do { + __schedule(false); + } while (need_resched()); +@@ -6056,26 +6058,28 @@ EXPORT_SYMBOL_GPL(sched_show_task); + static inline bool + state_filter_match(unsigned long state_filter, struct task_struct *p) + { ++ unsigned int state = READ_ONCE(p->__state); ++ + /* no filter, everything matches */ + if (!state_filter) + return true; + + /* filter, but doesn't match */ +- if (!(p->state & state_filter)) ++ if (!(state & state_filter)) + return false; + + /* + * When looking for TASK_UNINTERRUPTIBLE skip TASK_IDLE (allows + * TASK_KILLABLE). + */ +- if (state_filter == TASK_UNINTERRUPTIBLE && p->state == TASK_IDLE) ++ if (state_filter == TASK_UNINTERRUPTIBLE && state == TASK_IDLE) + return false; + + return true; + } + + +-void show_state_filter(unsigned long state_filter) ++void show_state_filter(unsigned int state_filter) + { + struct task_struct *g, *p; + +@@ -6142,7 +6146,7 @@ void __init init_idle(struct task_struct *idle, int cpu) + update_rq_clock(rq); + + idle->last_ran = rq->clock_task; +- idle->state = TASK_RUNNING; ++ idle->__state = TASK_RUNNING; + /* + * PF_KTHREAD should already be set at this point; regardless, make it + * look like a proper per-CPU kthread. +-- +2.37.0 + + +From 6e8b5a2ee7417d5faccaac3dc998472025232c6a Mon Sep 17 00:00:00 2001 +From: Alfred Chen +Date: Thu, 12 Aug 2021 10:46:39 +0000 +Subject: [PATCH 199/297] sched/alt: [Sync] a1dfb6311c77 tick/nohz: Kick only + _queued_ task whose tick dependency is updated + +--- + kernel/sched/alt_core.c | 5 +++++ + 1 file changed, 5 insertions(+) + +diff --git a/kernel/sched/alt_core.c b/kernel/sched/alt_core.c +index 4909bf54bd4c..706927956630 100644 +--- a/kernel/sched/alt_core.c ++++ b/kernel/sched/alt_core.c +@@ -582,6 +582,11 @@ static inline void sched_update_tick_dependency(struct rq *rq) + static inline void sched_update_tick_dependency(struct rq *rq) { } + #endif + ++bool sched_task_on_rq(struct task_struct *p) ++{ ++ return task_on_rq_queued(p); ++} ++ + /* + * Add/Remove/Requeue task to/from the runqueue routines + * Context: rq->lock +-- +2.37.0 + + +From 5c840ab4be958f4a7a166bfb81e94a59dbd2fd70 Mon Sep 17 00:00:00 2001 +From: Alfred Chen +Date: Thu, 12 Aug 2021 10:56:29 +0000 +Subject: [PATCH 200/297] sched/alt: [Sync] 0fdcccfafcff tick/nohz: Call + tick_nohz_task_switch() with interrupts disabled + +--- + kernel/sched/alt_core.c | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +diff --git a/kernel/sched/alt_core.c b/kernel/sched/alt_core.c +index 706927956630..07cc293d12ae 100644 +--- a/kernel/sched/alt_core.c ++++ b/kernel/sched/alt_core.c +@@ -3074,6 +3074,7 @@ static struct rq *finish_task_switch(struct task_struct *prev) + vtime_task_switch(prev); + perf_event_task_sched_in(prev, current); + finish_task(prev); ++ tick_nohz_task_switch(); + finish_lock_switch(rq); + finish_arch_post_lock_switch(); + kcov_finish_switch(current); +@@ -3116,7 +3117,6 @@ static struct rq *finish_task_switch(struct task_struct *prev) + put_task_struct_rcu_user(prev); + } + +- tick_nohz_task_switch(); + return rq; + } + +-- +2.37.0 + + +From 7eb71005ba22b669ad18eca3dfc7b3db009ed871 Mon Sep 17 00:00:00 2001 +From: Alfred Chen +Date: Thu, 12 Aug 2021 11:05:51 +0000 +Subject: [PATCH 201/297] sched/alt: [Sync] 1eb5dde674f5 cpufreq: CPPC: Add + support for frequency invariance + +--- + kernel/sched/alt_core.c | 1 + + 1 file changed, 1 insertion(+) + +diff --git a/kernel/sched/alt_core.c b/kernel/sched/alt_core.c +index 07cc293d12ae..8afacd3736f4 100644 +--- a/kernel/sched/alt_core.c ++++ b/kernel/sched/alt_core.c +@@ -5151,6 +5151,7 @@ int sched_setattr_nocheck(struct task_struct *p, const struct sched_attr *attr) + { + return __sched_setscheduler(p, attr, false, true); + } ++EXPORT_SYMBOL_GPL(sched_setattr_nocheck); + + /** + * sched_setscheduler_nocheck - change the scheduling policy and/or RT priority of a thread from kernelspace. +-- +2.37.0 + + +From 2adc42b2009354362a36069f2e1a225f0635bbfc Mon Sep 17 00:00:00 2001 +From: Alfred Chen +Date: Fri, 27 Aug 2021 12:25:47 +0000 +Subject: [PATCH 202/297] sched/alt: [Sync] f558c2b834ec sched/rt: Fix double + enqueue caused by rt_effective_prio + +--- + kernel/sched/alt_core.c | 46 ++++++++++++++++++++--------------------- + 1 file changed, 22 insertions(+), 24 deletions(-) + +diff --git a/kernel/sched/alt_core.c b/kernel/sched/alt_core.c +index 8afacd3736f4..68e555999c46 100644 +--- a/kernel/sched/alt_core.c ++++ b/kernel/sched/alt_core.c +@@ -1116,6 +1116,12 @@ static inline void hrtick_rq_init(struct rq *rq) + } + #endif /* CONFIG_SCHED_HRTICK */ + ++static inline int __normal_prio(int policy, int rt_prio, int nice) ++{ ++ return rt_policy(policy) ? (MAX_RT_PRIO - 1 - rt_prio) : ++ NICE_TO_PRIO(nice) + MAX_PRIORITY_ADJ; ++} ++ + /* + * Calculate the expected normal priority: i.e. priority + * without taking RT-inheritance into account. Might be +@@ -1125,8 +1131,7 @@ static inline void hrtick_rq_init(struct rq *rq) + */ + static inline int normal_prio(struct task_struct *p) + { +- return task_has_rt_policy(p) ? (MAX_RT_PRIO - 1 - p->rt_priority) : +- p->static_prio + MAX_PRIORITY_ADJ; ++ return __normal_prio(p->policy, p->rt_priority, PRIO_TO_NICE(p->static_prio)); + } + + /* +@@ -2583,7 +2588,7 @@ int sched_fork(unsigned long clone_flags, struct task_struct *p) + } else if (PRIO_TO_NICE(p->static_prio) < 0) + p->static_prio = NICE_TO_PRIO(0); + +- p->prio = p->normal_prio = normal_prio(p); ++ p->prio = p->normal_prio = p->static_prio; + + /* + * We don't need the reset flag anymore after the fork. It has +@@ -4604,6 +4609,11 @@ static inline void check_task_changed(struct task_struct *p, struct rq *rq) + } + } + ++static void __setscheduler_prio(struct task_struct *p, int prio) ++{ ++ p->prio = prio; ++} ++ + #ifdef CONFIG_RT_MUTEXES + + static inline int __rt_effective_prio(struct task_struct *pi_task, int prio) +@@ -4685,7 +4695,8 @@ void rt_mutex_setprio(struct task_struct *p, struct task_struct *pi_task) + } + + trace_sched_pi_setprio(p, pi_task); +- p->prio = prio; ++ ++ __setscheduler_prio(p, prio); + + check_task_changed(p, rq); + out_unlock: +@@ -4884,21 +4895,6 @@ static void __setscheduler_params(struct task_struct *p, + p->normal_prio = normal_prio(p); + } + +-/* Actually do priority change: must hold rq lock. */ +-static void __setscheduler(struct rq *rq, struct task_struct *p, +- const struct sched_attr *attr, bool keep_boost) +-{ +- __setscheduler_params(p, attr); +- +- /* +- * Keep a potential priority boosting if called from +- * sched_setscheduler(). +- */ +- p->prio = normal_prio(p); +- if (keep_boost) +- p->prio = rt_effective_prio(p, p->prio); +-} +- + /* + * check the target process has a UID that matches the current process's + */ +@@ -4925,9 +4921,8 @@ static int __sched_setscheduler(struct task_struct *p, + .sched_nice = 0, + .sched_priority = 99, + }; +- int newprio = MAX_RT_PRIO - 1 - attr->sched_priority; +- int retval, oldpolicy = -1; +- int policy = attr->sched_policy; ++ int oldpolicy = -1, policy = attr->sched_policy; ++ int retval, newprio; + struct callback_head *head; + unsigned long flags; + struct rq *rq; +@@ -4943,7 +4938,6 @@ static int __sched_setscheduler(struct task_struct *p, + if (unlikely(SCHED_DEADLINE == policy)) { + attr = &dl_squash_attr; + policy = attr->sched_policy; +- newprio = MAX_RT_PRIO - 1 - attr->sched_priority; + } + recheck: + /* Double check policy once rq lock held */ +@@ -5061,6 +5055,7 @@ static int __sched_setscheduler(struct task_struct *p, + + p->sched_reset_on_fork = reset_on_fork; + ++ newprio = __normal_prio(policy, attr->sched_priority, attr->sched_nice); + if (pi) { + /* + * Take priority boosted tasks into account. If the new +@@ -5076,7 +5071,10 @@ static int __sched_setscheduler(struct task_struct *p, + } + } + +- __setscheduler(rq, p, attr, pi); ++ if (!(attr->sched_flags & SCHED_FLAG_KEEP_PARAMS)) { ++ __setscheduler_params(p, attr); ++ __setscheduler_prio(p, newprio); ++ } + + check_task_changed(p, rq); + +-- +2.37.0 + + +From 70a924d4be3dbae051725599096d2dbd2ef88730 Mon Sep 17 00:00:00 2001 +From: Alfred Chen +Date: Thu, 12 Aug 2021 15:02:03 +0000 +Subject: [PATCH 203/297] Project-C v5.14-r0 + +--- + kernel/sched/alt_core.c | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +diff --git a/kernel/sched/alt_core.c b/kernel/sched/alt_core.c +index 68e555999c46..5df1157a597c 100644 +--- a/kernel/sched/alt_core.c ++++ b/kernel/sched/alt_core.c +@@ -67,7 +67,7 @@ __read_mostly int sysctl_resched_latency_warn_once = 1; + #define sched_feat(x) (0) + #endif /* CONFIG_SCHED_DEBUG */ + +-#define ALT_SCHED_VERSION "v5.13-r3" ++#define ALT_SCHED_VERSION "v5.14-r0" + + /* rt_prio(prio) defined in include/linux/sched/rt.h */ + #define rt_task(p) rt_prio((p)->prio) +-- +2.37.0 + + +From c61bbc99782e5af2e8e4ade9ef2f784393ea46f3 Mon Sep 17 00:00:00 2001 +From: Alfred Chen +Date: Thu, 2 Sep 2021 16:19:32 +0000 +Subject: [PATCH 204/297] sched/alt: Fix for v5.14-prjc-r0 fails on x86 UP + build + +Mainline add some sched-core related api which cause below fails on x86 +UP build, #30. +--- + kernel/sched/alt_core.c | 24 ++++++++++++++++++++++++ + kernel/sched/alt_sched.h | 30 ++++++++++++++++++++++++++++++ + 2 files changed, 54 insertions(+) + +diff --git a/kernel/sched/alt_core.c b/kernel/sched/alt_core.c +index 5df1157a597c..bdff1c65e2e9 100644 +--- a/kernel/sched/alt_core.c ++++ b/kernel/sched/alt_core.c +@@ -492,6 +492,30 @@ rq_unlock_irqrestore(struct rq *rq, struct rq_flags *rf) + raw_spin_unlock_irqrestore(&rq->lock, rf->flags); + } + ++void raw_spin_rq_lock_nested(struct rq *rq, int subclass) ++{ ++ raw_spinlock_t *lock; ++ ++ /* Matches synchronize_rcu() in __sched_core_enable() */ ++ preempt_disable(); ++ ++ for (;;) { ++ lock = __rq_lockp(rq); ++ raw_spin_lock_nested(lock, subclass); ++ if (likely(lock == __rq_lockp(rq))) { ++ /* preempt_count *MUST* be > 1 */ ++ preempt_enable_no_resched(); ++ return; ++ } ++ raw_spin_unlock(lock); ++ } ++} ++ ++void raw_spin_rq_unlock(struct rq *rq) ++{ ++ raw_spin_unlock(rq_lockp(rq)); ++} ++ + /* + * RQ-clock updating methods: + */ +diff --git a/kernel/sched/alt_sched.h b/kernel/sched/alt_sched.h +index 7a48809550bf..f03af9ab9123 100644 +--- a/kernel/sched/alt_sched.h ++++ b/kernel/sched/alt_sched.h +@@ -443,6 +443,36 @@ this_rq_lock_irq(struct rq_flags *rf) + return rq; + } + ++extern void raw_spin_rq_lock_nested(struct rq *rq, int subclass); ++extern void raw_spin_rq_unlock(struct rq *rq); ++ ++static inline raw_spinlock_t *__rq_lockp(struct rq *rq) ++{ ++ return &rq->lock; ++} ++ ++static inline raw_spinlock_t *rq_lockp(struct rq *rq) ++{ ++ return __rq_lockp(rq); ++} ++ ++static inline void raw_spin_rq_lock(struct rq *rq) ++{ ++ raw_spin_rq_lock_nested(rq, 0); ++} ++ ++static inline void raw_spin_rq_lock_irq(struct rq *rq) ++{ ++ local_irq_disable(); ++ raw_spin_rq_lock(rq); ++} ++ ++static inline void raw_spin_rq_unlock_irq(struct rq *rq) ++{ ++ raw_spin_rq_unlock(rq); ++ local_irq_enable(); ++} ++ + static inline int task_current(struct rq *rq, struct task_struct *p) + { + return rq->curr == p; +-- +2.37.0 + + +From 78e1ff83298be9c593370f4646ca12d30a49da36 Mon Sep 17 00:00:00 2001 +From: Alfred Chen +Date: Fri, 3 Sep 2021 08:07:43 +0000 +Subject: [PATCH 205/297] sched/alt: Disable SCHED_CORE on Project C + +SCHED_CORE is introduced in 5.14, which is not supported in Project C. +--- + kernel/Kconfig.preempt | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +diff --git a/kernel/Kconfig.preempt b/kernel/Kconfig.preempt +index c2f1fd95a821..41654679b1b2 100644 +--- a/kernel/Kconfig.preempt ++++ b/kernel/Kconfig.preempt +@@ -117,7 +117,7 @@ config PREEMPT_DYNAMIC + + config SCHED_CORE + bool "Core Scheduling for SMT" +- depends on SCHED_SMT ++ depends on SCHED_SMT && !SCHED_ALT + help + This option permits Core Scheduling, a means of coordinated task + selection across SMT siblings. When enabled -- see +-- +2.37.0 + + +From 23df85da949fe03fcbea1ee7d31f6d626d6c8ed0 Mon Sep 17 00:00:00 2001 +From: Alfred Chen +Date: Sat, 4 Sep 2021 08:59:59 +0000 +Subject: [PATCH 206/297] sched/alt: Optimize parameter to __normal_prio(). + +--- + kernel/sched/alt_core.c | 8 ++++---- + 1 file changed, 4 insertions(+), 4 deletions(-) + +diff --git a/kernel/sched/alt_core.c b/kernel/sched/alt_core.c +index bdff1c65e2e9..688a6696749c 100644 +--- a/kernel/sched/alt_core.c ++++ b/kernel/sched/alt_core.c +@@ -1140,10 +1140,10 @@ static inline void hrtick_rq_init(struct rq *rq) + } + #endif /* CONFIG_SCHED_HRTICK */ + +-static inline int __normal_prio(int policy, int rt_prio, int nice) ++static inline int __normal_prio(int policy, int rt_prio, int static_prio) + { + return rt_policy(policy) ? (MAX_RT_PRIO - 1 - rt_prio) : +- NICE_TO_PRIO(nice) + MAX_PRIORITY_ADJ; ++ static_prio + MAX_PRIORITY_ADJ; + } + + /* +@@ -1155,7 +1155,7 @@ static inline int __normal_prio(int policy, int rt_prio, int nice) + */ + static inline int normal_prio(struct task_struct *p) + { +- return __normal_prio(p->policy, p->rt_priority, PRIO_TO_NICE(p->static_prio)); ++ return __normal_prio(p->policy, p->rt_priority, p->static_prio); + } + + /* +@@ -5079,7 +5079,7 @@ static int __sched_setscheduler(struct task_struct *p, + + p->sched_reset_on_fork = reset_on_fork; + +- newprio = __normal_prio(policy, attr->sched_priority, attr->sched_nice); ++ newprio = __normal_prio(policy, attr->sched_priority, NICE_TO_PRIO(attr->sched_nice)); + if (pi) { + /* + * Take priority boosted tasks into account. If the new +-- +2.37.0 + + +From 827dd84ce146e07acd45f7f8539de6454aa50332 Mon Sep 17 00:00:00 2001 +From: Alfred Chen +Date: Sat, 4 Sep 2021 09:12:38 +0000 +Subject: [PATCH 207/297] Project-C v5.14-r1 + +--- + kernel/sched/alt_core.c | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +diff --git a/kernel/sched/alt_core.c b/kernel/sched/alt_core.c +index 688a6696749c..900889c838ea 100644 +--- a/kernel/sched/alt_core.c ++++ b/kernel/sched/alt_core.c +@@ -67,7 +67,7 @@ __read_mostly int sysctl_resched_latency_warn_once = 1; + #define sched_feat(x) (0) + #endif /* CONFIG_SCHED_DEBUG */ + +-#define ALT_SCHED_VERSION "v5.14-r0" ++#define ALT_SCHED_VERSION "v5.14-r1" + + /* rt_prio(prio) defined in include/linux/sched/rt.h */ + #define rt_task(p) rt_prio((p)->prio) +-- +2.37.0 + + +From 9324fd52c57652d38d1e63188a718cdc1473cde8 Mon Sep 17 00:00:00 2001 +From: Alfred Chen +Date: Mon, 6 Sep 2021 16:13:18 +0000 +Subject: [PATCH 208/297] sched/alt: Sync-up mainline change for pi in + __sched_setscheduler() + +--- + kernel/sched/alt_core.c | 6 +----- + 1 file changed, 1 insertion(+), 5 deletions(-) + +diff --git a/kernel/sched/alt_core.c b/kernel/sched/alt_core.c +index 900889c838ea..ea551c1fca45 100644 +--- a/kernel/sched/alt_core.c ++++ b/kernel/sched/alt_core.c +@@ -5088,11 +5088,7 @@ static int __sched_setscheduler(struct task_struct *p, + * the runqueue. This will be done when the task deboost + * itself. + */ +- if (rt_effective_prio(p, newprio) == p->prio) { +- __setscheduler_params(p, attr); +- retval = 0; +- goto unlock; +- } ++ newprio = rt_effective_prio(p, newprio); + } + + if (!(attr->sched_flags & SCHED_FLAG_KEEP_PARAMS)) { +-- +2.37.0 + + +From c259d1cbb016ca585d976c254c70a4125d58b86c Mon Sep 17 00:00:00 2001 +From: Alfred Chen +Date: Sun, 19 Sep 2021 07:46:44 +0000 +Subject: [PATCH 209/297] sched/alt: [Sync] 5b7992c06c54 sched: Prevent + balance_push() on remote runqueues + +--- + kernel/sched/alt_core.c | 6 +++--- + 1 file changed, 3 insertions(+), 3 deletions(-) + +diff --git a/kernel/sched/alt_core.c b/kernel/sched/alt_core.c +index ea551c1fca45..2cfc83c87d71 100644 +--- a/kernel/sched/alt_core.c ++++ b/kernel/sched/alt_core.c +@@ -6300,7 +6300,6 @@ static void balance_push(struct rq *rq) + struct task_struct *push_task = rq->curr; + + lockdep_assert_held(&rq->lock); +- SCHED_WARN_ON(rq->cpu != smp_processor_id()); + + /* + * Ensure the thing is persistent until balance_push_set(.on = false); +@@ -6308,9 +6307,10 @@ static void balance_push(struct rq *rq) + rq->balance_callback = &balance_push_callback; + + /* +- * Only active while going offline. ++ * Only active while going offline and when invoked on the outgoing ++ * CPU. + */ +- if (!cpu_dying(rq->cpu)) ++ if (!cpu_dying(rq->cpu) || rq != this_rq()) + return; + + /* +-- +2.37.0 + + +From a307aae151702d3c39c73eb3ebcb85b432a0bb1f Mon Sep 17 00:00:00 2001 +From: Alfred Chen +Date: Sun, 19 Sep 2021 16:25:34 +0000 +Subject: [PATCH 210/297] sched/alt: Move general load accounting to RQ. + +This commit move the general load accounting from cpufreq_schedutil to +RQ in core file. + +Also implement sched_cpu_util() using the general load accounting, which +fix compilation error of missing sched_cpu_util(). +--- + kernel/sched/alt_core.c | 95 ++++++++++++++++++++++++++++++++ + kernel/sched/alt_sched.h | 42 +++----------- + kernel/sched/cpufreq_schedutil.c | 68 ++--------------------- + 3 files changed, 107 insertions(+), 98 deletions(-) + +diff --git a/kernel/sched/alt_core.c b/kernel/sched/alt_core.c +index 2cfc83c87d71..ee6fc0307135 100644 +--- a/kernel/sched/alt_core.c ++++ b/kernel/sched/alt_core.c +@@ -584,6 +584,101 @@ static inline void update_rq_clock(struct rq *rq) + update_rq_clock_task(rq, delta); + } + ++/* ++ * RQ Load update routine ++ */ ++#define RQ_LOAD_HISTORY_BITS (sizeof(s32) * 8ULL) ++#define RQ_UTIL_SHIFT (8) ++#define RQ_LOAD_HISTORY_TO_UTIL(l) (((l) >> (RQ_LOAD_HISTORY_BITS - 1 - RQ_UTIL_SHIFT)) & 0xff) ++ ++#define LOAD_BLOCK(t) ((t) >> 17) ++#define LOAD_HALF_BLOCK(t) ((t) >> 16) ++#define BLOCK_MASK(t) ((t) & ((0x01 << 18) - 1)) ++#define LOAD_BLOCK_BIT(b) (1UL << (RQ_LOAD_HISTORY_BITS - 1 - (b))) ++#define CURRENT_LOAD_BIT LOAD_BLOCK_BIT(0) ++ ++static inline void rq_load_update(struct rq *rq) ++{ ++ u64 time = rq->clock; ++ u64 delta = min(LOAD_BLOCK(time) - LOAD_BLOCK(rq->load_stamp), ++ RQ_LOAD_HISTORY_BITS - 1); ++ u64 prev = !!(rq->load_history & CURRENT_LOAD_BIT); ++ u64 curr = !!cpu_rq(rq->cpu)->nr_running; ++ ++ if (delta) { ++ rq->load_history = rq->load_history >> delta; ++ ++ if (delta < RQ_UTIL_SHIFT) { ++ rq->load_block += (~BLOCK_MASK(rq->load_stamp)) * prev; ++ if (!!LOAD_HALF_BLOCK(rq->load_block) ^ curr) ++ rq->load_history ^= LOAD_BLOCK_BIT(delta); ++ } ++ ++ rq->load_block = BLOCK_MASK(time) * prev; ++ } else { ++ rq->load_block += (time - rq->load_stamp) * prev; ++ } ++ if (prev ^ curr) ++ rq->load_history ^= CURRENT_LOAD_BIT; ++ rq->load_stamp = time; ++} ++ ++unsigned long rq_load_util(struct rq *rq, unsigned long max) ++{ ++ return RQ_LOAD_HISTORY_TO_UTIL(rq->load_history) * (max >> RQ_UTIL_SHIFT); ++} ++ ++#ifdef CONFIG_SMP ++unsigned long sched_cpu_util(int cpu, unsigned long max) ++{ ++ return rq_load_util(cpu_rq(cpu), max); ++} ++#endif /* CONFIG_SMP */ ++ ++#ifdef CONFIG_CPU_FREQ ++/** ++ * cpufreq_update_util - Take a note about CPU utilization changes. ++ * @rq: Runqueue to carry out the update for. ++ * @flags: Update reason flags. ++ * ++ * This function is called by the scheduler on the CPU whose utilization is ++ * being updated. ++ * ++ * It can only be called from RCU-sched read-side critical sections. ++ * ++ * The way cpufreq is currently arranged requires it to evaluate the CPU ++ * performance state (frequency/voltage) on a regular basis to prevent it from ++ * being stuck in a completely inadequate performance level for too long. ++ * That is not guaranteed to happen if the updates are only triggered from CFS ++ * and DL, though, because they may not be coming in if only RT tasks are ++ * active all the time (or there are RT tasks only). ++ * ++ * As a workaround for that issue, this function is called periodically by the ++ * RT sched class to trigger extra cpufreq updates to prevent it from stalling, ++ * but that really is a band-aid. Going forward it should be replaced with ++ * solutions targeted more specifically at RT tasks. ++ */ ++static inline void cpufreq_update_util(struct rq *rq, unsigned int flags) ++{ ++ struct update_util_data *data; ++ ++#ifdef CONFIG_SMP ++ rq_load_update(rq); ++#endif ++ data = rcu_dereference_sched(*per_cpu_ptr(&cpufreq_update_util_data, ++ cpu_of(rq))); ++ if (data) ++ data->func(data, rq_clock(rq), flags); ++} ++#else ++static inline void cpufreq_update_util(struct rq *rq, unsigned int flags) ++{ ++#ifdef CONFIG_SMP ++ rq_load_update(rq); ++#endif ++} ++#endif /* CONFIG_CPU_FREQ */ ++ + #ifdef CONFIG_NO_HZ_FULL + /* + * Tick may be needed by tasks in the runqueue depending on their policy and +diff --git a/kernel/sched/alt_sched.h b/kernel/sched/alt_sched.h +index f03af9ab9123..289058a09bd5 100644 +--- a/kernel/sched/alt_sched.h ++++ b/kernel/sched/alt_sched.h +@@ -197,6 +197,7 @@ struct rq { + struct rcuwait hotplug_wait; + #endif + unsigned int nr_pinned; ++ + #endif /* CONFIG_SMP */ + #ifdef CONFIG_IRQ_TIME_ACCOUNTING + u64 prev_irq_time; +@@ -208,6 +209,11 @@ struct rq { + u64 prev_steal_time_rq; + #endif /* CONFIG_PARAVIRT_TIME_ACCOUNTING */ + ++ /* For genenal cpu load util */ ++ s32 load_history; ++ u64 load_block; ++ u64 load_stamp; ++ + /* calc_load related fields */ + unsigned long calc_load_update; + long calc_load_active; +@@ -260,6 +266,8 @@ struct rq { + #endif /* CONFIG_NO_HZ_COMMON */ + }; + ++extern unsigned long rq_load_util(struct rq *rq, unsigned long max); ++ + extern unsigned long calc_load_update; + extern atomic_long_t calc_load_tasks; + +@@ -572,40 +580,6 @@ static inline u64 irq_time_read(int cpu) + + #ifdef CONFIG_CPU_FREQ + DECLARE_PER_CPU(struct update_util_data __rcu *, cpufreq_update_util_data); +- +-/** +- * cpufreq_update_util - Take a note about CPU utilization changes. +- * @rq: Runqueue to carry out the update for. +- * @flags: Update reason flags. +- * +- * This function is called by the scheduler on the CPU whose utilization is +- * being updated. +- * +- * It can only be called from RCU-sched read-side critical sections. +- * +- * The way cpufreq is currently arranged requires it to evaluate the CPU +- * performance state (frequency/voltage) on a regular basis to prevent it from +- * being stuck in a completely inadequate performance level for too long. +- * That is not guaranteed to happen if the updates are only triggered from CFS +- * and DL, though, because they may not be coming in if only RT tasks are +- * active all the time (or there are RT tasks only). +- * +- * As a workaround for that issue, this function is called periodically by the +- * RT sched class to trigger extra cpufreq updates to prevent it from stalling, +- * but that really is a band-aid. Going forward it should be replaced with +- * solutions targeted more specifically at RT tasks. +- */ +-static inline void cpufreq_update_util(struct rq *rq, unsigned int flags) +-{ +- struct update_util_data *data; +- +- data = rcu_dereference_sched(*per_cpu_ptr(&cpufreq_update_util_data, +- cpu_of(rq))); +- if (data) +- data->func(data, rq_clock(rq), flags); +-} +-#else +-static inline void cpufreq_update_util(struct rq *rq, unsigned int flags) {} + #endif /* CONFIG_CPU_FREQ */ + + #ifdef CONFIG_NO_HZ_FULL +diff --git a/kernel/sched/cpufreq_schedutil.c b/kernel/sched/cpufreq_schedutil.c +index ab803029b7fd..b2590f961139 100644 +--- a/kernel/sched/cpufreq_schedutil.c ++++ b/kernel/sched/cpufreq_schedutil.c +@@ -50,13 +50,6 @@ struct sugov_cpu { + unsigned long bw_dl; + unsigned long max; + +-#ifdef CONFIG_SCHED_ALT +- /* For genenal cpu load util */ +- s32 load_history; +- u64 load_block; +- u64 load_stamp; +-#endif +- + /* The field below is for single-CPU policies only: */ + #ifdef CONFIG_NO_HZ_COMMON + unsigned long saved_idle_calls; +@@ -161,66 +154,21 @@ static unsigned int get_next_freq(struct sugov_policy *sg_policy, + return cpufreq_driver_resolve_freq(policy, freq); + } + +-#ifndef CONFIG_SCHED_ALT + static void sugov_get_util(struct sugov_cpu *sg_cpu) + { + struct rq *rq = cpu_rq(sg_cpu->cpu); + unsigned long max = arch_scale_cpu_capacity(sg_cpu->cpu); + + sg_cpu->max = max; ++#ifndef CONFIG_SCHED_ALT + sg_cpu->bw_dl = cpu_bw_dl(rq); + sg_cpu->util = effective_cpu_util(sg_cpu->cpu, cpu_util_cfs(sg_cpu->cpu), max, + FREQUENCY_UTIL, NULL); +-} +- +-#else /* CONFIG_SCHED_ALT */ +- +-#define SG_CPU_LOAD_HISTORY_BITS (sizeof(s32) * 8ULL) +-#define SG_CPU_UTIL_SHIFT (8) +-#define SG_CPU_LOAD_HISTORY_SHIFT (SG_CPU_LOAD_HISTORY_BITS - 1 - SG_CPU_UTIL_SHIFT) +-#define SG_CPU_LOAD_HISTORY_TO_UTIL(l) (((l) >> SG_CPU_LOAD_HISTORY_SHIFT) & 0xff) +- +-#define LOAD_BLOCK(t) ((t) >> 17) +-#define LOAD_HALF_BLOCK(t) ((t) >> 16) +-#define BLOCK_MASK(t) ((t) & ((0x01 << 18) - 1)) +-#define LOAD_BLOCK_BIT(b) (1UL << (SG_CPU_LOAD_HISTORY_BITS - 1 - (b))) +-#define CURRENT_LOAD_BIT LOAD_BLOCK_BIT(0) +- +-static void sugov_get_util(struct sugov_cpu *sg_cpu) +-{ +- unsigned long max = arch_scale_cpu_capacity(sg_cpu->cpu); +- +- sg_cpu->max = max; ++#else + sg_cpu->bw_dl = 0; +- sg_cpu->util = SG_CPU_LOAD_HISTORY_TO_UTIL(sg_cpu->load_history) * +- (max >> SG_CPU_UTIL_SHIFT); +-} +- +-static inline void sugov_cpu_load_update(struct sugov_cpu *sg_cpu, u64 time) +-{ +- u64 delta = min(LOAD_BLOCK(time) - LOAD_BLOCK(sg_cpu->load_stamp), +- SG_CPU_LOAD_HISTORY_BITS - 1); +- u64 prev = !!(sg_cpu->load_history & CURRENT_LOAD_BIT); +- u64 curr = !!cpu_rq(sg_cpu->cpu)->nr_running; +- +- if (delta) { +- sg_cpu->load_history = sg_cpu->load_history >> delta; +- +- if (delta <= SG_CPU_UTIL_SHIFT) { +- sg_cpu->load_block += (~BLOCK_MASK(sg_cpu->load_stamp)) * prev; +- if (!!LOAD_HALF_BLOCK(sg_cpu->load_block) ^ curr) +- sg_cpu->load_history ^= LOAD_BLOCK_BIT(delta); +- } +- +- sg_cpu->load_block = BLOCK_MASK(time) * prev; +- } else { +- sg_cpu->load_block += (time - sg_cpu->load_stamp) * prev; +- } +- if (prev ^ curr) +- sg_cpu->load_history ^= CURRENT_LOAD_BIT; +- sg_cpu->load_stamp = time; +-} ++ sg_cpu->util = rq_load_util(rq, max); + #endif /* CONFIG_SCHED_ALT */ ++} + + /** + * sugov_iowait_reset() - Reset the IO boost status of a CPU. +@@ -372,10 +320,6 @@ static inline void ignore_dl_rate_limit(struct sugov_cpu *sg_cpu) + static inline bool sugov_update_single_common(struct sugov_cpu *sg_cpu, + u64 time, unsigned int flags) + { +-#ifdef CONFIG_SCHED_ALT +- sugov_cpu_load_update(sg_cpu, time); +-#endif /* CONFIG_SCHED_ALT */ +- + sugov_iowait_boost(sg_cpu, time, flags); + sg_cpu->last_update = time; + +@@ -502,10 +446,6 @@ sugov_update_shared(struct update_util_data *hook, u64 time, unsigned int flags) + + raw_spin_lock(&sg_policy->update_lock); + +-#ifdef CONFIG_SCHED_ALT +- sugov_cpu_load_update(sg_cpu, time); +-#endif /* CONFIG_SCHED_ALT */ +- + sugov_iowait_boost(sg_cpu, time, flags); + sg_cpu->last_update = time; + +-- +2.37.0 + + +From 1ef06827dfe7e2db1bdda65d1c8817f9d05035bd Mon Sep 17 00:00:00 2001 +From: Alfred Chen +Date: Sun, 19 Sep 2021 16:38:23 +0000 +Subject: [PATCH 211/297] Project-C v5.14-r2 + +--- + kernel/sched/alt_core.c | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +diff --git a/kernel/sched/alt_core.c b/kernel/sched/alt_core.c +index ee6fc0307135..7c998e606114 100644 +--- a/kernel/sched/alt_core.c ++++ b/kernel/sched/alt_core.c +@@ -67,7 +67,7 @@ __read_mostly int sysctl_resched_latency_warn_once = 1; + #define sched_feat(x) (0) + #endif /* CONFIG_SCHED_DEBUG */ + +-#define ALT_SCHED_VERSION "v5.14-r1" ++#define ALT_SCHED_VERSION "v5.14-r2" + + /* rt_prio(prio) defined in include/linux/sched/rt.h */ + #define rt_task(p) rt_prio((p)->prio) +-- +2.37.0 + + +From f2bb457d75d11588616a4932537b7d955e88f0a9 Mon Sep 17 00:00:00 2001 +From: Alfred Chen +Date: Tue, 21 Sep 2021 09:04:32 +0000 +Subject: [PATCH 212/297] sched/alt: Add sanity check at + migrate_pending_tasks() + +sched_task_sanity_check() is missing when task moving from other rq(s), +this should fix #36 +--- + kernel/sched/alt_core.c | 1 + + 1 file changed, 1 insertion(+) + +diff --git a/kernel/sched/alt_core.c b/kernel/sched/alt_core.c +index 7c998e606114..3d334abeadce 100644 +--- a/kernel/sched/alt_core.c ++++ b/kernel/sched/alt_core.c +@@ -4030,6 +4030,7 @@ migrate_pending_tasks(struct rq *rq, struct rq *dest_rq, const int dest_cpu) + if (cpumask_test_cpu(dest_cpu, p->cpus_ptr)) { + __SCHED_DEQUEUE_TASK(p, rq, 0, ); + set_task_cpu(p, dest_cpu); ++ sched_task_sanity_check(p, dest_rq); + __SCHED_ENQUEUE_TASK(p, dest_rq, 0); + nr_migrated++; + } +-- +2.37.0 + + +From cd61f958e2ec980c724cb98111c2be1dbc9b4c26 Mon Sep 17 00:00:00 2001 +From: Alfred Chen +Date: Tue, 21 Sep 2021 11:14:18 +0000 +Subject: [PATCH 213/297] sched/alt: Fix missing cpuacct.usage. + +Call to cgroup_account_cputime() is missing in Project C scheduler, +which cause cpuacct.usage reports zero. + +This should fixed #40 +--- + kernel/sched/alt_core.c | 1 + + 1 file changed, 1 insertion(+) + +diff --git a/kernel/sched/alt_core.c b/kernel/sched/alt_core.c +index 3d334abeadce..c1643fa8243c 100644 +--- a/kernel/sched/alt_core.c ++++ b/kernel/sched/alt_core.c +@@ -3468,6 +3468,7 @@ static inline void update_curr(struct rq *rq, struct task_struct *p) + s64 ns = rq->clock_task - p->last_ran; + + p->sched_time += ns; ++ cgroup_account_cputime(p, ns); + account_group_exec_runtime(p, ns); + + p->time_slice -= ns; +-- +2.37.0 + + +From bad1f2d661f25c57d57303026b29fd2459475bcc Mon Sep 17 00:00:00 2001 +From: Alfred Chen +Date: Thu, 23 Sep 2021 14:25:30 +0000 +Subject: [PATCH 214/297] sched/alt: Fix compilation issue with + rebuild_sched_domains_energy() + +Energy model is not supported in Project C, this fix the undefined +reference to `rebuild_sched_domains_energy'. +--- + include/linux/sched/topology.h | 3 ++- + 1 file changed, 2 insertions(+), 1 deletion(-) + +diff --git a/include/linux/sched/topology.h b/include/linux/sched/topology.h +index 56cffe42abbc..e020fc572b22 100644 +--- a/include/linux/sched/topology.h ++++ b/include/linux/sched/topology.h +@@ -233,7 +233,8 @@ static inline bool cpus_share_cache(int this_cpu, int that_cpu) + + #endif /* !CONFIG_SMP */ + +-#if defined(CONFIG_ENERGY_MODEL) && defined(CONFIG_CPU_FREQ_GOV_SCHEDUTIL) ++#if defined(CONFIG_ENERGY_MODEL) && defined(CONFIG_CPU_FREQ_GOV_SCHEDUTIL) && \ ++ !defined(CONFIG_SCHED_ALT) + extern void rebuild_sched_domains_energy(void); + #else + static inline void rebuild_sched_domains_energy(void) +-- +2.37.0 + + +From 20ab96e4d076e8e51e7d34e53e09eabb132fa2a3 Mon Sep 17 00:00:00 2001 +From: Alfred Chen +Date: Thu, 23 Sep 2021 14:53:14 +0000 +Subject: [PATCH 215/297] Project-C v5.14-r3 + +--- + kernel/sched/alt_core.c | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +diff --git a/kernel/sched/alt_core.c b/kernel/sched/alt_core.c +index c1643fa8243c..56aed2b1e42c 100644 +--- a/kernel/sched/alt_core.c ++++ b/kernel/sched/alt_core.c +@@ -67,7 +67,7 @@ __read_mostly int sysctl_resched_latency_warn_once = 1; + #define sched_feat(x) (0) + #endif /* CONFIG_SCHED_DEBUG */ + +-#define ALT_SCHED_VERSION "v5.14-r2" ++#define ALT_SCHED_VERSION "v5.14-r3" + + /* rt_prio(prio) defined in include/linux/sched/rt.h */ + #define rt_task(p) rt_prio((p)->prio) +-- +2.37.0 + + +From 043b0f475a9c347ab2df59d85ad56209e1319c3d Mon Sep 17 00:00:00 2001 +From: Alfred Chen +Date: Thu, 7 Oct 2021 20:15:28 +0000 +Subject: [PATCH 216/297] sched/alt: Fix UP compilation issue. + +Refine the code in rq_load_update() and fix UP compilation issue. +--- + kernel/sched/alt_core.c | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +diff --git a/kernel/sched/alt_core.c b/kernel/sched/alt_core.c +index 56aed2b1e42c..80faf3000851 100644 +--- a/kernel/sched/alt_core.c ++++ b/kernel/sched/alt_core.c +@@ -603,7 +603,7 @@ static inline void rq_load_update(struct rq *rq) + u64 delta = min(LOAD_BLOCK(time) - LOAD_BLOCK(rq->load_stamp), + RQ_LOAD_HISTORY_BITS - 1); + u64 prev = !!(rq->load_history & CURRENT_LOAD_BIT); +- u64 curr = !!cpu_rq(rq->cpu)->nr_running; ++ u64 curr = !!rq->nr_running; + + if (delta) { + rq->load_history = rq->load_history >> delta; +-- +2.37.0 + + +From 4d5cf13eb754af0352df0c680553aec60dd31dc2 Mon Sep 17 00:00:00 2001 +From: Alfred Chen +Date: Fri, 8 Oct 2021 13:48:36 +0000 +Subject: [PATCH 217/297] Project-C v5.14-r4 + +--- + kernel/sched/alt_core.c | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +diff --git a/kernel/sched/alt_core.c b/kernel/sched/alt_core.c +index 80faf3000851..e1a7c163c2d5 100644 +--- a/kernel/sched/alt_core.c ++++ b/kernel/sched/alt_core.c +@@ -67,7 +67,7 @@ __read_mostly int sysctl_resched_latency_warn_once = 1; + #define sched_feat(x) (0) + #endif /* CONFIG_SCHED_DEBUG */ + +-#define ALT_SCHED_VERSION "v5.14-r3" ++#define ALT_SCHED_VERSION "v5.14-r4" + + /* rt_prio(prio) defined in include/linux/sched/rt.h */ + #define rt_task(p) rt_prio((p)->prio) +-- +2.37.0 + + +From 4b9654988d24888e1db9f206b8340b66a4cc32a7 Mon Sep 17 00:00:00 2001 +From: Alfred Chen +Date: Fri, 15 Oct 2021 13:50:38 +0000 +Subject: [PATCH 218/297] sched/alt: [Sync] 508958259bb3 rcu: Explain why + rcu_all_qs() is a stub in preemptible TREE RCU + +--- + kernel/sched/alt_core.c | 11 +++++++++++ + 1 file changed, 11 insertions(+) + +diff --git a/kernel/sched/alt_core.c b/kernel/sched/alt_core.c +index e1a7c163c2d5..3f74f912d534 100644 +--- a/kernel/sched/alt_core.c ++++ b/kernel/sched/alt_core.c +@@ -5848,6 +5848,17 @@ int __sched __cond_resched(void) + preempt_schedule_common(); + return 1; + } ++ /* ++ * In preemptible kernels, ->rcu_read_lock_nesting tells the tick ++ * whether the current CPU is in an RCU read-side critical section, ++ * so the tick can report quiescent states even for CPUs looping ++ * in kernel context. In contrast, in non-preemptible kernels, ++ * RCU readers leave no in-memory hints, which means that CPU-bound ++ * processes executing in kernel context might never report an ++ * RCU quiescent state. Therefore, the following code causes ++ * cond_resched() to report a quiescent state, but only when RCU ++ * is in urgent need of one. ++ */ + #ifndef CONFIG_PREEMPT_RCU + rcu_all_qs(); + #endif +-- +2.37.0 + + +From e76f8a809e9c9f3f2816471949123c7301baf2fc Mon Sep 17 00:00:00 2001 +From: Alfred Chen +Date: Fri, 15 Oct 2021 14:00:12 +0000 +Subject: [PATCH 219/297] sched/alt: [Sync] 031e3bd8986f sched: Optimize + housekeeping_cpumask() in for_each_cpu_and() + +--- + kernel/sched/alt_core.c | 5 ++++- + 1 file changed, 4 insertions(+), 1 deletion(-) + +diff --git a/kernel/sched/alt_core.c b/kernel/sched/alt_core.c +index 3f74f912d534..50b89c828837 100644 +--- a/kernel/sched/alt_core.c ++++ b/kernel/sched/alt_core.c +@@ -1005,6 +1005,7 @@ int get_nohz_timer_target(void) + { + int i, cpu = smp_processor_id(), default_cpu = -1; + struct cpumask *mask; ++ const struct cpumask *hk_mask; + + if (housekeeping_cpu(cpu, HK_FLAG_TIMER)) { + if (!idle_cpu(cpu)) +@@ -1012,9 +1013,11 @@ int get_nohz_timer_target(void) + default_cpu = cpu; + } + ++ hk_mask = housekeeping_cpumask(HK_FLAG_TIMER); ++ + for (mask = per_cpu(sched_cpu_topo_masks, cpu) + 1; + mask < per_cpu(sched_cpu_topo_end_mask, cpu); mask++) +- for_each_cpu_and(i, mask, housekeeping_cpumask(HK_FLAG_TIMER)) ++ for_each_cpu_and(i, mask, hk_mask) + if (!idle_cpu(i)) + return i; + +-- +2.37.0 + + +From 11bb1d45a50840ca29158114418156a13a3daa05 Mon Sep 17 00:00:00 2001 +From: Alfred Chen +Date: Fri, 15 Oct 2021 14:10:17 +0000 +Subject: [PATCH 220/297] sched/alt: [Sync] 7ad721bf1071 sched: Don't report + SCHED_FLAG_SUGOV in sched_getattr() + +--- + kernel/sched/alt_core.c | 1 + + 1 file changed, 1 insertion(+) + +diff --git a/kernel/sched/alt_core.c b/kernel/sched/alt_core.c +index 50b89c828837..25184a6ddef3 100644 +--- a/kernel/sched/alt_core.c ++++ b/kernel/sched/alt_core.c +@@ -5617,6 +5617,7 @@ SYSCALL_DEFINE4(sched_getattr, pid_t, pid, struct sched_attr __user *, uattr, + kattr.sched_priority = p->rt_priority; + else + kattr.sched_nice = task_nice(p); ++ kattr.sched_flags &= SCHED_FLAG_ALL; + + #ifdef CONFIG_UCLAMP_TASK + kattr.sched_util_min = p->uclamp_req[UCLAMP_MIN].value; +-- +2.37.0 + + +From 4aeb4a2d94d1203145c3da2279ef1ed52d9e04c3 Mon Sep 17 00:00:00 2001 +From: Alfred Chen +Date: Fri, 15 Oct 2021 14:39:30 +0000 +Subject: [PATCH 221/297] sched/alt: [Sync] 9ae606bc74dd sched: Introduce + task_cpu_possible_mask() to limit fallback rq selection + +--- + kernel/sched/alt_core.c | 8 +++----- + 1 file changed, 3 insertions(+), 5 deletions(-) + +diff --git a/kernel/sched/alt_core.c b/kernel/sched/alt_core.c +index 25184a6ddef3..2ae47a36458c 100644 +--- a/kernel/sched/alt_core.c ++++ b/kernel/sched/alt_core.c +@@ -1478,7 +1478,7 @@ static inline bool is_cpu_allowed(struct task_struct *p, int cpu) + + /* Non kernel threads are not allowed during either online or offline. */ + if (!(p->flags & PF_KTHREAD)) +- return cpu_active(cpu); ++ return cpu_active(cpu) && task_cpu_possible(cpu, p); + + /* KTHREAD_IS_PER_CPU is always allowed. */ + if (kthread_is_per_cpu(p)) +@@ -1797,9 +1797,7 @@ static int select_fallback_rq(int cpu, struct task_struct *p) + + /* Look for allowed, online CPU in same node. */ + for_each_cpu(dest_cpu, nodemask) { +- if (!cpu_active(dest_cpu)) +- continue; +- if (cpumask_test_cpu(dest_cpu, p->cpus_ptr)) ++ if (is_cpu_allowed(p, dest_cpu)) + return dest_cpu; + } + } +@@ -1828,7 +1826,7 @@ static int select_fallback_rq(int cpu, struct task_struct *p) + * + * More yuck to audit. + */ +- do_set_cpus_allowed(p, cpu_possible_mask); ++ do_set_cpus_allowed(p, task_cpu_possible_mask(p)); + state = fail; + break; + +-- +2.37.0 + + +From cae9fb325825c4a607bcf3a225f135f037bcdb13 Mon Sep 17 00:00:00 2001 +From: Alfred Chen +Date: Fri, 15 Oct 2021 14:42:29 +0000 +Subject: [PATCH 222/297] sched/alt: [Sync] 97c0054dbe2c cpuset: Cleanup + cpuset_cpus_allowed_fallback() use in select_fallback_rq() + +--- + kernel/sched/alt_core.c | 3 +-- + 1 file changed, 1 insertion(+), 2 deletions(-) + +diff --git a/kernel/sched/alt_core.c b/kernel/sched/alt_core.c +index 2ae47a36458c..2708688b24f9 100644 +--- a/kernel/sched/alt_core.c ++++ b/kernel/sched/alt_core.c +@@ -1813,8 +1813,7 @@ static int select_fallback_rq(int cpu, struct task_struct *p) + /* No more Mr. Nice Guy. */ + switch (state) { + case cpuset: +- if (IS_ENABLED(CONFIG_CPUSETS)) { +- cpuset_cpus_allowed_fallback(p); ++ if (cpuset_cpus_allowed_fallback(p)) { + state = possible; + break; + } +-- +2.37.0 + + +From 931fcd03848348878418583c4e3fbce41a1eb274 Mon Sep 17 00:00:00 2001 +From: Alfred Chen +Date: Fri, 15 Oct 2021 15:15:41 +0000 +Subject: [PATCH 223/297] sched/alt: [Sync] 234a503e670b sched: Reject CPU + affinity changes based on task_cpu_possible_mask() + +--- + kernel/sched/alt_core.c | 9 ++++++++- + 1 file changed, 8 insertions(+), 1 deletion(-) + +diff --git a/kernel/sched/alt_core.c b/kernel/sched/alt_core.c +index 2708688b24f9..11c847aa8691 100644 +--- a/kernel/sched/alt_core.c ++++ b/kernel/sched/alt_core.c +@@ -1927,7 +1927,9 @@ static int __set_cpus_allowed_ptr(struct task_struct *p, + const struct cpumask *new_mask, + u32 flags) + { ++ const struct cpumask *cpu_allowed_mask = task_cpu_possible_mask(p); + const struct cpumask *cpu_valid_mask = cpu_active_mask; ++ bool kthread = p->flags & PF_KTHREAD; + int dest_cpu; + unsigned long irq_flags; + struct rq *rq; +@@ -1937,7 +1939,7 @@ static int __set_cpus_allowed_ptr(struct task_struct *p, + raw_spin_lock_irqsave(&p->pi_lock, irq_flags); + rq = __task_access_lock(p, &lock); + +- if (p->flags & PF_KTHREAD || is_migration_disabled(p)) { ++ if (kthread || is_migration_disabled(p)) { + /* + * Kernel threads are allowed on online && !active CPUs, + * however, during cpu-hot-unplug, even these might get pushed +@@ -1951,6 +1953,11 @@ static int __set_cpus_allowed_ptr(struct task_struct *p, + cpu_valid_mask = cpu_online_mask; + } + ++ if (!kthread && !cpumask_subset(new_mask, cpu_allowed_mask)) { ++ ret = -EINVAL; ++ goto out; ++ } ++ + /* + * Must re-check here, to close a race against __kthread_bind(), + * sched_setaffinity() is not guaranteed to observe the flag. +-- +2.37.0 + + +From 53428a9670df072cde92f8872c10b7a14bc2229b Mon Sep 17 00:00:00 2001 +From: Alfred Chen +Date: Fri, 15 Oct 2021 16:31:35 +0000 +Subject: [PATCH 224/297] sched/alt: [Sync] b90ca8badbd1 sched: Introduce + task_struct::user_cpus_ptr to track requested affinity + +--- + kernel/sched/alt_core.c | 20 ++++++++++++++++++++ + 1 file changed, 20 insertions(+) + +diff --git a/kernel/sched/alt_core.c b/kernel/sched/alt_core.c +index 11c847aa8691..6eeeb10f2eea 100644 +--- a/kernel/sched/alt_core.c ++++ b/kernel/sched/alt_core.c +@@ -1616,6 +1616,26 @@ void do_set_cpus_allowed(struct task_struct *p, const struct cpumask *new_mask) + __do_set_cpus_allowed(p, new_mask); + } + ++int dup_user_cpus_ptr(struct task_struct *dst, struct task_struct *src, ++ int node) ++{ ++ if (!src->user_cpus_ptr) ++ return 0; ++ ++ dst->user_cpus_ptr = kmalloc_node(cpumask_size(), GFP_KERNEL, node); ++ if (!dst->user_cpus_ptr) ++ return -ENOMEM; ++ ++ cpumask_copy(dst->user_cpus_ptr, src->user_cpus_ptr); ++ return 0; ++} ++ ++void release_user_cpus_ptr(struct task_struct *p) ++{ ++ kfree(p->user_cpus_ptr); ++ p->user_cpus_ptr = NULL; ++} ++ + #endif + + /** +-- +2.37.0 + + +From d855c8f926b5eaefa57a511cb41955e53dca391b Mon Sep 17 00:00:00 2001 +From: Alfred Chen +Date: Mon, 18 Oct 2021 21:10:56 +0000 +Subject: [PATCH 225/297] sched/alt: [Sync] db3b02ae896e sched: Split the guts + of sched_setaffinity() into a helper function + +--- + kernel/sched/alt_core.c | 78 +++++++++++++++++++++++------------------ + 1 file changed, 44 insertions(+), 34 deletions(-) + +diff --git a/kernel/sched/alt_core.c b/kernel/sched/alt_core.c +index 6eeeb10f2eea..f2546d629b9d 100644 +--- a/kernel/sched/alt_core.c ++++ b/kernel/sched/alt_core.c +@@ -5657,9 +5657,47 @@ SYSCALL_DEFINE4(sched_getattr, pid_t, pid, struct sched_attr __user *, uattr, + return retval; + } + +-long sched_setaffinity(pid_t pid, const struct cpumask *in_mask) ++static int ++__sched_setaffinity(struct task_struct *p, const struct cpumask *mask) + { ++ int retval; + cpumask_var_t cpus_allowed, new_mask; ++ ++ if (!alloc_cpumask_var(&cpus_allowed, GFP_KERNEL)) ++ return -ENOMEM; ++ ++ if (!alloc_cpumask_var(&new_mask, GFP_KERNEL)) { ++ retval = -ENOMEM; ++ goto out_free_cpus_allowed; ++ } ++ ++ cpuset_cpus_allowed(p, cpus_allowed); ++ cpumask_and(new_mask, mask, cpus_allowed); ++again: ++ retval = __set_cpus_allowed_ptr(p, new_mask, SCA_CHECK); ++ if (retval) ++ goto out_free_new_mask; ++ ++ cpuset_cpus_allowed(p, cpus_allowed); ++ if (!cpumask_subset(new_mask, cpus_allowed)) { ++ /* ++ * We must have raced with a concurrent cpuset ++ * update. Just reset the cpus_allowed to the ++ * cpuset's cpus_allowed ++ */ ++ cpumask_copy(new_mask, cpus_allowed); ++ goto again; ++ } ++ ++out_free_new_mask: ++ free_cpumask_var(new_mask); ++out_free_cpus_allowed: ++ free_cpumask_var(cpus_allowed); ++ return retval; ++} ++ ++long sched_setaffinity(pid_t pid, const struct cpumask *in_mask) ++{ + struct task_struct *p; + int retval; + +@@ -5679,50 +5717,22 @@ long sched_setaffinity(pid_t pid, const struct cpumask *in_mask) + retval = -EINVAL; + goto out_put_task; + } +- if (!alloc_cpumask_var(&cpus_allowed, GFP_KERNEL)) { +- retval = -ENOMEM; +- goto out_put_task; +- } +- if (!alloc_cpumask_var(&new_mask, GFP_KERNEL)) { +- retval = -ENOMEM; +- goto out_free_cpus_allowed; +- } +- retval = -EPERM; ++ + if (!check_same_owner(p)) { + rcu_read_lock(); + if (!ns_capable(__task_cred(p)->user_ns, CAP_SYS_NICE)) { + rcu_read_unlock(); +- goto out_free_new_mask; ++ retval = -EPERM; ++ goto out_put_task; + } + rcu_read_unlock(); + } + + retval = security_task_setscheduler(p); + if (retval) +- goto out_free_new_mask; +- +- cpuset_cpus_allowed(p, cpus_allowed); +- cpumask_and(new_mask, in_mask, cpus_allowed); +- +-again: +- retval = __set_cpus_allowed_ptr(p, new_mask, SCA_CHECK); ++ goto out_put_task; + +- if (!retval) { +- cpuset_cpus_allowed(p, cpus_allowed); +- if (!cpumask_subset(new_mask, cpus_allowed)) { +- /* +- * We must have raced with a concurrent cpuset +- * update. Just reset the cpus_allowed to the +- * cpuset's cpus_allowed +- */ +- cpumask_copy(new_mask, cpus_allowed); +- goto again; +- } +- } +-out_free_new_mask: +- free_cpumask_var(new_mask); +-out_free_cpus_allowed: +- free_cpumask_var(cpus_allowed); ++ retval = __sched_setaffinity(p, in_mask); + out_put_task: + put_task_struct(p); + return retval; +-- +2.37.0 + + +From 13397b10a79f6055b970cbb41466cd61644b7f2a Mon Sep 17 00:00:00 2001 +From: Alfred Chen +Date: Tue, 2 Nov 2021 09:55:27 +0000 +Subject: [PATCH 226/297] sched/alt: [Sync] 07ec77a1d4e8 sched: Allow task CPU + affinity to be restricted on asymmetric systems + +--- + kernel/sched/alt_core.c | 262 ++++++++++++++++++++++++++++++++-------- + 1 file changed, 211 insertions(+), 51 deletions(-) + +diff --git a/kernel/sched/alt_core.c b/kernel/sched/alt_core.c +index f2546d629b9d..fe42edce5d55 100644 +--- a/kernel/sched/alt_core.c ++++ b/kernel/sched/alt_core.c +@@ -1333,6 +1333,7 @@ static inline bool is_migration_disabled(struct task_struct *p) + } + + #define SCA_CHECK 0x01 ++#define SCA_USER 0x08 + + #ifdef CONFIG_SMP + +@@ -1630,10 +1631,18 @@ int dup_user_cpus_ptr(struct task_struct *dst, struct task_struct *src, + return 0; + } + ++static inline struct cpumask *clear_user_cpus_ptr(struct task_struct *p) ++{ ++ struct cpumask *user_mask = NULL; ++ ++ swap(p->user_cpus_ptr, user_mask); ++ ++ return user_mask; ++} ++ + void release_user_cpus_ptr(struct task_struct *p) + { +- kfree(p->user_cpus_ptr); +- p->user_cpus_ptr = NULL; ++ kfree(clear_user_cpus_ptr(p)); + } + + #endif +@@ -1934,31 +1943,58 @@ void sched_set_stop_task(int cpu, struct task_struct *stop) + } + } + +-/* +- * Change a given task's CPU affinity. Migrate the thread to a +- * proper CPU and schedule it away if the CPU it's executing on +- * is removed from the allowed bitmask. +- * +- * NOTE: the caller must have a valid reference to the task, the +- * task must not exit() & deallocate itself prematurely. The +- * call is not atomic; no spinlocks may be held. +- */ +-static int __set_cpus_allowed_ptr(struct task_struct *p, +- const struct cpumask *new_mask, +- u32 flags) ++static int affine_move_task(struct rq *rq, struct task_struct *p, int dest_cpu, ++ raw_spinlock_t *lock, unsigned long irq_flags) ++{ ++ /* Can the task run on the task's current CPU? If so, we're done */ ++ if (!cpumask_test_cpu(task_cpu(p), &p->cpus_mask)) { ++ if (p->migration_disabled) { ++ if (likely(p->cpus_ptr != &p->cpus_mask)) ++ __do_set_cpus_ptr(p, &p->cpus_mask); ++ p->migration_disabled = 0; ++ p->migration_flags |= MDF_FORCE_ENABLED; ++ /* When p is migrate_disabled, rq->lock should be held */ ++ rq->nr_pinned--; ++ } ++ ++ if (task_running(p) || READ_ONCE(p->__state) == TASK_WAKING) { ++ struct migration_arg arg = { p, dest_cpu }; ++ ++ /* Need help from migration thread: drop lock and wait. */ ++ __task_access_unlock(p, lock); ++ raw_spin_unlock_irqrestore(&p->pi_lock, irq_flags); ++ stop_one_cpu(cpu_of(rq), migration_cpu_stop, &arg); ++ return 0; ++ } ++ if (task_on_rq_queued(p)) { ++ /* ++ * OK, since we're going to drop the lock immediately ++ * afterwards anyway. ++ */ ++ update_rq_clock(rq); ++ rq = move_queued_task(rq, p, dest_cpu); ++ lock = &rq->lock; ++ } ++ } ++ __task_access_unlock(p, lock); ++ raw_spin_unlock_irqrestore(&p->pi_lock, irq_flags); ++ return 0; ++} ++ ++static int __set_cpus_allowed_ptr_locked(struct task_struct *p, ++ const struct cpumask *new_mask, ++ u32 flags, ++ struct rq *rq, ++ raw_spinlock_t *lock, ++ unsigned long irq_flags) + { + const struct cpumask *cpu_allowed_mask = task_cpu_possible_mask(p); + const struct cpumask *cpu_valid_mask = cpu_active_mask; + bool kthread = p->flags & PF_KTHREAD; ++ struct cpumask *user_mask = NULL; + int dest_cpu; +- unsigned long irq_flags; +- struct rq *rq; +- raw_spinlock_t *lock; + int ret = 0; + +- raw_spin_lock_irqsave(&p->pi_lock, irq_flags); +- rq = __task_access_lock(p, &lock); +- + if (kthread || is_migration_disabled(p)) { + /* + * Kernel threads are allowed on online && !active CPUs, +@@ -1998,37 +2034,14 @@ static int __set_cpus_allowed_ptr(struct task_struct *p, + + __do_set_cpus_allowed(p, new_mask); + +- /* Can the task run on the task's current CPU? If so, we're done */ +- if (cpumask_test_cpu(task_cpu(p), new_mask)) +- goto out; ++ if (flags & SCA_USER) ++ user_mask = clear_user_cpus_ptr(p); + +- if (p->migration_disabled) { +- if (likely(p->cpus_ptr != &p->cpus_mask)) +- __do_set_cpus_ptr(p, &p->cpus_mask); +- p->migration_disabled = 0; +- p->migration_flags |= MDF_FORCE_ENABLED; +- /* When p is migrate_disabled, rq->lock should be held */ +- rq->nr_pinned--; +- } ++ ret = affine_move_task(rq, p, dest_cpu, lock, irq_flags); + +- if (task_running(p) || READ_ONCE(p->__state) == TASK_WAKING) { +- struct migration_arg arg = { p, dest_cpu }; ++ kfree(user_mask); + +- /* Need help from migration thread: drop lock and wait. */ +- __task_access_unlock(p, lock); +- raw_spin_unlock_irqrestore(&p->pi_lock, irq_flags); +- stop_one_cpu(cpu_of(rq), migration_cpu_stop, &arg); +- return 0; +- } +- if (task_on_rq_queued(p)) { +- /* +- * OK, since we're going to drop the lock immediately +- * afterwards anyway. +- */ +- update_rq_clock(rq); +- rq = move_queued_task(rq, p, dest_cpu); +- lock = &rq->lock; +- } ++ return ret; + + out: + __task_access_unlock(p, lock); +@@ -2037,12 +2050,160 @@ static int __set_cpus_allowed_ptr(struct task_struct *p, + return ret; + } + ++/* ++ * Change a given task's CPU affinity. Migrate the thread to a ++ * proper CPU and schedule it away if the CPU it's executing on ++ * is removed from the allowed bitmask. ++ * ++ * NOTE: the caller must have a valid reference to the task, the ++ * task must not exit() & deallocate itself prematurely. The ++ * call is not atomic; no spinlocks may be held. ++ */ ++static int __set_cpus_allowed_ptr(struct task_struct *p, ++ const struct cpumask *new_mask, u32 flags) ++{ ++ unsigned long irq_flags; ++ struct rq *rq; ++ raw_spinlock_t *lock; ++ ++ raw_spin_lock_irqsave(&p->pi_lock, irq_flags); ++ rq = __task_access_lock(p, &lock); ++ ++ return __set_cpus_allowed_ptr_locked(p, new_mask, flags, rq, lock, irq_flags); ++} ++ + int set_cpus_allowed_ptr(struct task_struct *p, const struct cpumask *new_mask) + { + return __set_cpus_allowed_ptr(p, new_mask, 0); + } + EXPORT_SYMBOL_GPL(set_cpus_allowed_ptr); + ++/* ++ * Change a given task's CPU affinity to the intersection of its current ++ * affinity mask and @subset_mask, writing the resulting mask to @new_mask ++ * and pointing @p->user_cpus_ptr to a copy of the old mask. ++ * If the resulting mask is empty, leave the affinity unchanged and return ++ * -EINVAL. ++ */ ++static int restrict_cpus_allowed_ptr(struct task_struct *p, ++ struct cpumask *new_mask, ++ const struct cpumask *subset_mask) ++{ ++ struct cpumask *user_mask = NULL; ++ unsigned long irq_flags; ++ raw_spinlock_t *lock; ++ struct rq *rq; ++ int err; ++ ++ if (!p->user_cpus_ptr) { ++ user_mask = kmalloc(cpumask_size(), GFP_KERNEL); ++ if (!user_mask) ++ return -ENOMEM; ++ } ++ ++ raw_spin_lock_irqsave(&p->pi_lock, irq_flags); ++ rq = __task_access_lock(p, &lock); ++ ++ if (!cpumask_and(new_mask, &p->cpus_mask, subset_mask)) { ++ err = -EINVAL; ++ goto err_unlock; ++ } ++ ++ /* ++ * We're about to butcher the task affinity, so keep track of what ++ * the user asked for in case we're able to restore it later on. ++ */ ++ if (user_mask) { ++ cpumask_copy(user_mask, p->cpus_ptr); ++ p->user_cpus_ptr = user_mask; ++ } ++ ++ /*return __set_cpus_allowed_ptr_locked(p, new_mask, 0, rq, &rf);*/ ++ return __set_cpus_allowed_ptr_locked(p, new_mask, 0, rq, lock, irq_flags); ++ ++err_unlock: ++ __task_access_unlock(p, lock); ++ raw_spin_unlock_irqrestore(&p->pi_lock, irq_flags); ++ kfree(user_mask); ++ return err; ++} ++ ++/* ++ * Restrict the CPU affinity of task @p so that it is a subset of ++ * task_cpu_possible_mask() and point @p->user_cpu_ptr to a copy of the ++ * old affinity mask. If the resulting mask is empty, we warn and walk ++ * up the cpuset hierarchy until we find a suitable mask. ++ */ ++void force_compatible_cpus_allowed_ptr(struct task_struct *p) ++{ ++ cpumask_var_t new_mask; ++ const struct cpumask *override_mask = task_cpu_possible_mask(p); ++ ++ alloc_cpumask_var(&new_mask, GFP_KERNEL); ++ ++ /* ++ * __migrate_task() can fail silently in the face of concurrent ++ * offlining of the chosen destination CPU, so take the hotplug ++ * lock to ensure that the migration succeeds. ++ */ ++ cpus_read_lock(); ++ if (!cpumask_available(new_mask)) ++ goto out_set_mask; ++ ++ if (!restrict_cpus_allowed_ptr(p, new_mask, override_mask)) ++ goto out_free_mask; ++ ++ /* ++ * We failed to find a valid subset of the affinity mask for the ++ * task, so override it based on its cpuset hierarchy. ++ */ ++ cpuset_cpus_allowed(p, new_mask); ++ override_mask = new_mask; ++ ++out_set_mask: ++ if (printk_ratelimit()) { ++ printk_deferred("Overriding affinity for process %d (%s) to CPUs %*pbl\n", ++ task_pid_nr(p), p->comm, ++ cpumask_pr_args(override_mask)); ++ } ++ ++ WARN_ON(set_cpus_allowed_ptr(p, override_mask)); ++out_free_mask: ++ cpus_read_unlock(); ++ free_cpumask_var(new_mask); ++} ++ ++static int ++__sched_setaffinity(struct task_struct *p, const struct cpumask *mask); ++ ++/* ++ * Restore the affinity of a task @p which was previously restricted by a ++ * call to force_compatible_cpus_allowed_ptr(). This will clear (and free) ++ * @p->user_cpus_ptr. ++ * ++ * It is the caller's responsibility to serialise this with any calls to ++ * force_compatible_cpus_allowed_ptr(@p). ++ */ ++void relax_compatible_cpus_allowed_ptr(struct task_struct *p) ++{ ++ struct cpumask *user_mask = p->user_cpus_ptr; ++ unsigned long flags; ++ ++ /* ++ * Try to restore the old affinity mask. If this fails, then ++ * we free the mask explicitly to avoid it being inherited across ++ * a subsequent fork(). ++ */ ++ if (!user_mask || !__sched_setaffinity(p, user_mask)) ++ return; ++ ++ raw_spin_lock_irqsave(&p->pi_lock, flags); ++ user_mask = clear_user_cpus_ptr(p); ++ raw_spin_unlock_irqrestore(&p->pi_lock, flags); ++ ++ kfree(user_mask); ++} ++ + #else /* CONFIG_SMP */ + + static inline int select_task_rq(struct task_struct *p) +@@ -2052,8 +2213,7 @@ static inline int select_task_rq(struct task_struct *p) + + static inline int + __set_cpus_allowed_ptr(struct task_struct *p, +- const struct cpumask *new_mask, +- u32 flags) ++ const struct cpumask *new_mask, u32 flags) + { + return set_cpus_allowed_ptr(p, new_mask); + } +@@ -5674,7 +5834,7 @@ __sched_setaffinity(struct task_struct *p, const struct cpumask *mask) + cpuset_cpus_allowed(p, cpus_allowed); + cpumask_and(new_mask, mask, cpus_allowed); + again: +- retval = __set_cpus_allowed_ptr(p, new_mask, SCA_CHECK); ++ retval = __set_cpus_allowed_ptr(p, new_mask, SCA_CHECK | SCA_USER); + if (retval) + goto out_free_new_mask; + +-- +2.37.0 + + +From 418955fbb100e0a418836b0410f816fa3b568ac1 Mon Sep 17 00:00:00 2001 +From: Alfred Chen +Date: Tue, 2 Nov 2021 10:30:05 +0000 +Subject: [PATCH 227/297] sched/alt: [Sync] sched/wakeup + +This commit sync-up the below commits +43295d73adc8 sched/wakeup: Split out the wakeup ->__state check +5f220be21418 sched/wakeup: Prepare for RT sleeping spin/rwlocks +--- + kernel/sched/alt_core.c | 57 ++++++++++++++++++++++++++++++++++++----- + 1 file changed, 51 insertions(+), 6 deletions(-) + +diff --git a/kernel/sched/alt_core.c b/kernel/sched/alt_core.c +index fe42edce5d55..7931fed720cc 100644 +--- a/kernel/sched/alt_core.c ++++ b/kernel/sched/alt_core.c +@@ -2473,6 +2473,55 @@ static inline void ttwu_queue(struct task_struct *p, int cpu, int wake_flags) + raw_spin_unlock(&rq->lock); + } + ++/* ++ * Invoked from try_to_wake_up() to check whether the task can be woken up. ++ * ++ * The caller holds p::pi_lock if p != current or has preemption ++ * disabled when p == current. ++ * ++ * The rules of PREEMPT_RT saved_state: ++ * ++ * The related locking code always holds p::pi_lock when updating ++ * p::saved_state, which means the code is fully serialized in both cases. ++ * ++ * The lock wait and lock wakeups happen via TASK_RTLOCK_WAIT. No other ++ * bits set. This allows to distinguish all wakeup scenarios. ++ */ ++static __always_inline ++bool ttwu_state_match(struct task_struct *p, unsigned int state, int *success) ++{ ++ if (IS_ENABLED(CONFIG_DEBUG_PREEMPT)) { ++ WARN_ON_ONCE((state & TASK_RTLOCK_WAIT) && ++ state != TASK_RTLOCK_WAIT); ++ } ++ ++ if (READ_ONCE(p->__state) & state) { ++ *success = 1; ++ return true; ++ } ++ ++#ifdef CONFIG_PREEMPT_RT ++ /* ++ * Saved state preserves the task state across blocking on ++ * an RT lock. If the state matches, set p::saved_state to ++ * TASK_RUNNING, but do not wake the task because it waits ++ * for a lock wakeup. Also indicate success because from ++ * the regular waker's point of view this has succeeded. ++ * ++ * After acquiring the lock the task will restore p::__state ++ * from p::saved_state which ensures that the regular ++ * wakeup is not lost. The restore will also set ++ * p::saved_state to TASK_RUNNING so any further tests will ++ * not result in false positives vs. @success ++ */ ++ if (p->saved_state & state) { ++ p->saved_state = TASK_RUNNING; ++ *success = 1; ++ } ++#endif ++ return false; ++} ++ + /* + * Notes on Program-Order guarantees on SMP systems. + * +@@ -2620,10 +2669,9 @@ static int try_to_wake_up(struct task_struct *p, unsigned int state, + * - we're serialized against set_special_state() by virtue of + * it disabling IRQs (this allows not taking ->pi_lock). + */ +- if (!(READ_ONCE(p->__state) & state)) ++ if (!ttwu_state_match(p, state, &success)) + goto out; + +- success = 1; + trace_sched_waking(p); + WRITE_ONCE(p->__state, TASK_RUNNING); + trace_sched_wakeup(p); +@@ -2638,14 +2686,11 @@ static int try_to_wake_up(struct task_struct *p, unsigned int state, + */ + raw_spin_lock_irqsave(&p->pi_lock, flags); + smp_mb__after_spinlock(); +- if (!(READ_ONCE(p->__state) & state)) ++ if (!ttwu_state_match(p, state, &success)) + goto unlock; + + trace_sched_waking(p); + +- /* We're going to change ->state: */ +- success = 1; +- + /* + * Ensure we load p->on_rq _after_ p->state, otherwise it would + * be possible to, falsely, observe p->on_rq == 0 and get stuck +-- +2.37.0 + + +From 2d8bd043e300268f660a5f839ee925406eb52c56 Mon Sep 17 00:00:00 2001 +From: Alfred Chen +Date: Tue, 2 Nov 2021 10:54:21 +0000 +Subject: [PATCH 228/297] sched/alt: [Sync] b4bfa3fcfe3b sched/core: Rework the + __schedule() preempt argument + +--- + kernel/sched/alt_core.c | 34 +++++++++++++++++++++++----------- + 1 file changed, 23 insertions(+), 11 deletions(-) + +diff --git a/kernel/sched/alt_core.c b/kernel/sched/alt_core.c +index 7931fed720cc..b30f46567470 100644 +--- a/kernel/sched/alt_core.c ++++ b/kernel/sched/alt_core.c +@@ -4386,6 +4386,18 @@ choose_next_task(struct rq *rq, int cpu, struct task_struct *prev) + return next; + } + ++/* ++ * Constants for the sched_mode argument of __schedule(). ++ * ++ * The mode argument allows RT enabled kernels to differentiate a ++ * preemption from blocking on an 'sleeping' spin/rwlock. Note that ++ * SM_MASK_PREEMPT for !RT has all bits set, which allows the compiler to ++ * optimize the AND operation out and just check for zero. ++ */ ++#define SM_NONE 0x0 ++#define SM_PREEMPT 0x1 ++#define SM_MASK_PREEMPT (~0U) ++ + /* + * schedule() is the main scheduler function. + * +@@ -4425,7 +4437,7 @@ choose_next_task(struct rq *rq, int cpu, struct task_struct *prev) + * + * WARNING: must be called with preemption disabled! + */ +-static void __sched notrace __schedule(bool preempt) ++static void __sched notrace __schedule(unsigned int sched_mode) + { + struct task_struct *prev, *next; + unsigned long *switch_count; +@@ -4437,13 +4449,13 @@ static void __sched notrace __schedule(bool preempt) + rq = cpu_rq(cpu); + prev = rq->curr; + +- schedule_debug(prev, preempt); ++ schedule_debug(prev, !!sched_mode); + + /* by passing sched_feat(HRTICK) checking which Alt schedule FW doesn't support */ + hrtick_clear(rq); + + local_irq_disable(); +- rcu_note_context_switch(preempt); ++ rcu_note_context_switch(!!sched_mode); + + /* + * Make sure that signal_pending_state()->signal_pending() below +@@ -4474,7 +4486,7 @@ static void __sched notrace __schedule(bool preempt) + * - ptrace_{,un}freeze_traced() can change ->state underneath us. + */ + prev_state = READ_ONCE(prev->__state); +- if (!preempt && prev_state) { ++ if (!(sched_mode & SM_MASK_PREEMPT) && prev_state) { + if (signal_pending_state(prev_state, prev)) { + WRITE_ONCE(prev->__state, TASK_RUNNING); + } else { +@@ -4545,7 +4557,7 @@ static void __sched notrace __schedule(bool preempt) + + psi_sched_switch(prev, next, !task_on_rq_queued(prev)); + +- trace_sched_switch(preempt, prev, next); ++ trace_sched_switch(sched_mode & SM_MASK_PREEMPT, prev, next); + + /* Also unlocks the rq: */ + rq = context_switch(rq, prev, next); +@@ -4567,7 +4579,7 @@ void __noreturn do_task_dead(void) + /* Tell freezer to ignore us: */ + current->flags |= PF_NOFREEZE; + +- __schedule(false); ++ __schedule(SM_NONE); + BUG(); + + /* Avoid "noreturn function does return" - but don't continue if BUG() is a NOP: */ +@@ -4628,7 +4640,7 @@ asmlinkage __visible void __sched schedule(void) + sched_submit_work(tsk); + do { + preempt_disable(); +- __schedule(false); ++ __schedule(SM_NONE); + sched_preempt_enable_no_resched(); + } while (need_resched()); + sched_update_worker(tsk); +@@ -4656,7 +4668,7 @@ void __sched schedule_idle(void) + */ + WARN_ON_ONCE(current->__state); + do { +- __schedule(false); ++ __schedule(SM_NONE); + } while (need_resched()); + } + +@@ -4709,7 +4721,7 @@ static void __sched notrace preempt_schedule_common(void) + */ + preempt_disable_notrace(); + preempt_latency_start(1); +- __schedule(true); ++ __schedule(SM_PREEMPT); + preempt_latency_stop(1); + preempt_enable_no_resched_notrace(); + +@@ -4788,7 +4800,7 @@ asmlinkage __visible void __sched notrace preempt_schedule_notrace(void) + * an infinite recursion. + */ + prev_ctx = exception_enter(); +- __schedule(true); ++ __schedule(SM_PREEMPT); + exception_exit(prev_ctx); + + preempt_latency_stop(1); +@@ -4937,7 +4949,7 @@ asmlinkage __visible void __sched preempt_schedule_irq(void) + do { + preempt_disable(); + local_irq_enable(); +- __schedule(true); ++ __schedule(SM_PREEMPT); + local_irq_disable(); + sched_preempt_enable_no_resched(); + } while (need_resched()); +-- +2.37.0 + + +From a52bd2dcfe2ed4d4fa8376a0847db1a1c5bb6217 Mon Sep 17 00:00:00 2001 +From: Alfred Chen +Date: Tue, 2 Nov 2021 10:59:04 +0000 +Subject: [PATCH 229/297] sched/alt: [Sync] 6991436c2b5d sched/core: Provide a + scheduling point for RT locks + +--- + kernel/sched/alt_core.c | 20 +++++++++++++++++++- + 1 file changed, 19 insertions(+), 1 deletion(-) + +diff --git a/kernel/sched/alt_core.c b/kernel/sched/alt_core.c +index b30f46567470..f10749755cc8 100644 +--- a/kernel/sched/alt_core.c ++++ b/kernel/sched/alt_core.c +@@ -4396,7 +4396,13 @@ choose_next_task(struct rq *rq, int cpu, struct task_struct *prev) + */ + #define SM_NONE 0x0 + #define SM_PREEMPT 0x1 +-#define SM_MASK_PREEMPT (~0U) ++#define SM_RTLOCK_WAIT 0x2 ++ ++#ifndef CONFIG_PREEMPT_RT ++# define SM_MASK_PREEMPT (~0U) ++#else ++# define SM_MASK_PREEMPT SM_PREEMPT ++#endif + + /* + * schedule() is the main scheduler function. +@@ -4703,6 +4709,18 @@ void __sched schedule_preempt_disabled(void) + preempt_disable(); + } + ++#ifdef CONFIG_PREEMPT_RT ++void __sched notrace schedule_rtlock(void) ++{ ++ do { ++ preempt_disable(); ++ __schedule(SM_RTLOCK_WAIT); ++ sched_preempt_enable_no_resched(); ++ } while (need_resched()); ++} ++NOKPROBE_SYMBOL(schedule_rtlock); ++#endif ++ + static void __sched notrace preempt_schedule_common(void) + { + do { +-- +2.37.0 + + +From 565ecd60ab8d9eb1165e3d05f1898d72579748af Mon Sep 17 00:00:00 2001 +From: Alfred Chen +Date: Tue, 2 Nov 2021 11:05:16 +0000 +Subject: [PATCH 230/297] sched/alt: [Sync] 63acd42c0d49 sched/scs: Reset the + shadow stack when idle_task_exit + +--- + kernel/sched/alt_core.c | 1 + + 1 file changed, 1 insertion(+) + +diff --git a/kernel/sched/alt_core.c b/kernel/sched/alt_core.c +index f10749755cc8..44361857be30 100644 +--- a/kernel/sched/alt_core.c ++++ b/kernel/sched/alt_core.c +@@ -6642,6 +6642,7 @@ void idle_task_exit(void) + finish_arch_post_lock_switch(); + } + ++ scs_task_reset(current); + /* finish_cpu(), as ran on the BP, will clean up the active_mm state */ + } + +-- +2.37.0 + + +From 9382a3b0c9ccb4917982e1f7b0fed7f1f120a9c4 Mon Sep 17 00:00:00 2001 +From: Alfred Chen +Date: Tue, 2 Nov 2021 14:11:00 +0000 +Subject: [PATCH 231/297] Project-C v5.15-r0 + +--- + kernel/sched/alt_core.c | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +diff --git a/kernel/sched/alt_core.c b/kernel/sched/alt_core.c +index 44361857be30..9576c57f82da 100644 +--- a/kernel/sched/alt_core.c ++++ b/kernel/sched/alt_core.c +@@ -67,7 +67,7 @@ __read_mostly int sysctl_resched_latency_warn_once = 1; + #define sched_feat(x) (0) + #endif /* CONFIG_SCHED_DEBUG */ + +-#define ALT_SCHED_VERSION "v5.14-r4" ++#define ALT_SCHED_VERSION "v5.15-r0" + + /* rt_prio(prio) defined in include/linux/sched/rt.h */ + #define rt_task(p) rt_prio((p)->prio) +-- +2.37.0 + + +From 219ac601646e194c11e589e17fa29e278c467dea Mon Sep 17 00:00:00 2001 +From: Alfred Chen +Date: Fri, 19 Nov 2021 14:44:34 +0000 +Subject: [PATCH 232/297] sched/alt: [Sync] 3869eecf0504 kernel/sched: Fix + sched_fork() access an invalid sched_task_group + +--- + kernel/sched/alt_core.c | 29 +++++++++++++++-------------- + 1 file changed, 15 insertions(+), 14 deletions(-) + +diff --git a/kernel/sched/alt_core.c b/kernel/sched/alt_core.c +index 9576c57f82da..b8d5b1db9fac 100644 +--- a/kernel/sched/alt_core.c ++++ b/kernel/sched/alt_core.c +@@ -2912,9 +2912,6 @@ static inline void __sched_fork(unsigned long clone_flags, struct task_struct *p + */ + int sched_fork(unsigned long clone_flags, struct task_struct *p) + { +- unsigned long flags; +- struct rq *rq; +- + __sched_fork(clone_flags, p); + /* + * We mark the process as NEW here. This guarantees that +@@ -2948,6 +2945,20 @@ int sched_fork(unsigned long clone_flags, struct task_struct *p) + p->sched_reset_on_fork = 0; + } + ++#ifdef CONFIG_SCHED_INFO ++ if (unlikely(sched_info_on())) ++ memset(&p->sched_info, 0, sizeof(p->sched_info)); ++#endif ++ init_task_preempt_count(p); ++ ++ return 0; ++} ++ ++void sched_post_fork(struct task_struct *p, struct kernel_clone_args *kargs) ++{ ++ unsigned long flags; ++ struct rq *rq; ++ + /* + * The child is not yet in the pid-hash so no cgroup attach races, + * and the cgroup is pinned to this child due to cgroup_fork() +@@ -2982,20 +2993,10 @@ int sched_fork(unsigned long clone_flags, struct task_struct *p) + * We're setting the CPU for the first time, we don't migrate, + * so use __set_task_cpu(). + */ +- __set_task_cpu(p, cpu_of(rq)); ++ __set_task_cpu(p, smp_processor_id()); + raw_spin_unlock_irqrestore(&p->pi_lock, flags); +- +-#ifdef CONFIG_SCHED_INFO +- if (unlikely(sched_info_on())) +- memset(&p->sched_info, 0, sizeof(p->sched_info)); +-#endif +- init_task_preempt_count(p); +- +- return 0; + } + +-void sched_post_fork(struct task_struct *p) {} +- + #ifdef CONFIG_SCHEDSTATS + + DEFINE_STATIC_KEY_FALSE(sched_schedstats); +-- +2.37.0 + + +From 0f40f9b6ce1e2e8d06f863f750b95a179f8f39d5 Mon Sep 17 00:00:00 2001 +From: Alfred Chen +Date: Fri, 19 Nov 2021 15:15:12 +0000 +Subject: [PATCH 233/297] Project-C v5.15-r1 + +--- + kernel/sched/alt_core.c | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +diff --git a/kernel/sched/alt_core.c b/kernel/sched/alt_core.c +index b8d5b1db9fac..8b0ddbdd24e4 100644 +--- a/kernel/sched/alt_core.c ++++ b/kernel/sched/alt_core.c +@@ -67,7 +67,7 @@ __read_mostly int sysctl_resched_latency_warn_once = 1; + #define sched_feat(x) (0) + #endif /* CONFIG_SCHED_DEBUG */ + +-#define ALT_SCHED_VERSION "v5.15-r0" ++#define ALT_SCHED_VERSION "v5.15-r1" + + /* rt_prio(prio) defined in include/linux/sched/rt.h */ + #define rt_task(p) rt_prio((p)->prio) +-- +2.37.0 + + +From 51eb72aad2884ab9241bf686327c65d04abe90a5 Mon Sep 17 00:00:00 2001 +From: Alfred Chen +Date: Mon, 31 Jan 2022 23:54:53 +0000 +Subject: [PATCH 234/297] sched/alt: [Sync] 008f75a20e70 block: cleanup the + flush plug helpers + +--- + kernel/sched/alt_core.c | 5 +++-- + 1 file changed, 3 insertions(+), 2 deletions(-) + +diff --git a/kernel/sched/alt_core.c b/kernel/sched/alt_core.c +index 8b0ddbdd24e4..39b6c5b8a147 100644 +--- a/kernel/sched/alt_core.c ++++ b/kernel/sched/alt_core.c +@@ -4627,7 +4627,7 @@ static inline void sched_submit_work(struct task_struct *tsk) + * make sure to submit it to avoid deadlocks. + */ + if (blk_needs_flush_plug(tsk)) +- blk_schedule_flush_plug(tsk); ++ blk_flush_plug(tsk->plug, true); + } + + static void sched_update_worker(struct task_struct *tsk) +@@ -6274,7 +6274,8 @@ int io_schedule_prepare(void) + int old_iowait = current->in_iowait; + + current->in_iowait = 1; +- blk_schedule_flush_plug(current); ++ if (current->plug) ++ blk_flush_plug(current->plug, true); + + return old_iowait; + } +-- +2.37.0 + + +From 874ce3af55cd45ad1a86e22375c553f46b9c79a9 Mon Sep 17 00:00:00 2001 +From: Alfred Chen +Date: Mon, 31 Jan 2022 23:57:26 +0000 +Subject: [PATCH 235/297] sched/alt: [Sync] 874f670e6088 sched: Clean up the + might_sleep() underscore zoo + +--- + kernel/sched/alt_core.c | 6 +++--- + 1 file changed, 3 insertions(+), 3 deletions(-) + +diff --git a/kernel/sched/alt_core.c b/kernel/sched/alt_core.c +index 39b6c5b8a147..6512c37d4fed 100644 +--- a/kernel/sched/alt_core.c ++++ b/kernel/sched/alt_core.c +@@ -7247,11 +7247,11 @@ void __might_sleep(const char *file, int line, int preempt_offset) + (void *)current->task_state_change, + (void *)current->task_state_change); + +- ___might_sleep(file, line, preempt_offset); ++ __might_resched(file, line, preempt_offset); + } + EXPORT_SYMBOL(__might_sleep); + +-void ___might_sleep(const char *file, int line, int preempt_offset) ++void __might_resched(const char *file, int line, int preempt_offset) + { + /* Ratelimiting timestamp: */ + static unsigned long prev_jiffy; +@@ -7296,7 +7296,7 @@ void ___might_sleep(const char *file, int line, int preempt_offset) + dump_stack(); + add_taint(TAINT_WARN, LOCKDEP_STILL_OK); + } +-EXPORT_SYMBOL(___might_sleep); ++EXPORT_SYMBOL(__might_resched); + + void __cant_sleep(const char *file, int line, int preempt_offset) + { +-- +2.37.0 + + +From 8fe7d47613b3ec4082df7d18bd2e6c2d56b886a2 Mon Sep 17 00:00:00 2001 +From: Alfred Chen +Date: Mon, 31 Jan 2022 23:59:31 +0000 +Subject: [PATCH 236/297] sched/alt: [Sync] 42a387566c56 sched: Remove + preempt_offset argument from __might_sleep() + +--- + kernel/sched/alt_core.c | 4 ++-- + 1 file changed, 2 insertions(+), 2 deletions(-) + +diff --git a/kernel/sched/alt_core.c b/kernel/sched/alt_core.c +index 6512c37d4fed..7f099c407879 100644 +--- a/kernel/sched/alt_core.c ++++ b/kernel/sched/alt_core.c +@@ -7233,7 +7233,7 @@ static inline int preempt_count_equals(int preempt_offset) + return (nested == preempt_offset); + } + +-void __might_sleep(const char *file, int line, int preempt_offset) ++void __might_sleep(const char *file, int line) + { + unsigned int state = get_current_state(); + /* +@@ -7247,7 +7247,7 @@ void __might_sleep(const char *file, int line, int preempt_offset) + (void *)current->task_state_change, + (void *)current->task_state_change); + +- __might_resched(file, line, preempt_offset); ++ __might_resched(file, line, 0); + } + EXPORT_SYMBOL(__might_sleep); + +-- +2.37.0 + + +From 91c92a5bd40c7cc9d87ed3802284e410e8555d2e Mon Sep 17 00:00:00 2001 +From: Alfred Chen +Date: Tue, 1 Feb 2022 00:03:46 +0000 +Subject: [PATCH 237/297] sched/alt: [Sync] a45ed302b6e6 sched: Cleanup + might_sleep() printks + +--- + kernel/sched/alt_core.c | 14 ++++++-------- + 1 file changed, 6 insertions(+), 8 deletions(-) + +diff --git a/kernel/sched/alt_core.c b/kernel/sched/alt_core.c +index 7f099c407879..cb8f93bca0b7 100644 +--- a/kernel/sched/alt_core.c ++++ b/kernel/sched/alt_core.c +@@ -7273,16 +7273,14 @@ void __might_resched(const char *file, int line, int preempt_offset) + /* Save this before calling printk(), since that will clobber it: */ + preempt_disable_ip = get_preempt_disable_ip(current); + +- printk(KERN_ERR +- "BUG: sleeping function called from invalid context at %s:%d\n", +- file, line); +- printk(KERN_ERR +- "in_atomic(): %d, irqs_disabled(): %d, non_block: %d, pid: %d, name: %s\n", +- in_atomic(), irqs_disabled(), current->non_block_count, +- current->pid, current->comm); ++ pr_err("BUG: sleeping function called from invalid context at %s:%d\n", ++ file, line); ++ pr_err("in_atomic(): %d, irqs_disabled(): %d, non_block: %d, pid: %d, name: %s\n", ++ in_atomic(), irqs_disabled(), current->non_block_count, ++ current->pid, current->comm); + + if (task_stack_end_corrupted(current)) +- printk(KERN_EMERG "Thread overran stack, or stack corrupted\n"); ++ pr_emerg("Thread overran stack, or stack corrupted\n"); + + debug_show_held_locks(current); + if (irqs_disabled()) +-- +2.37.0 + + +From 4ac3f931dc243f8fd01ed754d7bbc19ec9f2933c Mon Sep 17 00:00:00 2001 +From: Alfred Chen +Date: Thu, 24 Mar 2022 11:05:50 +0000 +Subject: [PATCH 238/297] sched/alt: [Sync] 8d713b699e84 sched: Make + might_sleep() output less confusing + +--- + kernel/sched/alt_core.c | 28 ++++++++++++++++++++++------ + 1 file changed, 22 insertions(+), 6 deletions(-) + +diff --git a/kernel/sched/alt_core.c b/kernel/sched/alt_core.c +index cb8f93bca0b7..35d41ddd866a 100644 +--- a/kernel/sched/alt_core.c ++++ b/kernel/sched/alt_core.c +@@ -7251,6 +7251,18 @@ void __might_sleep(const char *file, int line) + } + EXPORT_SYMBOL(__might_sleep); + ++static void print_preempt_disable_ip(int preempt_offset, unsigned long ip) ++{ ++ if (!IS_ENABLED(CONFIG_DEBUG_PREEMPT)) ++ return; ++ ++ if (preempt_count() == preempt_offset) ++ return; ++ ++ pr_err("Preemption disabled at:"); ++ print_ip_sym(KERN_ERR, ip); ++} ++ + void __might_resched(const char *file, int line, int preempt_offset) + { + /* Ratelimiting timestamp: */ +@@ -7278,6 +7290,13 @@ void __might_resched(const char *file, int line, int preempt_offset) + pr_err("in_atomic(): %d, irqs_disabled(): %d, non_block: %d, pid: %d, name: %s\n", + in_atomic(), irqs_disabled(), current->non_block_count, + current->pid, current->comm); ++ pr_err("preempt_count: %x, expected: %x\n", preempt_count(), ++ preempt_offset); ++ ++ if (IS_ENABLED(CONFIG_PREEMPT_RCU)) { ++ pr_err("RCU nest depth: %d, expected: 0\n", ++ rcu_preempt_depth()); ++ } + + if (task_stack_end_corrupted(current)) + pr_emerg("Thread overran stack, or stack corrupted\n"); +@@ -7285,12 +7304,9 @@ void __might_resched(const char *file, int line, int preempt_offset) + debug_show_held_locks(current); + if (irqs_disabled()) + print_irqtrace_events(current); +-#ifdef CONFIG_DEBUG_PREEMPT +- if (!preempt_count_equals(preempt_offset)) { +- pr_err("Preemption disabled at:"); +- print_ip_sym(KERN_ERR, preempt_disable_ip); +- } +-#endif ++ ++ print_preempt_disable_ip(preempt_offset, preempt_disable_ip); ++ + dump_stack(); + add_taint(TAINT_WARN, LOCKDEP_STILL_OK); + } +-- +2.37.0 + + +From 33c812e321c733db96c0c0867b884b4149383c3b Mon Sep 17 00:00:00 2001 +From: Alfred Chen +Date: Thu, 24 Mar 2022 11:14:16 +0000 +Subject: [PATCH 239/297] sched/alt: [Sync] 50e081b96e35 sched: Make RCU nest + depth distinct in __might_resched() + +--- + kernel/sched/alt_core.c | 28 ++++++++++++++++------------ + 1 file changed, 16 insertions(+), 12 deletions(-) + +diff --git a/kernel/sched/alt_core.c b/kernel/sched/alt_core.c +index 35d41ddd866a..5015be1987f0 100644 +--- a/kernel/sched/alt_core.c ++++ b/kernel/sched/alt_core.c +@@ -7226,12 +7226,6 @@ void __init sched_init(void) + } + + #ifdef CONFIG_DEBUG_ATOMIC_SLEEP +-static inline int preempt_count_equals(int preempt_offset) +-{ +- int nested = preempt_count() + rcu_preempt_depth(); +- +- return (nested == preempt_offset); +-} + + void __might_sleep(const char *file, int line) + { +@@ -7263,7 +7257,16 @@ static void print_preempt_disable_ip(int preempt_offset, unsigned long ip) + print_ip_sym(KERN_ERR, ip); + } + +-void __might_resched(const char *file, int line, int preempt_offset) ++static inline bool resched_offsets_ok(unsigned int offsets) ++{ ++ unsigned int nested = preempt_count(); ++ ++ nested += rcu_preempt_depth() << MIGHT_RESCHED_RCU_SHIFT; ++ ++ return nested == offsets; ++} ++ ++void __might_resched(const char *file, int line, unsigned int offsets) + { + /* Ratelimiting timestamp: */ + static unsigned long prev_jiffy; +@@ -7273,7 +7276,7 @@ void __might_resched(const char *file, int line, int preempt_offset) + /* WARN_ON_ONCE() by default, no rate limit required: */ + rcu_sleep_check(); + +- if ((preempt_count_equals(preempt_offset) && !irqs_disabled() && ++ if ((resched_offsets_ok(offsets) && !irqs_disabled() && + !is_idle_task(current) && !current->non_block_count) || + system_state == SYSTEM_BOOTING || system_state > SYSTEM_RUNNING || + oops_in_progress) +@@ -7291,11 +7294,11 @@ void __might_resched(const char *file, int line, int preempt_offset) + in_atomic(), irqs_disabled(), current->non_block_count, + current->pid, current->comm); + pr_err("preempt_count: %x, expected: %x\n", preempt_count(), +- preempt_offset); ++ offsets & MIGHT_RESCHED_PREEMPT_MASK); + + if (IS_ENABLED(CONFIG_PREEMPT_RCU)) { +- pr_err("RCU nest depth: %d, expected: 0\n", +- rcu_preempt_depth()); ++ pr_err("RCU nest depth: %d, expected: %u\n", ++ rcu_preempt_depth(), offsets >> MIGHT_RESCHED_RCU_SHIFT); + } + + if (task_stack_end_corrupted(current)) +@@ -7305,7 +7308,8 @@ void __might_resched(const char *file, int line, int preempt_offset) + if (irqs_disabled()) + print_irqtrace_events(current); + +- print_preempt_disable_ip(preempt_offset, preempt_disable_ip); ++ print_preempt_disable_ip(offsets & MIGHT_RESCHED_PREEMPT_MASK, ++ preempt_disable_ip); + + dump_stack(); + add_taint(TAINT_WARN, LOCKDEP_STILL_OK); +-- +2.37.0 + + +From 514691cc12c9e113eda837fd1367710e209aadf7 Mon Sep 17 00:00:00 2001 +From: Alfred Chen +Date: Thu, 24 Mar 2022 11:22:03 +0000 +Subject: [PATCH 240/297] sched/alt: [Sync] c33627e9a114 sched: Switch + wait_task_inactive to HRTIMER_MODE_REL_HARD + +--- + kernel/sched/alt_core.c | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +diff --git a/kernel/sched/alt_core.c b/kernel/sched/alt_core.c +index 5015be1987f0..ff4a45708a68 100644 +--- a/kernel/sched/alt_core.c ++++ b/kernel/sched/alt_core.c +@@ -1747,7 +1747,7 @@ unsigned long wait_task_inactive(struct task_struct *p, unsigned int match_state + ktime_t to = NSEC_PER_SEC / HZ; + + set_current_state(TASK_UNINTERRUPTIBLE); +- schedule_hrtimeout(&to, HRTIMER_MODE_REL); ++ schedule_hrtimeout(&to, HRTIMER_MODE_REL_HARD); + continue; + } + +-- +2.37.0 + + +From 6e0c61073cc9c76d6ac48058e6019cb92cbda964 Mon Sep 17 00:00:00 2001 +From: Alfred Chen +Date: Fri, 25 Mar 2022 10:43:13 +0000 +Subject: [PATCH 241/297] sched/alt: [Sync] ceeadb83aea2 sched: Make struct + sched_statistics independent of fair sched class + +--- + kernel/sched/alt_core.c | 16 +++++++++++++--- + kernel/sched/alt_sched.h | 1 + + kernel/sched/stats.h | 2 ++ + 3 files changed, 16 insertions(+), 3 deletions(-) + +diff --git a/kernel/sched/alt_core.c b/kernel/sched/alt_core.c +index ff4a45708a68..f783788e3caa 100644 +--- a/kernel/sched/alt_core.c ++++ b/kernel/sched/alt_core.c +@@ -28,7 +28,6 @@ + #include + #include + #include +-#include + #include + #include + #include +@@ -2236,9 +2235,10 @@ ttwu_stat(struct task_struct *p, int cpu, int wake_flags) + rq = this_rq(); + + #ifdef CONFIG_SMP +- if (cpu == rq->cpu) ++ if (cpu == rq->cpu) { + __schedstat_inc(rq->ttwu_local); +- else { ++ __schedstat_inc(p->stats.nr_wakeups_local); ++ } else { + /** Alt schedule FW ToDo: + * How to do ttwu_wake_remote + */ +@@ -2246,6 +2246,7 @@ ttwu_stat(struct task_struct *p, int cpu, int wake_flags) + #endif /* CONFIG_SMP */ + + __schedstat_inc(rq->ttwu_count); ++ __schedstat_inc(p->stats.nr_wakeups); + } + + /* +@@ -2895,6 +2896,11 @@ static inline void __sched_fork(unsigned long clone_flags, struct task_struct *p + p->stime = 0; + p->sched_time = 0; + ++#ifdef CONFIG_SCHEDSTATS ++ /* Even if schedstat is disabled, there should not be garbage */ ++ memset(&p->stats, 0, sizeof(p->stats)); ++#endif ++ + #ifdef CONFIG_PREEMPT_NOTIFIERS + INIT_HLIST_HEAD(&p->preempt_notifiers); + #endif +@@ -7397,6 +7403,10 @@ void normalize_rt_tasks(void) + if (p->flags & PF_KTHREAD) + continue; + ++ schedstat_set(p->stats.wait_start, 0); ++ schedstat_set(p->stats.sleep_start, 0); ++ schedstat_set(p->stats.block_start, 0); ++ + if (!rt_task(p)) { + /* + * Renice negative nice level userspace +diff --git a/kernel/sched/alt_sched.h b/kernel/sched/alt_sched.h +index 289058a09bd5..f2b9e686d6a6 100644 +--- a/kernel/sched/alt_sched.h ++++ b/kernel/sched/alt_sched.h +@@ -31,6 +31,7 @@ + #include + #include + #include ++#include + #include + #include + #include +diff --git a/kernel/sched/stats.h b/kernel/sched/stats.h +index baa839c1ba96..15238be0581b 100644 +--- a/kernel/sched/stats.h ++++ b/kernel/sched/stats.h +@@ -89,6 +89,7 @@ static inline void rq_sched_info_depart (struct rq *rq, unsigned long long delt + + #endif /* CONFIG_SCHEDSTATS */ + ++#ifndef CONFIG_SCHED_ALT + #ifdef CONFIG_FAIR_GROUP_SCHED + struct sched_entity_stats { + struct sched_entity se; +@@ -105,6 +106,7 @@ __schedstats_from_se(struct sched_entity *se) + #endif + return &task_of(se)->stats; + } ++#endif /* CONFIG_SCHED_ALT */ + + #ifdef CONFIG_PSI + /* +-- +2.37.0 + + +From ed37afe16495d81e98faa671095c14c0c3dce01c Mon Sep 17 00:00:00 2001 +From: Alfred Chen +Date: Fri, 25 Mar 2022 13:47:50 +0000 +Subject: [PATCH 242/297] sched/alt: [Sync] c597bfddc9e9 sched: Provide Kconfig + support for default dynamic preempt mode + +--- + kernel/sched/alt_core.c | 29 ++++++++++++++++++++++++++--- + 1 file changed, 26 insertions(+), 3 deletions(-) + +diff --git a/kernel/sched/alt_core.c b/kernel/sched/alt_core.c +index f783788e3caa..5fad6f795625 100644 +--- a/kernel/sched/alt_core.c ++++ b/kernel/sched/alt_core.c +@@ -4876,12 +4876,13 @@ EXPORT_STATIC_CALL_TRAMP(preempt_schedule_notrace); + */ + + enum { +- preempt_dynamic_none = 0, ++ preempt_dynamic_undefined = -1, ++ preempt_dynamic_none, + preempt_dynamic_voluntary, + preempt_dynamic_full, + }; + +-int preempt_dynamic_mode = preempt_dynamic_full; ++int preempt_dynamic_mode = preempt_dynamic_undefined; + + int sched_dynamic_mode(const char *str) + { +@@ -4954,7 +4955,27 @@ static int __init setup_preempt_mode(char *str) + } + __setup("preempt=", setup_preempt_mode); + +-#endif /* CONFIG_PREEMPT_DYNAMIC */ ++static void __init preempt_dynamic_init(void) ++{ ++ if (preempt_dynamic_mode == preempt_dynamic_undefined) { ++ if (IS_ENABLED(CONFIG_PREEMPT_NONE)) { ++ sched_dynamic_update(preempt_dynamic_none); ++ } else if (IS_ENABLED(CONFIG_PREEMPT_VOLUNTARY)) { ++ sched_dynamic_update(preempt_dynamic_voluntary); ++ } else { ++ /* Default static call setting, nothing to do */ ++ WARN_ON_ONCE(!IS_ENABLED(CONFIG_PREEMPT)); ++ preempt_dynamic_mode = preempt_dynamic_full; ++ pr_info("Dynamic Preempt: full\n"); ++ } ++ } ++} ++ ++#else /* !CONFIG_PREEMPT_DYNAMIC */ ++ ++static inline void preempt_dynamic_init(void) { } ++ ++#endif /* #ifdef CONFIG_PREEMPT_DYNAMIC */ + + /* + * This is the entry point to schedule() from kernel preemption +@@ -7229,6 +7250,8 @@ void __init sched_init(void) + #endif /* SMP */ + + psi_init(); ++ ++ preempt_dynamic_init(); + } + + #ifdef CONFIG_DEBUG_ATOMIC_SLEEP +-- +2.37.0 + + +From e6f6d45ddd16dc961aa6f70cb9d90a6b873a390a Mon Sep 17 00:00:00 2001 +From: Alfred Chen +Date: Fri, 25 Mar 2022 13:52:06 +0000 +Subject: [PATCH 243/297] sched/alt: [Sync] 8d491de6edc2 sched: Move mmdrop to + RCU on RT + +--- + kernel/sched/alt_core.c | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +diff --git a/kernel/sched/alt_core.c b/kernel/sched/alt_core.c +index 5fad6f795625..89cd86225ed3 100644 +--- a/kernel/sched/alt_core.c ++++ b/kernel/sched/alt_core.c +@@ -3465,7 +3465,7 @@ static struct rq *finish_task_switch(struct task_struct *prev) + */ + if (mm) { + membarrier_mm_sync_core_before_usermode(mm); +- mmdrop(mm); ++ mmdrop_sched(mm); + } + if (unlikely(prev_state == TASK_DEAD)) { + /* +-- +2.37.0 + + +From d3567a80216b2828cd001b3c97ad9d90754c8dec Mon Sep 17 00:00:00 2001 +From: Alfred Chen +Date: Fri, 25 Mar 2022 14:06:58 +0000 +Subject: [PATCH 244/297] sched/alt: [Sync] 670721c7bd2a sched: Move kprobes + cleanup out of finish_task_switch() + +--- + kernel/sched/alt_core.c | 6 ------ + 1 file changed, 6 deletions(-) + +diff --git a/kernel/sched/alt_core.c b/kernel/sched/alt_core.c +index 89cd86225ed3..107c5f4b74a7 100644 +--- a/kernel/sched/alt_core.c ++++ b/kernel/sched/alt_core.c +@@ -3468,12 +3468,6 @@ static struct rq *finish_task_switch(struct task_struct *prev) + mmdrop_sched(mm); + } + if (unlikely(prev_state == TASK_DEAD)) { +- /* +- * Remove function-return probe instances associated with this +- * task and put them back on the free list. +- */ +- kprobe_flush_task(prev); +- + /* Task is done with its stack. */ + put_task_stack(prev); + +-- +2.37.0 + + +From 6eaae287302bde2d2107e4800710df3ebe02206c Mon Sep 17 00:00:00 2001 +From: Alfred Chen +Date: Fri, 25 Mar 2022 14:10:30 +0000 +Subject: [PATCH 245/297] sched/alt: [Sync] b945efcdd07d sched: Remove + pointless preemption disable in sched_submit_work() + +--- + kernel/sched/alt_core.c | 10 ++-------- + 1 file changed, 2 insertions(+), 8 deletions(-) + +diff --git a/kernel/sched/alt_core.c b/kernel/sched/alt_core.c +index 107c5f4b74a7..3ba686739e4d 100644 +--- a/kernel/sched/alt_core.c ++++ b/kernel/sched/alt_core.c +@@ -4603,20 +4603,14 @@ static inline void sched_submit_work(struct task_struct *tsk) + + task_flags = tsk->flags; + /* +- * If a worker went to sleep, notify and ask workqueue whether +- * it wants to wake up a task to maintain concurrency. +- * As this function is called inside the schedule() context, +- * we disable preemption to avoid it calling schedule() again +- * in the possible wakeup of a kworker and because wq_worker_sleeping() +- * requires it. ++ * If a worker goes to sleep, notify and ask workqueue whether it ++ * wants to wake up a task to maintain concurrency. + */ + if (task_flags & (PF_WQ_WORKER | PF_IO_WORKER)) { +- preempt_disable(); + if (task_flags & PF_WQ_WORKER) + wq_worker_sleeping(tsk); + else + io_wq_worker_sleeping(tsk); +- preempt_enable_no_resched(); + } + + if (tsk_is_pi_blocked(tsk)) +-- +2.37.0 + + +From 6842ba831f915ba07bd0bc2b84515b2f446fb74f Mon Sep 17 00:00:00 2001 +From: Alfred Chen +Date: Fri, 25 Mar 2022 21:35:14 +0000 +Subject: [PATCH 246/297] sched/alt: [Sync] f6ac18fafcf6 sched: Improve + try_invoke_on_locked_down_task() + +--- + kernel/sched/alt_core.c | 63 +++++++++++++++++++++++++---------------- + 1 file changed, 39 insertions(+), 24 deletions(-) + +diff --git a/kernel/sched/alt_core.c b/kernel/sched/alt_core.c +index 3ba686739e4d..fbb46f7c98ce 100644 +--- a/kernel/sched/alt_core.c ++++ b/kernel/sched/alt_core.c +@@ -2821,41 +2821,56 @@ static int try_to_wake_up(struct task_struct *p, unsigned int state, + * @func: Function to invoke. + * @arg: Argument to function. + * +- * If the specified task can be quickly locked into a definite state +- * (either sleeping or on a given runqueue), arrange to keep it in that +- * state while invoking @func(@arg). This function can use ->on_rq and +- * task_curr() to work out what the state is, if required. Given that +- * @func can be invoked with a runqueue lock held, it had better be quite +- * lightweight. ++ * Fix the task in it's current state by avoiding wakeups and or rq operations ++ * and call @func(@arg) on it. This function can use ->on_rq and task_curr() ++ * to work out what the state is, if required. Given that @func can be invoked ++ * with a runqueue lock held, it had better be quite lightweight. + * + * Returns: +- * @false if the task slipped out from under the locks. +- * @true if the task was locked onto a runqueue or is sleeping. +- * However, @func can override this by returning @false. ++ * Whatever @func returns + */ + bool try_invoke_on_locked_down_task(struct task_struct *p, bool (*func)(struct task_struct *t, void *arg), void *arg) + { ++ struct rq *rq = NULL; ++ unsigned int state; + struct rq_flags rf; + bool ret = false; +- struct rq *rq; + + raw_spin_lock_irqsave(&p->pi_lock, rf.flags); +- if (p->on_rq) { ++ ++ state = READ_ONCE(p->__state); ++ ++ /* ++ * Ensure we load p->on_rq after p->__state, otherwise it would be ++ * possible to, falsely, observe p->on_rq == 0. ++ * ++ * See try_to_wake_up() for a longer comment. ++ */ ++ smp_rmb(); ++ ++ /* ++ * Since pi->lock blocks try_to_wake_up(), we don't need rq->lock when ++ * the task is blocked. Make sure to check @state since ttwu() can drop ++ * locks at the end, see ttwu_queue_wakelist(). ++ */ ++ if (state == TASK_RUNNING || state == TASK_WAKING || p->on_rq) + rq = __task_rq_lock(p, &rf); +- if (task_rq(p) == rq) +- ret = func(p, arg); ++ ++ /* ++ * At this point the task is pinned; either: ++ * - blocked and we're holding off wakeups (pi->lock) ++ * - woken, and we're holding off enqueue (rq->lock) ++ * - queued, and we're holding off schedule (rq->lock) ++ * - running, and we're holding off de-schedule (rq->lock) ++ * ++ * The called function (@func) can use: task_curr(), p->on_rq and ++ * p->__state to differentiate between these states. ++ */ ++ ret = func(p, arg); ++ ++ if (rq) + __task_rq_unlock(rq, &rf); +- } else { +- switch (READ_ONCE(p->__state)) { +- case TASK_RUNNING: +- case TASK_WAKING: +- break; +- default: +- smp_rmb(); // See smp_rmb() comment in try_to_wake_up(). +- if (!p->on_rq) +- ret = func(p, arg); +- } +- } ++ + raw_spin_unlock_irqrestore(&p->pi_lock, rf.flags); + return ret; + } +-- +2.37.0 + + +From f884a8b53f81a0e837d002bcc30d6417be0c9a1f Mon Sep 17 00:00:00 2001 +From: Alfred Chen +Date: Fri, 25 Mar 2022 22:07:37 +0000 +Subject: [PATCH 247/297] sched/alt: [Sync] 9b3c4ab3045e sched,rcu: Rework + try_invoke_on_locked_down_task() + +--- + kernel/sched/alt_core.c | 6 +++--- + 1 file changed, 3 insertions(+), 3 deletions(-) + +diff --git a/kernel/sched/alt_core.c b/kernel/sched/alt_core.c +index fbb46f7c98ce..4c02602ea9ab 100644 +--- a/kernel/sched/alt_core.c ++++ b/kernel/sched/alt_core.c +@@ -2816,7 +2816,7 @@ static int try_to_wake_up(struct task_struct *p, unsigned int state, + } + + /** +- * try_invoke_on_locked_down_task - Invoke a function on task in fixed state ++ * task_call_func - Invoke a function on task in fixed state + * @p: Process for which the function is to be invoked, can be @current. + * @func: Function to invoke. + * @arg: Argument to function. +@@ -2829,12 +2829,12 @@ static int try_to_wake_up(struct task_struct *p, unsigned int state, + * Returns: + * Whatever @func returns + */ +-bool try_invoke_on_locked_down_task(struct task_struct *p, bool (*func)(struct task_struct *t, void *arg), void *arg) ++int task_call_func(struct task_struct *p, task_call_f func, void *arg) + { + struct rq *rq = NULL; + unsigned int state; + struct rq_flags rf; +- bool ret = false; ++ int ret; + + raw_spin_lock_irqsave(&p->pi_lock, rf.flags); + +-- +2.37.0 + + +From 44d5bff770e0a048a330580b286e49515656d9e5 Mon Sep 17 00:00:00 2001 +From: Alfred Chen +Date: Sat, 26 Mar 2022 14:57:59 +0000 +Subject: [PATCH 248/297] sched/alt: [Sync] 8850cb663b5c sched: Simplify + wake_up_*idle*() + +--- + kernel/sched/alt_core.c | 14 +++++--------- + 1 file changed, 5 insertions(+), 9 deletions(-) + +diff --git a/kernel/sched/alt_core.c b/kernel/sched/alt_core.c +index 4c02602ea9ab..12bad31a663a 100644 +--- a/kernel/sched/alt_core.c ++++ b/kernel/sched/alt_core.c +@@ -2434,15 +2434,11 @@ void wake_up_if_idle(int cpu) + if (!is_idle_task(rcu_dereference(rq->curr))) + goto out; + +- if (set_nr_if_polling(rq->idle)) { +- trace_sched_wake_idle_without_ipi(cpu); +- } else { +- raw_spin_lock_irqsave(&rq->lock, flags); +- if (is_idle_task(rq->curr)) +- smp_send_reschedule(cpu); +- /* Else CPU is not idle, do nothing here */ +- raw_spin_unlock_irqrestore(&rq->lock, flags); +- } ++ raw_spin_lock_irqsave(&rq->lock, flags); ++ if (is_idle_task(rq->curr)) ++ resched_curr(rq); ++ /* Else CPU is not idle, do nothing here */ ++ raw_spin_unlock_irqrestore(&rq->lock, flags); + + out: + rcu_read_unlock(); +-- +2.37.0 + + +From ca436f41b3429fe65d576e08288971b993be7b2f Mon Sep 17 00:00:00 2001 +From: Alfred Chen +Date: Sat, 26 Mar 2022 15:37:29 +0000 +Subject: [PATCH 249/297] sched/alt: [Sync] 42a20f86dc19 sched: Add wrapper for + get_wchan() to keep task blocked + +--- + kernel/sched/alt_core.c | 19 +++++++++++++++++++ + 1 file changed, 19 insertions(+) + +diff --git a/kernel/sched/alt_core.c b/kernel/sched/alt_core.c +index 12bad31a663a..1b7eae610c8f 100644 +--- a/kernel/sched/alt_core.c ++++ b/kernel/sched/alt_core.c +@@ -705,6 +705,25 @@ bool sched_task_on_rq(struct task_struct *p) + return task_on_rq_queued(p); + } + ++unsigned long get_wchan(struct task_struct *p) ++{ ++ unsigned long ip = 0; ++ unsigned int state; ++ ++ if (!p || p == current) ++ return 0; ++ ++ /* Only get wchan if task is blocked and we can keep it that way. */ ++ raw_spin_lock_irq(&p->pi_lock); ++ state = READ_ONCE(p->__state); ++ smp_rmb(); /* see try_to_wake_up() */ ++ if (state != TASK_RUNNING && state != TASK_WAKING && !p->on_rq) ++ ip = __get_wchan(p); ++ raw_spin_unlock_irq(&p->pi_lock); ++ ++ return ip; ++} ++ + /* + * Add/Remove/Requeue task to/from the runqueue routines + * Context: rq->lock +-- +2.37.0 + + +From 1374b3bed52331b57467ce3b08a20fd51e064213 Mon Sep 17 00:00:00 2001 +From: Alfred Chen +Date: Sat, 26 Mar 2022 15:53:21 +0000 +Subject: [PATCH 250/297] sched/alt: [Sync] 42dc938a590c sched/core: Mitigate + race cpus_share_cache()/update_top_cache_domain() + +--- + kernel/sched/alt_core.c | 3 +++ + 1 file changed, 3 insertions(+) + +diff --git a/kernel/sched/alt_core.c b/kernel/sched/alt_core.c +index 1b7eae610c8f..2f05197477a7 100644 +--- a/kernel/sched/alt_core.c ++++ b/kernel/sched/alt_core.c +@@ -2465,6 +2465,9 @@ void wake_up_if_idle(int cpu) + + bool cpus_share_cache(int this_cpu, int that_cpu) + { ++ if (this_cpu == that_cpu) ++ return true; ++ + return per_cpu(sd_llc_id, this_cpu) == per_cpu(sd_llc_id, that_cpu); + } + #else /* !CONFIG_SMP */ +-- +2.37.0 + + +From 546b3c60ac0f5ff3734f9c06099ddc2d422320a4 Mon Sep 17 00:00:00 2001 +From: Alfred Chen +Date: Sat, 26 Mar 2022 16:31:36 +0000 +Subject: [PATCH 251/297] sched/alt: [Sync] b027789e5e50 sched/fair: Prevent + dead task groups from regaining cfs_rq's + +--- + kernel/sched/alt_core.c | 30 ++++++++++++++++++++++-------- + 1 file changed, 22 insertions(+), 8 deletions(-) + +diff --git a/kernel/sched/alt_core.c b/kernel/sched/alt_core.c +index 2f05197477a7..7d696c143d7d 100644 +--- a/kernel/sched/alt_core.c ++++ b/kernel/sched/alt_core.c +@@ -7522,6 +7522,20 @@ static void sched_free_group(struct task_group *tg) + kmem_cache_free(task_group_cache, tg); + } + ++static void sched_free_group_rcu(struct rcu_head *rhp) ++{ ++ sched_free_group(container_of(rhp, struct task_group, rcu)); ++} ++ ++static void sched_unregister_group(struct task_group *tg) ++{ ++ /* ++ * We have to wait for yet another RCU grace period to expire, as ++ * print_cfs_stats() might run concurrently. ++ */ ++ call_rcu(&tg->rcu, sched_free_group_rcu); ++} ++ + /* allocate runqueue etc for a new task group */ + struct task_group *sched_create_group(struct task_group *parent) + { +@@ -7539,19 +7553,19 @@ void sched_online_group(struct task_group *tg, struct task_group *parent) + } + + /* rcu callback to free various structures associated with a task group */ +-static void sched_free_group_rcu(struct rcu_head *rhp) ++static void sched_unregister_group_rcu(struct rcu_head *rhp) + { +- /* Now it should be safe to free those cfs_rqs */ +- sched_free_group(container_of(rhp, struct task_group, rcu)); ++ /* Now it should be safe to free those cfs_rqs: */ ++ sched_unregister_group(container_of(rhp, struct task_group, rcu)); + } + + void sched_destroy_group(struct task_group *tg) + { +- /* Wait for possible concurrent references to cfs_rqs complete */ +- call_rcu(&tg->rcu, sched_free_group_rcu); ++ /* Wait for possible concurrent references to cfs_rqs complete: */ ++ call_rcu(&tg->rcu, sched_unregister_group_rcu); + } + +-void sched_offline_group(struct task_group *tg) ++void sched_release_group(struct task_group *tg) + { + } + +@@ -7592,7 +7606,7 @@ static void cpu_cgroup_css_released(struct cgroup_subsys_state *css) + { + struct task_group *tg = css_tg(css); + +- sched_offline_group(tg); ++ sched_release_group(tg); + } + + static void cpu_cgroup_css_free(struct cgroup_subsys_state *css) +@@ -7602,7 +7616,7 @@ static void cpu_cgroup_css_free(struct cgroup_subsys_state *css) + /* + * Relies on the RCU grace period between css_released() and this. + */ +- sched_free_group(tg); ++ sched_unregister_group(tg); + } + + static void cpu_cgroup_fork(struct task_struct *task) +-- +2.37.0 + + +From eafb174cc75a117b7a41bf419b2fcd823c24c67d Mon Sep 17 00:00:00 2001 +From: Alfred Chen +Date: Mon, 28 Mar 2022 14:41:07 +0000 +Subject: [PATCH 252/297] sched/alt: [Sync] dce1ca0525bf sched/scs: Reset task + stack state in bringup_cpu() + +--- + kernel/sched/alt_core.c | 4 ---- + 1 file changed, 4 deletions(-) + +diff --git a/kernel/sched/alt_core.c b/kernel/sched/alt_core.c +index 7d696c143d7d..564ddea29a5c 100644 +--- a/kernel/sched/alt_core.c ++++ b/kernel/sched/alt_core.c +@@ -6611,9 +6611,6 @@ void __init init_idle(struct task_struct *idle, int cpu) + + sched_queue_init_idle(&rq->queue, idle); + +- scs_task_reset(idle); +- kasan_unpoison_task_stack(idle); +- + #ifdef CONFIG_SMP + /* + * It's possible that init_idle() gets called multiple times on a task, +@@ -6692,7 +6689,6 @@ void idle_task_exit(void) + finish_arch_post_lock_switch(); + } + +- scs_task_reset(current); + /* finish_cpu(), as ran on the BP, will clean up the active_mm state */ + } + +-- +2.37.0 + + +From 971a3cad3439a60192fc109d292211ee949bd30f Mon Sep 17 00:00:00 2001 +From: Alfred Chen +Date: Mon, 28 Mar 2022 15:58:21 +0000 +Subject: [PATCH 253/297] sched/alt: [Sync] 9ed20bafc858 preempt/dynamic: Fix + setup_preempt_mode() return value + +--- + kernel/sched/alt_core.c | 4 ++-- + 1 file changed, 2 insertions(+), 2 deletions(-) + +diff --git a/kernel/sched/alt_core.c b/kernel/sched/alt_core.c +index 564ddea29a5c..e6fb7f44d1f8 100644 +--- a/kernel/sched/alt_core.c ++++ b/kernel/sched/alt_core.c +@@ -4968,11 +4968,11 @@ static int __init setup_preempt_mode(char *str) + int mode = sched_dynamic_mode(str); + if (mode < 0) { + pr_warn("Dynamic Preempt: unsupported mode: %s\n", str); +- return 1; ++ return 0; + } + + sched_dynamic_update(mode); +- return 0; ++ return 1; + } + __setup("preempt=", setup_preempt_mode); + +-- +2.37.0 + + +From 8a378e4e86bfb0ab4c2f96539ec085ed85b6f597 Mon Sep 17 00:00:00 2001 +From: Alfred Chen +Date: Mon, 28 Mar 2022 16:39:07 +0000 +Subject: [PATCH 254/297] sched/alt: [Sync] bcf9033e5449 sched: move CPU field + back into thread_info if THREAD_INFO_IN_TASK=y + +--- + kernel/sched/alt_core.c | 4 ---- + 1 file changed, 4 deletions(-) + +diff --git a/kernel/sched/alt_core.c b/kernel/sched/alt_core.c +index e6fb7f44d1f8..01af42ae88c9 100644 +--- a/kernel/sched/alt_core.c ++++ b/kernel/sched/alt_core.c +@@ -1333,12 +1333,8 @@ static inline void __set_task_cpu(struct task_struct *p, unsigned int cpu) + */ + smp_wmb(); + +-#ifdef CONFIG_THREAD_INFO_IN_TASK +- WRITE_ONCE(p->cpu, cpu); +-#else + WRITE_ONCE(task_thread_info(p)->cpu, cpu); + #endif +-#endif + } + + static inline bool is_migration_disabled(struct task_struct *p) +-- +2.37.0 + + +From a2f5af380f37a9dfed9b0db77e4bf5e241484048 Mon Sep 17 00:00:00 2001 +From: Alfred Chen +Date: Wed, 30 Mar 2022 14:02:11 +0000 +Subject: [PATCH 255/297] Project-C v5.16-r0 + +--- + kernel/sched/alt_core.c | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +diff --git a/kernel/sched/alt_core.c b/kernel/sched/alt_core.c +index 01af42ae88c9..c90cd4a48660 100644 +--- a/kernel/sched/alt_core.c ++++ b/kernel/sched/alt_core.c +@@ -66,7 +66,7 @@ __read_mostly int sysctl_resched_latency_warn_once = 1; + #define sched_feat(x) (0) + #endif /* CONFIG_SCHED_DEBUG */ + +-#define ALT_SCHED_VERSION "v5.15-r1" ++#define ALT_SCHED_VERSION "v5.16-r0" + + /* rt_prio(prio) defined in include/linux/sched/rt.h */ + #define rt_task(p) rt_prio((p)->prio) +-- +2.37.0 + + +From 6d57850ffda9de0a69e1432a0635b1159f61cbbd Mon Sep 17 00:00:00 2001 +From: Alfred Chen +Date: Wed, 30 Mar 2022 14:09:08 +0000 +Subject: [PATCH 256/297] sched/alt: [Sync] c65cfd89cef6 sched: Fix yet more + sched_fork() races + +--- + kernel/sched/alt_core.c | 13 +++++++------ + 1 file changed, 7 insertions(+), 6 deletions(-) + +diff --git a/kernel/sched/alt_core.c b/kernel/sched/alt_core.c +index c90cd4a48660..3a841b733f1e 100644 +--- a/kernel/sched/alt_core.c ++++ b/kernel/sched/alt_core.c +@@ -2989,17 +2989,14 @@ int sched_fork(unsigned long clone_flags, struct task_struct *p) + return 0; + } + +-void sched_post_fork(struct task_struct *p, struct kernel_clone_args *kargs) ++void sched_cgroup_fork(struct task_struct *p, struct kernel_clone_args *kargs) + { + unsigned long flags; + struct rq *rq; + + /* +- * The child is not yet in the pid-hash so no cgroup attach races, +- * and the cgroup is pinned to this child due to cgroup_fork() +- * is ran before sched_fork(). +- * +- * Silence PROVE_RCU. ++ * Because we're not yet on the pid-hash, p->pi_lock isn't strictly ++ * required yet, but lockdep gets upset if rules are violated. + */ + raw_spin_lock_irqsave(&p->pi_lock, flags); + /* +@@ -3032,6 +3029,10 @@ void sched_post_fork(struct task_struct *p, struct kernel_clone_args *kargs) + raw_spin_unlock_irqrestore(&p->pi_lock, flags); + } + ++void sched_post_fork(struct task_struct *p) ++{ ++} ++ + #ifdef CONFIG_SCHEDSTATS + + DEFINE_STATIC_KEY_FALSE(sched_schedstats); +-- +2.37.0 + + +From be71d9d7fd6c13111e2102c3e17f68fc0061dc52 Mon Sep 17 00:00:00 2001 +From: Alfred Chen +Date: Wed, 30 Mar 2022 14:17:03 +0000 +Subject: [PATCH 257/297] sched/alt: [Sync] f07660619137 sched: Avoid double + preemption in __cond_resched_*lock*() + +--- + kernel/sched/alt_core.c | 12 +++--------- + 1 file changed, 3 insertions(+), 9 deletions(-) + +diff --git a/kernel/sched/alt_core.c b/kernel/sched/alt_core.c +index 3a841b733f1e..02b29f2ca885 100644 +--- a/kernel/sched/alt_core.c ++++ b/kernel/sched/alt_core.c +@@ -6211,9 +6211,7 @@ int __cond_resched_lock(spinlock_t *lock) + + if (spin_needbreak(lock) || resched) { + spin_unlock(lock); +- if (resched) +- preempt_schedule_common(); +- else ++ if (!_cond_resched()) + cpu_relax(); + ret = 1; + spin_lock(lock); +@@ -6231,9 +6229,7 @@ int __cond_resched_rwlock_read(rwlock_t *lock) + + if (rwlock_needbreak(lock) || resched) { + read_unlock(lock); +- if (resched) +- preempt_schedule_common(); +- else ++ if (!_cond_resched()) + cpu_relax(); + ret = 1; + read_lock(lock); +@@ -6251,9 +6247,7 @@ int __cond_resched_rwlock_write(rwlock_t *lock) + + if (rwlock_needbreak(lock) || resched) { + write_unlock(lock); +- if (resched) +- preempt_schedule_common(); +- else ++ if (!_cond_resched()) + cpu_relax(); + ret = 1; + write_lock(lock); +-- +2.37.0 + + +From 262bb5b1599c775cae11329cc6aaf8ced2b6a869 Mon Sep 17 00:00:00 2001 +From: Alfred Chen +Date: Wed, 30 Mar 2022 15:35:44 +0000 +Subject: [PATCH 258/297] Project-C v5.16-r1 + +--- + kernel/sched/alt_core.c | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +diff --git a/kernel/sched/alt_core.c b/kernel/sched/alt_core.c +index 02b29f2ca885..83407c4ee806 100644 +--- a/kernel/sched/alt_core.c ++++ b/kernel/sched/alt_core.c +@@ -66,7 +66,7 @@ __read_mostly int sysctl_resched_latency_warn_once = 1; + #define sched_feat(x) (0) + #endif /* CONFIG_SCHED_DEBUG */ + +-#define ALT_SCHED_VERSION "v5.16-r0" ++#define ALT_SCHED_VERSION "v5.16-r1" + + /* rt_prio(prio) defined in include/linux/sched/rt.h */ + #define rt_task(p) rt_prio((p)->prio) +-- +2.37.0 + + +From 52165e52705901f7f6a7df5324fe6e8e09140ac9 Mon Sep 17 00:00:00 2001 +From: Alfred Chen +Date: Thu, 31 Mar 2022 13:38:40 +0000 +Subject: [PATCH 259/297] sched/alt: [Sync] 0569b245132c sched: Snapshot thread + flags + +--- + kernel/sched/alt_core.c | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +diff --git a/kernel/sched/alt_core.c b/kernel/sched/alt_core.c +index 83407c4ee806..4c1ab66489a5 100644 +--- a/kernel/sched/alt_core.c ++++ b/kernel/sched/alt_core.c +@@ -6492,7 +6492,7 @@ void sched_show_task(struct task_struct *p) + rcu_read_unlock(); + pr_cont(" stack:%5lu pid:%5d ppid:%6d flags:0x%08lx\n", + free, task_pid_nr(p), ppid, +- (unsigned long)task_thread_info(p)->flags); ++ read_task_thread_flags(p)); + + print_worker_info(KERN_INFO, p); + print_stop_info(KERN_INFO, p); +-- +2.37.0 + + +From a54525ecc20872a88395111beb0ac9383136b8f5 Mon Sep 17 00:00:00 2001 +From: Alfred Chen +Date: Thu, 31 Mar 2022 13:44:13 +0000 +Subject: [PATCH 260/297] sched/alt: [Sync] 9d0df3779745 sched: Trigger warning + if ->migration_disabled counter underflows. + +--- + kernel/sched/alt_core.c | 3 +++ + 1 file changed, 3 insertions(+) + +diff --git a/kernel/sched/alt_core.c b/kernel/sched/alt_core.c +index 4c1ab66489a5..c1f3afc120cf 100644 +--- a/kernel/sched/alt_core.c ++++ b/kernel/sched/alt_core.c +@@ -1449,6 +1449,9 @@ void migrate_enable(void) + return; + } + ++ if (WARN_ON_ONCE(!p->migration_disabled)) ++ return; ++ + /* + * Ensure stop_task runs either before or after this, and that + * __set_cpus_allowed_ptr(SCA_MIGRATE_ENABLE) doesn't schedule(). +-- +2.37.0 + + +From bb74763fb1e197e2cda77d77112680cd39e570d3 Mon Sep 17 00:00:00 2001 +From: Alfred Chen +Date: Thu, 31 Mar 2022 13:47:42 +0000 +Subject: [PATCH 261/297] sched/alt: [Sync] 40966e316f86 kthread: Ensure struct + kthread is present for all kthreads + +--- + kernel/sched/alt_core.c | 16 ++++++++-------- + 1 file changed, 8 insertions(+), 8 deletions(-) + +diff --git a/kernel/sched/alt_core.c b/kernel/sched/alt_core.c +index c1f3afc120cf..475437df15e1 100644 +--- a/kernel/sched/alt_core.c ++++ b/kernel/sched/alt_core.c +@@ -6582,14 +6582,6 @@ void __init init_idle(struct task_struct *idle, int cpu) + + __sched_fork(0, idle); + +- /* +- * The idle task doesn't need the kthread struct to function, but it +- * is dressed up as a per-CPU kthread and thus needs to play the part +- * if we want to avoid special-casing it in code that deals with per-CPU +- * kthreads. +- */ +- set_kthread_struct(idle); +- + raw_spin_lock_irqsave(&idle->pi_lock, flags); + raw_spin_lock(&rq->lock); + update_rq_clock(rq); +@@ -7243,6 +7235,14 @@ void __init sched_init(void) + mmgrab(&init_mm); + enter_lazy_tlb(&init_mm, current); + ++ /* ++ * The idle task doesn't need the kthread struct to function, but it ++ * is dressed up as a per-CPU kthread and thus needs to play the part ++ * if we want to avoid special-casing it in code that deals with per-CPU ++ * kthreads. ++ */ ++ WARN_ON(set_kthread_struct(current)); ++ + /* + * Make us the idle thread. Technically, schedule() should not be + * called from this thread, however somewhere below it might be, +-- +2.37.0 + + +From 514861d279a8517b0a9188f103c7b46e96eb8254 Mon Sep 17 00:00:00 2001 +From: Alfred Chen +Date: Thu, 31 Mar 2022 13:49:18 +0000 +Subject: [PATCH 262/297] sched/alt: [Sync] dd621ee0cf8e kthread: Warn about + failed allocations for the init kthread + +--- + kernel/sched/alt_core.c | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +diff --git a/kernel/sched/alt_core.c b/kernel/sched/alt_core.c +index 475437df15e1..4c0e90628368 100644 +--- a/kernel/sched/alt_core.c ++++ b/kernel/sched/alt_core.c +@@ -7241,7 +7241,7 @@ void __init sched_init(void) + * if we want to avoid special-casing it in code that deals with per-CPU + * kthreads. + */ +- WARN_ON(set_kthread_struct(current)); ++ WARN_ON(!set_kthread_struct(current)); + + /* + * Make us the idle thread. Technically, schedule() should not be +-- +2.37.0 + + +From 00c11cdce606918bf2d6fe15197e85e9e5e90c40 Mon Sep 17 00:00:00 2001 +From: Alfred Chen +Date: Thu, 31 Mar 2022 13:51:02 +0000 +Subject: [PATCH 263/297] sched/alt: [Sync] 00580f03af5e kthread: Never + put_user the set_child_tid address + +--- + kernel/sched/alt_core.c | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +diff --git a/kernel/sched/alt_core.c b/kernel/sched/alt_core.c +index 4c0e90628368..69058fc928f9 100644 +--- a/kernel/sched/alt_core.c ++++ b/kernel/sched/alt_core.c +@@ -3529,7 +3529,7 @@ asmlinkage __visible void schedule_tail(struct task_struct *prev) + finish_task_switch(prev); + preempt_enable(); + +- if (current->set_child_tid) ++ if (!(current->flags & PF_KTHREAD) && current->set_child_tid) + put_user(task_pid_vnr(current), current->set_child_tid); + + calculate_sigpending(); +-- +2.37.0 + + +From 0b94c47182f02121330dc86919eb96ab691efc2e Mon Sep 17 00:00:00 2001 +From: Alfred Chen +Date: Thu, 31 Mar 2022 16:37:18 +0000 +Subject: [PATCH 264/297] sched/alt: [Sync] e32cf5dfbe22 kthread: Generalize + pf_io_worker so it can point to struct kthread + +--- + kernel/sched/alt_core.c | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +diff --git a/kernel/sched/alt_core.c b/kernel/sched/alt_core.c +index 69058fc928f9..4c0e90628368 100644 +--- a/kernel/sched/alt_core.c ++++ b/kernel/sched/alt_core.c +@@ -3529,7 +3529,7 @@ asmlinkage __visible void schedule_tail(struct task_struct *prev) + finish_task_switch(prev); + preempt_enable(); + +- if (!(current->flags & PF_KTHREAD) && current->set_child_tid) ++ if (current->set_child_tid) + put_user(task_pid_vnr(current), current->set_child_tid); + + calculate_sigpending(); +-- +2.37.0 + + +From 9a29056912c42933ea78411c141d7f509263563a Mon Sep 17 00:00:00 2001 +From: Alfred Chen +Date: Fri, 1 Apr 2022 07:40:22 +0000 +Subject: [PATCH 265/297] Project-C v5.17-r0 + +--- + kernel/sched/alt_core.c | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +diff --git a/kernel/sched/alt_core.c b/kernel/sched/alt_core.c +index 4c0e90628368..6338a97b429e 100644 +--- a/kernel/sched/alt_core.c ++++ b/kernel/sched/alt_core.c +@@ -66,7 +66,7 @@ __read_mostly int sysctl_resched_latency_warn_once = 1; + #define sched_feat(x) (0) + #endif /* CONFIG_SCHED_DEBUG */ + +-#define ALT_SCHED_VERSION "v5.16-r1" ++#define ALT_SCHED_VERSION "v5.17-r0" + + /* rt_prio(prio) defined in include/linux/sched/rt.h */ + #define rt_task(p) rt_prio((p)->prio) +-- +2.37.0 + + +From e3604a1d033f652b9cef229b91c45608cddeaa50 Mon Sep 17 00:00:00 2001 +From: Piotr Gorski +Date: Mon, 4 Apr 2022 12:03:13 +0200 +Subject: [PATCH 266/297] prjc: remove duplicated struct __call_single_nod + +Signed-off-by: Piotr Gorski +--- + include/linux/sched.h | 6 ++++-- + 1 file changed, 4 insertions(+), 2 deletions(-) + +diff --git a/include/linux/sched.h b/include/linux/sched.h +index 2423bc6713eb..7c65e6317d97 100644 +--- a/include/linux/sched.h ++++ b/include/linux/sched.h +@@ -756,8 +756,9 @@ struct task_struct { + #if defined(CONFIG_SMP) || defined(CONFIG_SCHED_ALT) + int on_cpu; + #endif +-#if defined(CONFIG_SMP) && !defined(CONFIG_SCHED_ALT) +- struct __call_single_node wake_entry; ++ ++#ifdef CONFIG_SMP ++#ifndef CONFIG_SCHED_ALT + unsigned int wakee_flips; + unsigned long wakee_flip_decay_ts; + struct task_struct *last_wakee; +@@ -771,6 +772,7 @@ struct task_struct { + */ + int recent_used_cpu; + int wake_cpu; ++#endif /* !CONFIG_SCHED_ALT */ + #endif + int on_rq; + +-- +2.37.0 + + +From 27c2772dc1e4f949ee8cad94985a81def05016ea Mon Sep 17 00:00:00 2001 +From: Alfred Chen +Date: Fri, 1 Apr 2022 21:12:21 +0000 +Subject: [PATCH 267/297] sched/alt: [Sync] 61bb6cd2 mm: move + node_reclaim_distance to fix NUMA without SMP + +--- + kernel/sched/topology.c | 2 -- + 1 file changed, 2 deletions(-) + +diff --git a/kernel/sched/topology.c b/kernel/sched/topology.c +index 2c9daf90398f..480ef393b3c9 100644 +--- a/kernel/sched/topology.c ++++ b/kernel/sched/topology.c +@@ -2649,8 +2649,6 @@ void partition_sched_domains(int ndoms_new, cpumask_var_t doms_new[], + {} + + #ifdef CONFIG_NUMA +-int __read_mostly node_reclaim_distance = RECLAIM_DISTANCE; +- + int sched_numa_find_closest(const struct cpumask *cpus, int cpu) + { + return best_mask_cpu(cpu, cpus); +-- +2.37.0 + + +From 1cb07337b129d2b6c2930b686226cf191e67e0be Mon Sep 17 00:00:00 2001 +From: Alfred Chen +Date: Fri, 1 Apr 2022 21:34:05 +0000 +Subject: [PATCH 268/297] sched/alt: Fix Kconfig menu item for ProjectC + alternative schedulers + +--- + init/Kconfig | 58 ++++++++++++++++++++++++++-------------------------- + 1 file changed, 29 insertions(+), 29 deletions(-) + +diff --git a/init/Kconfig b/init/Kconfig +index fcdfda2b98f1..d2b593e3807d 100644 +--- a/init/Kconfig ++++ b/init/Kconfig +@@ -809,35 +809,6 @@ config GENERIC_SCHED_CLOCK + + menu "Scheduler features" + +-menuconfig SCHED_ALT +- bool "Alternative CPU Schedulers" +- default y +- help +- This feature enable alternative CPU scheduler" +- +-if SCHED_ALT +- +-choice +- prompt "Alternative CPU Scheduler" +- default SCHED_BMQ +- +-config SCHED_BMQ +- bool "BMQ CPU scheduler" +- help +- The BitMap Queue CPU scheduler for excellent interactivity and +- responsiveness on the desktop and solid scalability on normal +- hardware and commodity servers. +- +-config SCHED_PDS +- bool "PDS CPU scheduler" +- help +- The Priority and Deadline based Skip list multiple queue CPU +- Scheduler. +- +-endchoice +- +-endif +- + config UCLAMP_TASK + bool "Enable utilization clamping for RT/FAIR tasks" + depends on CPU_FREQ_GOV_SCHEDUTIL +@@ -888,6 +859,35 @@ config UCLAMP_BUCKETS_COUNT + + If in doubt, use the default value. + ++menuconfig SCHED_ALT ++ bool "Alternative CPU Schedulers" ++ default y ++ help ++ This feature enable alternative CPU scheduler" ++ ++if SCHED_ALT ++ ++choice ++ prompt "Alternative CPU Scheduler" ++ default SCHED_BMQ ++ ++config SCHED_BMQ ++ bool "BMQ CPU scheduler" ++ help ++ The BitMap Queue CPU scheduler for excellent interactivity and ++ responsiveness on the desktop and solid scalability on normal ++ hardware and commodity servers. ++ ++config SCHED_PDS ++ bool "PDS CPU scheduler" ++ help ++ The Priority and Deadline based Skip list multiple queue CPU ++ Scheduler. ++ ++endchoice ++ ++endif ++ + endmenu + + # +-- +2.37.0 + + +From c344f4e608af37bb14d7eaf449fe9ff4f461f8f0 Mon Sep 17 00:00:00 2001 +From: Alfred Chen +Date: Sat, 2 Apr 2022 13:41:18 +0000 +Subject: [PATCH 269/297] Project-C v5.17-r1 + +--- + kernel/sched/alt_core.c | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +diff --git a/kernel/sched/alt_core.c b/kernel/sched/alt_core.c +index 6338a97b429e..abd0f2bc531e 100644 +--- a/kernel/sched/alt_core.c ++++ b/kernel/sched/alt_core.c +@@ -66,7 +66,7 @@ __read_mostly int sysctl_resched_latency_warn_once = 1; + #define sched_feat(x) (0) + #endif /* CONFIG_SCHED_DEBUG */ + +-#define ALT_SCHED_VERSION "v5.17-r0" ++#define ALT_SCHED_VERSION "v5.17-r1" + + /* rt_prio(prio) defined in include/linux/sched/rt.h */ + #define rt_task(p) rt_prio((p)->prio) +-- +2.37.0 + + +From 5455a5775c24053234bf3c0570b91c608cd44292 Mon Sep 17 00:00:00 2001 +From: Alfred Chen +Date: Sat, 2 Apr 2022 16:07:47 +0000 +Subject: [PATCH 270/297] sched/alt: Avoid call task_sched_prio_idx() in + check_task_changed() code path. + +--- + kernel/sched/alt_core.c | 14 ++++++-------- + kernel/sched/bmq.h | 2 +- + kernel/sched/pds.h | 2 +- + 3 files changed, 8 insertions(+), 10 deletions(-) + +diff --git a/kernel/sched/alt_core.c b/kernel/sched/alt_core.c +index abd0f2bc531e..a3b1d8bbe53d 100644 +--- a/kernel/sched/alt_core.c ++++ b/kernel/sched/alt_core.c +@@ -78,7 +78,7 @@ __read_mostly int sysctl_resched_latency_warn_once = 1; + /* Default time slice is 4 in ms, can be set via kernel parameter "sched_timeslice" */ + u64 sched_timeslice_ns __read_mostly = (4 << 20); + +-static inline void requeue_task(struct task_struct *p, struct rq *rq); ++static inline void requeue_task(struct task_struct *p, struct rq *rq, int idx); + + #ifdef CONFIG_SCHED_BMQ + #include "bmq.h" +@@ -784,17 +784,13 @@ static inline void enqueue_task(struct task_struct *p, struct rq *rq, int flags) + sched_update_tick_dependency(rq); + } + +-static inline void requeue_task(struct task_struct *p, struct rq *rq) ++static inline void requeue_task(struct task_struct *p, struct rq *rq, int idx) + { +- int idx; +- + lockdep_assert_held(&rq->lock); + /*printk(KERN_INFO "sched: requeue(%d) %px %016llx\n", cpu_of(rq), p, p->priodl);*/ + WARN_ONCE(task_rq(p) != rq, "sched: cpu[%d] requeue task reside on cpu%d\n", + cpu_of(rq), task_cpu(p)); + +- idx = task_sched_prio_idx(p, rq); +- + list_del(&p->sq_node); + list_add_tail(&p->sq_node, &rq->queue.heads[idx]); + if (idx != p->sq_idx) { +@@ -5034,9 +5030,11 @@ EXPORT_SYMBOL(default_wake_function); + + static inline void check_task_changed(struct task_struct *p, struct rq *rq) + { ++ int idx; ++ + /* Trigger resched if task sched_prio has been modified. */ +- if (task_on_rq_queued(p) && task_sched_prio_idx(p, rq) != p->sq_idx) { +- requeue_task(p, rq); ++ if (task_on_rq_queued(p) && (idx = task_sched_prio_idx(p, rq)) != p->sq_idx) { ++ requeue_task(p, rq, idx); + check_preempt_curr(rq); + } + } +diff --git a/kernel/sched/bmq.h b/kernel/sched/bmq.h +index be3ee4a553ca..bf7ac80ec242 100644 +--- a/kernel/sched/bmq.h ++++ b/kernel/sched/bmq.h +@@ -72,7 +72,7 @@ static inline void time_slice_expired(struct task_struct *p, struct rq *rq) + if (SCHED_FIFO != p->policy && task_on_rq_queued(p)) { + if (SCHED_RR != p->policy) + deboost_task(p); +- requeue_task(p, rq); ++ requeue_task(p, rq, task_sched_prio_idx(p, rq)); + } + } + +diff --git a/kernel/sched/pds.h b/kernel/sched/pds.h +index 0f1f0d708b77..56a649d02e49 100644 +--- a/kernel/sched/pds.h ++++ b/kernel/sched/pds.h +@@ -101,7 +101,7 @@ static inline void time_slice_expired(struct task_struct *p, struct rq *rq) + p->time_slice = sched_timeslice_ns; + sched_renew_deadline(p, rq); + if (SCHED_FIFO != p->policy && task_on_rq_queued(p)) +- requeue_task(p, rq); ++ requeue_task(p, rq, task_sched_prio_idx(p, rq)); + } + + static inline void sched_task_sanity_check(struct task_struct *p, struct rq *rq) +-- +2.37.0 + + +From ff747536b8cf890249d993cdde251e93273ad46c Mon Sep 17 00:00:00 2001 +From: Alfred Chen +Date: Mon, 4 Apr 2022 14:25:30 +0000 +Subject: [PATCH 271/297] sched/alt: Delay update_sched_rq_watermark in + deactivation. + +--- + kernel/sched/alt_core.c | 27 ++++++++++++++------------- + 1 file changed, 14 insertions(+), 13 deletions(-) + +diff --git a/kernel/sched/alt_core.c b/kernel/sched/alt_core.c +index a3b1d8bbe53d..41e4b63801e6 100644 +--- a/kernel/sched/alt_core.c ++++ b/kernel/sched/alt_core.c +@@ -728,16 +728,13 @@ unsigned long get_wchan(struct task_struct *p) + * Add/Remove/Requeue task to/from the runqueue routines + * Context: rq->lock + */ +-#define __SCHED_DEQUEUE_TASK(p, rq, flags, func) \ +- psi_dequeue(p, flags & DEQUEUE_SLEEP); \ +- sched_info_dequeue(rq, p); \ +- \ +- list_del(&p->sq_node); \ +- if (list_empty(&rq->queue.heads[p->sq_idx])) { \ +- clear_bit(sched_idx2prio(p->sq_idx, rq), \ +- rq->queue.bitmap); \ +- func; \ +- } ++#define __SCHED_DEQUEUE_TASK(p, rq, flags) \ ++ psi_dequeue(p, flags & DEQUEUE_SLEEP); \ ++ sched_info_dequeue(rq, p); \ ++ \ ++ list_del(&p->sq_node); \ ++ if (list_empty(&rq->queue.heads[p->sq_idx])) \ ++ clear_bit(sched_idx2prio(p->sq_idx, rq), rq->queue.bitmap); + + #define __SCHED_ENQUEUE_TASK(p, rq, flags) \ + sched_info_enqueue(rq, p); \ +@@ -755,7 +752,7 @@ static inline void dequeue_task(struct task_struct *p, struct rq *rq, int flags) + WARN_ONCE(task_rq(p) != rq, "sched: dequeue task reside on cpu%d from cpu%d\n", + task_cpu(p), cpu_of(rq)); + +- __SCHED_DEQUEUE_TASK(p, rq, flags, update_sched_rq_watermark(rq)); ++ __SCHED_DEQUEUE_TASK(p, rq, flags); + --rq->nr_running; + #ifdef CONFIG_SMP + if (1 == rq->nr_running) +@@ -1532,6 +1529,7 @@ static struct rq *move_queued_task(struct rq *rq, struct task_struct *p, int + + WRITE_ONCE(p->on_rq, TASK_ON_RQ_MIGRATING); + dequeue_task(p, rq, 0); ++ update_sched_rq_watermark(rq); + set_task_cpu(p, new_cpu); + raw_spin_unlock(&rq->lock); + +@@ -4291,7 +4289,7 @@ migrate_pending_tasks(struct rq *rq, struct rq *dest_rq, const int dest_cpu) + (p = sched_rq_next_task(skip, rq)) != rq->idle) { + skip = sched_rq_next_task(p, rq); + if (cpumask_test_cpu(dest_cpu, p->cpus_ptr)) { +- __SCHED_DEQUEUE_TASK(p, rq, 0, ); ++ __SCHED_DEQUEUE_TASK(p, rq, 0); + set_task_cpu(p, dest_cpu); + sched_task_sanity_check(p, dest_rq); + __SCHED_ENQUEUE_TASK(p, dest_rq, 0); +@@ -4336,7 +4334,6 @@ static inline int take_other_rq_tasks(struct rq *rq, int cpu) + if (rq->nr_running > 1) + cpumask_set_cpu(cpu, &sched_rq_pending_mask); + +- update_sched_rq_watermark(rq); + cpufreq_update_util(rq, 0); + + spin_release(&src_rq->lock.dep_map, _RET_IP_); +@@ -4480,6 +4477,7 @@ static void __sched notrace __schedule(unsigned int sched_mode) + unsigned long prev_state; + struct rq *rq; + int cpu; ++ int deactivated = 0; + + cpu = smp_processor_id(); + rq = cpu_rq(cpu); +@@ -4547,6 +4545,7 @@ static void __sched notrace __schedule(unsigned int sched_mode) + */ + sched_task_deactivate(prev, rq); + deactivate_task(prev, rq); ++ deactivated = 1; + + if (prev->in_iowait) { + atomic_inc(&rq->nr_iowait); +@@ -4566,6 +4565,8 @@ static void __sched notrace __schedule(unsigned int sched_mode) + #endif + + if (likely(prev != next)) { ++ if (deactivated) ++ update_sched_rq_watermark(rq); + next->last_ran = rq->clock_task; + rq->last_ts_switch = rq->clock; + +-- +2.37.0 + + +From 6e486af3b5bd7cc5f6c8c9ea1f06d897125e633e Mon Sep 17 00:00:00 2001 +From: Alfred Chen +Date: Sat, 7 May 2022 16:16:33 +0000 +Subject: [PATCH 272/297] sched/alt: Dummy uclamp sync-up + +88e4d8c3cf08 sched/uclamp: Fix iowait boost escaping uclamp restriction +d9f51b85bfd8 sched/sugov: Ignore 'busy' filter when rq is capped by uclamp_max +--- + kernel/sched/alt_sched.h | 10 ++++++++++ + 1 file changed, 10 insertions(+) + +diff --git a/kernel/sched/alt_sched.h b/kernel/sched/alt_sched.h +index f2b9e686d6a6..7bbe006ce568 100644 +--- a/kernel/sched/alt_sched.h ++++ b/kernel/sched/alt_sched.h +@@ -664,4 +664,14 @@ extern void sched_dynamic_update(int mode); + #endif + + static inline void nohz_run_idle_balance(int cpu) { } ++ ++static inline ++unsigned long uclamp_rq_util_with(struct rq *rq, unsigned long util, ++ struct task_struct *p) ++{ ++ return util; ++} ++ ++static inline bool uclamp_rq_is_capped(struct rq *rq) { return false; } ++ + #endif /* ALT_SCHED_H */ +-- +2.37.0 + + +From 4d10039793835a860bd1cec75443ee9f0ef9f83f Mon Sep 17 00:00:00 2001 +From: Alfred Chen +Date: Mon, 9 May 2022 11:25:02 +0000 +Subject: [PATCH 273/297] Project-C v5.17-r2 + +--- + kernel/sched/alt_core.c | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +diff --git a/kernel/sched/alt_core.c b/kernel/sched/alt_core.c +index 41e4b63801e6..c60f255bb828 100644 +--- a/kernel/sched/alt_core.c ++++ b/kernel/sched/alt_core.c +@@ -66,7 +66,7 @@ __read_mostly int sysctl_resched_latency_warn_once = 1; + #define sched_feat(x) (0) + #endif /* CONFIG_SCHED_DEBUG */ + +-#define ALT_SCHED_VERSION "v5.17-r1" ++#define ALT_SCHED_VERSION "v5.17-r2" + + /* rt_prio(prio) defined in include/linux/sched/rt.h */ + #define rt_task(p) rt_prio((p)->prio) +-- +2.37.0 + + +From ee44ff2ea9884f5a5de2e352f557e011c1c27d77 Mon Sep 17 00:00:00 2001 +From: Alfred Chen +Date: Wed, 18 May 2022 14:05:22 +0000 +Subject: [PATCH 274/297] sched/alt: [Sync] b1f866b013e6 block: remove + blk_needs_flush_plug + +--- + kernel/sched/alt_core.c | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +diff --git a/kernel/sched/alt_core.c b/kernel/sched/alt_core.c +index c60f255bb828..3c0dde3280c1 100644 +--- a/kernel/sched/alt_core.c ++++ b/kernel/sched/alt_core.c +@@ -4650,7 +4650,7 @@ static inline void sched_submit_work(struct task_struct *tsk) + * If we are going to sleep and we have plugged IO queued, + * make sure to submit it to avoid deadlocks. + */ +- if (blk_needs_flush_plug(tsk)) ++ if (tsk->plug) + blk_flush_plug(tsk->plug, true); + } + +-- +2.37.0 + + +From 9fd276f94028804dd9b645ca87c57bfd2e6e7233 Mon Sep 17 00:00:00 2001 +From: Alfred Chen +Date: Wed, 18 May 2022 14:07:37 +0000 +Subject: [PATCH 275/297] sched/alt: [Sync] aa8dcccaf32b block: check that + there is a plug in blk_flush_plug + +--- + kernel/sched/alt_core.c | 7 ++----- + 1 file changed, 2 insertions(+), 5 deletions(-) + +diff --git a/kernel/sched/alt_core.c b/kernel/sched/alt_core.c +index 3c0dde3280c1..5ed2477f09d0 100644 +--- a/kernel/sched/alt_core.c ++++ b/kernel/sched/alt_core.c +@@ -4650,8 +4650,7 @@ static inline void sched_submit_work(struct task_struct *tsk) + * If we are going to sleep and we have plugged IO queued, + * make sure to submit it to avoid deadlocks. + */ +- if (tsk->plug) +- blk_flush_plug(tsk->plug, true); ++ blk_flush_plug(tsk->plug, true); + } + + static void sched_update_worker(struct task_struct *tsk) +@@ -6315,9 +6314,7 @@ int io_schedule_prepare(void) + int old_iowait = current->in_iowait; + + current->in_iowait = 1; +- if (current->plug) +- blk_flush_plug(current->plug, true); +- ++ blk_flush_plug(current->plug, true); + return old_iowait; + } + +-- +2.37.0 + + +From 110f181cd3c7f5b336fb2d2245b7ad1b3c770d49 Mon Sep 17 00:00:00 2001 +From: Alfred Chen +Date: Thu, 19 May 2022 12:41:20 +0000 +Subject: [PATCH 276/297] sched/alt: [Sync] 04d4e665a609 sched/isolation: Use + single feature type while referring to housekeeping cpumask + +--- + kernel/sched/alt_core.c | 12 ++++++------ + 1 file changed, 6 insertions(+), 6 deletions(-) + +diff --git a/kernel/sched/alt_core.c b/kernel/sched/alt_core.c +index 5ed2477f09d0..852dbf392fb3 100644 +--- a/kernel/sched/alt_core.c ++++ b/kernel/sched/alt_core.c +@@ -1018,13 +1018,13 @@ int get_nohz_timer_target(void) + struct cpumask *mask; + const struct cpumask *hk_mask; + +- if (housekeeping_cpu(cpu, HK_FLAG_TIMER)) { ++ if (housekeeping_cpu(cpu, HK_TYPE_TIMER)) { + if (!idle_cpu(cpu)) + return cpu; + default_cpu = cpu; + } + +- hk_mask = housekeeping_cpumask(HK_FLAG_TIMER); ++ hk_mask = housekeeping_cpumask(HK_TYPE_TIMER); + + for (mask = per_cpu(sched_cpu_topo_masks, cpu) + 1; + mask < per_cpu(sched_cpu_topo_end_mask, cpu); mask++) +@@ -1033,7 +1033,7 @@ int get_nohz_timer_target(void) + return i; + + if (default_cpu == -1) +- default_cpu = housekeeping_any_cpu(HK_FLAG_TIMER); ++ default_cpu = housekeeping_any_cpu(HK_TYPE_TIMER); + cpu = default_cpu; + + return cpu; +@@ -4062,7 +4062,7 @@ static void sched_tick_start(int cpu) + int os; + struct tick_work *twork; + +- if (housekeeping_cpu(cpu, HK_FLAG_TICK)) ++ if (housekeeping_cpu(cpu, HK_TYPE_TICK)) + return; + + WARN_ON_ONCE(!tick_work_cpu); +@@ -4082,7 +4082,7 @@ static void sched_tick_stop(int cpu) + { + struct tick_work *twork; + +- if (housekeeping_cpu(cpu, HK_FLAG_TICK)) ++ if (housekeeping_cpu(cpu, HK_TYPE_TICK)) + return; + + WARN_ON_ONCE(!tick_work_cpu); +@@ -7119,7 +7119,7 @@ static void sched_init_topology_cpumask(void) + void __init sched_init_smp(void) + { + /* Move init over to a non-isolated CPU */ +- if (set_cpus_allowed_ptr(current, housekeeping_cpumask(HK_FLAG_DOMAIN)) < 0) ++ if (set_cpus_allowed_ptr(current, housekeeping_cpumask(HK_TYPE_DOMAIN)) < 0) + BUG(); + current->flags &= ~PF_NO_SETAFFINITY; + +-- +2.37.0 + + +From d15b4ba78545069c8aae836ba4f755287d4e4acb Mon Sep 17 00:00:00 2001 +From: Alfred Chen +Date: Thu, 19 May 2022 15:21:22 +0000 +Subject: [PATCH 277/297] sched/alt: [Sync] 4c7485584d48 sched/preempt: Move + PREEMPT_DYNAMIC logic later + +--- + kernel/sched/alt_core.c | 272 ++++++++++++++++++++-------------------- + 1 file changed, 136 insertions(+), 136 deletions(-) + +diff --git a/kernel/sched/alt_core.c b/kernel/sched/alt_core.c +index 852dbf392fb3..71edc3dd7e9a 100644 +--- a/kernel/sched/alt_core.c ++++ b/kernel/sched/alt_core.c +@@ -4858,142 +4858,6 @@ EXPORT_STATIC_CALL_TRAMP(preempt_schedule_notrace); + + #endif /* CONFIG_PREEMPTION */ + +-#ifdef CONFIG_PREEMPT_DYNAMIC +- +-#include +- +-/* +- * SC:cond_resched +- * SC:might_resched +- * SC:preempt_schedule +- * SC:preempt_schedule_notrace +- * SC:irqentry_exit_cond_resched +- * +- * +- * NONE: +- * cond_resched <- __cond_resched +- * might_resched <- RET0 +- * preempt_schedule <- NOP +- * preempt_schedule_notrace <- NOP +- * irqentry_exit_cond_resched <- NOP +- * +- * VOLUNTARY: +- * cond_resched <- __cond_resched +- * might_resched <- __cond_resched +- * preempt_schedule <- NOP +- * preempt_schedule_notrace <- NOP +- * irqentry_exit_cond_resched <- NOP +- * +- * FULL: +- * cond_resched <- RET0 +- * might_resched <- RET0 +- * preempt_schedule <- preempt_schedule +- * preempt_schedule_notrace <- preempt_schedule_notrace +- * irqentry_exit_cond_resched <- irqentry_exit_cond_resched +- */ +- +-enum { +- preempt_dynamic_undefined = -1, +- preempt_dynamic_none, +- preempt_dynamic_voluntary, +- preempt_dynamic_full, +-}; +- +-int preempt_dynamic_mode = preempt_dynamic_undefined; +- +-int sched_dynamic_mode(const char *str) +-{ +- if (!strcmp(str, "none")) +- return preempt_dynamic_none; +- +- if (!strcmp(str, "voluntary")) +- return preempt_dynamic_voluntary; +- +- if (!strcmp(str, "full")) +- return preempt_dynamic_full; +- +- return -EINVAL; +-} +- +-void sched_dynamic_update(int mode) +-{ +- /* +- * Avoid {NONE,VOLUNTARY} -> FULL transitions from ever ending up in +- * the ZERO state, which is invalid. +- */ +- static_call_update(cond_resched, __cond_resched); +- static_call_update(might_resched, __cond_resched); +- static_call_update(preempt_schedule, __preempt_schedule_func); +- static_call_update(preempt_schedule_notrace, __preempt_schedule_notrace_func); +- static_call_update(irqentry_exit_cond_resched, irqentry_exit_cond_resched); +- +- switch (mode) { +- case preempt_dynamic_none: +- static_call_update(cond_resched, __cond_resched); +- static_call_update(might_resched, (void *)&__static_call_return0); +- static_call_update(preempt_schedule, NULL); +- static_call_update(preempt_schedule_notrace, NULL); +- static_call_update(irqentry_exit_cond_resched, NULL); +- pr_info("Dynamic Preempt: none\n"); +- break; +- +- case preempt_dynamic_voluntary: +- static_call_update(cond_resched, __cond_resched); +- static_call_update(might_resched, __cond_resched); +- static_call_update(preempt_schedule, NULL); +- static_call_update(preempt_schedule_notrace, NULL); +- static_call_update(irqentry_exit_cond_resched, NULL); +- pr_info("Dynamic Preempt: voluntary\n"); +- break; +- +- case preempt_dynamic_full: +- static_call_update(cond_resched, (void *)&__static_call_return0); +- static_call_update(might_resched, (void *)&__static_call_return0); +- static_call_update(preempt_schedule, __preempt_schedule_func); +- static_call_update(preempt_schedule_notrace, __preempt_schedule_notrace_func); +- static_call_update(irqentry_exit_cond_resched, irqentry_exit_cond_resched); +- pr_info("Dynamic Preempt: full\n"); +- break; +- } +- +- preempt_dynamic_mode = mode; +-} +- +-static int __init setup_preempt_mode(char *str) +-{ +- int mode = sched_dynamic_mode(str); +- if (mode < 0) { +- pr_warn("Dynamic Preempt: unsupported mode: %s\n", str); +- return 0; +- } +- +- sched_dynamic_update(mode); +- return 1; +-} +-__setup("preempt=", setup_preempt_mode); +- +-static void __init preempt_dynamic_init(void) +-{ +- if (preempt_dynamic_mode == preempt_dynamic_undefined) { +- if (IS_ENABLED(CONFIG_PREEMPT_NONE)) { +- sched_dynamic_update(preempt_dynamic_none); +- } else if (IS_ENABLED(CONFIG_PREEMPT_VOLUNTARY)) { +- sched_dynamic_update(preempt_dynamic_voluntary); +- } else { +- /* Default static call setting, nothing to do */ +- WARN_ON_ONCE(!IS_ENABLED(CONFIG_PREEMPT)); +- preempt_dynamic_mode = preempt_dynamic_full; +- pr_info("Dynamic Preempt: full\n"); +- } +- } +-} +- +-#else /* !CONFIG_PREEMPT_DYNAMIC */ +- +-static inline void preempt_dynamic_init(void) { } +- +-#endif /* #ifdef CONFIG_PREEMPT_DYNAMIC */ +- + /* + * This is the entry point to schedule() from kernel preemption + * off of irq context. +@@ -6257,6 +6121,142 @@ int __cond_resched_rwlock_write(rwlock_t *lock) + } + EXPORT_SYMBOL(__cond_resched_rwlock_write); + ++#ifdef CONFIG_PREEMPT_DYNAMIC ++ ++#include ++ ++/* ++ * SC:cond_resched ++ * SC:might_resched ++ * SC:preempt_schedule ++ * SC:preempt_schedule_notrace ++ * SC:irqentry_exit_cond_resched ++ * ++ * ++ * NONE: ++ * cond_resched <- __cond_resched ++ * might_resched <- RET0 ++ * preempt_schedule <- NOP ++ * preempt_schedule_notrace <- NOP ++ * irqentry_exit_cond_resched <- NOP ++ * ++ * VOLUNTARY: ++ * cond_resched <- __cond_resched ++ * might_resched <- __cond_resched ++ * preempt_schedule <- NOP ++ * preempt_schedule_notrace <- NOP ++ * irqentry_exit_cond_resched <- NOP ++ * ++ * FULL: ++ * cond_resched <- RET0 ++ * might_resched <- RET0 ++ * preempt_schedule <- preempt_schedule ++ * preempt_schedule_notrace <- preempt_schedule_notrace ++ * irqentry_exit_cond_resched <- irqentry_exit_cond_resched ++ */ ++ ++enum { ++ preempt_dynamic_undefined = -1, ++ preempt_dynamic_none, ++ preempt_dynamic_voluntary, ++ preempt_dynamic_full, ++}; ++ ++int preempt_dynamic_mode = preempt_dynamic_undefined; ++ ++int sched_dynamic_mode(const char *str) ++{ ++ if (!strcmp(str, "none")) ++ return preempt_dynamic_none; ++ ++ if (!strcmp(str, "voluntary")) ++ return preempt_dynamic_voluntary; ++ ++ if (!strcmp(str, "full")) ++ return preempt_dynamic_full; ++ ++ return -EINVAL; ++} ++ ++void sched_dynamic_update(int mode) ++{ ++ /* ++ * Avoid {NONE,VOLUNTARY} -> FULL transitions from ever ending up in ++ * the ZERO state, which is invalid. ++ */ ++ static_call_update(cond_resched, __cond_resched); ++ static_call_update(might_resched, __cond_resched); ++ static_call_update(preempt_schedule, __preempt_schedule_func); ++ static_call_update(preempt_schedule_notrace, __preempt_schedule_notrace_func); ++ static_call_update(irqentry_exit_cond_resched, irqentry_exit_cond_resched); ++ ++ switch (mode) { ++ case preempt_dynamic_none: ++ static_call_update(cond_resched, __cond_resched); ++ static_call_update(might_resched, (void *)&__static_call_return0); ++ static_call_update(preempt_schedule, NULL); ++ static_call_update(preempt_schedule_notrace, NULL); ++ static_call_update(irqentry_exit_cond_resched, NULL); ++ pr_info("Dynamic Preempt: none\n"); ++ break; ++ ++ case preempt_dynamic_voluntary: ++ static_call_update(cond_resched, __cond_resched); ++ static_call_update(might_resched, __cond_resched); ++ static_call_update(preempt_schedule, NULL); ++ static_call_update(preempt_schedule_notrace, NULL); ++ static_call_update(irqentry_exit_cond_resched, NULL); ++ pr_info("Dynamic Preempt: voluntary\n"); ++ break; ++ ++ case preempt_dynamic_full: ++ static_call_update(cond_resched, (void *)&__static_call_return0); ++ static_call_update(might_resched, (void *)&__static_call_return0); ++ static_call_update(preempt_schedule, __preempt_schedule_func); ++ static_call_update(preempt_schedule_notrace, __preempt_schedule_notrace_func); ++ static_call_update(irqentry_exit_cond_resched, irqentry_exit_cond_resched); ++ pr_info("Dynamic Preempt: full\n"); ++ break; ++ } ++ ++ preempt_dynamic_mode = mode; ++} ++ ++static int __init setup_preempt_mode(char *str) ++{ ++ int mode = sched_dynamic_mode(str); ++ if (mode < 0) { ++ pr_warn("Dynamic Preempt: unsupported mode: %s\n", str); ++ return 0; ++ } ++ ++ sched_dynamic_update(mode); ++ return 1; ++} ++__setup("preempt=", setup_preempt_mode); ++ ++static void __init preempt_dynamic_init(void) ++{ ++ if (preempt_dynamic_mode == preempt_dynamic_undefined) { ++ if (IS_ENABLED(CONFIG_PREEMPT_NONE)) { ++ sched_dynamic_update(preempt_dynamic_none); ++ } else if (IS_ENABLED(CONFIG_PREEMPT_VOLUNTARY)) { ++ sched_dynamic_update(preempt_dynamic_voluntary); ++ } else { ++ /* Default static call setting, nothing to do */ ++ WARN_ON_ONCE(!IS_ENABLED(CONFIG_PREEMPT)); ++ preempt_dynamic_mode = preempt_dynamic_full; ++ pr_info("Dynamic Preempt: full\n"); ++ } ++ } ++} ++ ++#else /* !CONFIG_PREEMPT_DYNAMIC */ ++ ++static inline void preempt_dynamic_init(void) { } ++ ++#endif /* #ifdef CONFIG_PREEMPT_DYNAMIC */ ++ + /** + * yield - yield the current processor to other threads. + * +-- +2.37.0 + + +From 83722edbe96c3adb41638f2db5091965f1a083d3 Mon Sep 17 00:00:00 2001 +From: Alfred Chen +Date: Thu, 19 May 2022 15:54:41 +0000 +Subject: [PATCH 278/297] sched/alt: [Sync] 8a69fe0be143 sched/preempt: + Refactor sched_dynamic_update() + +--- + kernel/sched/alt_core.c | 61 +++++++++++++++++++++++++---------------- + 1 file changed, 37 insertions(+), 24 deletions(-) + +diff --git a/kernel/sched/alt_core.c b/kernel/sched/alt_core.c +index 71edc3dd7e9a..94450a8a4325 100644 +--- a/kernel/sched/alt_core.c ++++ b/kernel/sched/alt_core.c +@@ -4794,8 +4794,11 @@ NOKPROBE_SYMBOL(preempt_schedule); + EXPORT_SYMBOL(preempt_schedule); + + #ifdef CONFIG_PREEMPT_DYNAMIC +-DEFINE_STATIC_CALL(preempt_schedule, __preempt_schedule_func); +-EXPORT_STATIC_CALL_TRAMP(preempt_schedule); ++#ifndef preempt_schedule_dynamic_enabled ++#define preempt_schedule_dynamic_enabled preempt_schedule ++#define preempt_schedule_dynamic_disabled NULL ++#endif ++DEFINE_STATIC_CALL(preempt_schedule, preempt_schedule_dynamic_enabled); + #endif + + +@@ -4852,8 +4855,11 @@ asmlinkage __visible void __sched notrace preempt_schedule_notrace(void) + EXPORT_SYMBOL_GPL(preempt_schedule_notrace); + + #ifdef CONFIG_PREEMPT_DYNAMIC +-DEFINE_STATIC_CALL(preempt_schedule_notrace, __preempt_schedule_notrace_func); +-EXPORT_STATIC_CALL_TRAMP(preempt_schedule_notrace); ++#ifndef preempt_schedule_notrace_dynamic_enabled ++#define preempt_schedule_notrace_dynamic_enabled preempt_schedule_notrace ++#define preempt_schedule_notrace_dynamic_disabled NULL ++#endif ++DEFINE_STATIC_CALL(preempt_schedule_notrace, preempt_schedule_notrace_dynamic_enabled); + #endif + + #endif /* CONFIG_PREEMPTION */ +@@ -6052,9 +6058,13 @@ EXPORT_SYMBOL(__cond_resched); + #endif + + #ifdef CONFIG_PREEMPT_DYNAMIC ++#define cond_resched_dynamic_enabled __cond_resched ++#define cond_resched_dynamic_disabled ((void *)&__static_call_return0) + DEFINE_STATIC_CALL_RET0(cond_resched, __cond_resched); + EXPORT_STATIC_CALL_TRAMP(cond_resched); + ++#define might_resched_dynamic_enabled __cond_resched ++#define might_resched_dynamic_disabled ((void *)&__static_call_return0) + DEFINE_STATIC_CALL_RET0(might_resched, __cond_resched); + EXPORT_STATIC_CALL_TRAMP(might_resched); + #endif +@@ -6178,43 +6188,46 @@ int sched_dynamic_mode(const char *str) + return -EINVAL; + } + ++#define preempt_dynamic_enable(f) static_call_update(f, f##_dynamic_enabled) ++#define preempt_dynamic_disable(f) static_call_update(f, f##_dynamic_disabled) ++ + void sched_dynamic_update(int mode) + { + /* + * Avoid {NONE,VOLUNTARY} -> FULL transitions from ever ending up in + * the ZERO state, which is invalid. + */ +- static_call_update(cond_resched, __cond_resched); +- static_call_update(might_resched, __cond_resched); +- static_call_update(preempt_schedule, __preempt_schedule_func); +- static_call_update(preempt_schedule_notrace, __preempt_schedule_notrace_func); +- static_call_update(irqentry_exit_cond_resched, irqentry_exit_cond_resched); ++ preempt_dynamic_enable(cond_resched); ++ preempt_dynamic_enable(might_resched); ++ preempt_dynamic_enable(preempt_schedule); ++ preempt_dynamic_enable(preempt_schedule_notrace); ++ preempt_dynamic_enable(irqentry_exit_cond_resched); + + switch (mode) { + case preempt_dynamic_none: +- static_call_update(cond_resched, __cond_resched); +- static_call_update(might_resched, (void *)&__static_call_return0); +- static_call_update(preempt_schedule, NULL); +- static_call_update(preempt_schedule_notrace, NULL); +- static_call_update(irqentry_exit_cond_resched, NULL); ++ preempt_dynamic_enable(cond_resched); ++ preempt_dynamic_disable(might_resched); ++ preempt_dynamic_disable(preempt_schedule); ++ preempt_dynamic_disable(preempt_schedule_notrace); ++ preempt_dynamic_disable(irqentry_exit_cond_resched); + pr_info("Dynamic Preempt: none\n"); + break; + + case preempt_dynamic_voluntary: +- static_call_update(cond_resched, __cond_resched); +- static_call_update(might_resched, __cond_resched); +- static_call_update(preempt_schedule, NULL); +- static_call_update(preempt_schedule_notrace, NULL); +- static_call_update(irqentry_exit_cond_resched, NULL); ++ preempt_dynamic_enable(cond_resched); ++ preempt_dynamic_enable(might_resched); ++ preempt_dynamic_disable(preempt_schedule); ++ preempt_dynamic_disable(preempt_schedule_notrace); ++ preempt_dynamic_disable(irqentry_exit_cond_resched); + pr_info("Dynamic Preempt: voluntary\n"); + break; + + case preempt_dynamic_full: +- static_call_update(cond_resched, (void *)&__static_call_return0); +- static_call_update(might_resched, (void *)&__static_call_return0); +- static_call_update(preempt_schedule, __preempt_schedule_func); +- static_call_update(preempt_schedule_notrace, __preempt_schedule_notrace_func); +- static_call_update(irqentry_exit_cond_resched, irqentry_exit_cond_resched); ++ preempt_dynamic_disable(cond_resched); ++ preempt_dynamic_disable(might_resched); ++ preempt_dynamic_enable(preempt_schedule); ++ preempt_dynamic_enable(preempt_schedule_notrace); ++ preempt_dynamic_enable(irqentry_exit_cond_resched); + pr_info("Dynamic Preempt: full\n"); + break; + } +-- +2.37.0 + + +From 6f3fefb494e2b409cd56f7b2d889fedb2a3283e2 Mon Sep 17 00:00:00 2001 +From: Alfred Chen +Date: Thu, 19 May 2022 16:00:36 +0000 +Subject: [PATCH 279/297] sched/alt: [Sync] 33c64734be34 sched/preempt: + Decouple HAVE_PREEMPT_DYNAMIC from GENERIC_ENTRY + +--- + kernel/sched/alt_core.c | 2 ++ + 1 file changed, 2 insertions(+) + +diff --git a/kernel/sched/alt_core.c b/kernel/sched/alt_core.c +index 94450a8a4325..4012c7f597ff 100644 +--- a/kernel/sched/alt_core.c ++++ b/kernel/sched/alt_core.c +@@ -6133,7 +6133,9 @@ EXPORT_SYMBOL(__cond_resched_rwlock_write); + + #ifdef CONFIG_PREEMPT_DYNAMIC + ++#ifdef CONFIG_GENERIC_ENTRY + #include ++#endif + + /* + * SC:cond_resched +-- +2.37.0 + + +From ece7246e356476506a92f18b83e32af6acb0c0b8 Mon Sep 17 00:00:00 2001 +From: Alfred Chen +Date: Thu, 19 May 2022 16:53:23 +0000 +Subject: [PATCH 280/297] sched/alt: [Sync] 99cf983cc8bc sched/preempt: Add + PREEMPT_DYNAMIC using static keys + +--- + kernel/sched/alt_core.c | 55 ++++++++++++++++++++++++++++++++++++++++- + 1 file changed, 54 insertions(+), 1 deletion(-) + +diff --git a/kernel/sched/alt_core.c b/kernel/sched/alt_core.c +index 4012c7f597ff..1f09a5210445 100644 +--- a/kernel/sched/alt_core.c ++++ b/kernel/sched/alt_core.c +@@ -25,6 +25,7 @@ + #include + #include + #include ++#include + #include + #include + #include +@@ -4794,13 +4795,25 @@ NOKPROBE_SYMBOL(preempt_schedule); + EXPORT_SYMBOL(preempt_schedule); + + #ifdef CONFIG_PREEMPT_DYNAMIC ++#if defined(CONFIG_HAVE_PREEMPT_DYNAMIC_CALL) + #ifndef preempt_schedule_dynamic_enabled + #define preempt_schedule_dynamic_enabled preempt_schedule + #define preempt_schedule_dynamic_disabled NULL + #endif + DEFINE_STATIC_CALL(preempt_schedule, preempt_schedule_dynamic_enabled); ++EXPORT_STATIC_CALL_TRAMP(preempt_schedule); ++#elif defined(CONFIG_HAVE_PREEMPT_DYNAMIC_KEY) ++static DEFINE_STATIC_KEY_TRUE(sk_dynamic_preempt_schedule); ++void __sched notrace dynamic_preempt_schedule(void) ++{ ++ if (!static_branch_unlikely(&sk_dynamic_preempt_schedule)) ++ return; ++ preempt_schedule(); ++} ++NOKPROBE_SYMBOL(dynamic_preempt_schedule); ++EXPORT_SYMBOL(dynamic_preempt_schedule); ++#endif + #endif +- + + /** + * preempt_schedule_notrace - preempt_schedule called by tracing +@@ -4855,11 +4868,24 @@ asmlinkage __visible void __sched notrace preempt_schedule_notrace(void) + EXPORT_SYMBOL_GPL(preempt_schedule_notrace); + + #ifdef CONFIG_PREEMPT_DYNAMIC ++#if defined(CONFIG_HAVE_PREEMPT_DYNAMIC_CALL) + #ifndef preempt_schedule_notrace_dynamic_enabled + #define preempt_schedule_notrace_dynamic_enabled preempt_schedule_notrace + #define preempt_schedule_notrace_dynamic_disabled NULL + #endif + DEFINE_STATIC_CALL(preempt_schedule_notrace, preempt_schedule_notrace_dynamic_enabled); ++EXPORT_STATIC_CALL_TRAMP(preempt_schedule_notrace); ++#elif defined(CONFIG_HAVE_PREEMPT_DYNAMIC_KEY) ++static DEFINE_STATIC_KEY_TRUE(sk_dynamic_preempt_schedule_notrace); ++void __sched notrace dynamic_preempt_schedule_notrace(void) ++{ ++ if (!static_branch_unlikely(&sk_dynamic_preempt_schedule_notrace)) ++ return; ++ preempt_schedule_notrace(); ++} ++NOKPROBE_SYMBOL(dynamic_preempt_schedule_notrace); ++EXPORT_SYMBOL(dynamic_preempt_schedule_notrace); ++#endif + #endif + + #endif /* CONFIG_PREEMPTION */ +@@ -6058,6 +6084,7 @@ EXPORT_SYMBOL(__cond_resched); + #endif + + #ifdef CONFIG_PREEMPT_DYNAMIC ++#if defined(CONFIG_HAVE_PREEMPT_DYNAMIC_CALL) + #define cond_resched_dynamic_enabled __cond_resched + #define cond_resched_dynamic_disabled ((void *)&__static_call_return0) + DEFINE_STATIC_CALL_RET0(cond_resched, __cond_resched); +@@ -6067,6 +6094,25 @@ EXPORT_STATIC_CALL_TRAMP(cond_resched); + #define might_resched_dynamic_disabled ((void *)&__static_call_return0) + DEFINE_STATIC_CALL_RET0(might_resched, __cond_resched); + EXPORT_STATIC_CALL_TRAMP(might_resched); ++#elif defined(CONFIG_HAVE_PREEMPT_DYNAMIC_KEY) ++static DEFINE_STATIC_KEY_FALSE(sk_dynamic_cond_resched); ++int __sched dynamic_cond_resched(void) ++{ ++ if (!static_branch_unlikely(&sk_dynamic_cond_resched)) ++ return 0; ++ return __cond_resched(); ++} ++EXPORT_SYMBOL(dynamic_cond_resched); ++ ++static DEFINE_STATIC_KEY_FALSE(sk_dynamic_might_resched); ++int __sched dynamic_might_resched(void) ++{ ++ if (!static_branch_unlikely(&sk_dynamic_might_resched)) ++ return 0; ++ return __cond_resched(); ++} ++EXPORT_SYMBOL(dynamic_might_resched); ++#endif + #endif + + /* +@@ -6190,8 +6236,15 @@ int sched_dynamic_mode(const char *str) + return -EINVAL; + } + ++#if defined(CONFIG_HAVE_PREEMPT_DYNAMIC_CALL) + #define preempt_dynamic_enable(f) static_call_update(f, f##_dynamic_enabled) + #define preempt_dynamic_disable(f) static_call_update(f, f##_dynamic_disabled) ++#elif defined(CONFIG_HAVE_PREEMPT_DYNAMIC_KEY) ++#define preempt_dynamic_enable(f) static_key_enable(&sk_dynamic_##f.key) ++#define preempt_dynamic_disable(f) static_key_disable(&sk_dynamic_##f.key) ++#else ++#error "Unsupported PREEMPT_DYNAMIC mechanism" ++#endif + + void sched_dynamic_update(int mode) + { +-- +2.37.0 + + +From 8bc62bbacd2c5c300177d7bb95147106170d3eba Mon Sep 17 00:00:00 2001 +From: Alfred Chen +Date: Sat, 21 May 2022 07:43:39 +0000 +Subject: [PATCH 281/297] sched/alt: [Sync] fa2c3254d7cf sched/tracing: Don't + re-read p->state when emitting sched_switch event + +--- + kernel/sched/alt_core.c | 4 ++-- + 1 file changed, 2 insertions(+), 2 deletions(-) + +diff --git a/kernel/sched/alt_core.c b/kernel/sched/alt_core.c +index 1f09a5210445..3c1a0f3ddcbc 100644 +--- a/kernel/sched/alt_core.c ++++ b/kernel/sched/alt_core.c +@@ -3430,7 +3430,7 @@ static struct rq *finish_task_switch(struct task_struct *prev) + { + struct rq *rq = this_rq(); + struct mm_struct *mm = rq->prev_mm; +- long prev_state; ++ unsigned int prev_state; + + /* + * The previous task will have left us with a preempt_count of 2 +@@ -4595,7 +4595,7 @@ static void __sched notrace __schedule(unsigned int sched_mode) + + psi_sched_switch(prev, next, !task_on_rq_queued(prev)); + +- trace_sched_switch(sched_mode & SM_MASK_PREEMPT, prev, next); ++ trace_sched_switch(sched_mode & SM_MASK_PREEMPT, prev, next, prev_state); + + /* Also unlocks the rq: */ + rq = context_switch(rq, prev, next); +-- +2.37.0 + + +From c41cdff6539523725c3fa3f1b842711a7c1c6a87 Mon Sep 17 00:00:00 2001 +From: Alfred Chen +Date: Sat, 21 May 2022 11:44:15 +0000 +Subject: [PATCH 282/297] sched/alt: [Sync] 801c14195510 sched/headers: + Introduce kernel/sched/build_utility.c and build multiple .c files there + +--- + kernel/sched/alt_sched.h | 12 +++++++++--- + kernel/sched/build_utility.c | 4 +--- + 2 files changed, 10 insertions(+), 6 deletions(-) + +diff --git a/kernel/sched/alt_sched.h b/kernel/sched/alt_sched.h +index 7bbe006ce568..39c6057b0ad3 100644 +--- a/kernel/sched/alt_sched.h ++++ b/kernel/sched/alt_sched.h +@@ -48,6 +48,7 @@ + + #include "cpupri.h" + ++#include + #include + + #ifdef CONFIG_SCHED_BMQ +@@ -452,9 +453,6 @@ this_rq_lock_irq(struct rq_flags *rf) + return rq; + } + +-extern void raw_spin_rq_lock_nested(struct rq *rq, int subclass); +-extern void raw_spin_rq_unlock(struct rq *rq); +- + static inline raw_spinlock_t *__rq_lockp(struct rq *rq) + { + return &rq->lock; +@@ -465,6 +463,14 @@ static inline raw_spinlock_t *rq_lockp(struct rq *rq) + return __rq_lockp(rq); + } + ++static inline void lockdep_assert_rq_held(struct rq *rq) ++{ ++ lockdep_assert_held(__rq_lockp(rq)); ++} ++ ++extern void raw_spin_rq_lock_nested(struct rq *rq, int subclass); ++extern void raw_spin_rq_unlock(struct rq *rq); ++ + static inline void raw_spin_rq_lock(struct rq *rq) + { + raw_spin_rq_lock_nested(rq, 0); +diff --git a/kernel/sched/build_utility.c b/kernel/sched/build_utility.c +index 128a283332f4..23f80a86d2d7 100644 +--- a/kernel/sched/build_utility.c ++++ b/kernel/sched/build_utility.c +@@ -69,11 +69,9 @@ + # include "cpufreq_schedutil.c" + #endif + +-#ifdef CONFIG_SCHED_ALT + #ifdef CONFIG_SCHED_DEBUG + # include "debug.c" + #endif +-#endif + + #ifdef CONFIG_SCHEDSTATS + # include "stats.c" +@@ -87,7 +85,7 @@ + + #ifdef CONFIG_SMP + # include "cpupri.c" +-#ifdef CONFIG_SCHED_ALT ++#ifndef CONFIG_SCHED_ALT + # include "stop_task.c" + #endif + # include "topology.c" +-- +2.37.0 + + +From b2fe44ce98f2d6666a1cb69b31dae32a84eeac93 Mon Sep 17 00:00:00 2001 +From: Alfred Chen +Date: Mon, 23 May 2022 11:03:24 +0000 +Subject: [PATCH 283/297] Project-C v5.18-r0 + +--- + kernel/sched/alt_core.c | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +diff --git a/kernel/sched/alt_core.c b/kernel/sched/alt_core.c +index 3c1a0f3ddcbc..cb8f7d24ab5d 100644 +--- a/kernel/sched/alt_core.c ++++ b/kernel/sched/alt_core.c +@@ -67,7 +67,7 @@ __read_mostly int sysctl_resched_latency_warn_once = 1; + #define sched_feat(x) (0) + #endif /* CONFIG_SCHED_DEBUG */ + +-#define ALT_SCHED_VERSION "v5.17-r2" ++#define ALT_SCHED_VERSION "v5.18-r0" + + /* rt_prio(prio) defined in include/linux/sched/rt.h */ + #define rt_task(p) rt_prio((p)->prio) +-- +2.37.0 + + +From 9cab4c8a8c422ea563e11df3f55946e2821bfa66 Mon Sep 17 00:00:00 2001 +From: Alfred Chen +Date: Tue, 24 May 2022 15:30:25 +0000 +Subject: [PATCH 284/297] sched/alt: Headers cleanup + +--- + kernel/sched/alt_core.c | 41 +++++++++++++++++------------------ + kernel/sched/alt_sched.h | 46 ++++------------------------------------ + 2 files changed, 23 insertions(+), 64 deletions(-) + +diff --git a/kernel/sched/alt_core.c b/kernel/sched/alt_core.c +index cb8f7d24ab5d..aaebbb988343 100644 +--- a/kernel/sched/alt_core.c ++++ b/kernel/sched/alt_core.c +@@ -11,41 +11,38 @@ + * scheduler by Alfred Chen. + * 2019-02-20 BMQ(BitMap Queue) kernel scheduler by Alfred Chen. + */ +-#define CREATE_TRACE_POINTS +-#include +-#undef CREATE_TRACE_POINTS +- +-#include "sched.h" ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include + +-#include +- +-#include +-#include + #include ++#include ++#include + #include +-#include + #include +-#include ++#include + #include +-#include ++#include + #include +-#include +-#include +-#include +-#include + +-#include +-#include ++#include + + #include + +-#include "../workqueue_internal.h" ++#define CREATE_TRACE_POINTS ++#include ++#undef CREATE_TRACE_POINTS ++ ++#include "sched.h" ++ + #include "../../fs/io-wq.h" + #include "../smpboot.h" + +-#include "pelt.h" +-#include "smp.h" +- + /* + * Export tracepoints that act as a bare tracehook (ie: have no trace event + * associated with them) to allow external modules to probe them. +diff --git a/kernel/sched/alt_sched.h b/kernel/sched/alt_sched.h +index 39c6057b0ad3..611424bbfa9b 100644 +--- a/kernel/sched/alt_sched.h ++++ b/kernel/sched/alt_sched.h +@@ -1,56 +1,18 @@ + #ifndef ALT_SCHED_H + #define ALT_SCHED_H + +-#include +- +-#include +-#include +-#include +-#include +-#include +-#include +-#include +-#include +-#include +-#include +-#include +-#include +-#include +-#include +-#include +- +-#include +- +-#include +-#include +-#include +-#include +-#include +-#include +-#include +-#include +-#include +-#include +-#include + #include +-#include + #include +-#include +-#include + #include +-#include ++#include + +-#include ++#include ++#include + +-#ifdef CONFIG_PARAVIRT +-# include +-#endif ++#include "../workqueue_internal.h" + + #include "cpupri.h" + +-#include +-#include +- + #ifdef CONFIG_SCHED_BMQ + /* bits: + * RT(0-99), (Low prio adj range, nice width, high prio adj range) / 2, cpu idle task */ +-- +2.37.0 + + +From efdd27969a4e455b6d36195afdd38416d67430c8 Mon Sep 17 00:00:00 2001 +From: Alfred Chen +Date: Tue, 24 May 2022 16:33:15 +0000 +Subject: [PATCH 285/297] Project-C v5.18-r1 + +--- + kernel/sched/alt_core.c | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +diff --git a/kernel/sched/alt_core.c b/kernel/sched/alt_core.c +index aaebbb988343..a466a05301b8 100644 +--- a/kernel/sched/alt_core.c ++++ b/kernel/sched/alt_core.c +@@ -64,7 +64,7 @@ __read_mostly int sysctl_resched_latency_warn_once = 1; + #define sched_feat(x) (0) + #endif /* CONFIG_SCHED_DEBUG */ + +-#define ALT_SCHED_VERSION "v5.18-r0" ++#define ALT_SCHED_VERSION "v5.18-r1" + + /* rt_prio(prio) defined in include/linux/sched/rt.h */ + #define rt_task(p) rt_prio((p)->prio) +-- +2.37.0 + + +From 497edb58f02de67c6c0323b6a4b7cfa2e2c277bb Mon Sep 17 00:00:00 2001 +From: Tor Vic +Date: Tue, 5 Apr 2022 09:56:18 +0200 +Subject: [PATCH 286/297] sched/alt: Add missing call to lru_gen_use_mm when + using MGLRU + +--- + kernel/sched/alt_core.c | 3 +++ + 1 file changed, 3 insertions(+) + +diff --git a/kernel/sched/alt_core.c b/kernel/sched/alt_core.c +index a466a05301b8..31a487038547 100644 +--- a/kernel/sched/alt_core.c ++++ b/kernel/sched/alt_core.c +@@ -3569,6 +3569,9 @@ context_switch(struct rq *rq, struct task_struct *prev, + * finish_task_switch()'s mmdrop(). + */ + switch_mm_irqs_off(prev->active_mm, next->mm, next); ++#ifdef CONFIG_LRU_GEN ++ lru_gen_use_mm(next->mm); ++#endif + + if (!prev->mm) { // from kernel + /* will mmdrop() in finish_task_switch(). */ +-- +2.37.0 + + +From 1b7fdd70e63bfd3d1c8ffca949df7ed6e605bb55 Mon Sep 17 00:00:00 2001 +From: Tor Vic +Date: Tue, 5 Apr 2022 09:58:51 +0200 +Subject: [PATCH 287/297] sched/alt: Kconfig: Disable SCHED_ALT by default, + prefer PDS over BMQ + +Also edit some help text. +--- + init/Kconfig | 8 ++++---- + 1 file changed, 4 insertions(+), 4 deletions(-) + +diff --git a/init/Kconfig b/init/Kconfig +index d2b593e3807d..48e3e5684076 100644 +--- a/init/Kconfig ++++ b/init/Kconfig +@@ -861,15 +861,15 @@ config UCLAMP_BUCKETS_COUNT + + menuconfig SCHED_ALT + bool "Alternative CPU Schedulers" +- default y ++ default n + help +- This feature enable alternative CPU scheduler" ++ This feature enables the ProjectC alternative CPU schedulers." + + if SCHED_ALT + + choice +- prompt "Alternative CPU Scheduler" +- default SCHED_BMQ ++ prompt "Alternative CPU schedulers" ++ default SCHED_PDS + + config SCHED_BMQ + bool "BMQ CPU scheduler" +-- +2.37.0 + + +From df2558ad4d27543e2e40d6668b711267826d0fcd Mon Sep 17 00:00:00 2001 +From: Tor Vic +Date: Mon, 30 May 2022 10:34:56 +0200 +Subject: [PATCH 288/297] sched/alt: Add missing preempt model accessors + +--- + kernel/sched/alt_core.c | 12 ++++++++++++ + 1 file changed, 12 insertions(+) + +diff --git a/kernel/sched/alt_core.c b/kernel/sched/alt_core.c +index 31a487038547..95572f44ca31 100644 +--- a/kernel/sched/alt_core.c ++++ b/kernel/sched/alt_core.c +@@ -6319,6 +6319,18 @@ static void __init preempt_dynamic_init(void) + } + } + ++#define PREEMPT_MODEL_ACCESSOR(mode) \ ++ bool preempt_model_##mode(void) \ ++ { \ ++ WARN_ON_ONCE(preempt_dynamic_mode == preempt_dynamic_undefined); \ ++ return preempt_dynamic_mode == preempt_dynamic_##mode; \ ++ } \ ++ EXPORT_SYMBOL_GPL(preempt_model_##mode) ++ ++PREEMPT_MODEL_ACCESSOR(none); ++PREEMPT_MODEL_ACCESSOR(voluntary); ++PREEMPT_MODEL_ACCESSOR(full); ++ + #else /* !CONFIG_PREEMPT_DYNAMIC */ + + static inline void preempt_dynamic_init(void) { } +-- +2.37.0 + + +From aeaf4e4c78b4117636a6ad31dca389d540111bb1 Mon Sep 17 00:00:00 2001 +From: Tor Vic +Date: Thu, 2 Jun 2022 09:54:37 +0200 +Subject: [PATCH 289/297] sched/alt: [Sync] sched: Fix the check of nr_running + at queue wakelist + +--- + kernel/sched/alt_core.c | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +diff --git a/kernel/sched/alt_core.c b/kernel/sched/alt_core.c +index 95572f44ca31..f1a5a610b07c 100644 +--- a/kernel/sched/alt_core.c ++++ b/kernel/sched/alt_core.c +@@ -2414,7 +2414,7 @@ static inline bool ttwu_queue_cond(int cpu, int wake_flags) + * the soon-to-be-idle CPU as the current CPU is likely busy. + * nr_running is checked to avoid unnecessary task stacking. + */ +- if ((wake_flags & WF_ON_CPU) && cpu_rq(cpu)->nr_running <= 1) ++ if ((wake_flags & WF_ON_CPU) && !cpu_rq(cpu)->nr_running) + return true; + + return false; +-- +2.37.0 + + +From 3404ff57f1cd54fc17a691f589abee298a69b681 Mon Sep 17 00:00:00 2001 +From: Tor Vic +Date: Thu, 2 Jun 2022 10:05:02 +0200 +Subject: [PATCH 290/297] sched/alt: [Sync] sched: Remove the limitation of + WF_ON_CPU on wakelist if wakee cpu is idle + +--- + kernel/sched/alt_core.c | 27 ++++++++++++++++----------- + kernel/sched/alt_sched.h | 1 - + 2 files changed, 16 insertions(+), 12 deletions(-) + +diff --git a/kernel/sched/alt_core.c b/kernel/sched/alt_core.c +index f1a5a610b07c..8ae6975aa495 100644 +--- a/kernel/sched/alt_core.c ++++ b/kernel/sched/alt_core.c +@@ -2392,7 +2392,7 @@ static void __ttwu_queue_wakelist(struct task_struct *p, int cpu, int wake_flags + __smp_call_single_queue(cpu, &p->wake_entry.llist); + } + +-static inline bool ttwu_queue_cond(int cpu, int wake_flags) ++static inline bool ttwu_queue_cond(int cpu) + { + /* + * Do not complicate things with the async wake_list while the CPU is +@@ -2408,13 +2408,21 @@ static inline bool ttwu_queue_cond(int cpu, int wake_flags) + if (!cpus_share_cache(smp_processor_id(), cpu)) + return true; + ++ if (cpu == smp_processor_id()) ++ return false; ++ + /* +- * If the task is descheduling and the only running task on the +- * CPU then use the wakelist to offload the task activation to +- * the soon-to-be-idle CPU as the current CPU is likely busy. +- * nr_running is checked to avoid unnecessary task stacking. ++ * If the wakee cpu is idle, or the task is descheduling and the ++ * only running task on the CPU, then use the wakelist to offload ++ * the task activation to the idle (or soon-to-be-idle) CPU as ++ * the current CPU is likely busy. nr_running is checked to ++ * avoid unnecessary task stacking. ++ * ++ * Note that we can only get here with (wakee) p->on_rq=0, ++ * p->on_cpu can be whatever, we've done the dequeue, so ++ * the wakee has been accounted out of ->nr_running. + */ +- if ((wake_flags & WF_ON_CPU) && !cpu_rq(cpu)->nr_running) ++ if (!cpu_rq(cpu)->nr_running) + return true; + + return false; +@@ -2422,10 +2430,7 @@ static inline bool ttwu_queue_cond(int cpu, int wake_flags) + + static bool ttwu_queue_wakelist(struct task_struct *p, int cpu, int wake_flags) + { +- if (__is_defined(ALT_SCHED_TTWU_QUEUE) && ttwu_queue_cond(cpu, wake_flags)) { +- if (WARN_ON_ONCE(cpu == smp_processor_id())) +- return false; +- ++ if (__is_defined(ALT_SCHED_TTWU_QUEUE) && ttwu_queue_cond(cpu)) { + sched_clock_cpu(cpu); /* Sync clocks across CPUs */ + __ttwu_queue_wakelist(p, cpu, wake_flags); + return true; +@@ -2781,7 +2786,7 @@ static int try_to_wake_up(struct task_struct *p, unsigned int state, + * scheduling. + */ + if (smp_load_acquire(&p->on_cpu) && +- ttwu_queue_wakelist(p, task_cpu(p), wake_flags | WF_ON_CPU)) ++ ttwu_queue_wakelist(p, task_cpu(p), wake_flags)) + goto unlock; + + /* +diff --git a/kernel/sched/alt_sched.h b/kernel/sched/alt_sched.h +index 611424bbfa9b..1f85c5627d6d 100644 +--- a/kernel/sched/alt_sched.h ++++ b/kernel/sched/alt_sched.h +@@ -99,7 +99,6 @@ static inline int task_on_rq_migrating(struct task_struct *p) + #define WF_SYNC 0x01 /* waker goes to sleep after wakeup */ + #define WF_FORK 0x02 /* child wakeup after fork */ + #define WF_MIGRATED 0x04 /* internal use, task got migrated */ +-#define WF_ON_CPU 0x08 /* Wakee is on_rq */ + + #define SCHED_QUEUE_BITS (SCHED_BITS - 1) + +-- +2.37.0 + + +From 570e4903cdff5177f3317dc50e636537e6494471 Mon Sep 17 00:00:00 2001 +From: Tor Vic +Date: Mon, 20 Jun 2022 19:37:43 +0200 +Subject: [PATCH 291/297] sched/alt: [Sync]: sched: Fix balance_push() vs + __sched_setscheduler() + +--- + kernel/sched/alt_core.c | 39 ++++++++++++++++++++++++++++++++++----- + 1 file changed, 34 insertions(+), 5 deletions(-) + +diff --git a/kernel/sched/alt_core.c b/kernel/sched/alt_core.c +index 8ae6975aa495..b43a992edd87 100644 +--- a/kernel/sched/alt_core.c ++++ b/kernel/sched/alt_core.c +@@ -3275,26 +3275,55 @@ static void do_balance_callbacks(struct rq *rq, struct callback_head *head) + + static void balance_push(struct rq *rq); + ++/* ++ * balance_push_callback is a right abuse of the callback interface and plays ++ * by significantly different rules. ++ * ++ * Where the normal balance_callback's purpose is to be ran in the same context ++ * that queued it (only later, when it's safe to drop rq->lock again), ++ * balance_push_callback is specifically targeted at __schedule(). ++ * ++ * This abuse is tolerated because it places all the unlikely/odd cases behind ++ * a single test, namely: rq->balance_callback == NULL. ++ */ + struct callback_head balance_push_callback = { + .next = NULL, + .func = (void (*)(struct callback_head *))balance_push, + }; + +-static inline struct callback_head *splice_balance_callbacks(struct rq *rq) ++static inline struct callback_head * ++__splice_balance_callbacks(struct rq *rq, bool split) + { + struct callback_head *head = rq->balance_callback; + +- if (head) { +- lockdep_assert_held(&rq->lock); ++ if (likely(!head)) ++ return NULL; ++ ++ lockdep_assert_held(&rq->lock); ++ /* ++ * Must not take balance_push_callback off the list when ++ * splice_balance_callbacks() and balance_callbacks() are not ++ * in the same rq->lock section. ++ * ++ * In that case it would be possible for __schedule() to interleave ++ * and observe the list empty. ++ */ ++ if (split && head == &balance_push_callback) ++ head = NULL; ++ else + rq->balance_callback = NULL; +- } + + return head; + } + ++static inline struct callback_head *splice_balance_callbacks(struct rq *rq) ++{ ++ return __splice_balance_callbacks(rq, true); ++} ++ + static void __balance_callbacks(struct rq *rq) + { +- do_balance_callbacks(rq, splice_balance_callbacks(rq)); ++ do_balance_callbacks(rq, __splice_balance_callbacks(rq, false)); + } + + static inline void balance_callbacks(struct rq *rq, struct callback_head *head) +-- +2.37.0 + + +From ae09d83fe8e118d31fe8a60c193949ef807633e9 Mon Sep 17 00:00:00 2001 +From: Alfred Chen +Date: Thu, 26 May 2022 16:52:23 +0000 +Subject: [PATCH 292/297] sched/alt: Rename slibing group balance functions + +--- + kernel/sched/alt_core.c | 12 ++++++------ + 1 file changed, 6 insertions(+), 6 deletions(-) + +diff --git a/kernel/sched/alt_core.c b/kernel/sched/alt_core.c +index b43a992edd87..70da89bacc72 100644 +--- a/kernel/sched/alt_core.c ++++ b/kernel/sched/alt_core.c +@@ -3915,7 +3915,7 @@ void scheduler_tick(void) + } + + #ifdef CONFIG_SCHED_SMT +-static inline int active_load_balance_cpu_stop(void *data) ++static inline int sg_balance_cpu_stop(void *data) + { + struct rq *rq = this_rq(); + struct task_struct *p = data; +@@ -3966,15 +3966,15 @@ static inline int sg_balance_trigger(const int cpu) + raw_spin_unlock_irqrestore(&rq->lock, flags); + + if (res) +- stop_one_cpu_nowait(cpu, active_load_balance_cpu_stop, +- curr, &rq->active_balance_work); ++ stop_one_cpu_nowait(cpu, sg_balance_cpu_stop, curr, ++ &rq->active_balance_work); + return res; + } + + /* +- * sg_balance_check - slibing group balance check for run queue @rq ++ * sg_balance - slibing group balance check for run queue @rq + */ +-static inline void sg_balance_check(struct rq *rq) ++static inline void sg_balance(struct rq *rq) + { + cpumask_t chk; + int cpu = cpu_of(rq); +@@ -4639,7 +4639,7 @@ static void __sched notrace __schedule(unsigned int sched_mode) + } + + #ifdef CONFIG_SCHED_SMT +- sg_balance_check(rq); ++ sg_balance(rq); + #endif + } + +-- +2.37.0 + + +From ce3f2729759e710ed1b5b44dd867977ac66f63ec Mon Sep 17 00:00:00 2001 +From: Alfred Chen +Date: Sun, 5 Jun 2022 15:09:03 +0000 +Subject: [PATCH 293/297] sched/alt: Watermark preempt fix + +Schedule watermark based preempt is not wokring properly. Typical +scenario is running low priority tasks on all CPUs, then run kernel +compilation with normal prioority, kernel compilation will spread only +on cpu0. Here is the fix. + +1) Fix the size of sched_rq_watermark, IDLE_TASK_SCHED_PRIO doesn't need + a watermark. +2) Remove sched_exec() implementation. It tends scheduling on cpu 0. +3) For BMQ, children task runs at lowest boost priority. +--- + kernel/sched/alt_core.c | 30 ++++++------------------------ + kernel/sched/bmq.h | 3 +-- + 2 files changed, 7 insertions(+), 26 deletions(-) + +diff --git a/kernel/sched/alt_core.c b/kernel/sched/alt_core.c +index 70da89bacc72..9f8c1a826095 100644 +--- a/kernel/sched/alt_core.c ++++ b/kernel/sched/alt_core.c +@@ -144,14 +144,14 @@ DEFINE_PER_CPU_SHARED_ALIGNED(struct rq, runqueues); + #ifdef CONFIG_SCHED_SMT + static cpumask_t sched_sg_idle_mask ____cacheline_aligned_in_smp; + #endif +-static cpumask_t sched_rq_watermark[SCHED_BITS] ____cacheline_aligned_in_smp; ++static cpumask_t sched_rq_watermark[SCHED_QUEUE_BITS] ____cacheline_aligned_in_smp; + + /* sched_queue related functions */ + static inline void sched_queue_init(struct sched_queue *q) + { + int i; + +- bitmap_zero(q->bitmap, SCHED_BITS); ++ bitmap_zero(q->bitmap, SCHED_QUEUE_BITS); + for(i = 0; i < SCHED_BITS; i++) + INIT_LIST_HEAD(&q->heads[i]); + } +@@ -183,7 +183,7 @@ static inline void update_sched_rq_watermark(struct rq *rq) + cpu = cpu_of(rq); + if (watermark < last_wm) { + for (i = last_wm; i > watermark; i--) +- cpumask_clear_cpu(cpu, sched_rq_watermark + SCHED_BITS - 1 - i); ++ cpumask_clear_cpu(cpu, sched_rq_watermark + SCHED_QUEUE_BITS - i); + #ifdef CONFIG_SCHED_SMT + if (static_branch_likely(&sched_smt_present) && + IDLE_TASK_SCHED_PRIO == last_wm) +@@ -194,7 +194,7 @@ static inline void update_sched_rq_watermark(struct rq *rq) + } + /* last_wm < watermark */ + for (i = watermark; i > last_wm; i--) +- cpumask_set_cpu(cpu, sched_rq_watermark + SCHED_BITS - 1 - i); ++ cpumask_set_cpu(cpu, sched_rq_watermark + SCHED_QUEUE_BITS - i); + #ifdef CONFIG_SCHED_SMT + if (static_branch_likely(&sched_smt_present) && + IDLE_TASK_SCHED_PRIO == watermark) { +@@ -1902,7 +1902,7 @@ static inline int select_task_rq(struct task_struct *p) + #endif + cpumask_and(&tmp, &chk_mask, sched_rq_watermark) || + cpumask_and(&tmp, &chk_mask, +- sched_rq_watermark + SCHED_BITS - task_sched_prio(p))) ++ sched_rq_watermark + SCHED_QUEUE_BITS - 1 - task_sched_prio(p))) + return best_mask_cpu(task_cpu(p), &tmp); + + return best_mask_cpu(task_cpu(p), &chk_mask); +@@ -3730,24 +3730,6 @@ unsigned int nr_iowait(void) + */ + void sched_exec(void) + { +- struct task_struct *p = current; +- unsigned long flags; +- int dest_cpu; +- +- raw_spin_lock_irqsave(&p->pi_lock, flags); +- dest_cpu = cpumask_any(p->cpus_ptr); +- if (dest_cpu == smp_processor_id()) +- goto unlock; +- +- if (likely(cpu_active(dest_cpu))) { +- struct migration_arg arg = { p, dest_cpu }; +- +- raw_spin_unlock_irqrestore(&p->pi_lock, flags); +- stop_one_cpu(task_cpu(p), migration_cpu_stop, &arg); +- return; +- } +-unlock: +- raw_spin_unlock_irqrestore(&p->pi_lock, flags); + } + + #endif +@@ -7292,7 +7274,7 @@ void __init sched_init(void) + wait_bit_init(); + + #ifdef CONFIG_SMP +- for (i = 0; i < SCHED_BITS; i++) ++ for (i = 0; i < SCHED_QUEUE_BITS; i++) + cpumask_copy(sched_rq_watermark + i, cpu_present_mask); + #endif + +diff --git a/kernel/sched/bmq.h b/kernel/sched/bmq.h +index bf7ac80ec242..66b77291b9d0 100644 +--- a/kernel/sched/bmq.h ++++ b/kernel/sched/bmq.h +@@ -85,8 +85,7 @@ inline int task_running_nice(struct task_struct *p) + + static void sched_task_fork(struct task_struct *p, struct rq *rq) + { +- p->boost_prio = (p->boost_prio < 0) ? +- p->boost_prio + MAX_PRIORITY_ADJ : MAX_PRIORITY_ADJ; ++ p->boost_prio = MAX_PRIORITY_ADJ; + } + + static inline void do_sched_yield_type_1(struct task_struct *p, struct rq *rq) +-- +2.37.0 + + +From bee4479334a920c506a6bff410251c39fd82c6b2 Mon Sep 17 00:00:00 2001 +From: Alfred Chen +Date: Sun, 19 Jun 2022 10:20:11 +0000 +Subject: [PATCH 294/297] Project-C v5.18-r2 + +--- + kernel/sched/alt_core.c | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +diff --git a/kernel/sched/alt_core.c b/kernel/sched/alt_core.c +index 9f8c1a826095..b70c19fdb7a6 100644 +--- a/kernel/sched/alt_core.c ++++ b/kernel/sched/alt_core.c +@@ -64,7 +64,7 @@ __read_mostly int sysctl_resched_latency_warn_once = 1; + #define sched_feat(x) (0) + #endif /* CONFIG_SCHED_DEBUG */ + +-#define ALT_SCHED_VERSION "v5.18-r1" ++#define ALT_SCHED_VERSION "v5.18-r2" + + /* rt_prio(prio) defined in include/linux/sched/rt.h */ + #define rt_task(p) rt_prio((p)->prio) +-- +2.37.0 + + +From ae834915c8ab8e84348a3285b5760caf416285e6 Mon Sep 17 00:00:00 2001 +From: Tor Vic +Date: Fri, 8 Jul 2022 11:26:06 +0200 +Subject: [PATCH 295/297] sched/alt: Rebase onto 5.19 + +--- + kernel/sched/alt_core.c | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +diff --git a/kernel/sched/alt_core.c b/kernel/sched/alt_core.c +index b70c19fdb7a6..1ee86c260e20 100644 +--- a/kernel/sched/alt_core.c ++++ b/kernel/sched/alt_core.c +@@ -64,7 +64,7 @@ __read_mostly int sysctl_resched_latency_warn_once = 1; + #define sched_feat(x) (0) + #endif /* CONFIG_SCHED_DEBUG */ + +-#define ALT_SCHED_VERSION "v5.18-r2" ++#define ALT_SCHED_VERSION "v5.19-r0-vd" + + /* rt_prio(prio) defined in include/linux/sched/rt.h */ + #define rt_task(p) rt_prio((p)->prio) +-- +2.37.0 + + +From 9c4a7c5fd612abc5e58d018a76d45bad06ff3a6f Mon Sep 17 00:00:00 2001 +From: Tor Vic +Date: Fri, 8 Jul 2022 14:09:33 +0200 +Subject: [PATCH 296/297] sched/alt: [Sync] smp: Rename + flush_smp_call_function_from_idle() + +--- + kernel/sched/alt_core.c | 2 +- + kernel/sched/alt_sched.h | 4 ++-- + 2 files changed, 3 insertions(+), 3 deletions(-) + +diff --git a/kernel/sched/alt_core.c b/kernel/sched/alt_core.c +index 1ee86c260e20..bea63c917bde 100644 +--- a/kernel/sched/alt_core.c ++++ b/kernel/sched/alt_core.c +@@ -1590,7 +1590,7 @@ static int migration_cpu_stop(void *data) + * __migrate_task() such that we will not miss enforcing cpus_ptr + * during wakeups, see set_cpus_allowed_ptr()'s TASK_WAKING test. + */ +- flush_smp_call_function_from_idle(); ++ flush_smp_call_function_queue(); + + raw_spin_lock(&p->pi_lock); + raw_spin_lock(&rq->lock); +diff --git a/kernel/sched/alt_sched.h b/kernel/sched/alt_sched.h +index 1f85c5627d6d..fc6f9a805571 100644 +--- a/kernel/sched/alt_sched.h ++++ b/kernel/sched/alt_sched.h +@@ -289,10 +289,10 @@ static inline int best_mask_cpu(int cpu, const cpumask_t *mask) + return __best_mask_cpu(mask, per_cpu(sched_cpu_topo_masks, cpu)); + } + +-extern void flush_smp_call_function_from_idle(void); ++extern void flush_smp_call_function_queue(void); + + #else /* !CONFIG_SMP */ +-static inline void flush_smp_call_function_from_idle(void) { } ++static inline void flush_smp_call_function_queue(void) { } + #endif + + #ifndef arch_scale_freq_tick +-- +2.37.0 + + +From e73b28fc1e4234835c241307ec8cf46d992d3ec4 Mon Sep 17 00:00:00 2001 +From: Tor Vic +Date: Fri, 8 Jul 2022 14:11:40 +0200 +Subject: [PATCH 297/297] sched/alt: Add lost includes again + +--- + kernel/sched/alt_core.c | 3 +++ + 1 file changed, 3 insertions(+) + +diff --git a/kernel/sched/alt_core.c b/kernel/sched/alt_core.c +index bea63c917bde..3e8ecfb413f9 100644 +--- a/kernel/sched/alt_core.c ++++ b/kernel/sched/alt_core.c +@@ -29,6 +29,7 @@ + #include + #include + #include ++#include + + #include + +@@ -40,6 +41,8 @@ + + #include "sched.h" + ++#include "pelt.h" ++ + #include "../../fs/io-wq.h" + #include "../smpboot.h" + +-- +2.37.0 +