diff --git a/PKGBUILD b/PKGBUILD index b80cf39..53723a1 100644 --- a/PKGBUILD +++ b/PKGBUILD @@ -59,7 +59,7 @@ else fi pkgname=("${pkgbase}" "${pkgbase}-headers") pkgver="${_basekernel}"."${_sub}" -pkgrel=266 +pkgrel=267 pkgdesc='Linux-tkg' arch=('x86_64') # no i686 in here url="https://www.kernel.org/" @@ -838,7 +838,7 @@ case $_basever in '40e7ecc52af379df1e362b32e5b9827815284dae895328556336cb0d10979b63' '9fad4a40449e09522899955762c8928ae17f4cdaa16e01239fd12592e9d58177' 'a557b342111849a5f920bbe1c129f3ff1fc1eff62c6bd6685e0972fc88e39911' - 'b9822a7f912a46196facb9e70229b90357fbc7c7c1dfe23a407e1cb014add30d' + '6592ae4cadf0cfddd63fa77416a07131a926d3426df351c41490f361fc56495b' '970819d5a6eca65b9eeb7c459a8bd487600a10b2b02fed323f7456f483c7d5ce' '213ecf1ba59dc87ed1844c3473d575b85ffe3a567f86735e8c6239c92dbbb493' '1b656ad96004f27e9dc63d7f430b50d5c48510d6d4cd595a81c24b21adb70313' diff --git a/linux-tkg-patches/5.19/0009-prjc_v5.19-r0.patch b/linux-tkg-patches/5.19/0009-prjc_v5.19-r0.patch index 814125c..610cfe8 100644 --- a/linux-tkg-patches/5.19/0009-prjc_v5.19-r0.patch +++ b/linux-tkg-patches/5.19/0009-prjc_v5.19-r0.patch @@ -1,34 +1,34 @@ diff --git a/Documentation/admin-guide/kernel-parameters.txt b/Documentation/admin-guide/kernel-parameters.txt -index 2522b11e593f..11e0b608c57d 100644 +index cc3ea8febc62..ab4c5a35b999 100644 --- a/Documentation/admin-guide/kernel-parameters.txt +++ b/Documentation/admin-guide/kernel-parameters.txt -@@ -5273,6 +5273,12 @@ +@@ -5299,6 +5299,12 @@ sa1100ir [NET] See drivers/net/irda/sa1100_ir.c. - + + sched_timeslice= -+ [KNL] Time slice in us for BMQ scheduler. -+ Format: (must be >= 1000) -+ Default: 4000 ++ [KNL] Time slice in ms for Project C BMQ/PDS scheduler. ++ Format: integer 2, 4 ++ Default: 4 + See Documentation/scheduler/sched-BMQ.txt + sched_verbose [KNL] Enables verbose scheduler debug messages. - + schedstats= [KNL,X86] Enable or disable scheduled statistics. diff --git a/Documentation/admin-guide/sysctl/kernel.rst b/Documentation/admin-guide/sysctl/kernel.rst -index ddccd1077462..8fd3bfa2ecd9 100644 +index ddccd1077462..e24781970a3d 100644 --- a/Documentation/admin-guide/sysctl/kernel.rst +++ b/Documentation/admin-guide/sysctl/kernel.rst @@ -1524,3 +1524,13 @@ is 10 seconds. - + The softlockup threshold is (``2 * watchdog_thresh``). Setting this tunable to zero will disable lockup detection altogether. + +yield_type: +=========== + -+BMQ CPU scheduler only. This determines what type of yield calls to -+sched_yield will perform. ++BMQ/PDS CPU scheduler only. This determines what type of yield calls ++to sched_yield will perform. + + 0 - No yield. + 1 - Deboost and requeue task. (default) @@ -149,22 +149,6 @@ index 000000000000..05c84eec0f31 +spend most of their time blocked awaiting another user event. So they get the +priority boost from unblocking while background threads that do most of the +processing receive the priority penalty for using their entire timeslice. -diff --git a/arch/powerpc/platforms/cell/spufs/sched.c b/arch/powerpc/platforms/cell/spufs/sched.c -index 99bd027a7f7c..525372fcd0f2 100644 ---- a/arch/powerpc/platforms/cell/spufs/sched.c -+++ b/arch/powerpc/platforms/cell/spufs/sched.c -@@ -51,11 +51,6 @@ static struct task_struct *spusched_task; - static struct timer_list spusched_timer; - static struct timer_list spuloadavg_timer; - --/* -- * Priority of a normal, non-rt, non-niced'd process (aka nice level 0). -- */ --#define NORMAL_PRIO 120 -- - /* - * Frequency of the spu scheduler tick. By default we do one SPU scheduler - * tick for every 10 CPU scheduler ticks. diff --git a/fs/proc/base.c b/fs/proc/base.c index 8dfa36a99c74..46397c606e01 100644 --- a/fs/proc/base.c @@ -177,7 +161,7 @@ index 8dfa36a99c74..46397c606e01 100644 + (unsigned long long)tsk_seruntime(task), (unsigned long long)task->sched_info.run_delay, task->sched_info.pcount); - + diff --git a/include/asm-generic/resource.h b/include/asm-generic/resource.h index 8874f681b056..59eb72bf7d5f 100644 --- a/include/asm-generic/resource.h @@ -192,51 +176,66 @@ index 8874f681b056..59eb72bf7d5f 100644 [RLIMIT_RTTIME] = { RLIM_INFINITY, RLIM_INFINITY }, \ } diff --git a/include/linux/sched.h b/include/linux/sched.h -index c46f3a63b758..6907241224a5 100644 +index c46f3a63b758..7c65e6317d97 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h -@@ -750,8 +750,10 @@ struct task_struct { - unsigned int flags; +@@ -751,8 +751,14 @@ struct task_struct { unsigned int ptrace; - --#ifdef CONFIG_SMP -+#if defined(CONFIG_SMP) || defined(CONFIG_SCHED_ALT) - int on_cpu; -+#endif -+#if defined(CONFIG_SMP) && !defined(CONFIG_SCHED_ALT) + + #ifdef CONFIG_SMP +- int on_cpu; struct __call_single_node wake_entry; ++#endif ++#if defined(CONFIG_SMP) || defined(CONFIG_SCHED_ALT) ++ int on_cpu; ++#endif ++ ++#ifdef CONFIG_SMP ++#ifndef CONFIG_SCHED_ALT unsigned int wakee_flips; unsigned long wakee_flip_decay_ts; -@@ -774,6 +776,17 @@ struct task_struct { + struct task_struct *last_wakee; +@@ -766,6 +772,7 @@ struct task_struct { + */ + int recent_used_cpu; + int wake_cpu; ++#endif /* !CONFIG_SCHED_ALT */ + #endif + int on_rq; + +@@ -774,6 +781,20 @@ struct task_struct { int normal_prio; unsigned int rt_priority; - + +#ifdef CONFIG_SCHED_ALT + u64 last_ran; + s64 time_slice; -+ int boost_prio; ++ int sq_idx; ++ struct list_head sq_node; +#ifdef CONFIG_SCHED_BMQ -+ int bmq_idx; -+ struct list_head bmq_node; ++ int boost_prio; +#endif /* CONFIG_SCHED_BMQ */ ++#ifdef CONFIG_SCHED_PDS ++ u64 deadline; ++#endif /* CONFIG_SCHED_PDS */ + /* sched_clock time spent running */ + u64 sched_time; +#else /* !CONFIG_SCHED_ALT */ struct sched_entity se; struct sched_rt_entity rt; struct sched_dl_entity dl; -@@ -784,6 +797,7 @@ struct task_struct { +@@ -784,6 +805,7 @@ struct task_struct { unsigned long core_cookie; unsigned int core_occupation; #endif +#endif /* !CONFIG_SCHED_ALT */ - + #ifdef CONFIG_CGROUP_SCHED struct task_group *sched_task_group; -@@ -1517,6 +1531,15 @@ struct task_struct { +@@ -1517,6 +1539,15 @@ struct task_struct { */ }; - + +#ifdef CONFIG_SCHED_ALT +#define tsk_seruntime(t) ((t)->sched_time) +/* replace the uncertian rt_timeout with 0UL */ @@ -250,18 +249,27 @@ index c46f3a63b758..6907241224a5 100644 { return task->thread_pid; diff --git a/include/linux/sched/deadline.h b/include/linux/sched/deadline.h -index 7c83d4d5a971..529e1b2ebd19 100644 +index 7c83d4d5a971..fa30f98cb2be 100644 --- a/include/linux/sched/deadline.h +++ b/include/linux/sched/deadline.h -@@ -1,5 +1,15 @@ +@@ -1,5 +1,24 @@ /* SPDX-License-Identifier: GPL-2.0 */ - + +#ifdef CONFIG_SCHED_ALT + ++static inline int dl_task(struct task_struct *p) ++{ ++ return 0; ++} ++ +#ifdef CONFIG_SCHED_BMQ +#define __tsk_deadline(p) (0UL) +#endif + ++#ifdef CONFIG_SCHED_PDS ++#define __tsk_deadline(p) ((((u64) ((p)->prio))<<56) | (p)->deadline) ++#endif ++ +#else + +#define __tsk_deadline(p) ((p)->dl.deadline) @@ -269,26 +277,47 @@ index 7c83d4d5a971..529e1b2ebd19 100644 /* * SCHED_DEADLINE tasks has negative priorities, reflecting * the fact that any of them has higher prio than RT and -@@ -21,6 +31,7 @@ static inline int dl_task(struct task_struct *p) +@@ -21,6 +40,7 @@ static inline int dl_task(struct task_struct *p) { return dl_prio(p->prio); } +#endif /* CONFIG_SCHED_ALT */ - + static inline bool dl_time_before(u64 a, u64 b) { diff --git a/include/linux/sched/prio.h b/include/linux/sched/prio.h -index ab83d85e1183..c28676e431be 100644 +index ab83d85e1183..6af9ae681116 100644 --- a/include/linux/sched/prio.h +++ b/include/linux/sched/prio.h -@@ -18,6 +18,11 @@ +@@ -18,6 +18,32 @@ #define MAX_PRIO (MAX_RT_PRIO + NICE_WIDTH) #define DEFAULT_PRIO (MAX_RT_PRIO + NICE_WIDTH / 2) - + +#ifdef CONFIG_SCHED_ALT ++ ++/* Undefine MAX_PRIO and DEFAULT_PRIO */ ++#undef MAX_PRIO ++#undef DEFAULT_PRIO ++ +/* +/- priority levels from the base priority */ -+#define MAX_PRIORITY_ADJ 4 ++#ifdef CONFIG_SCHED_BMQ ++#define MAX_PRIORITY_ADJ (7) ++ ++#define MIN_NORMAL_PRIO (MAX_RT_PRIO) ++#define MAX_PRIO (MIN_NORMAL_PRIO + NICE_WIDTH) ++#define DEFAULT_PRIO (MIN_NORMAL_PRIO + NICE_WIDTH / 2) +#endif ++ ++#ifdef CONFIG_SCHED_PDS ++#define MAX_PRIORITY_ADJ (0) ++ ++#define MIN_NORMAL_PRIO (128) ++#define NORMAL_PRIO_NUM (64) ++#define MAX_PRIO (MIN_NORMAL_PRIO + NORMAL_PRIO_NUM) ++#define DEFAULT_PRIO (MAX_PRIO - NICE_WIDTH / 2) ++#endif ++ ++#endif /* CONFIG_SCHED_ALT */ + /* * Convert user-nice values [ -20 ... 0 ... 19 ] @@ -298,7 +327,7 @@ index e5af028c08b4..0a7565d0d3cf 100644 --- a/include/linux/sched/rt.h +++ b/include/linux/sched/rt.h @@ -24,8 +24,10 @@ static inline bool task_is_realtime(struct task_struct *tsk) - + if (policy == SCHED_FIFO || policy == SCHED_RR) return true; +#ifndef CONFIG_SCHED_ALT @@ -307,15 +336,37 @@ index e5af028c08b4..0a7565d0d3cf 100644 +#endif return false; } - + +diff --git a/include/linux/sched/topology.h b/include/linux/sched/topology.h +index 56cffe42abbc..e020fc572b22 100644 +--- a/include/linux/sched/topology.h ++++ b/include/linux/sched/topology.h +@@ -233,7 +233,8 @@ static inline bool cpus_share_cache(int this_cpu, int that_cpu) + + #endif /* !CONFIG_SMP */ + +-#if defined(CONFIG_ENERGY_MODEL) && defined(CONFIG_CPU_FREQ_GOV_SCHEDUTIL) ++#if defined(CONFIG_ENERGY_MODEL) && defined(CONFIG_CPU_FREQ_GOV_SCHEDUTIL) && \ ++ !defined(CONFIG_SCHED_ALT) + extern void rebuild_sched_domains_energy(void); + #else + static inline void rebuild_sched_domains_energy(void) diff --git a/init/Kconfig b/init/Kconfig -index c7900e8975f1..0a817c86c966 100644 +index c7900e8975f1..d2b593e3807d 100644 --- a/init/Kconfig +++ b/init/Kconfig -@@ -809,9 +809,33 @@ config GENERIC_SCHED_CLOCK - - menu "Scheduler features" - +@@ -812,6 +812,7 @@ menu "Scheduler features" + config UCLAMP_TASK + bool "Enable utilization clamping for RT/FAIR tasks" + depends on CPU_FREQ_GOV_SCHEDUTIL ++ depends on !SCHED_ALT + help + This feature enables the scheduler to track the clamped utilization + of each CPU based on RUNNABLE tasks scheduled on that CPU. +@@ -858,6 +859,35 @@ config UCLAMP_BUCKETS_COUNT + + If in doubt, use the default value. + +menuconfig SCHED_ALT + bool "Alternative CPU Schedulers" + default y @@ -335,44 +386,53 @@ index c7900e8975f1..0a817c86c966 100644 + responsiveness on the desktop and solid scalability on normal + hardware and commodity servers. + ++config SCHED_PDS ++ bool "PDS CPU scheduler" ++ help ++ The Priority and Deadline based Skip list multiple queue CPU ++ Scheduler. ++ +endchoice + +endif + - config UCLAMP_TASK - bool "Enable utilization clamping for RT/FAIR tasks" - depends on CPU_FREQ_GOV_SCHEDUTIL -+ depends on !SCHED_BMQ - help - This feature enables the scheduler to track the clamped utilization - of each CPU based on RUNNABLE tasks scheduled on that CPU. -@@ -911,6 +935,7 @@ config NUMA_BALANCING + endmenu + + # +@@ -911,6 +941,7 @@ config NUMA_BALANCING depends on ARCH_SUPPORTS_NUMA_BALANCING depends on !ARCH_WANT_NUMA_VARIABLE_LOCALITY depends on SMP && NUMA && MIGRATION && !PREEMPT_RT -+ depends on !SCHED_BMQ ++ depends on !SCHED_ALT help This option adds support for automatic NUMA aware memory/task placement. The mechanism is quite primitive and is based on migrating memory when -@@ -997,7 +1022,7 @@ menuconfig CGROUP_SCHED - bandwidth allocation to such task groups. It uses cgroups to group - tasks. - --if CGROUP_SCHED -+if CGROUP_SCHED && !SCHED_BMQ - config FAIR_GROUP_SCHED - bool "Group scheduling for SCHED_OTHER" +@@ -1003,6 +1034,7 @@ config FAIR_GROUP_SCHED depends on CGROUP_SCHED -@@ -1268,6 +1293,7 @@ config CHECKPOINT_RESTORE - + default CGROUP_SCHED + ++if !SCHED_ALT + config CFS_BANDWIDTH + bool "CPU bandwidth provisioning for FAIR_GROUP_SCHED" + depends on FAIR_GROUP_SCHED +@@ -1025,6 +1057,7 @@ config RT_GROUP_SCHED + realtime bandwidth for them. + See Documentation/scheduler/sched-rt-group.rst for more information. + ++endif #!SCHED_ALT + endif #CGROUP_SCHED + + config UCLAMP_TASK_GROUP +@@ -1268,6 +1301,7 @@ config CHECKPOINT_RESTORE + config SCHED_AUTOGROUP bool "Automatic process group scheduling" -+ depends on !SCHED_BMQ ++ depends on !SCHED_ALT select CGROUPS select CGROUP_SCHED select FAIR_GROUP_SCHED diff --git a/init/init_task.c b/init/init_task.c -index 73cc8f03511a..9017276b1a80 100644 +index 73cc8f03511a..2d0bad762895 100644 --- a/init/init_task.c +++ b/init/init_task.c @@ -75,9 +75,15 @@ struct task_struct init_task @@ -391,22 +451,25 @@ index 73cc8f03511a..9017276b1a80 100644 .policy = SCHED_NORMAL, .cpus_ptr = &init_task.cpus_mask, .user_cpus_ptr = NULL, -@@ -88,6 +94,14 @@ struct task_struct init_task +@@ -88,6 +94,17 @@ struct task_struct init_task .restart_block = { .fn = do_no_restart_syscall, }, +#ifdef CONFIG_SCHED_ALT -+ .boost_prio = 0, ++ .sq_node = LIST_HEAD_INIT(init_task.sq_node), +#ifdef CONFIG_SCHED_BMQ -+ .bmq_idx = 15, -+ .bmq_node = LIST_HEAD_INIT(init_task.bmq_node), ++ .boost_prio = 0, ++ .sq_idx = 15, ++#endif ++#ifdef CONFIG_SCHED_PDS ++ .deadline = 0, +#endif + .time_slice = HZ, +#else .se = { .group_node = LIST_HEAD_INIT(init_task.se.group_node), }, -@@ -95,6 +109,7 @@ struct task_struct init_task +@@ -95,6 +112,7 @@ struct task_struct init_task .run_list = LIST_HEAD_INIT(init_task.rt.run_list), .time_slice = RR_TIMESLICE, }, @@ -414,6 +477,19 @@ index 73cc8f03511a..9017276b1a80 100644 .tasks = LIST_HEAD_INIT(init_task.tasks), #ifdef CONFIG_SMP .pushable_tasks = PLIST_NODE_INIT(init_task.pushable_tasks, MAX_PRIO), +diff --git a/kernel/Kconfig.preempt b/kernel/Kconfig.preempt +index c2f1fd95a821..41654679b1b2 100644 +--- a/kernel/Kconfig.preempt ++++ b/kernel/Kconfig.preempt +@@ -117,7 +117,7 @@ config PREEMPT_DYNAMIC + + config SCHED_CORE + bool "Core Scheduling for SMT" +- depends on SCHED_SMT ++ depends on SCHED_SMT && !SCHED_ALT + help + This option permits Core Scheduling, a means of coordinated task + selection across SMT siblings. When enabled -- see diff --git a/kernel/cgroup/cpuset.c b/kernel/cgroup/cpuset.c index 71a418858a5e..7e3016873db1 100644 --- a/kernel/cgroup/cpuset.c @@ -421,7 +497,7 @@ index 71a418858a5e..7e3016873db1 100644 @@ -704,7 +704,7 @@ static int validate_change(struct cpuset *cur, struct cpuset *trial) return ret; } - + -#ifdef CONFIG_SMP +#if defined(CONFIG_SMP) && !defined(CONFIG_SCHED_ALT) /* @@ -446,21 +522,21 @@ index 164ed9ef77a3..c974a84b056f 100644 t2 = tsk->sched_info.run_delay; - t3 = tsk->se.sum_exec_runtime; + t3 = tsk_seruntime(tsk); - + d->cpu_count += t1; - + diff --git a/kernel/exit.c b/kernel/exit.c -index f072959fcab7..da97095a2997 100644 +index 64c938ce36fe..a353f7ef5392 100644 --- a/kernel/exit.c +++ b/kernel/exit.c @@ -124,7 +124,7 @@ static void __exit_signal(struct task_struct *tsk) sig->curr_target = next_thread(tsk); } - + - add_device_randomness((const void*) &tsk->se.sum_exec_runtime, + add_device_randomness((const void*) &tsk_seruntime(tsk), sizeof(unsigned long long)); - + /* @@ -145,7 +145,7 @@ static void __exit_signal(struct task_struct *tsk) sig->inblock += task_io_get_inblock(tsk); @@ -472,68 +548,82 @@ index f072959fcab7..da97095a2997 100644 __unhash_process(tsk, group_dead); write_sequnlock(&sig->stats_lock); diff --git a/kernel/locking/rtmutex.c b/kernel/locking/rtmutex.c -index 7779ee8abc2a..4258d1c08c71 100644 +index 7779ee8abc2a..5b9893cdfb1b 100644 --- a/kernel/locking/rtmutex.c +++ b/kernel/locking/rtmutex.c -@@ -300,14 +300,14 @@ static __always_inline void +@@ -300,21 +300,25 @@ static __always_inline void waiter_update_prio(struct rt_mutex_waiter *waiter, struct task_struct *task) { waiter->prio = __waiter_prio(task); - waiter->deadline = task->dl.deadline; + waiter->deadline = __tsk_deadline(task); } - + /* * Only use with rt_mutex_waiter_{less,equal}() */ #define task_to_waiter(p) \ - &(struct rt_mutex_waiter){ .prio = __waiter_prio(p), .deadline = (p)->dl.deadline } + &(struct rt_mutex_waiter){ .prio = __waiter_prio(p), .deadline = __tsk_deadline(p) } - + static __always_inline int rt_mutex_waiter_less(struct rt_mutex_waiter *left, struct rt_mutex_waiter *right) -@@ -315,6 +315,7 @@ static __always_inline int rt_mutex_waiter_less(struct rt_mutex_waiter *left, + { ++#ifdef CONFIG_SCHED_PDS ++ return (left->deadline < right->deadline); ++#else if (left->prio < right->prio) return 1; - + +#ifndef CONFIG_SCHED_BMQ /* * If both waiters have dl_prio(), we check the deadlines of the * associated tasks. -@@ -323,6 +324,7 @@ static __always_inline int rt_mutex_waiter_less(struct rt_mutex_waiter *left, +@@ -323,16 +327,22 @@ static __always_inline int rt_mutex_waiter_less(struct rt_mutex_waiter *left, */ if (dl_prio(left->prio)) return dl_time_before(left->deadline, right->deadline); +#endif - + return 0; ++#endif } -@@ -333,6 +335,7 @@ static __always_inline int rt_mutex_waiter_equal(struct rt_mutex_waiter *left, + + static __always_inline int rt_mutex_waiter_equal(struct rt_mutex_waiter *left, + struct rt_mutex_waiter *right) + { ++#ifdef CONFIG_SCHED_PDS ++ return (left->deadline == right->deadline); ++#else if (left->prio != right->prio) return 0; - + +#ifndef CONFIG_SCHED_BMQ /* * If both waiters have dl_prio(), we check the deadlines of the * associated tasks. -@@ -341,6 +344,7 @@ static __always_inline int rt_mutex_waiter_equal(struct rt_mutex_waiter *left, +@@ -341,8 +351,10 @@ static __always_inline int rt_mutex_waiter_equal(struct rt_mutex_waiter *left, */ if (dl_prio(left->prio)) return left->deadline == right->deadline; +#endif - + return 1; ++#endif } + + static inline bool rt_mutex_steal(struct rt_mutex_waiter *waiter, diff --git a/kernel/sched/Makefile b/kernel/sched/Makefile -index 976092b7bd45..01b041388419 100644 +index 976092b7bd45..31d587c16ec1 100644 --- a/kernel/sched/Makefile +++ b/kernel/sched/Makefile -@@ -28,7 +28,11 @@ endif +@@ -28,7 +28,12 @@ endif # These compilation units have roughly the same size and complexity - so their # build parallelizes well and finishes roughly at once: # +ifdef CONFIG_SCHED_ALT -+obj-y += alt_core.o alt_debug.o ++obj-y += alt_core.o ++obj-$(CONFIG_SCHED_DEBUG) += alt_debug.o +else obj-y += core.o obj-y += fair.o @@ -542,10 +632,10 @@ index 976092b7bd45..01b041388419 100644 obj-y += build_utility.o diff --git a/kernel/sched/alt_core.c b/kernel/sched/alt_core.c new file mode 100644 -index 000000000000..09ca47de425c +index 000000000000..d0ab41c4d9ad --- /dev/null +++ b/kernel/sched/alt_core.c -@@ -0,0 +1,5940 @@ +@@ -0,0 +1,7807 @@ +/* + * kernel/sched/alt_core.c + * @@ -559,37 +649,63 @@ index 000000000000..09ca47de425c + * scheduler by Alfred Chen. + * 2019-02-20 BMQ(BitMap Queue) kernel scheduler by Alfred Chen. + */ -+#include "sched.h" ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include + -+#include -+ -+#include -+#include +#include ++#include ++#include +#include -+#include +#include -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+ +#include ++#include ++#include ++#include ++#include ++ ++#include + +#include + -+#include "../workqueue_internal.h" -+#include "../../fs/io-wq.h" -+#include "../smpboot.h" ++#define CREATE_TRACE_POINTS ++#include ++#undef CREATE_TRACE_POINTS ++ ++#include "sched.h" + +#include "pelt.h" + -+#define CREATE_TRACE_POINTS -+#include ++#include "../../fs/io-wq.h" ++#include "../smpboot.h" ++ ++/* ++ * Export tracepoints that act as a bare tracehook (ie: have no trace event ++ * associated with them) to allow external modules to probe them. ++ */ ++EXPORT_TRACEPOINT_SYMBOL_GPL(pelt_irq_tp); ++ ++#ifdef CONFIG_SCHED_DEBUG ++#define sched_feat(x) (1) ++/* ++ * Print a warning if need_resched is set for the given duration (if ++ * LATENCY_WARN is enabled). ++ * ++ * If sysctl_resched_latency_warn_once is set, only one warning will be shown ++ * per boot. ++ */ ++__read_mostly int sysctl_resched_latency_warn_ms = 100; ++__read_mostly int sysctl_resched_latency_warn_once = 1; ++#else ++#define sched_feat(x) (0) ++#endif /* CONFIG_SCHED_DEBUG */ ++ ++#define ALT_SCHED_VERSION "v5.19-r0" + +/* rt_prio(prio) defined in include/linux/sched/rt.h */ +#define rt_task(p) rt_prio((p)->prio) @@ -599,22 +715,33 @@ index 000000000000..09ca47de425c +#define STOP_PRIO (MAX_RT_PRIO - 1) + +/* Default time slice is 4 in ms, can be set via kernel parameter "sched_timeslice" */ -+u64 sched_timeslice_ns __read_mostly = (4 * 1000 * 1000); ++u64 sched_timeslice_ns __read_mostly = (4 << 20); ++ ++static inline void requeue_task(struct task_struct *p, struct rq *rq, int idx); ++ ++#ifdef CONFIG_SCHED_BMQ ++#include "bmq.h" ++#endif ++#ifdef CONFIG_SCHED_PDS ++#include "pds.h" ++#endif + +static int __init sched_timeslice(char *str) +{ -+ int timeslice_us; ++ int timeslice_ms; + -+ get_option(&str, ×lice_us); -+ if (timeslice_us >= 1000) -+ sched_timeslice_ns = timeslice_us * 1000; ++ get_option(&str, ×lice_ms); ++ if (2 != timeslice_ms) ++ timeslice_ms = 4; ++ sched_timeslice_ns = timeslice_ms << 20; ++ sched_timeslice_imp(timeslice_ms); + + return 0; +} +early_param("sched_timeslice", sched_timeslice); + +/* Reschedule if less than this many μs left */ -+#define RESCHED_NS (100 * 1000) ++#define RESCHED_NS (100 << 10) + +/** + * sched_yield_type - Choose what sort of yield sched_yield will perform. @@ -624,42 +751,12 @@ index 000000000000..09ca47de425c + */ +int sched_yield_type __read_mostly = 1; + -+#define rq_switch_time(rq) ((rq)->clock - (rq)->last_ts_switch) -+#define boost_threshold(p) (sched_timeslice_ns >>\ -+ (10 - MAX_PRIORITY_ADJ - (p)->boost_prio)) -+ -+static inline void boost_task(struct task_struct *p) -+{ -+ int limit; -+ -+ switch (p->policy) { -+ case SCHED_NORMAL: -+ limit = -MAX_PRIORITY_ADJ; -+ break; -+ case SCHED_BATCH: -+ case SCHED_IDLE: -+ limit = 0; -+ break; -+ default: -+ return; -+ } -+ -+ if (p->boost_prio > limit) -+ p->boost_prio--; -+} -+ -+static inline void deboost_task(struct task_struct *p) -+{ -+ if (p->boost_prio < MAX_PRIORITY_ADJ) -+ p->boost_prio++; -+} -+ +#ifdef CONFIG_SMP +static cpumask_t sched_rq_pending_mask ____cacheline_aligned_in_smp; + -+DEFINE_PER_CPU(cpumask_t [NR_CPU_AFFINITY_CHK_LEVEL], sched_cpu_affinity_masks); -+DEFINE_PER_CPU(cpumask_t *, sched_cpu_affinity_end_mask); ++DEFINE_PER_CPU(cpumask_t [NR_CPU_AFFINITY_LEVELS], sched_cpu_topo_masks); +DEFINE_PER_CPU(cpumask_t *, sched_cpu_llc_mask); ++DEFINE_PER_CPU(cpumask_t *, sched_cpu_topo_end_mask); + +#ifdef CONFIG_SCHED_SMT +DEFINE_STATIC_KEY_FALSE(sched_smt_present); @@ -685,14 +782,37 @@ index 000000000000..09ca47de425c +# define finish_arch_post_lock_switch() do { } while (0) +#endif + -+#define IDLE_WM (IDLE_TASK_SCHED_PRIO) -+ ++#ifdef CONFIG_SCHED_SMT +static cpumask_t sched_sg_idle_mask ____cacheline_aligned_in_smp; -+static cpumask_t sched_rq_watermark[SCHED_BITS] ____cacheline_aligned_in_smp; ++#endif ++static cpumask_t sched_rq_watermark[SCHED_QUEUE_BITS] ____cacheline_aligned_in_smp; + ++/* sched_queue related functions */ ++static inline void sched_queue_init(struct sched_queue *q) ++{ ++ int i; ++ ++ bitmap_zero(q->bitmap, SCHED_QUEUE_BITS); ++ for(i = 0; i < SCHED_BITS; i++) ++ INIT_LIST_HEAD(&q->heads[i]); ++} ++ ++/* ++ * Init idle task and put into queue structure of rq ++ * IMPORTANT: may be called multiple times for a single cpu ++ */ ++static inline void sched_queue_init_idle(struct sched_queue *q, ++ struct task_struct *idle) ++{ ++ idle->sq_idx = IDLE_TASK_SCHED_PRIO; ++ INIT_LIST_HEAD(&q->heads[idle->sq_idx]); ++ list_add(&idle->sq_node, &q->heads[idle->sq_idx]); ++} ++ ++/* water mark related functions */ +static inline void update_sched_rq_watermark(struct rq *rq) +{ -+ unsigned long watermark = find_first_bit(rq->queue.bitmap, SCHED_BITS); ++ unsigned long watermark = find_first_bit(rq->queue.bitmap, SCHED_QUEUE_BITS); + unsigned long last_wm = rq->watermark; + unsigned long i; + int cpu; @@ -703,40 +823,59 @@ index 000000000000..09ca47de425c + rq->watermark = watermark; + cpu = cpu_of(rq); + if (watermark < last_wm) { -+ for (i = watermark + 1; i <= last_wm; i++) -+ cpumask_andnot(&sched_rq_watermark[i], -+ &sched_rq_watermark[i], cpumask_of(cpu)); ++ for (i = last_wm; i > watermark; i--) ++ cpumask_clear_cpu(cpu, sched_rq_watermark + SCHED_QUEUE_BITS - i); +#ifdef CONFIG_SCHED_SMT -+ if (!static_branch_likely(&sched_smt_present)) -+ return; -+ if (IDLE_WM == last_wm) ++ if (static_branch_likely(&sched_smt_present) && ++ IDLE_TASK_SCHED_PRIO == last_wm) + cpumask_andnot(&sched_sg_idle_mask, + &sched_sg_idle_mask, cpu_smt_mask(cpu)); +#endif + return; + } + /* last_wm < watermark */ -+ for (i = last_wm + 1; i <= watermark; i++) -+ cpumask_set_cpu(cpu, &sched_rq_watermark[i]); ++ for (i = watermark; i > last_wm; i--) ++ cpumask_set_cpu(cpu, sched_rq_watermark + SCHED_QUEUE_BITS - i); +#ifdef CONFIG_SCHED_SMT -+ if (!static_branch_likely(&sched_smt_present)) -+ return; -+ if (IDLE_WM == watermark) { ++ if (static_branch_likely(&sched_smt_present) && ++ IDLE_TASK_SCHED_PRIO == watermark) { + cpumask_t tmp; -+ cpumask_and(&tmp, cpu_smt_mask(cpu), &sched_rq_watermark[IDLE_WM]); ++ ++ cpumask_and(&tmp, cpu_smt_mask(cpu), sched_rq_watermark); + if (cpumask_equal(&tmp, cpu_smt_mask(cpu))) -+ cpumask_or(&sched_sg_idle_mask, cpu_smt_mask(cpu), -+ &sched_sg_idle_mask); ++ cpumask_or(&sched_sg_idle_mask, ++ &sched_sg_idle_mask, cpu_smt_mask(cpu)); + } +#endif +} + -+static inline int task_sched_prio(struct task_struct *p) ++/* ++ * This routine assume that the idle task always in queue ++ */ ++static inline struct task_struct *sched_rq_first_task(struct rq *rq) +{ -+ return (p->prio < MAX_RT_PRIO)? p->prio : p->prio + p->boost_prio; ++ unsigned long idx = find_first_bit(rq->queue.bitmap, SCHED_QUEUE_BITS); ++ const struct list_head *head = &rq->queue.heads[sched_prio2idx(idx, rq)]; ++ ++ return list_first_entry(head, struct task_struct, sq_node); +} + -+#include "bmq_imp.h" ++static inline struct task_struct * ++sched_rq_next_task(struct task_struct *p, struct rq *rq) ++{ ++ unsigned long idx = p->sq_idx; ++ struct list_head *head = &rq->queue.heads[idx]; ++ ++ if (list_is_last(&p->sq_node, head)) { ++ idx = find_next_bit(rq->queue.bitmap, SCHED_QUEUE_BITS, ++ sched_idx2prio(idx, rq) + 1); ++ head = &rq->queue.heads[sched_prio2idx(idx, rq)]; ++ ++ return list_first_entry(head, struct task_struct, sq_node); ++ } ++ ++ return list_next_entry(p, sq_node); ++} + +static inline struct task_struct *rq_runnable_task(struct rq *rq) +{ @@ -749,6 +888,99 @@ index 000000000000..09ca47de425c +} + +/* ++ * Serialization rules: ++ * ++ * Lock order: ++ * ++ * p->pi_lock ++ * rq->lock ++ * hrtimer_cpu_base->lock (hrtimer_start() for bandwidth controls) ++ * ++ * rq1->lock ++ * rq2->lock where: rq1 < rq2 ++ * ++ * Regular state: ++ * ++ * Normal scheduling state is serialized by rq->lock. __schedule() takes the ++ * local CPU's rq->lock, it optionally removes the task from the runqueue and ++ * always looks at the local rq data structures to find the most eligible task ++ * to run next. ++ * ++ * Task enqueue is also under rq->lock, possibly taken from another CPU. ++ * Wakeups from another LLC domain might use an IPI to transfer the enqueue to ++ * the local CPU to avoid bouncing the runqueue state around [ see ++ * ttwu_queue_wakelist() ] ++ * ++ * Task wakeup, specifically wakeups that involve migration, are horribly ++ * complicated to avoid having to take two rq->locks. ++ * ++ * Special state: ++ * ++ * System-calls and anything external will use task_rq_lock() which acquires ++ * both p->pi_lock and rq->lock. As a consequence the state they change is ++ * stable while holding either lock: ++ * ++ * - sched_setaffinity()/ ++ * set_cpus_allowed_ptr(): p->cpus_ptr, p->nr_cpus_allowed ++ * - set_user_nice(): p->se.load, p->*prio ++ * - __sched_setscheduler(): p->sched_class, p->policy, p->*prio, ++ * p->se.load, p->rt_priority, ++ * p->dl.dl_{runtime, deadline, period, flags, bw, density} ++ * - sched_setnuma(): p->numa_preferred_nid ++ * - sched_move_task()/ ++ * cpu_cgroup_fork(): p->sched_task_group ++ * - uclamp_update_active() p->uclamp* ++ * ++ * p->state <- TASK_*: ++ * ++ * is changed locklessly using set_current_state(), __set_current_state() or ++ * set_special_state(), see their respective comments, or by ++ * try_to_wake_up(). This latter uses p->pi_lock to serialize against ++ * concurrent self. ++ * ++ * p->on_rq <- { 0, 1 = TASK_ON_RQ_QUEUED, 2 = TASK_ON_RQ_MIGRATING }: ++ * ++ * is set by activate_task() and cleared by deactivate_task(), under ++ * rq->lock. Non-zero indicates the task is runnable, the special ++ * ON_RQ_MIGRATING state is used for migration without holding both ++ * rq->locks. It indicates task_cpu() is not stable, see task_rq_lock(). ++ * ++ * p->on_cpu <- { 0, 1 }: ++ * ++ * is set by prepare_task() and cleared by finish_task() such that it will be ++ * set before p is scheduled-in and cleared after p is scheduled-out, both ++ * under rq->lock. Non-zero indicates the task is running on its CPU. ++ * ++ * [ The astute reader will observe that it is possible for two tasks on one ++ * CPU to have ->on_cpu = 1 at the same time. ] ++ * ++ * task_cpu(p): is changed by set_task_cpu(), the rules are: ++ * ++ * - Don't call set_task_cpu() on a blocked task: ++ * ++ * We don't care what CPU we're not running on, this simplifies hotplug, ++ * the CPU assignment of blocked tasks isn't required to be valid. ++ * ++ * - for try_to_wake_up(), called under p->pi_lock: ++ * ++ * This allows try_to_wake_up() to only take one rq->lock, see its comment. ++ * ++ * - for migration called under rq->lock: ++ * [ see task_on_rq_migrating() in task_rq_lock() ] ++ * ++ * o move_queued_task() ++ * o detach_task() ++ * ++ * - for migration called under double_rq_lock(): ++ * ++ * o __migrate_swap_task() ++ * o push_rt_task() / pull_rt_task() ++ * o push_dl_task() / pull_dl_task() ++ * o dl_task_offline_migration() ++ * ++ */ ++ ++/* + * Context: p->pi_lock + */ +static inline struct rq @@ -884,6 +1116,44 @@ index 000000000000..09ca47de425c + } +} + ++static inline void ++rq_lock_irqsave(struct rq *rq, struct rq_flags *rf) ++ __acquires(rq->lock) ++{ ++ raw_spin_lock_irqsave(&rq->lock, rf->flags); ++} ++ ++static inline void ++rq_unlock_irqrestore(struct rq *rq, struct rq_flags *rf) ++ __releases(rq->lock) ++{ ++ raw_spin_unlock_irqrestore(&rq->lock, rf->flags); ++} ++ ++void raw_spin_rq_lock_nested(struct rq *rq, int subclass) ++{ ++ raw_spinlock_t *lock; ++ ++ /* Matches synchronize_rcu() in __sched_core_enable() */ ++ preempt_disable(); ++ ++ for (;;) { ++ lock = __rq_lockp(rq); ++ raw_spin_lock_nested(lock, subclass); ++ if (likely(lock == __rq_lockp(rq))) { ++ /* preempt_count *MUST* be > 1 */ ++ preempt_enable_no_resched(); ++ return; ++ } ++ raw_spin_unlock(lock); ++ } ++} ++ ++void raw_spin_rq_unlock(struct rq *rq) ++{ ++ raw_spin_unlock(rq_lockp(rq)); ++} ++ +/* + * RQ-clock updating methods: + */ @@ -948,9 +1218,105 @@ index 000000000000..09ca47de425c + if (unlikely(delta <= 0)) + return; + rq->clock += delta; ++ update_rq_time_edge(rq); + update_rq_clock_task(rq, delta); +} + ++/* ++ * RQ Load update routine ++ */ ++#define RQ_LOAD_HISTORY_BITS (sizeof(s32) * 8ULL) ++#define RQ_UTIL_SHIFT (8) ++#define RQ_LOAD_HISTORY_TO_UTIL(l) (((l) >> (RQ_LOAD_HISTORY_BITS - 1 - RQ_UTIL_SHIFT)) & 0xff) ++ ++#define LOAD_BLOCK(t) ((t) >> 17) ++#define LOAD_HALF_BLOCK(t) ((t) >> 16) ++#define BLOCK_MASK(t) ((t) & ((0x01 << 18) - 1)) ++#define LOAD_BLOCK_BIT(b) (1UL << (RQ_LOAD_HISTORY_BITS - 1 - (b))) ++#define CURRENT_LOAD_BIT LOAD_BLOCK_BIT(0) ++ ++static inline void rq_load_update(struct rq *rq) ++{ ++ u64 time = rq->clock; ++ u64 delta = min(LOAD_BLOCK(time) - LOAD_BLOCK(rq->load_stamp), ++ RQ_LOAD_HISTORY_BITS - 1); ++ u64 prev = !!(rq->load_history & CURRENT_LOAD_BIT); ++ u64 curr = !!rq->nr_running; ++ ++ if (delta) { ++ rq->load_history = rq->load_history >> delta; ++ ++ if (delta < RQ_UTIL_SHIFT) { ++ rq->load_block += (~BLOCK_MASK(rq->load_stamp)) * prev; ++ if (!!LOAD_HALF_BLOCK(rq->load_block) ^ curr) ++ rq->load_history ^= LOAD_BLOCK_BIT(delta); ++ } ++ ++ rq->load_block = BLOCK_MASK(time) * prev; ++ } else { ++ rq->load_block += (time - rq->load_stamp) * prev; ++ } ++ if (prev ^ curr) ++ rq->load_history ^= CURRENT_LOAD_BIT; ++ rq->load_stamp = time; ++} ++ ++unsigned long rq_load_util(struct rq *rq, unsigned long max) ++{ ++ return RQ_LOAD_HISTORY_TO_UTIL(rq->load_history) * (max >> RQ_UTIL_SHIFT); ++} ++ ++#ifdef CONFIG_SMP ++unsigned long sched_cpu_util(int cpu, unsigned long max) ++{ ++ return rq_load_util(cpu_rq(cpu), max); ++} ++#endif /* CONFIG_SMP */ ++ ++#ifdef CONFIG_CPU_FREQ ++/** ++ * cpufreq_update_util - Take a note about CPU utilization changes. ++ * @rq: Runqueue to carry out the update for. ++ * @flags: Update reason flags. ++ * ++ * This function is called by the scheduler on the CPU whose utilization is ++ * being updated. ++ * ++ * It can only be called from RCU-sched read-side critical sections. ++ * ++ * The way cpufreq is currently arranged requires it to evaluate the CPU ++ * performance state (frequency/voltage) on a regular basis to prevent it from ++ * being stuck in a completely inadequate performance level for too long. ++ * That is not guaranteed to happen if the updates are only triggered from CFS ++ * and DL, though, because they may not be coming in if only RT tasks are ++ * active all the time (or there are RT tasks only). ++ * ++ * As a workaround for that issue, this function is called periodically by the ++ * RT sched class to trigger extra cpufreq updates to prevent it from stalling, ++ * but that really is a band-aid. Going forward it should be replaced with ++ * solutions targeted more specifically at RT tasks. ++ */ ++static inline void cpufreq_update_util(struct rq *rq, unsigned int flags) ++{ ++ struct update_util_data *data; ++ ++#ifdef CONFIG_SMP ++ rq_load_update(rq); ++#endif ++ data = rcu_dereference_sched(*per_cpu_ptr(&cpufreq_update_util_data, ++ cpu_of(rq))); ++ if (data) ++ data->func(data, rq_clock(rq), flags); ++} ++#else ++static inline void cpufreq_update_util(struct rq *rq, unsigned int flags) ++{ ++#ifdef CONFIG_SMP ++ rq_load_update(rq); ++#endif ++} ++#endif /* CONFIG_CPU_FREQ */ ++ +#ifdef CONFIG_NO_HZ_FULL +/* + * Tick may be needed by tasks in the runqueue depending on their policy and @@ -959,12 +1325,7 @@ index 000000000000..09ca47de425c + */ +static inline void sched_update_tick_dependency(struct rq *rq) +{ -+ int cpu; -+ -+ if (!tick_nohz_full_enabled()) -+ return; -+ -+ cpu = cpu_of(rq); ++ int cpu = cpu_of(rq); + + if (!tick_nohz_full_cpu(cpu)) + return; @@ -978,18 +1339,59 @@ index 000000000000..09ca47de425c +static inline void sched_update_tick_dependency(struct rq *rq) { } +#endif + ++bool sched_task_on_rq(struct task_struct *p) ++{ ++ return task_on_rq_queued(p); ++} ++ ++unsigned long get_wchan(struct task_struct *p) ++{ ++ unsigned long ip = 0; ++ unsigned int state; ++ ++ if (!p || p == current) ++ return 0; ++ ++ /* Only get wchan if task is blocked and we can keep it that way. */ ++ raw_spin_lock_irq(&p->pi_lock); ++ state = READ_ONCE(p->__state); ++ smp_rmb(); /* see try_to_wake_up() */ ++ if (state != TASK_RUNNING && state != TASK_WAKING && !p->on_rq) ++ ip = __get_wchan(p); ++ raw_spin_unlock_irq(&p->pi_lock); ++ ++ return ip; ++} ++ +/* + * Add/Remove/Requeue task to/from the runqueue routines + * Context: rq->lock + */ ++#define __SCHED_DEQUEUE_TASK(p, rq, flags) \ ++ psi_dequeue(p, flags & DEQUEUE_SLEEP); \ ++ sched_info_dequeue(rq, p); \ ++ \ ++ list_del(&p->sq_node); \ ++ if (list_empty(&rq->queue.heads[p->sq_idx])) \ ++ clear_bit(sched_idx2prio(p->sq_idx, rq), rq->queue.bitmap); ++ ++#define __SCHED_ENQUEUE_TASK(p, rq, flags) \ ++ sched_info_enqueue(rq, p); \ ++ psi_enqueue(p, flags); \ ++ \ ++ p->sq_idx = task_sched_prio_idx(p, rq); \ ++ list_add_tail(&p->sq_node, &rq->queue.heads[p->sq_idx]); \ ++ set_bit(sched_idx2prio(p->sq_idx, rq), rq->queue.bitmap); ++ +static inline void dequeue_task(struct task_struct *p, struct rq *rq, int flags) +{ + lockdep_assert_held(&rq->lock); + ++ /*printk(KERN_INFO "sched: dequeue(%d) %px %016llx\n", cpu_of(rq), p, p->priodl);*/ + WARN_ONCE(task_rq(p) != rq, "sched: dequeue task reside on cpu%d from cpu%d\n", + task_cpu(p), cpu_of(rq)); + -+ __SCHED_DEQUEUE_TASK(p, rq, flags, update_sched_rq_watermark(rq)); ++ __SCHED_DEQUEUE_TASK(p, rq, flags); + --rq->nr_running; +#ifdef CONFIG_SMP + if (1 == rq->nr_running) @@ -1003,6 +1405,7 @@ index 000000000000..09ca47de425c +{ + lockdep_assert_held(&rq->lock); + ++ /*printk(KERN_INFO "sched: enqueue(%d) %px %016llx\n", cpu_of(rq), p, p->priodl);*/ + WARN_ONCE(task_rq(p) != rq, "sched: enqueue task reside on cpu%d to cpu%d\n", + task_cpu(p), cpu_of(rq)); + @@ -1015,23 +1418,25 @@ index 000000000000..09ca47de425c +#endif + + sched_update_tick_dependency(rq); -+ -+ /* -+ * If in_iowait is set, the code below may not trigger any cpufreq -+ * utilization updates, so do it here explicitly with the IOWAIT flag -+ * passed. -+ */ -+ if (p->in_iowait) -+ cpufreq_update_util(rq, SCHED_CPUFREQ_IOWAIT); +} + -+static inline void requeue_task(struct task_struct *p, struct rq *rq) ++static inline void requeue_task(struct task_struct *p, struct rq *rq, int idx) +{ + lockdep_assert_held(&rq->lock); ++ /*printk(KERN_INFO "sched: requeue(%d) %px %016llx\n", cpu_of(rq), p, p->priodl);*/ + WARN_ONCE(task_rq(p) != rq, "sched: cpu[%d] requeue task reside on cpu%d\n", + cpu_of(rq), task_cpu(p)); + -+ __requeue_task(p, rq); ++ list_del(&p->sq_node); ++ list_add_tail(&p->sq_node, &rq->queue.heads[idx]); ++ if (idx != p->sq_idx) { ++ if (list_empty(&rq->queue.heads[p->sq_idx])) ++ clear_bit(sched_idx2prio(p->sq_idx, rq), ++ rq->queue.bitmap); ++ p->sq_idx = idx; ++ set_bit(sched_idx2prio(p->sq_idx, rq), rq->queue.bitmap); ++ update_sched_rq_watermark(rq); ++ } +} + +/* @@ -1109,7 +1514,7 @@ index 000000000000..09ca47de425c + + /* + * Atomically grab the task, if ->wake_q is !nil already it means -+ * its already queued (either by us or someone else) and will get the ++ * it's already queued (either by us or someone else) and will get the + * wakeup due to that. + * + * In order to ensure that a pending wakeup will observe our pending @@ -1176,7 +1581,6 @@ index 000000000000..09ca47de425c + struct task_struct *task; + + task = container_of(node, struct task_struct, wake_q); -+ BUG_ON(!task); + /* task can safely be re-inserted now: */ + node = node->next; + task->wake_q.next = NULL; @@ -1233,13 +1637,9 @@ index 000000000000..09ca47de425c + +#ifdef CONFIG_SMP +#ifdef CONFIG_NO_HZ_COMMON -+void nohz_balance_enter_idle(int cpu) -+{ -+} ++void nohz_balance_enter_idle(int cpu) {} + -+void select_nohz_load_balancer(int stop_tick) -+{ -+} ++void select_nohz_load_balancer(int stop_tick) {} + +void set_cpu_sd_state_idle(void) {} + @@ -1255,21 +1655,24 @@ index 000000000000..09ca47de425c +{ + int i, cpu = smp_processor_id(), default_cpu = -1; + struct cpumask *mask; ++ const struct cpumask *hk_mask; + -+ if (housekeeping_cpu(cpu, HK_FLAG_TIMER)) { ++ if (housekeeping_cpu(cpu, HK_TYPE_TIMER)) { + if (!idle_cpu(cpu)) + return cpu; + default_cpu = cpu; + } + -+ for (mask = &(per_cpu(sched_cpu_affinity_masks, cpu)[0]); -+ mask < per_cpu(sched_cpu_affinity_end_mask, cpu); mask++) -+ for_each_cpu_and(i, mask, housekeeping_cpumask(HK_FLAG_TIMER)) ++ hk_mask = housekeeping_cpumask(HK_TYPE_TIMER); ++ ++ for (mask = per_cpu(sched_cpu_topo_masks, cpu) + 1; ++ mask < per_cpu(sched_cpu_topo_end_mask, cpu); mask++) ++ for_each_cpu_and(i, mask, hk_mask) + if (!idle_cpu(i)) + return i; + + if (default_cpu == -1) -+ default_cpu = housekeeping_any_cpu(HK_FLAG_TIMER); ++ default_cpu = housekeeping_any_cpu(HK_TYPE_TIMER); + cpu = default_cpu; + + return cpu; @@ -1287,11 +1690,15 @@ index 000000000000..09ca47de425c + */ +static inline void wake_up_idle_cpu(int cpu) +{ ++ struct rq *rq = cpu_rq(cpu); ++ + if (cpu == smp_processor_id()) + return; + -+ set_tsk_need_resched(cpu_rq(cpu)->idle); -+ smp_send_reschedule(cpu); ++ if (set_nr_and_not_polling(rq->idle)) ++ smp_send_reschedule(cpu); ++ else ++ trace_sched_wake_idle_without_ipi(cpu); +} + +static inline bool wake_up_full_nohz_cpu(int cpu) @@ -1302,6 +1709,8 @@ index 000000000000..09ca47de425c + * If needed we can still optimize that later with an + * empty IRQ. + */ ++ if (cpu_is_offline(cpu)) ++ return true; /* Don't try to wake offline CPUs. */ + if (tick_nohz_full_cpu(cpu)) { + if (cpu != smp_processor_id() || + tick_nohz_tick_stopped()) @@ -1314,10 +1723,29 @@ index 000000000000..09ca47de425c + +void wake_up_nohz_cpu(int cpu) +{ -+ if (cpu_online(cpu) && !wake_up_full_nohz_cpu(cpu)) ++ if (!wake_up_full_nohz_cpu(cpu)) + wake_up_idle_cpu(cpu); +} + ++static void nohz_csd_func(void *info) ++{ ++ struct rq *rq = info; ++ int cpu = cpu_of(rq); ++ unsigned int flags; ++ ++ /* ++ * Release the rq::nohz_csd. ++ */ ++ flags = atomic_fetch_andnot(NOHZ_KICK_MASK, nohz_flags(cpu)); ++ WARN_ON(!(flags & NOHZ_KICK_MASK)); ++ ++ rq->idle_balance = idle_cpu(cpu); ++ if (rq->idle_balance && !need_resched()) { ++ rq->nohz_idle_balance = flags; ++ raise_softirq_irqoff(SCHED_SOFTIRQ); ++ } ++} ++ +#endif /* CONFIG_NO_HZ_COMMON */ +#endif /* CONFIG_SMP */ + @@ -1345,13 +1773,10 @@ index 000000000000..09ca47de425c +static enum hrtimer_restart hrtick(struct hrtimer *timer) +{ + struct rq *rq = container_of(timer, struct rq, hrtick_timer); -+ struct task_struct *p; + + WARN_ON_ONCE(cpu_of(rq) != smp_processor_id()); + + raw_spin_lock(&rq->lock); -+ p = rq->curr; -+ p->time_slice = 0; + resched_curr(rq); + raw_spin_unlock(&rq->lock); + @@ -1380,8 +1805,9 @@ index 000000000000..09ca47de425c +static void __hrtick_restart(struct rq *rq) +{ + struct hrtimer *timer = &rq->hrtick_timer; ++ ktime_t time = rq->hrtick_time; + -+ hrtimer_start_expires(timer, HRTIMER_MODE_ABS_PINNED_HARD); ++ hrtimer_start(timer, time, HRTIMER_MODE_ABS_PINNED_HARD); +} + +/* @@ -1404,7 +1830,6 @@ index 000000000000..09ca47de425c +void hrtick_start(struct rq *rq, u64 delay) +{ + struct hrtimer *timer = &rq->hrtick_timer; -+ ktime_t time; + s64 delta; + + /* @@ -1412,9 +1837,8 @@ index 000000000000..09ca47de425c + * doesn't make sense and can cause timer DoS. + */ + delta = max_t(s64, delay, 10000LL); -+ time = ktime_add_ns(timer->base->get_time(), delta); + -+ hrtimer_set_expires(timer, time); ++ rq->hrtick_time = ktime_add_ns(timer->base->get_time(), delta); + + if (rq == this_rq()) + __hrtick_restart(rq); @@ -1443,9 +1867,7 @@ index 000000000000..09ca47de425c +static void hrtick_rq_init(struct rq *rq) +{ +#ifdef CONFIG_SMP -+ rq->hrtick_csd.flags = 0; -+ rq->hrtick_csd.func = __hrtick_start; -+ rq->hrtick_csd.info = rq; ++ INIT_CSD(&rq->hrtick_csd, __hrtick_start, rq); +#endif + + hrtimer_init(&rq->hrtick_timer, CLOCK_MONOTONIC, HRTIMER_MODE_REL_HARD); @@ -1466,12 +1888,22 @@ index 000000000000..09ca47de425c +} +#endif /* CONFIG_SCHED_HRTICK */ + ++static inline int __normal_prio(int policy, int rt_prio, int static_prio) ++{ ++ return rt_policy(policy) ? (MAX_RT_PRIO - 1 - rt_prio) : ++ static_prio + MAX_PRIORITY_ADJ; ++} ++ ++/* ++ * Calculate the expected normal priority: i.e. priority ++ * without taking RT-inheritance into account. Might be ++ * boosted by interactivity modifiers. Changes upon fork, ++ * setprio syscalls, and whenever the interactivity ++ * estimator recalculates. ++ */ +static inline int normal_prio(struct task_struct *p) +{ -+ if (task_has_rt_policy(p)) -+ return MAX_RT_PRIO - 1 - p->rt_priority; -+ -+ return p->static_prio + MAX_PRIORITY_ADJ; ++ return __normal_prio(p->policy, p->rt_priority, p->static_prio); +} + +/* @@ -1500,11 +1932,15 @@ index 000000000000..09ca47de425c + */ +static void activate_task(struct task_struct *p, struct rq *rq) +{ -+ if (task_contributes_to_load(p)) -+ rq->nr_uninterruptible--; + enqueue_task(p, rq, ENQUEUE_WAKEUP); + p->on_rq = TASK_ON_RQ_QUEUED; -+ cpufreq_update_util(rq, 0); ++ ++ /* ++ * If in_iowait is set, the code below may not trigger any cpufreq ++ * utilization updates, so do it here explicitly with the IOWAIT flag ++ * passed. ++ */ ++ cpufreq_update_util(rq, SCHED_CPUFREQ_IOWAIT * p->in_iowait); +} + +/* @@ -1514,8 +1950,6 @@ index 000000000000..09ca47de425c + */ +static inline void deactivate_task(struct task_struct *p, struct rq *rq) +{ -+ if (task_contributes_to_load(p)) -+ rq->nr_uninterruptible++; + dequeue_task(p, rq, DEQUEUE_SLEEP); + p->on_rq = 0; + cpufreq_update_util(rq, 0); @@ -1531,24 +1965,35 @@ index 000000000000..09ca47de425c + */ + smp_wmb(); + -+#ifdef CONFIG_THREAD_INFO_IN_TASK -+ WRITE_ONCE(p->cpu, cpu); -+#else + WRITE_ONCE(task_thread_info(p)->cpu, cpu); +#endif -+#endif +} + ++static inline bool is_migration_disabled(struct task_struct *p) ++{ +#ifdef CONFIG_SMP ++ return p->migration_disabled; ++#else ++ return false; ++#endif ++} ++ ++#define SCA_CHECK 0x01 ++#define SCA_USER 0x08 ++ ++#ifdef CONFIG_SMP ++ +void set_task_cpu(struct task_struct *p, unsigned int new_cpu) +{ +#ifdef CONFIG_SCHED_DEBUG ++ unsigned int state = READ_ONCE(p->__state); ++ + /* + * We should never call set_task_cpu() on a blocked task, + * ttwu() will sort out the placement. + */ -+ WARN_ON_ONCE(p->state != TASK_RUNNING && p->state != TASK_WAKING && -+ !p->on_rq); ++ WARN_ON_ONCE(state != TASK_RUNNING && state != TASK_WAKING && !p->on_rq); ++ +#ifdef CONFIG_LOCKDEP + /* + * The caller should hold either p->pi_lock or rq->lock, when changing @@ -1564,6 +2009,8 @@ index 000000000000..09ca47de425c + * Clearly, migrating tasks to offline CPUs is a fairly daft thing. + */ + WARN_ON_ONCE(!cpu_online(new_cpu)); ++ ++ WARN_ON_ONCE(is_migration_disabled(p)); +#endif + if (task_cpu(p) == new_cpu) + return; @@ -1574,9 +2021,95 @@ index 000000000000..09ca47de425c + __set_task_cpu(p, new_cpu); +} + -+static inline bool is_per_cpu_kthread(struct task_struct *p) ++#define MDF_FORCE_ENABLED 0x80 ++ ++static void ++__do_set_cpus_ptr(struct task_struct *p, const struct cpumask *new_mask) +{ -+ return ((p->flags & PF_KTHREAD) && (1 == p->nr_cpus_allowed)); ++ /* ++ * This here violates the locking rules for affinity, since we're only ++ * supposed to change these variables while holding both rq->lock and ++ * p->pi_lock. ++ * ++ * HOWEVER, it magically works, because ttwu() is the only code that ++ * accesses these variables under p->pi_lock and only does so after ++ * smp_cond_load_acquire(&p->on_cpu, !VAL), and we're in __schedule() ++ * before finish_task(). ++ * ++ * XXX do further audits, this smells like something putrid. ++ */ ++ SCHED_WARN_ON(!p->on_cpu); ++ p->cpus_ptr = new_mask; ++} ++ ++void migrate_disable(void) ++{ ++ struct task_struct *p = current; ++ int cpu; ++ ++ if (p->migration_disabled) { ++ p->migration_disabled++; ++ return; ++ } ++ ++ preempt_disable(); ++ cpu = smp_processor_id(); ++ if (cpumask_test_cpu(cpu, &p->cpus_mask)) { ++ cpu_rq(cpu)->nr_pinned++; ++ p->migration_disabled = 1; ++ p->migration_flags &= ~MDF_FORCE_ENABLED; ++ ++ /* ++ * Violates locking rules! see comment in __do_set_cpus_ptr(). ++ */ ++ if (p->cpus_ptr == &p->cpus_mask) ++ __do_set_cpus_ptr(p, cpumask_of(cpu)); ++ } ++ preempt_enable(); ++} ++EXPORT_SYMBOL_GPL(migrate_disable); ++ ++void migrate_enable(void) ++{ ++ struct task_struct *p = current; ++ ++ if (0 == p->migration_disabled) ++ return; ++ ++ if (p->migration_disabled > 1) { ++ p->migration_disabled--; ++ return; ++ } ++ ++ if (WARN_ON_ONCE(!p->migration_disabled)) ++ return; ++ ++ /* ++ * Ensure stop_task runs either before or after this, and that ++ * __set_cpus_allowed_ptr(SCA_MIGRATE_ENABLE) doesn't schedule(). ++ */ ++ preempt_disable(); ++ /* ++ * Assumption: current should be running on allowed cpu ++ */ ++ WARN_ON_ONCE(!cpumask_test_cpu(smp_processor_id(), &p->cpus_mask)); ++ if (p->cpus_ptr != &p->cpus_mask) ++ __do_set_cpus_ptr(p, &p->cpus_mask); ++ /* ++ * Mustn't clear migration_disabled() until cpus_ptr points back at the ++ * regular cpus_mask, otherwise things that race (eg. ++ * select_fallback_rq) get confused. ++ */ ++ barrier(); ++ p->migration_disabled = 0; ++ this_rq()->nr_pinned--; ++ preempt_enable(); ++} ++EXPORT_SYMBOL_GPL(migrate_enable); ++ ++static inline bool rq_has_pinned_tasks(struct rq *rq) ++{ ++ return rq->nr_pinned; +} + +/* @@ -1585,13 +2118,28 @@ index 000000000000..09ca47de425c + */ +static inline bool is_cpu_allowed(struct task_struct *p, int cpu) +{ ++ /* When not in the task's cpumask, no point in looking further. */ + if (!cpumask_test_cpu(cpu, p->cpus_ptr)) + return false; + -+ if (is_per_cpu_kthread(p)) ++ /* migrate_disabled() must be allowed to finish. */ ++ if (is_migration_disabled(p)) + return cpu_online(cpu); + -+ return cpu_active(cpu); ++ /* Non kernel threads are not allowed during either online or offline. */ ++ if (!(p->flags & PF_KTHREAD)) ++ return cpu_active(cpu) && task_cpu_possible(cpu, p); ++ ++ /* KTHREAD_IS_PER_CPU is always allowed. */ ++ if (kthread_is_per_cpu(p)) ++ return cpu_online(cpu); ++ ++ /* Regular kernel threads don't get to stay during offline. */ ++ if (cpu_dying(cpu)) ++ return false; ++ ++ /* But are allowed during online. */ ++ return cpu_online(cpu); +} + +/* @@ -1620,6 +2168,7 @@ index 000000000000..09ca47de425c + + WRITE_ONCE(p->on_rq, TASK_ON_RQ_MIGRATING); + dequeue_task(p, rq, 0); ++ update_sched_rq_watermark(rq); + set_task_cpu(p, new_cpu); + raw_spin_unlock(&rq->lock); + @@ -1627,6 +2176,7 @@ index 000000000000..09ca47de425c + + raw_spin_lock(&rq->lock); + BUG_ON(task_cpu(p) != new_cpu); ++ sched_task_sanity_check(p, rq); + enqueue_task(p, rq, 0); + p->on_rq = TASK_ON_RQ_QUEUED; + check_preempt_curr(rq); @@ -1669,12 +2219,19 @@ index 000000000000..09ca47de425c + struct migration_arg *arg = data; + struct task_struct *p = arg->task; + struct rq *rq = this_rq(); ++ unsigned long flags; + + /* + * The original target CPU might have gone down and we might + * be on another CPU but it doesn't matter. + */ -+ local_irq_disable(); ++ local_irq_save(flags); ++ /* ++ * We need to explicitly wake pending tasks before running ++ * __migrate_task() such that we will not miss enforcing cpus_ptr ++ * during wakeups, see set_cpus_allowed_ptr()'s TASK_WAKING test. ++ */ ++ flush_smp_call_function_queue(); + + raw_spin_lock(&p->pi_lock); + raw_spin_lock(&rq->lock); @@ -1686,9 +2243,8 @@ index 000000000000..09ca47de425c + if (task_rq(p) == rq && task_on_rq_queued(p)) + rq = __migrate_task(rq, p, arg->dest_cpu); + raw_spin_unlock(&rq->lock); -+ raw_spin_unlock(&p->pi_lock); ++ raw_spin_unlock_irqrestore(&p->pi_lock, flags); + -+ local_irq_enable(); + return 0; +} + @@ -1699,10 +2255,46 @@ index 000000000000..09ca47de425c + p->nr_cpus_allowed = cpumask_weight(new_mask); +} + -+void do_set_cpus_allowed(struct task_struct *p, const struct cpumask *new_mask) ++static void ++__do_set_cpus_allowed(struct task_struct *p, const struct cpumask *new_mask) +{ ++ lockdep_assert_held(&p->pi_lock); + set_cpus_allowed_common(p, new_mask); +} ++ ++void do_set_cpus_allowed(struct task_struct *p, const struct cpumask *new_mask) ++{ ++ __do_set_cpus_allowed(p, new_mask); ++} ++ ++int dup_user_cpus_ptr(struct task_struct *dst, struct task_struct *src, ++ int node) ++{ ++ if (!src->user_cpus_ptr) ++ return 0; ++ ++ dst->user_cpus_ptr = kmalloc_node(cpumask_size(), GFP_KERNEL, node); ++ if (!dst->user_cpus_ptr) ++ return -ENOMEM; ++ ++ cpumask_copy(dst->user_cpus_ptr, src->user_cpus_ptr); ++ return 0; ++} ++ ++static inline struct cpumask *clear_user_cpus_ptr(struct task_struct *p) ++{ ++ struct cpumask *user_mask = NULL; ++ ++ swap(p->user_cpus_ptr, user_mask); ++ ++ return user_mask; ++} ++ ++void release_user_cpus_ptr(struct task_struct *p) ++{ ++ kfree(clear_user_cpus_ptr(p)); ++} ++ +#endif + +/** @@ -1733,7 +2325,7 @@ index 000000000000..09ca47de425c + * smp_call_function() if an IPI is sent by the same process we are + * waiting to become inactive. + */ -+unsigned long wait_task_inactive(struct task_struct *p, long match_state) ++unsigned long wait_task_inactive(struct task_struct *p, unsigned int match_state) +{ + unsigned long flags; + bool running, on_rq; @@ -1756,7 +2348,7 @@ index 000000000000..09ca47de425c + * running somewhere else! + */ + while (task_running(p) && p == rq->curr) { -+ if (match_state && unlikely(p->state != match_state)) ++ if (match_state && unlikely(READ_ONCE(p->__state) != match_state)) + return 0; + cpu_relax(); + } @@ -1771,7 +2363,7 @@ index 000000000000..09ca47de425c + running = task_running(p); + on_rq = p->on_rq; + ncsw = 0; -+ if (!match_state || p->state == match_state) ++ if (!match_state || READ_ONCE(p->__state) == match_state) + ncsw = p->nvcsw | LONG_MIN; /* sets MSB */ + task_access_unlock_irqrestore(p, lock, &flags); + @@ -1805,7 +2397,7 @@ index 000000000000..09ca47de425c + ktime_t to = NSEC_PER_SEC / HZ; + + set_current_state(TASK_UNINTERRUPTIBLE); -+ schedule_hrtimeout(&to, HRTIMER_MODE_REL); ++ schedule_hrtimeout(&to, HRTIMER_MODE_REL_HARD); + continue; + } + @@ -1884,9 +2476,7 @@ index 000000000000..09ca47de425c + + /* Look for allowed, online CPU in same node. */ + for_each_cpu(dest_cpu, nodemask) { -+ if (!cpu_active(dest_cpu)) -+ continue; -+ if (cpumask_test_cpu(dest_cpu, p->cpus_ptr)) ++ if (is_cpu_allowed(p, dest_cpu)) + return dest_cpu; + } + } @@ -1902,14 +2492,19 @@ index 000000000000..09ca47de425c + /* No more Mr. Nice Guy. */ + switch (state) { + case cpuset: -+ if (IS_ENABLED(CONFIG_CPUSETS)) { -+ cpuset_cpus_allowed_fallback(p); ++ if (cpuset_cpus_allowed_fallback(p)) { + state = possible; + break; + } -+ /* Fall-through */ ++ fallthrough; + case possible: -+ do_set_cpus_allowed(p, cpu_possible_mask); ++ /* ++ * XXX When called from select_task_rq() we only ++ * hold p->pi_lock and again violate locking order. ++ * ++ * More yuck to audit. ++ */ ++ do_set_cpus_allowed(p, task_cpu_possible_mask(p)); + state = fail; + break; + @@ -1939,16 +2534,16 @@ index 000000000000..09ca47de425c +{ + cpumask_t chk_mask, tmp; + -+ if (unlikely(!cpumask_and(&chk_mask, p->cpus_ptr, cpu_online_mask))) ++ if (unlikely(!cpumask_and(&chk_mask, p->cpus_ptr, cpu_active_mask))) + return select_fallback_rq(task_cpu(p), p); + + if ( +#ifdef CONFIG_SCHED_SMT + cpumask_and(&tmp, &chk_mask, &sched_sg_idle_mask) || +#endif -+ cpumask_and(&tmp, &chk_mask, &sched_rq_watermark[IDLE_WM]) || ++ cpumask_and(&tmp, &chk_mask, sched_rq_watermark) || + cpumask_and(&tmp, &chk_mask, -+ &sched_rq_watermark[task_sched_prio(p) + 1])) ++ sched_rq_watermark + SCHED_QUEUE_BITS - 1 - task_sched_prio(p))) + return best_mask_cpu(task_cpu(p), &tmp); + + return best_mask_cpu(task_cpu(p), &chk_mask); @@ -1956,6 +2551,7 @@ index 000000000000..09ca47de425c + +void sched_set_stop_task(int cpu, struct task_struct *stop) +{ ++ static struct lock_class_key stop_pi_lock; + struct sched_param stop_param = { .sched_priority = STOP_PRIO }; + struct sched_param start_param = { .sched_priority = 0 }; + struct task_struct *old_stop = cpu_rq(cpu)->stop; @@ -1970,6 +2566,20 @@ index 000000000000..09ca47de425c + * rely on PI working anyway. + */ + sched_setscheduler_nocheck(stop, SCHED_FIFO, &stop_param); ++ ++ /* ++ * The PI code calls rt_mutex_setprio() with ->pi_lock held to ++ * adjust the effective priority of a task. As a result, ++ * rt_mutex_setprio() can trigger (RT) balancing operations, ++ * which can then trigger wakeups of the stop thread to push ++ * around the current task. ++ * ++ * The stop task itself will never be part of the PI-chain, it ++ * never blocks, therefore that ->pi_lock recursion is safe. ++ * Tell lockdep about this by placing the stop->pi_lock in its ++ * own class. ++ */ ++ lockdep_set_class(&stop->pi_lock, &stop_pi_lock); + } + + cpu_rq(cpu)->stop = stop; @@ -1983,6 +2593,113 @@ index 000000000000..09ca47de425c + } +} + ++static int affine_move_task(struct rq *rq, struct task_struct *p, int dest_cpu, ++ raw_spinlock_t *lock, unsigned long irq_flags) ++{ ++ /* Can the task run on the task's current CPU? If so, we're done */ ++ if (!cpumask_test_cpu(task_cpu(p), &p->cpus_mask)) { ++ if (p->migration_disabled) { ++ if (likely(p->cpus_ptr != &p->cpus_mask)) ++ __do_set_cpus_ptr(p, &p->cpus_mask); ++ p->migration_disabled = 0; ++ p->migration_flags |= MDF_FORCE_ENABLED; ++ /* When p is migrate_disabled, rq->lock should be held */ ++ rq->nr_pinned--; ++ } ++ ++ if (task_running(p) || READ_ONCE(p->__state) == TASK_WAKING) { ++ struct migration_arg arg = { p, dest_cpu }; ++ ++ /* Need help from migration thread: drop lock and wait. */ ++ __task_access_unlock(p, lock); ++ raw_spin_unlock_irqrestore(&p->pi_lock, irq_flags); ++ stop_one_cpu(cpu_of(rq), migration_cpu_stop, &arg); ++ return 0; ++ } ++ if (task_on_rq_queued(p)) { ++ /* ++ * OK, since we're going to drop the lock immediately ++ * afterwards anyway. ++ */ ++ update_rq_clock(rq); ++ rq = move_queued_task(rq, p, dest_cpu); ++ lock = &rq->lock; ++ } ++ } ++ __task_access_unlock(p, lock); ++ raw_spin_unlock_irqrestore(&p->pi_lock, irq_flags); ++ return 0; ++} ++ ++static int __set_cpus_allowed_ptr_locked(struct task_struct *p, ++ const struct cpumask *new_mask, ++ u32 flags, ++ struct rq *rq, ++ raw_spinlock_t *lock, ++ unsigned long irq_flags) ++{ ++ const struct cpumask *cpu_allowed_mask = task_cpu_possible_mask(p); ++ const struct cpumask *cpu_valid_mask = cpu_active_mask; ++ bool kthread = p->flags & PF_KTHREAD; ++ struct cpumask *user_mask = NULL; ++ int dest_cpu; ++ int ret = 0; ++ ++ if (kthread || is_migration_disabled(p)) { ++ /* ++ * Kernel threads are allowed on online && !active CPUs, ++ * however, during cpu-hot-unplug, even these might get pushed ++ * away if not KTHREAD_IS_PER_CPU. ++ * ++ * Specifically, migration_disabled() tasks must not fail the ++ * cpumask_any_and_distribute() pick below, esp. so on ++ * SCA_MIGRATE_ENABLE, otherwise we'll not call ++ * set_cpus_allowed_common() and actually reset p->cpus_ptr. ++ */ ++ cpu_valid_mask = cpu_online_mask; ++ } ++ ++ if (!kthread && !cpumask_subset(new_mask, cpu_allowed_mask)) { ++ ret = -EINVAL; ++ goto out; ++ } ++ ++ /* ++ * Must re-check here, to close a race against __kthread_bind(), ++ * sched_setaffinity() is not guaranteed to observe the flag. ++ */ ++ if ((flags & SCA_CHECK) && (p->flags & PF_NO_SETAFFINITY)) { ++ ret = -EINVAL; ++ goto out; ++ } ++ ++ if (cpumask_equal(&p->cpus_mask, new_mask)) ++ goto out; ++ ++ dest_cpu = cpumask_any_and(cpu_valid_mask, new_mask); ++ if (dest_cpu >= nr_cpu_ids) { ++ ret = -EINVAL; ++ goto out; ++ } ++ ++ __do_set_cpus_allowed(p, new_mask); ++ ++ if (flags & SCA_USER) ++ user_mask = clear_user_cpus_ptr(p); ++ ++ ret = affine_move_task(rq, p, dest_cpu, lock, irq_flags); ++ ++ kfree(user_mask); ++ ++ return ret; ++ ++out: ++ __task_access_unlock(p, lock); ++ raw_spin_unlock_irqrestore(&p->pi_lock, irq_flags); ++ ++ return ret; ++} ++ +/* + * Change a given task's CPU affinity. Migrate the thread to a + * proper CPU and schedule it away if the CPU it's executing on @@ -1993,91 +2710,150 @@ index 000000000000..09ca47de425c + * call is not atomic; no spinlocks may be held. + */ +static int __set_cpus_allowed_ptr(struct task_struct *p, -+ const struct cpumask *new_mask, bool check) ++ const struct cpumask *new_mask, u32 flags) +{ -+ const struct cpumask *cpu_valid_mask = cpu_active_mask; -+ int dest_cpu; -+ unsigned long flags; ++ unsigned long irq_flags; + struct rq *rq; + raw_spinlock_t *lock; -+ int ret = 0; + -+ raw_spin_lock_irqsave(&p->pi_lock, flags); ++ raw_spin_lock_irqsave(&p->pi_lock, irq_flags); + rq = __task_access_lock(p, &lock); + -+ if (p->flags & PF_KTHREAD) { -+ /* -+ * Kernel threads are allowed on online && !active CPUs -+ */ -+ cpu_valid_mask = cpu_online_mask; -+ } -+ -+ /* -+ * Must re-check here, to close a race against __kthread_bind(), -+ * sched_setaffinity() is not guaranteed to observe the flag. -+ */ -+ if (check && (p->flags & PF_NO_SETAFFINITY)) { -+ ret = -EINVAL; -+ goto out; -+ } -+ -+ if (cpumask_equal(p->cpus_ptr, new_mask)) -+ goto out; -+ -+ dest_cpu = cpumask_any_and(cpu_valid_mask, new_mask); -+ if (dest_cpu >= nr_cpu_ids) { -+ ret = -EINVAL; -+ goto out; -+ } -+ -+ do_set_cpus_allowed(p, new_mask); -+ -+ if (p->flags & PF_KTHREAD) { -+ /* -+ * For kernel threads that do indeed end up on online && -+ * !active we want to ensure they are strict per-CPU threads. -+ */ -+ WARN_ON(cpumask_intersects(new_mask, cpu_online_mask) && -+ !cpumask_intersects(new_mask, cpu_active_mask) && -+ p->nr_cpus_allowed != 1); -+ } -+ -+ /* Can the task run on the task's current CPU? If so, we're done */ -+ if (cpumask_test_cpu(task_cpu(p), new_mask)) -+ goto out; -+ -+ if (task_running(p) || p->state == TASK_WAKING) { -+ struct migration_arg arg = { p, dest_cpu }; -+ -+ /* Need help from migration thread: drop lock and wait. */ -+ __task_access_unlock(p, lock); -+ raw_spin_unlock_irqrestore(&p->pi_lock, flags); -+ stop_one_cpu(cpu_of(rq), migration_cpu_stop, &arg); -+ return 0; -+ } -+ if (task_on_rq_queued(p)) { -+ /* -+ * OK, since we're going to drop the lock immediately -+ * afterwards anyway. -+ */ -+ update_rq_clock(rq); -+ rq = move_queued_task(rq, p, dest_cpu); -+ lock = &rq->lock; -+ } -+ -+out: -+ __task_access_unlock(p, lock); -+ raw_spin_unlock_irqrestore(&p->pi_lock, flags); -+ -+ return ret; ++ return __set_cpus_allowed_ptr_locked(p, new_mask, flags, rq, lock, irq_flags); +} + +int set_cpus_allowed_ptr(struct task_struct *p, const struct cpumask *new_mask) +{ -+ return __set_cpus_allowed_ptr(p, new_mask, false); ++ return __set_cpus_allowed_ptr(p, new_mask, 0); +} +EXPORT_SYMBOL_GPL(set_cpus_allowed_ptr); + ++/* ++ * Change a given task's CPU affinity to the intersection of its current ++ * affinity mask and @subset_mask, writing the resulting mask to @new_mask ++ * and pointing @p->user_cpus_ptr to a copy of the old mask. ++ * If the resulting mask is empty, leave the affinity unchanged and return ++ * -EINVAL. ++ */ ++static int restrict_cpus_allowed_ptr(struct task_struct *p, ++ struct cpumask *new_mask, ++ const struct cpumask *subset_mask) ++{ ++ struct cpumask *user_mask = NULL; ++ unsigned long irq_flags; ++ raw_spinlock_t *lock; ++ struct rq *rq; ++ int err; ++ ++ if (!p->user_cpus_ptr) { ++ user_mask = kmalloc(cpumask_size(), GFP_KERNEL); ++ if (!user_mask) ++ return -ENOMEM; ++ } ++ ++ raw_spin_lock_irqsave(&p->pi_lock, irq_flags); ++ rq = __task_access_lock(p, &lock); ++ ++ if (!cpumask_and(new_mask, &p->cpus_mask, subset_mask)) { ++ err = -EINVAL; ++ goto err_unlock; ++ } ++ ++ /* ++ * We're about to butcher the task affinity, so keep track of what ++ * the user asked for in case we're able to restore it later on. ++ */ ++ if (user_mask) { ++ cpumask_copy(user_mask, p->cpus_ptr); ++ p->user_cpus_ptr = user_mask; ++ } ++ ++ /*return __set_cpus_allowed_ptr_locked(p, new_mask, 0, rq, &rf);*/ ++ return __set_cpus_allowed_ptr_locked(p, new_mask, 0, rq, lock, irq_flags); ++ ++err_unlock: ++ __task_access_unlock(p, lock); ++ raw_spin_unlock_irqrestore(&p->pi_lock, irq_flags); ++ kfree(user_mask); ++ return err; ++} ++ ++/* ++ * Restrict the CPU affinity of task @p so that it is a subset of ++ * task_cpu_possible_mask() and point @p->user_cpu_ptr to a copy of the ++ * old affinity mask. If the resulting mask is empty, we warn and walk ++ * up the cpuset hierarchy until we find a suitable mask. ++ */ ++void force_compatible_cpus_allowed_ptr(struct task_struct *p) ++{ ++ cpumask_var_t new_mask; ++ const struct cpumask *override_mask = task_cpu_possible_mask(p); ++ ++ alloc_cpumask_var(&new_mask, GFP_KERNEL); ++ ++ /* ++ * __migrate_task() can fail silently in the face of concurrent ++ * offlining of the chosen destination CPU, so take the hotplug ++ * lock to ensure that the migration succeeds. ++ */ ++ cpus_read_lock(); ++ if (!cpumask_available(new_mask)) ++ goto out_set_mask; ++ ++ if (!restrict_cpus_allowed_ptr(p, new_mask, override_mask)) ++ goto out_free_mask; ++ ++ /* ++ * We failed to find a valid subset of the affinity mask for the ++ * task, so override it based on its cpuset hierarchy. ++ */ ++ cpuset_cpus_allowed(p, new_mask); ++ override_mask = new_mask; ++ ++out_set_mask: ++ if (printk_ratelimit()) { ++ printk_deferred("Overriding affinity for process %d (%s) to CPUs %*pbl\n", ++ task_pid_nr(p), p->comm, ++ cpumask_pr_args(override_mask)); ++ } ++ ++ WARN_ON(set_cpus_allowed_ptr(p, override_mask)); ++out_free_mask: ++ cpus_read_unlock(); ++ free_cpumask_var(new_mask); ++} ++ ++static int ++__sched_setaffinity(struct task_struct *p, const struct cpumask *mask); ++ ++/* ++ * Restore the affinity of a task @p which was previously restricted by a ++ * call to force_compatible_cpus_allowed_ptr(). This will clear (and free) ++ * @p->user_cpus_ptr. ++ * ++ * It is the caller's responsibility to serialise this with any calls to ++ * force_compatible_cpus_allowed_ptr(@p). ++ */ ++void relax_compatible_cpus_allowed_ptr(struct task_struct *p) ++{ ++ struct cpumask *user_mask = p->user_cpus_ptr; ++ unsigned long flags; ++ ++ /* ++ * Try to restore the old affinity mask. If this fails, then ++ * we free the mask explicitly to avoid it being inherited across ++ * a subsequent fork(). ++ */ ++ if (!user_mask || !__sched_setaffinity(p, user_mask)) ++ return; ++ ++ raw_spin_lock_irqsave(&p->pi_lock, flags); ++ user_mask = clear_user_cpus_ptr(p); ++ raw_spin_unlock_irqrestore(&p->pi_lock, flags); ++ ++ kfree(user_mask); ++} ++ +#else /* CONFIG_SMP */ + +static inline int select_task_rq(struct task_struct *p) @@ -2087,12 +2863,17 @@ index 000000000000..09ca47de425c + +static inline int +__set_cpus_allowed_ptr(struct task_struct *p, -+ const struct cpumask *new_mask, bool check) ++ const struct cpumask *new_mask, u32 flags) +{ + return set_cpus_allowed_ptr(p, new_mask); +} + -+#endif /* CONFIG_SMP */ ++static inline bool rq_has_pinned_tasks(struct rq *rq) ++{ ++ return false; ++} ++ ++#endif /* !CONFIG_SMP */ + +static void +ttwu_stat(struct task_struct *p, int cpu, int wake_flags) @@ -2102,12 +2883,13 @@ index 000000000000..09ca47de425c + if (!schedstat_enabled()) + return; + -+ rq= this_rq(); ++ rq = this_rq(); + +#ifdef CONFIG_SMP -+ if (cpu == rq->cpu) ++ if (cpu == rq->cpu) { + __schedstat_inc(rq->ttwu_local); -+ else { ++ __schedstat_inc(p->stats.nr_wakeups_local); ++ } else { + /** Alt schedule FW ToDo: + * How to do ttwu_wake_remote + */ @@ -2115,6 +2897,7 @@ index 000000000000..09ca47de425c +#endif /* CONFIG_SMP */ + + __schedstat_inc(rq->ttwu_count); ++ __schedstat_inc(p->stats.nr_wakeups); +} + +/* @@ -2123,23 +2906,56 @@ index 000000000000..09ca47de425c +static inline void +ttwu_do_wakeup(struct rq *rq, struct task_struct *p, int wake_flags) +{ -+ p->state = TASK_RUNNING; ++ check_preempt_curr(rq); ++ WRITE_ONCE(p->__state, TASK_RUNNING); + trace_sched_wakeup(p); +} + +static inline void +ttwu_do_activate(struct rq *rq, struct task_struct *p, int wake_flags) +{ -+#ifdef CONFIG_SMP + if (p->sched_contributes_to_load) + rq->nr_uninterruptible--; ++ ++ if ( ++#ifdef CONFIG_SMP ++ !(wake_flags & WF_MIGRATED) && +#endif ++ p->in_iowait) { ++ delayacct_blkio_end(p); ++ atomic_dec(&task_rq(p)->nr_iowait); ++ } + + activate_task(p, rq); + ttwu_do_wakeup(rq, p, 0); +} + -+static int ttwu_remote(struct task_struct *p, int wake_flags) ++/* ++ * Consider @p being inside a wait loop: ++ * ++ * for (;;) { ++ * set_current_state(TASK_UNINTERRUPTIBLE); ++ * ++ * if (CONDITION) ++ * break; ++ * ++ * schedule(); ++ * } ++ * __set_current_state(TASK_RUNNING); ++ * ++ * between set_current_state() and schedule(). In this case @p is still ++ * runnable, so all that needs doing is change p->state back to TASK_RUNNING in ++ * an atomic manner. ++ * ++ * By taking task_rq(p)->lock we serialize against schedule(), if @p->on_rq ++ * then schedule() must still happen and p->state can be changed to ++ * TASK_RUNNING. Otherwise we lost the race, schedule() has happened, and we ++ * need to do a full wakeup with enqueue. ++ * ++ * Returns: %true when the wakeup is done, ++ * %false otherwise. ++ */ ++static int ttwu_runnable(struct task_struct *p, int wake_flags) +{ + struct rq *rq; + raw_spinlock_t *lock; @@ -2147,6 +2963,8 @@ index 000000000000..09ca47de425c + + rq = __task_access_lock(p, &lock); + if (task_on_rq_queued(p)) { ++ /* check_preempt_curr() may use rq clock */ ++ update_rq_clock(rq); + ttwu_do_wakeup(rq, p, wake_flags); + ret = 1; + } @@ -2156,20 +2974,105 @@ index 000000000000..09ca47de425c +} + +#ifdef CONFIG_SMP -+void scheduler_ipi(void) ++void sched_ttwu_pending(void *arg) +{ -+ /* -+ * Fold TIF_NEED_RESCHED into the preempt_count; anybody setting -+ * TIF_NEED_RESCHED remotely (for the first time) will also send -+ * this IPI. -+ */ -+ preempt_fold_need_resched(); ++ struct llist_node *llist = arg; ++ struct rq *rq = this_rq(); ++ struct task_struct *p, *t; ++ struct rq_flags rf; + -+ if (!idle_cpu(smp_processor_id()) || need_resched()) ++ if (!llist) + return; + -+ irq_enter(); -+ irq_exit(); ++ /* ++ * rq::ttwu_pending racy indication of out-standing wakeups. ++ * Races such that false-negatives are possible, since they ++ * are shorter lived that false-positives would be. ++ */ ++ WRITE_ONCE(rq->ttwu_pending, 0); ++ ++ rq_lock_irqsave(rq, &rf); ++ update_rq_clock(rq); ++ ++ llist_for_each_entry_safe(p, t, llist, wake_entry.llist) { ++ if (WARN_ON_ONCE(p->on_cpu)) ++ smp_cond_load_acquire(&p->on_cpu, !VAL); ++ ++ if (WARN_ON_ONCE(task_cpu(p) != cpu_of(rq))) ++ set_task_cpu(p, cpu_of(rq)); ++ ++ ttwu_do_activate(rq, p, p->sched_remote_wakeup ? WF_MIGRATED : 0); ++ } ++ ++ rq_unlock_irqrestore(rq, &rf); ++} ++ ++void send_call_function_single_ipi(int cpu) ++{ ++ struct rq *rq = cpu_rq(cpu); ++ ++ if (!set_nr_if_polling(rq->idle)) ++ arch_send_call_function_single_ipi(cpu); ++ else ++ trace_sched_wake_idle_without_ipi(cpu); ++} ++ ++/* ++ * Queue a task on the target CPUs wake_list and wake the CPU via IPI if ++ * necessary. The wakee CPU on receipt of the IPI will queue the task ++ * via sched_ttwu_wakeup() for activation so the wakee incurs the cost ++ * of the wakeup instead of the waker. ++ */ ++static void __ttwu_queue_wakelist(struct task_struct *p, int cpu, int wake_flags) ++{ ++ struct rq *rq = cpu_rq(cpu); ++ ++ p->sched_remote_wakeup = !!(wake_flags & WF_MIGRATED); ++ ++ WRITE_ONCE(rq->ttwu_pending, 1); ++ __smp_call_single_queue(cpu, &p->wake_entry.llist); ++} ++ ++static inline bool ttwu_queue_cond(int cpu, int wake_flags) ++{ ++ /* ++ * Do not complicate things with the async wake_list while the CPU is ++ * in hotplug state. ++ */ ++ if (!cpu_active(cpu)) ++ return false; ++ ++ /* ++ * If the CPU does not share cache, then queue the task on the ++ * remote rqs wakelist to avoid accessing remote data. ++ */ ++ if (!cpus_share_cache(smp_processor_id(), cpu)) ++ return true; ++ ++ /* ++ * If the task is descheduling and the only running task on the ++ * CPU then use the wakelist to offload the task activation to ++ * the soon-to-be-idle CPU as the current CPU is likely busy. ++ * nr_running is checked to avoid unnecessary task stacking. ++ */ ++ if ((wake_flags & WF_ON_CPU) && cpu_rq(cpu)->nr_running <= 1) ++ return true; ++ ++ return false; ++} ++ ++static bool ttwu_queue_wakelist(struct task_struct *p, int cpu, int wake_flags) ++{ ++ if (__is_defined(ALT_SCHED_TTWU_QUEUE) && ttwu_queue_cond(cpu, wake_flags)) { ++ if (WARN_ON_ONCE(cpu == smp_processor_id())) ++ return false; ++ ++ sched_clock_cpu(cpu); /* Sync clocks across CPUs */ ++ __ttwu_queue_wakelist(p, cpu, wake_flags); ++ return true; ++ } ++ ++ return false; +} + +void wake_up_if_idle(int cpu) @@ -2182,15 +3085,11 @@ index 000000000000..09ca47de425c + if (!is_idle_task(rcu_dereference(rq->curr))) + goto out; + -+ if (set_nr_if_polling(rq->idle)) { -+ trace_sched_wake_idle_without_ipi(cpu); -+ } else { -+ raw_spin_lock_irqsave(&rq->lock, flags); -+ if (is_idle_task(rq->curr)) -+ smp_send_reschedule(cpu); -+ /* Else CPU is not idle, do nothing here */ -+ raw_spin_unlock_irqrestore(&rq->lock, flags); -+ } ++ raw_spin_lock_irqsave(&rq->lock, flags); ++ if (is_idle_task(rq->curr)) ++ resched_curr(rq); ++ /* Else CPU is not idle, do nothing here */ ++ raw_spin_unlock_irqrestore(&rq->lock, flags); + +out: + rcu_read_unlock(); @@ -2198,22 +3097,83 @@ index 000000000000..09ca47de425c + +bool cpus_share_cache(int this_cpu, int that_cpu) +{ ++ if (this_cpu == that_cpu) ++ return true; ++ + return per_cpu(sd_llc_id, this_cpu) == per_cpu(sd_llc_id, that_cpu); +} ++#else /* !CONFIG_SMP */ ++ ++static inline bool ttwu_queue_wakelist(struct task_struct *p, int cpu, int wake_flags) ++{ ++ return false; ++} ++ +#endif /* CONFIG_SMP */ + +static inline void ttwu_queue(struct task_struct *p, int cpu, int wake_flags) +{ + struct rq *rq = cpu_rq(cpu); + ++ if (ttwu_queue_wakelist(p, cpu, wake_flags)) ++ return; ++ + raw_spin_lock(&rq->lock); + update_rq_clock(rq); + ttwu_do_activate(rq, p, wake_flags); -+ check_preempt_curr(rq); + raw_spin_unlock(&rq->lock); +} + +/* ++ * Invoked from try_to_wake_up() to check whether the task can be woken up. ++ * ++ * The caller holds p::pi_lock if p != current or has preemption ++ * disabled when p == current. ++ * ++ * The rules of PREEMPT_RT saved_state: ++ * ++ * The related locking code always holds p::pi_lock when updating ++ * p::saved_state, which means the code is fully serialized in both cases. ++ * ++ * The lock wait and lock wakeups happen via TASK_RTLOCK_WAIT. No other ++ * bits set. This allows to distinguish all wakeup scenarios. ++ */ ++static __always_inline ++bool ttwu_state_match(struct task_struct *p, unsigned int state, int *success) ++{ ++ if (IS_ENABLED(CONFIG_DEBUG_PREEMPT)) { ++ WARN_ON_ONCE((state & TASK_RTLOCK_WAIT) && ++ state != TASK_RTLOCK_WAIT); ++ } ++ ++ if (READ_ONCE(p->__state) & state) { ++ *success = 1; ++ return true; ++ } ++ ++#ifdef CONFIG_PREEMPT_RT ++ /* ++ * Saved state preserves the task state across blocking on ++ * an RT lock. If the state matches, set p::saved_state to ++ * TASK_RUNNING, but do not wake the task because it waits ++ * for a lock wakeup. Also indicate success because from ++ * the regular waker's point of view this has succeeded. ++ * ++ * After acquiring the lock the task will restore p::__state ++ * from p::saved_state which ensures that the regular ++ * wakeup is not lost. The restore will also set ++ * p::saved_state to TASK_RUNNING so any further tests will ++ * not result in false positives vs. @success ++ */ ++ if (p->saved_state & state) { ++ p->saved_state = TASK_RUNNING; ++ *success = 1; ++ } ++#endif ++ return false; ++} ++ ++/* + * Notes on Program-Order guarantees on SMP systems. + * + * MIGRATION @@ -2262,8 +3222,8 @@ index 000000000000..09ca47de425c + * migration. However the means are completely different as there is no lock + * chain to provide order. Instead we do: + * -+ * 1) smp_store_release(X->on_cpu, 0) -+ * 2) smp_cond_load_acquire(!X->on_cpu) ++ * 1) smp_store_release(X->on_cpu, 0) -- finish_task() ++ * 2) smp_cond_load_acquire(!X->on_cpu) -- try_to_wake_up() + * + * Example: + * @@ -2304,20 +3264,42 @@ index 000000000000..09ca47de425c + * + */ + -+/*** ++/** + * try_to_wake_up - wake up a thread + * @p: the thread to be awakened + * @state: the mask of task states that can be woken + * @wake_flags: wake modifier flags (WF_*) + * -+ * Put it on the run-queue if it's not already there. The "current" -+ * thread is always on the run-queue (except when the actual -+ * re-schedule is in progress), and as such you're allowed to do -+ * the simpler "current->state = TASK_RUNNING" to mark yourself -+ * runnable without the overhead of this. ++ * Conceptually does: + * -+ * Return: %true if @p was woken up, %false if it was already running. -+ * or @state didn't match @p's state. ++ * If (@state & @p->state) @p->state = TASK_RUNNING. ++ * ++ * If the task was not queued/runnable, also place it back on a runqueue. ++ * ++ * This function is atomic against schedule() which would dequeue the task. ++ * ++ * It issues a full memory barrier before accessing @p->state, see the comment ++ * with set_current_state(). ++ * ++ * Uses p->pi_lock to serialize against concurrent wake-ups. ++ * ++ * Relies on p->pi_lock stabilizing: ++ * - p->sched_class ++ * - p->cpus_ptr ++ * - p->sched_task_group ++ * in order to do migration, see its use of select_task_rq()/set_task_cpu(). ++ * ++ * Tries really hard to only take one task_rq(p)->lock for performance. ++ * Takes rq->lock in: ++ * - ttwu_runnable() -- old rq, unavoidable, see comment there; ++ * - ttwu_queue() -- new rq, for enqueue of the task; ++ * - psi_ttwu_dequeue() -- much sadness :-( accounting will kill us. ++ * ++ * As a consequence we race really badly with just about everything. See the ++ * many memory barriers and their comments for details. ++ * ++ * Return: %true if @p->state changes (an actual wakeup was done), ++ * %false otherwise. + */ +static int try_to_wake_up(struct task_struct *p, unsigned int state, + int wake_flags) @@ -2330,7 +3312,7 @@ index 000000000000..09ca47de425c + /* + * We're waking current, this means 'p->on_rq' and 'task_cpu(p) + * == smp_processor_id()'. Together this means we can special -+ * case the whole 'p->on_rq && ttwu_remote()' case below ++ * case the whole 'p->on_rq && ttwu_runnable()' case below + * without taking any locks. + * + * In particular: @@ -2338,13 +3320,11 @@ index 000000000000..09ca47de425c + * - we're serialized against set_special_state() by virtue of + * it disabling IRQs (this allows not taking ->pi_lock). + */ -+ if (!(p->state & state)) ++ if (!ttwu_state_match(p, state, &success)) + goto out; + -+ success = 1; -+ cpu = task_cpu(p); + trace_sched_waking(p); -+ p->state = TASK_RUNNING; ++ WRITE_ONCE(p->__state, TASK_RUNNING); + trace_sched_wakeup(p); + goto out; + } @@ -2352,20 +3332,16 @@ index 000000000000..09ca47de425c + /* + * If we are going to wake up a thread waiting for CONDITION we + * need to ensure that CONDITION=1 done by the caller can not be -+ * reordered with p->state check below. This pairs with mb() in -+ * set_current_state() the waiting thread does. ++ * reordered with p->state check below. This pairs with smp_store_mb() ++ * in set_current_state() that the waiting thread does. + */ + raw_spin_lock_irqsave(&p->pi_lock, flags); + smp_mb__after_spinlock(); -+ if (!(p->state & state)) ++ if (!ttwu_state_match(p, state, &success)) + goto unlock; + + trace_sched_waking(p); + -+ /* We're going to change ->state: */ -+ success = 1; -+ cpu = task_cpu(p); -+ + /* + * Ensure we load p->on_rq _after_ p->state, otherwise it would + * be possible to, falsely, observe p->on_rq == 0 and get stuck @@ -2385,9 +3361,11 @@ index 000000000000..09ca47de425c + * + * Pairs with the LOCK+smp_mb__after_spinlock() on rq->lock in + * __schedule(). See the comment for smp_mb__after_spinlock(). ++ * ++ * A similar smb_rmb() lives in try_invoke_on_locked_down_task(). + */ + smp_rmb(); -+ if (p->on_rq && ttwu_remote(p, wake_flags)) ++ if (READ_ONCE(p->on_rq) && ttwu_runnable(p, wake_flags)) + goto unlock; + +#ifdef CONFIG_SMP @@ -2409,12 +3387,47 @@ index 000000000000..09ca47de425c + * + * Pairs with the LOCK+smp_mb__after_spinlock() on rq->lock in + * __schedule(). See the comment for smp_mb__after_spinlock(). ++ * ++ * Form a control-dep-acquire with p->on_rq == 0 above, to ensure ++ * schedule()'s deactivate_task() has 'happened' and p will no longer ++ * care about it's own p->state. See the comment in __schedule(). + */ -+ smp_rmb(); ++ smp_acquire__after_ctrl_dep(); ++ ++ /* ++ * We're doing the wakeup (@success == 1), they did a dequeue (p->on_rq ++ * == 0), which means we need to do an enqueue, change p->state to ++ * TASK_WAKING such that we can unlock p->pi_lock before doing the ++ * enqueue, such as ttwu_queue_wakelist(). ++ */ ++ WRITE_ONCE(p->__state, TASK_WAKING); + + /* + * If the owning (remote) CPU is still in the middle of schedule() with -+ * this task as prev, wait until its done referencing the task. ++ * this task as prev, considering queueing p on the remote CPUs wake_list ++ * which potentially sends an IPI instead of spinning on p->on_cpu to ++ * let the waker make forward progress. This is safe because IRQs are ++ * disabled and the IPI will deliver after on_cpu is cleared. ++ * ++ * Ensure we load task_cpu(p) after p->on_cpu: ++ * ++ * set_task_cpu(p, cpu); ++ * STORE p->cpu = @cpu ++ * __schedule() (switch to task 'p') ++ * LOCK rq->lock ++ * smp_mb__after_spin_lock() smp_cond_load_acquire(&p->on_cpu) ++ * STORE p->on_cpu = 1 LOAD p->cpu ++ * ++ * to ensure we observe the correct CPU on which the task is currently ++ * scheduling. ++ */ ++ if (smp_load_acquire(&p->on_cpu) && ++ ttwu_queue_wakelist(p, task_cpu(p), wake_flags | WF_ON_CPU)) ++ goto unlock; ++ ++ /* ++ * If the owning (remote) CPU is still in the middle of schedule() with ++ * this task as prev, wait until it's done referencing the task. + * + * Pairs with the smp_store_release() in finish_task(). + * @@ -2423,29 +3436,22 @@ index 000000000000..09ca47de425c + */ + smp_cond_load_acquire(&p->on_cpu, !VAL); + -+ p->sched_contributes_to_load = !!task_contributes_to_load(p); -+ p->state = TASK_WAKING; -+ -+ if (p->in_iowait) { -+ delayacct_blkio_end(p); -+ atomic_dec(&task_rq(p)->nr_iowait); -+ } -+ -+ if(this_rq()->clock_task - p->last_ran > sched_timeslice_ns) -+ boost_task(p); ++ sched_task_ttwu(p); + + cpu = select_task_rq(p); + + if (cpu != task_cpu(p)) { ++ if (p->in_iowait) { ++ delayacct_blkio_end(p); ++ atomic_dec(&task_rq(p)->nr_iowait); ++ } ++ + wake_flags |= WF_MIGRATED; + psi_ttwu_dequeue(p); + set_task_cpu(p, cpu); + } -+#else /* CONFIG_SMP */ -+ if (p->in_iowait) { -+ delayacct_blkio_end(p); -+ atomic_dec(&task_rq(p)->nr_iowait); -+ } ++#else ++ cpu = task_cpu(p); +#endif /* CONFIG_SMP */ + + ttwu_queue(p, cpu, wake_flags); @@ -2453,13 +3459,73 @@ index 000000000000..09ca47de425c + raw_spin_unlock_irqrestore(&p->pi_lock, flags); +out: + if (success) -+ ttwu_stat(p, cpu, wake_flags); ++ ttwu_stat(p, task_cpu(p), wake_flags); + preempt_enable(); + + return success; +} + +/** ++ * task_call_func - Invoke a function on task in fixed state ++ * @p: Process for which the function is to be invoked, can be @current. ++ * @func: Function to invoke. ++ * @arg: Argument to function. ++ * ++ * Fix the task in it's current state by avoiding wakeups and or rq operations ++ * and call @func(@arg) on it. This function can use ->on_rq and task_curr() ++ * to work out what the state is, if required. Given that @func can be invoked ++ * with a runqueue lock held, it had better be quite lightweight. ++ * ++ * Returns: ++ * Whatever @func returns ++ */ ++int task_call_func(struct task_struct *p, task_call_f func, void *arg) ++{ ++ struct rq *rq = NULL; ++ unsigned int state; ++ struct rq_flags rf; ++ int ret; ++ ++ raw_spin_lock_irqsave(&p->pi_lock, rf.flags); ++ ++ state = READ_ONCE(p->__state); ++ ++ /* ++ * Ensure we load p->on_rq after p->__state, otherwise it would be ++ * possible to, falsely, observe p->on_rq == 0. ++ * ++ * See try_to_wake_up() for a longer comment. ++ */ ++ smp_rmb(); ++ ++ /* ++ * Since pi->lock blocks try_to_wake_up(), we don't need rq->lock when ++ * the task is blocked. Make sure to check @state since ttwu() can drop ++ * locks at the end, see ttwu_queue_wakelist(). ++ */ ++ if (state == TASK_RUNNING || state == TASK_WAKING || p->on_rq) ++ rq = __task_rq_lock(p, &rf); ++ ++ /* ++ * At this point the task is pinned; either: ++ * - blocked and we're holding off wakeups (pi->lock) ++ * - woken, and we're holding off enqueue (rq->lock) ++ * - queued, and we're holding off schedule (rq->lock) ++ * - running, and we're holding off de-schedule (rq->lock) ++ * ++ * The called function (@func) can use: task_curr(), p->on_rq and ++ * p->__state to differentiate between these states. ++ */ ++ ret = func(p, arg); ++ ++ if (rq) ++ __task_rq_unlock(rq, &rf); ++ ++ raw_spin_unlock_irqrestore(&p->pi_lock, rf.flags); ++ return ret; ++} ++ ++/** + * wake_up_process - Wake up a specific process + * @p: The process to be woken up. + * @@ -2495,6 +3561,11 @@ index 000000000000..09ca47de425c + p->stime = 0; + p->sched_time = 0; + ++#ifdef CONFIG_SCHEDSTATS ++ /* Even if schedstat is disabled, there should not be garbage */ ++ memset(&p->stats, 0, sizeof(p->stats)); ++#endif ++ +#ifdef CONFIG_PREEMPT_NOTIFIERS + INIT_HLIST_HEAD(&p->preempt_notifiers); +#endif @@ -2502,6 +3573,9 @@ index 000000000000..09ca47de425c +#ifdef CONFIG_COMPACTION + p->capture_control = NULL; +#endif ++#ifdef CONFIG_SMP ++ p->wake_entry.u_flags = CSD_TYPE_TTWU; ++#endif +} + +/* @@ -2509,16 +3583,13 @@ index 000000000000..09ca47de425c + */ +int sched_fork(unsigned long clone_flags, struct task_struct *p) +{ -+ unsigned long flags; -+ struct rq *rq; -+ + __sched_fork(clone_flags, p); + /* + * We mark the process as NEW here. This guarantees that + * nobody will actually run it, and a signal or other external + * event cannot wake it up and insert it on the runqueue either. + */ -+ p->state = TASK_NEW; ++ p->__state = TASK_NEW; + + /* + * Make sure we do not leak PI boosting priority to the child. @@ -2536,7 +3607,7 @@ index 000000000000..09ca47de425c + } else if (PRIO_TO_NICE(p->static_prio) < 0) + p->static_prio = NICE_TO_PRIO(0); + -+ p->prio = p->normal_prio = normal_prio(p); ++ p->prio = p->normal_prio = p->static_prio; + + /* + * We don't need the reset flag anymore after the fork. It has @@ -2545,42 +3616,6 @@ index 000000000000..09ca47de425c + p->sched_reset_on_fork = 0; + } + -+ p->boost_prio = (p->boost_prio < 0) ? -+ p->boost_prio + MAX_PRIORITY_ADJ : MAX_PRIORITY_ADJ; -+ /* -+ * The child is not yet in the pid-hash so no cgroup attach races, -+ * and the cgroup is pinned to this child due to cgroup_fork() -+ * is ran before sched_fork(). -+ * -+ * Silence PROVE_RCU. -+ */ -+ raw_spin_lock_irqsave(&p->pi_lock, flags); -+ /* -+ * Share the timeslice between parent and child, thus the -+ * total amount of pending timeslices in the system doesn't change, -+ * resulting in more scheduling fairness. -+ */ -+ rq = this_rq(); -+ raw_spin_lock(&rq->lock); -+ rq->curr->time_slice /= 2; -+ p->time_slice = rq->curr->time_slice; -+#ifdef CONFIG_SCHED_HRTICK -+ hrtick_start(rq, rq->curr->time_slice); -+#endif -+ -+ if (p->time_slice < RESCHED_NS) { -+ p->time_slice = sched_timeslice_ns; -+ resched_curr(rq); -+ } -+ raw_spin_unlock(&rq->lock); -+ -+ /* -+ * We're setting the CPU for the first time, we don't migrate, -+ * so use __set_task_cpu(). -+ */ -+ __set_task_cpu(p, cpu_of(rq)); -+ raw_spin_unlock_irqrestore(&p->pi_lock, flags); -+ +#ifdef CONFIG_SCHED_INFO + if (unlikely(sched_info_on())) + memset(&p->sched_info, 0, sizeof(p->sched_info)); @@ -2590,10 +3625,53 @@ index 000000000000..09ca47de425c + return 0; +} + ++void sched_cgroup_fork(struct task_struct *p, struct kernel_clone_args *kargs) ++{ ++ unsigned long flags; ++ struct rq *rq; ++ ++ /* ++ * Because we're not yet on the pid-hash, p->pi_lock isn't strictly ++ * required yet, but lockdep gets upset if rules are violated. ++ */ ++ raw_spin_lock_irqsave(&p->pi_lock, flags); ++ /* ++ * Share the timeslice between parent and child, thus the ++ * total amount of pending timeslices in the system doesn't change, ++ * resulting in more scheduling fairness. ++ */ ++ rq = this_rq(); ++ raw_spin_lock(&rq->lock); ++ ++ rq->curr->time_slice /= 2; ++ p->time_slice = rq->curr->time_slice; ++#ifdef CONFIG_SCHED_HRTICK ++ hrtick_start(rq, rq->curr->time_slice); ++#endif ++ ++ if (p->time_slice < RESCHED_NS) { ++ p->time_slice = sched_timeslice_ns; ++ resched_curr(rq); ++ } ++ sched_task_fork(p, rq); ++ raw_spin_unlock(&rq->lock); ++ ++ rseq_migrate(p); ++ /* ++ * We're setting the CPU for the first time, we don't migrate, ++ * so use __set_task_cpu(). ++ */ ++ __set_task_cpu(p, smp_processor_id()); ++ raw_spin_unlock_irqrestore(&p->pi_lock, flags); ++} ++ ++void sched_post_fork(struct task_struct *p) ++{ ++} ++ +#ifdef CONFIG_SCHEDSTATS + +DEFINE_STATIC_KEY_FALSE(sched_schedstats); -+static bool __initdata __sched_schedstats = false; + +static void set_schedstats(bool enabled) +{ @@ -2617,16 +3695,11 @@ index 000000000000..09ca47de425c + if (!str) + goto out; + -+ /* -+ * This code is called before jump labels have been set up, so we can't -+ * change the static branch directly just yet. Instead set a temporary -+ * variable so init_schedstats() can do it later. -+ */ + if (!strcmp(str, "enable")) { -+ __sched_schedstats = true; ++ set_schedstats(true); + ret = 1; + } else if (!strcmp(str, "disable")) { -+ __sched_schedstats = false; ++ set_schedstats(false); + ret = 1; + } +out: @@ -2637,14 +3710,9 @@ index 000000000000..09ca47de425c +} +__setup("schedstats=", setup_schedstats); + -+static void __init init_schedstats(void) -+{ -+ set_schedstats(__sched_schedstats); -+} -+ +#ifdef CONFIG_PROC_SYSCTL -+int sysctl_schedstats(struct ctl_table *table, int write, -+ void __user *buffer, size_t *lenp, loff_t *ppos) ++static int sysctl_schedstats(struct ctl_table *table, int write, void *buffer, ++ size_t *lenp, loff_t *ppos) +{ + struct ctl_table t; + int err; @@ -2662,9 +3730,26 @@ index 000000000000..09ca47de425c + set_schedstats(state); + return err; +} ++ ++static struct ctl_table sched_core_sysctls[] = { ++ { ++ .procname = "sched_schedstats", ++ .data = NULL, ++ .maxlen = sizeof(unsigned int), ++ .mode = 0644, ++ .proc_handler = sysctl_schedstats, ++ .extra1 = SYSCTL_ZERO, ++ .extra2 = SYSCTL_ONE, ++ }, ++ {} ++}; ++static int __init sched_core_sysctl_init(void) ++{ ++ register_sysctl_init("kernel", sched_core_sysctls); ++ return 0; ++} ++late_initcall(sched_core_sysctl_init); +#endif /* CONFIG_PROC_SYSCTL */ -+#else /* !CONFIG_SCHEDSTATS */ -+static inline void init_schedstats(void) {} +#endif /* CONFIG_SCHEDSTATS */ + +/* @@ -2680,15 +3765,15 @@ index 000000000000..09ca47de425c + struct rq *rq; + + raw_spin_lock_irqsave(&p->pi_lock, flags); -+ -+ p->state = TASK_RUNNING; -+ ++ WRITE_ONCE(p->__state, TASK_RUNNING); + rq = cpu_rq(select_task_rq(p)); +#ifdef CONFIG_SMP ++ rseq_migrate(p); + /* + * Fork balancing, do it here and not earlier because: + * - cpus_ptr can change in the fork path + * - any previously selected CPU might disappear through hotplug ++ * + * Use __set_task_cpu() to avoid calling sched_class::migrate_task_rq, + * as we're not fully set-up yet. + */ @@ -2696,8 +3781,8 @@ index 000000000000..09ca47de425c +#endif + + raw_spin_lock(&rq->lock); -+ + update_rq_clock(rq); ++ + activate_task(p, rq); + trace_sched_wakeup_new(p); + check_preempt_curr(rq); @@ -2798,16 +3883,19 @@ index 000000000000..09ca47de425c + /* + * Claim the task as running, we do this before switching to it + * such that any running task will have this set. ++ * ++ * See the ttwu() WF_ON_CPU case and its ordering comment. + */ -+ next->on_cpu = 1; ++ WRITE_ONCE(next->on_cpu, 1); +} + +static inline void finish_task(struct task_struct *prev) +{ +#ifdef CONFIG_SMP + /* -+ * After ->on_cpu is cleared, the task can be moved to a different CPU. -+ * We must ensure this doesn't happen until the switch is completely ++ * This must be the very last reference to @prev from this CPU. After ++ * p->on_cpu is cleared, the task can be moved to a different CPU. We ++ * must ensure this doesn't happen until the switch is completely + * finished. + * + * In particular, the load of prev->state in finish_task_switch() must @@ -2821,6 +3909,106 @@ index 000000000000..09ca47de425c +#endif +} + ++#ifdef CONFIG_SMP ++ ++static void do_balance_callbacks(struct rq *rq, struct callback_head *head) ++{ ++ void (*func)(struct rq *rq); ++ struct callback_head *next; ++ ++ lockdep_assert_held(&rq->lock); ++ ++ while (head) { ++ func = (void (*)(struct rq *))head->func; ++ next = head->next; ++ head->next = NULL; ++ head = next; ++ ++ func(rq); ++ } ++} ++ ++static void balance_push(struct rq *rq); ++ ++/* ++ * balance_push_callback is a right abuse of the callback interface and plays ++ * by significantly different rules. ++ * ++ * Where the normal balance_callback's purpose is to be ran in the same context ++ * that queued it (only later, when it's safe to drop rq->lock again), ++ * balance_push_callback is specifically targeted at __schedule(). ++ * ++ * This abuse is tolerated because it places all the unlikely/odd cases behind ++ * a single test, namely: rq->balance_callback == NULL. ++ */ ++struct callback_head balance_push_callback = { ++ .next = NULL, ++ .func = (void (*)(struct callback_head *))balance_push, ++}; ++ ++static inline struct callback_head * ++__splice_balance_callbacks(struct rq *rq, bool split) ++{ ++ struct callback_head *head = rq->balance_callback; ++ ++ if (likely(!head)) ++ return NULL; ++ ++ lockdep_assert_rq_held(rq); ++ /* ++ * Must not take balance_push_callback off the list when ++ * splice_balance_callbacks() and balance_callbacks() are not ++ * in the same rq->lock section. ++ * ++ * In that case it would be possible for __schedule() to interleave ++ * and observe the list empty. ++ */ ++ if (split && head == &balance_push_callback) ++ head = NULL; ++ else ++ rq->balance_callback = NULL; ++ ++ return head; ++} ++ ++static inline struct callback_head *splice_balance_callbacks(struct rq *rq) ++{ ++ return __splice_balance_callbacks(rq, true); ++} ++ ++static void __balance_callbacks(struct rq *rq) ++{ ++ do_balance_callbacks(rq, __splice_balance_callbacks(rq, false)); ++} ++ ++static inline void balance_callbacks(struct rq *rq, struct callback_head *head) ++{ ++ unsigned long flags; ++ ++ if (unlikely(head)) { ++ raw_spin_lock_irqsave(&rq->lock, flags); ++ do_balance_callbacks(rq, head); ++ raw_spin_unlock_irqrestore(&rq->lock, flags); ++ } ++} ++ ++#else ++ ++static inline void __balance_callbacks(struct rq *rq) ++{ ++} ++ ++static inline struct callback_head *splice_balance_callbacks(struct rq *rq) ++{ ++ return NULL; ++} ++ ++static inline void balance_callbacks(struct rq *rq, struct callback_head *head) ++{ ++} ++ ++#endif ++ +static inline void +prepare_lock_switch(struct rq *rq, struct task_struct *next) +{ @@ -2845,9 +4033,38 @@ index 000000000000..09ca47de425c + * prev into current: + */ + spin_acquire(&rq->lock.dep_map, 0, 0, _THIS_IP_); ++ __balance_callbacks(rq); + raw_spin_unlock_irq(&rq->lock); +} + ++/* ++ * NOP if the arch has not defined these: ++ */ ++ ++#ifndef prepare_arch_switch ++# define prepare_arch_switch(next) do { } while (0) ++#endif ++ ++#ifndef finish_arch_post_lock_switch ++# define finish_arch_post_lock_switch() do { } while (0) ++#endif ++ ++static inline void kmap_local_sched_out(void) ++{ ++#ifdef CONFIG_KMAP_LOCAL ++ if (unlikely(current->kmap_ctrl.idx)) ++ __kmap_local_sched_out(); ++#endif ++} ++ ++static inline void kmap_local_sched_in(void) ++{ ++#ifdef CONFIG_KMAP_LOCAL ++ if (unlikely(current->kmap_ctrl.idx)) ++ __kmap_local_sched_in(); ++#endif ++} ++ +/** + * prepare_task_switch - prepare to switch tasks + * @rq: the runqueue preparing to switch @@ -2869,6 +4086,7 @@ index 000000000000..09ca47de425c + perf_event_task_sched_out(prev, next); + rseq_preempt(prev); + fire_sched_out_preempt_notifiers(prev, next); ++ kmap_local_sched_out(); + prepare_task(next); + prepare_arch_switch(next); +} @@ -2898,7 +4116,7 @@ index 000000000000..09ca47de425c +{ + struct rq *rq = this_rq(); + struct mm_struct *mm = rq->prev_mm; -+ long prev_state; ++ unsigned int prev_state; + + /* + * The previous task will have left us with a preempt_count of 2 @@ -2929,13 +4147,22 @@ index 000000000000..09ca47de425c + * running on another CPU and we could rave with its RUNNING -> DEAD + * transition, resulting in a double drop. + */ -+ prev_state = prev->state; ++ prev_state = READ_ONCE(prev->__state); + vtime_task_switch(prev); + perf_event_task_sched_in(prev, current); + finish_task(prev); ++ tick_nohz_task_switch(); + finish_lock_switch(rq); + finish_arch_post_lock_switch(); + kcov_finish_switch(current); ++ /* ++ * kmap_local_sched_out() is invoked with rq::lock held and ++ * interrupts disabled. There is no requirement for that, but the ++ * sched out code does not have an interrupt enabled section. ++ * Restoring the maps on sched in does not require interrupts being ++ * disabled either. ++ */ ++ kmap_local_sched_in(); + + fire_sched_in_preempt_notifiers(current); + /* @@ -2952,22 +4179,15 @@ index 000000000000..09ca47de425c + */ + if (mm) { + membarrier_mm_sync_core_before_usermode(mm); -+ mmdrop(mm); ++ mmdrop_sched(mm); + } + if (unlikely(prev_state == TASK_DEAD)) { -+ /* -+ * Remove function-return probe instances associated with this -+ * task and put them back on the free list. -+ */ -+ kprobe_flush_task(prev); -+ + /* Task is done with its stack. */ + put_task_stack(prev); + + put_task_struct_rcu_user(prev); + } + -+ tick_nohz_task_switch(); + return rq; +} + @@ -2978,8 +4198,6 @@ index 000000000000..09ca47de425c +asmlinkage __visible void schedule_tail(struct task_struct *prev) + __releases(rq->lock) +{ -+ struct rq *rq; -+ + /* + * New tasks start with FORK_PREEMPT_COUNT, see there and + * finish_task_switch() for details. @@ -2989,7 +4207,7 @@ index 000000000000..09ca47de425c + * PREEMPT_COUNT kernels). + */ + -+ rq = finish_task_switch(prev); ++ finish_task_switch(prev); + preempt_enable(); + + if (current->set_child_tid) @@ -3063,9 +4281,9 @@ index 000000000000..09ca47de425c + * externally visible scheduler statistics: current number of runnable + * threads, total number of context switches performed since bootup. + */ -+unsigned long nr_running(void) ++unsigned int nr_running(void) +{ -+ unsigned long i, sum = 0; ++ unsigned int i, sum = 0; + + for_each_online_cpu(i) + sum += cpu_rq(i)->nr_running; @@ -3110,13 +4328,13 @@ index 000000000000..09ca47de425c + * it does become runnable. + */ + -+unsigned long nr_iowait_cpu(int cpu) ++unsigned int nr_iowait_cpu(int cpu) +{ + return atomic_read(&cpu_rq(cpu)->nr_iowait); +} + +/* -+ * IO-wait accounting, and how its mostly bollocks (on SMP). ++ * IO-wait accounting, and how it's mostly bollocks (on SMP). + * + * The idea behind IO-wait account is to account the idle time that we could + * have spend running if it were not for IO. That is, if we were to improve the @@ -3145,9 +4363,9 @@ index 000000000000..09ca47de425c + * Task CPU affinities can make all that even more 'interesting'. + */ + -+unsigned long nr_iowait(void) ++unsigned int nr_iowait(void) +{ -+ unsigned long i, sum = 0; ++ unsigned int i, sum = 0; + + for_each_possible_cpu(i) + sum += nr_iowait_cpu(i); @@ -3164,26 +4382,6 @@ index 000000000000..09ca47de425c + */ +void sched_exec(void) +{ -+ struct task_struct *p = current; -+ int dest_cpu; -+ -+ if (task_rq(p)->nr_running < 2) -+ return; -+ -+ dest_cpu = cpumask_any_and(p->cpus_ptr, &sched_rq_watermark[IDLE_WM]); -+ if ( dest_cpu < nr_cpu_ids) { -+#ifdef CONFIG_SCHED_SMT -+ int smt = cpumask_any_and(p->cpus_ptr, &sched_sg_idle_mask); -+ if (smt < nr_cpu_ids) -+ dest_cpu = smt; -+#endif -+ if (likely(cpu_active(dest_cpu))) { -+ struct migration_arg arg = { p, dest_cpu }; -+ -+ stop_one_cpu(task_cpu(p), migration_cpu_stop, &arg); -+ return; -+ } -+ } +} + +#endif @@ -3199,6 +4397,7 @@ index 000000000000..09ca47de425c + s64 ns = rq->clock_task - p->last_ran; + + p->sched_time += ns; ++ cgroup_account_cputime(p, ns); + account_group_exec_runtime(p, ns); + + p->time_slice -= ns; @@ -3249,17 +4448,6 @@ index 000000000000..09ca47de425c + return ns; +} + -+DEFINE_PER_CPU(unsigned long, thermal_pressure); -+ -+void arch_set_thermal_pressure(struct cpumask *cpus, -+ unsigned long th_pressure) -+{ -+ int cpu; -+ -+ for_each_cpu(cpu, cpus) -+ WRITE_ONCE(per_cpu(thermal_pressure, cpu), th_pressure); -+} -+ +/* This manages tasks that have run out of timeslice during a scheduler_tick */ +static inline void scheduler_task_tick(struct rq *rq) +{ @@ -3281,6 +4469,55 @@ index 000000000000..09ca47de425c + set_preempt_need_resched(); +} + ++#ifdef CONFIG_SCHED_DEBUG ++static u64 cpu_resched_latency(struct rq *rq) ++{ ++ int latency_warn_ms = READ_ONCE(sysctl_resched_latency_warn_ms); ++ u64 resched_latency, now = rq_clock(rq); ++ static bool warned_once; ++ ++ if (sysctl_resched_latency_warn_once && warned_once) ++ return 0; ++ ++ if (!need_resched() || !latency_warn_ms) ++ return 0; ++ ++ if (system_state == SYSTEM_BOOTING) ++ return 0; ++ ++ if (!rq->last_seen_need_resched_ns) { ++ rq->last_seen_need_resched_ns = now; ++ rq->ticks_without_resched = 0; ++ return 0; ++ } ++ ++ rq->ticks_without_resched++; ++ resched_latency = now - rq->last_seen_need_resched_ns; ++ if (resched_latency <= latency_warn_ms * NSEC_PER_MSEC) ++ return 0; ++ ++ warned_once = true; ++ ++ return resched_latency; ++} ++ ++static int __init setup_resched_latency_warn_ms(char *str) ++{ ++ long val; ++ ++ if ((kstrtol(str, 0, &val))) { ++ pr_warn("Unable to set resched_latency_warn_ms\n"); ++ return 1; ++ } ++ ++ sysctl_resched_latency_warn_ms = val; ++ return 1; ++} ++__setup("resched_latency_warn_ms=", setup_resched_latency_warn_ms); ++#else ++static inline u64 cpu_resched_latency(struct rq *rq) { return 0; } ++#endif /* CONFIG_SCHED_DEBUG */ ++ +/* + * This function gets called by the timer code, with HZ frequency. + * We call it with interrupts disabled. @@ -3289,6 +4526,7 @@ index 000000000000..09ca47de425c +{ + int cpu __maybe_unused = smp_processor_id(); + struct rq *rq = cpu_rq(cpu); ++ u64 resched_latency; + + arch_scale_freq_tick(); + sched_clock_tick(); @@ -3297,17 +4535,21 @@ index 000000000000..09ca47de425c + update_rq_clock(rq); + + scheduler_task_tick(rq); ++ if (sched_feat(LATENCY_WARN)) ++ resched_latency = cpu_resched_latency(rq); + calc_global_load_tick(rq); -+ psi_task_tick(rq); + + rq->last_tick = rq->clock; + raw_spin_unlock(&rq->lock); + ++ if (sched_feat(LATENCY_WARN) && resched_latency) ++ resched_latency_warn(cpu, resched_latency); ++ + perf_event_task_tick(); +} + +#ifdef CONFIG_SCHED_SMT -+static inline int active_load_balance_cpu_stop(void *data) ++static inline int sg_balance_cpu_stop(void *data) +{ + struct rq *rq = this_rq(); + struct task_struct *p = data; @@ -3322,10 +4564,10 @@ index 000000000000..09ca47de425c + rq->active_balance = 0; + /* _something_ may have changed the task, double check again */ + if (task_on_rq_queued(p) && task_rq(p) == rq && -+ cpumask_and(&tmp, p->cpus_ptr, &sched_sg_idle_mask)) { ++ cpumask_and(&tmp, p->cpus_ptr, &sched_sg_idle_mask) && ++ !is_migration_disabled(p)) { + int cpu = cpu_of(rq); -+ int dcpu = __best_mask_cpu(cpu, &tmp, -+ per_cpu(sched_cpu_llc_mask, cpu)); ++ int dcpu = __best_mask_cpu(&tmp, per_cpu(sched_cpu_llc_mask, cpu)); + rq = move_queued_task(rq, p, dcpu); + } + @@ -3350,7 +4592,7 @@ index 000000000000..09ca47de425c + curr = rq->curr; + res = (!is_idle_task(curr)) && (1 == rq->nr_running) &&\ + cpumask_intersects(curr->cpus_ptr, &sched_sg_idle_mask) &&\ -+ (!rq->active_balance); ++ !is_migration_disabled(curr) && (!rq->active_balance); + + if (res) + rq->active_balance = 1; @@ -3358,41 +4600,36 @@ index 000000000000..09ca47de425c + raw_spin_unlock_irqrestore(&rq->lock, flags); + + if (res) -+ stop_one_cpu_nowait(cpu, active_load_balance_cpu_stop, -+ curr, &rq->active_balance_work); ++ stop_one_cpu_nowait(cpu, sg_balance_cpu_stop, curr, ++ &rq->active_balance_work); + return res; +} + +/* -+ * sg_balance_check - slibing group balance check for run queue @rq ++ * sg_balance - slibing group balance check for run queue @rq + */ -+static inline void sg_balance_check(struct rq *rq) ++static inline void sg_balance(struct rq *rq) +{ + cpumask_t chk; -+ int cpu; ++ int cpu = cpu_of(rq); + -+ /* exit when no sg in idle */ -+ if (cpumask_empty(&sched_sg_idle_mask)) ++ /* exit when cpu is offline */ ++ if (unlikely(!rq->online)) + return; + -+ cpu = cpu_of(rq); + /* + * Only cpu in slibing idle group will do the checking and then + * find potential cpus which can migrate the current running task + */ + if (cpumask_test_cpu(cpu, &sched_sg_idle_mask) && -+ cpumask_andnot(&chk, cpu_online_mask, &sched_rq_pending_mask) && -+ cpumask_andnot(&chk, &chk, &sched_rq_watermark[IDLE_WM])) { -+ int i, tried = 0; ++ cpumask_andnot(&chk, cpu_online_mask, sched_rq_watermark) && ++ cpumask_andnot(&chk, &chk, &sched_rq_pending_mask)) { ++ int i; + + for_each_cpu_wrap(i, &chk, cpu) { -+ if (cpumask_subset(cpu_smt_mask(i), &chk)) { -+ if (sg_balance_trigger(i)) -+ return; -+ if (tried) -+ return; -+ tried++; -+ } ++ if (cpumask_subset(cpu_smt_mask(i), &chk) && ++ sg_balance_trigger(i)) ++ return; + } + } +} @@ -3494,7 +4731,7 @@ index 000000000000..09ca47de425c + int os; + struct tick_work *twork; + -+ if (housekeeping_cpu(cpu, HK_FLAG_TICK)) ++ if (housekeeping_cpu(cpu, HK_TYPE_TICK)) + return; + + WARN_ON_ONCE(!tick_work_cpu); @@ -3514,7 +4751,7 @@ index 000000000000..09ca47de425c +{ + struct tick_work *twork; + -+ if (housekeeping_cpu(cpu, HK_FLAG_TICK)) ++ if (housekeeping_cpu(cpu, HK_TYPE_TICK)) + return; + + WARN_ON_ONCE(!tick_work_cpu); @@ -3642,8 +4879,7 @@ index 000000000000..09ca47de425c + if (IS_ENABLED(CONFIG_DEBUG_PREEMPT) + && in_atomic_preempt_off()) { + pr_err("Preemption disabled at:"); -+ print_ip_sym(preempt_disable_ip); -+ pr_cont("\n"); ++ print_ip_sym(KERN_ERR, preempt_disable_ip); + } + if (panic_on_warn) + panic("scheduling while atomic\n"); @@ -3660,10 +4896,13 @@ index 000000000000..09ca47de425c +#ifdef CONFIG_SCHED_STACK_END_CHECK + if (task_stack_end_corrupted(prev)) + panic("corrupted stack end detected inside scheduler\n"); ++ ++ if (task_scs_end_corrupted(prev)) ++ panic("corrupted shadow stack detected inside scheduler\n"); +#endif + +#ifdef CONFIG_DEBUG_ATOMIC_SLEEP -+ if (!preempt && prev->state && prev->non_block_count) { ++ if (!preempt && READ_ONCE(prev->__state) && prev->non_block_count) { + printk(KERN_ERR "BUG: scheduling in a non-blocking section: %s/%d/%i\n", + prev->comm, prev->pid, prev->non_block_count); + dump_stack(); @@ -3676,15 +4915,33 @@ index 000000000000..09ca47de425c + preempt_count_set(PREEMPT_DISABLED); + } + rcu_sleep_check(); ++ SCHED_WARN_ON(ct_state() == CONTEXT_USER); + + profile_hit(SCHED_PROFILING, __builtin_return_address(0)); + + schedstat_inc(this_rq()->sched_count); +} + ++/* ++ * Compile time debug macro ++ * #define ALT_SCHED_DEBUG ++ */ ++ ++#ifdef ALT_SCHED_DEBUG ++void alt_sched_debug(void) ++{ ++ printk(KERN_INFO "sched: pending: 0x%04lx, idle: 0x%04lx, sg_idle: 0x%04lx\n", ++ sched_rq_pending_mask.bits[0], ++ sched_rq_watermark[0].bits[0], ++ sched_sg_idle_mask.bits[0]); ++} ++#else ++inline void alt_sched_debug(void) {} ++#endif ++ +#ifdef CONFIG_SMP + -+#define SCHED_RQ_NR_MIGRATION (32UL) ++#define SCHED_RQ_NR_MIGRATION (32U) +/* + * Migrate pending tasks in @rq to @dest_cpu + * Will try to migrate mininal of half of @rq nr_running tasks and @@ -3701,8 +4958,9 @@ index 000000000000..09ca47de425c + (p = sched_rq_next_task(skip, rq)) != rq->idle) { + skip = sched_rq_next_task(p, rq); + if (cpumask_test_cpu(dest_cpu, p->cpus_ptr)) { -+ __SCHED_DEQUEUE_TASK(p, rq, 0, ); ++ __SCHED_DEQUEUE_TASK(p, rq, 0); + set_task_cpu(p, dest_cpu); ++ sched_task_sanity_check(p, dest_rq); + __SCHED_ENQUEUE_TASK(p, dest_rq, 0); + nr_migrated++; + } @@ -3714,7 +4972,7 @@ index 000000000000..09ca47de425c + +static inline int take_other_rq_tasks(struct rq *rq, int cpu) +{ -+ struct cpumask *affinity_mask, *end_mask; ++ struct cpumask *topo_mask, *end_mask; + + if (unlikely(!rq->online)) + return 0; @@ -3722,11 +4980,11 @@ index 000000000000..09ca47de425c + if (cpumask_empty(&sched_rq_pending_mask)) + return 0; + -+ affinity_mask = &(per_cpu(sched_cpu_affinity_masks, cpu)[0]); -+ end_mask = per_cpu(sched_cpu_affinity_end_mask, cpu); ++ topo_mask = per_cpu(sched_cpu_topo_masks, cpu) + 1; ++ end_mask = per_cpu(sched_cpu_topo_end_mask, cpu); + do { + int i; -+ for_each_cpu_and(i, &sched_rq_pending_mask, affinity_mask) { ++ for_each_cpu_and(i, &sched_rq_pending_mask, topo_mask) { + int nr_migrated; + struct rq *src_rq; + @@ -3738,16 +4996,13 @@ index 000000000000..09ca47de425c + + if ((nr_migrated = migrate_pending_tasks(src_rq, rq, cpu))) { + src_rq->nr_running -= nr_migrated; -+#ifdef CONFIG_SMP + if (src_rq->nr_running < 2) + cpumask_clear_cpu(i, &sched_rq_pending_mask); -+#endif ++ + rq->nr_running += nr_migrated; -+#ifdef CONFIG_SMP + if (rq->nr_running > 1) + cpumask_set_cpu(cpu, &sched_rq_pending_mask); -+#endif -+ update_sched_rq_watermark(rq); ++ + cpufreq_update_util(rq, 0); + + spin_release(&src_rq->lock.dep_map, _RET_IP_); @@ -3759,7 +5014,7 @@ index 000000000000..09ca47de425c + spin_release(&src_rq->lock.dep_map, _RET_IP_); + do_raw_spin_unlock(&src_rq->lock); + } -+ } while (++affinity_mask < end_mask); ++ } while (++topo_mask < end_mask); + + return 0; +} @@ -3776,14 +5031,8 @@ index 000000000000..09ca47de425c + + update_curr(rq, p); + -+ if (p->time_slice < RESCHED_NS) { -+ p->time_slice = sched_timeslice_ns; -+ if (SCHED_FIFO != p->policy && task_on_rq_queued(p)) { -+ if (SCHED_RR != p->policy) -+ deboost_task(p); -+ requeue_task(p, rq); -+ } -+ } ++ if (p->time_slice < RESCHED_NS) ++ time_slice_expired(p, rq); +} + +static inline struct task_struct * @@ -3818,6 +5067,7 @@ index 000000000000..09ca47de425c + if (!take_other_rq_tasks(rq, cpu)) { +#endif + schedstat_inc(rq->sched_goidle); ++ /*printk(KERN_INFO "sched: choose_next_task(%d) idle %px\n", cpu, next);*/ + return next; +#ifdef CONFIG_SMP + } @@ -3827,10 +5077,30 @@ index 000000000000..09ca47de425c +#ifdef CONFIG_HIGH_RES_TIMERS + hrtick_start(rq, next->time_slice); +#endif ++ /*printk(KERN_INFO "sched: choose_next_task(%d) next %px\n", cpu, ++ * next);*/ + return next; +} + +/* ++ * Constants for the sched_mode argument of __schedule(). ++ * ++ * The mode argument allows RT enabled kernels to differentiate a ++ * preemption from blocking on an 'sleeping' spin/rwlock. Note that ++ * SM_MASK_PREEMPT for !RT has all bits set, which allows the compiler to ++ * optimize the AND operation out and just check for zero. ++ */ ++#define SM_NONE 0x0 ++#define SM_PREEMPT 0x1 ++#define SM_RTLOCK_WAIT 0x2 ++ ++#ifndef CONFIG_PREEMPT_RT ++# define SM_MASK_PREEMPT (~0U) ++#else ++# define SM_MASK_PREEMPT SM_PREEMPT ++#endif ++ ++/* + * schedule() is the main scheduler function. + * + * The main means of driving the scheduler and thus entering this function are: @@ -3869,31 +5139,40 @@ index 000000000000..09ca47de425c + * + * WARNING: must be called with preemption disabled! + */ -+static void __sched notrace __schedule(bool preempt) ++static void __sched notrace __schedule(unsigned int sched_mode) +{ + struct task_struct *prev, *next; + unsigned long *switch_count; ++ unsigned long prev_state; + struct rq *rq; + int cpu; ++ int deactivated = 0; + + cpu = smp_processor_id(); + rq = cpu_rq(cpu); + prev = rq->curr; + -+ schedule_debug(prev, preempt); ++ schedule_debug(prev, !!sched_mode); + + /* by passing sched_feat(HRTICK) checking which Alt schedule FW doesn't support */ + hrtick_clear(rq); + + local_irq_disable(); -+ rcu_note_context_switch(preempt); ++ rcu_note_context_switch(!!sched_mode); + + /* + * Make sure that signal_pending_state()->signal_pending() below + * can't be reordered with __set_current_state(TASK_INTERRUPTIBLE) -+ * done by the caller to avoid the race with signal_wake_up(). ++ * done by the caller to avoid the race with signal_wake_up(): + * -+ * The membarrier system call requires a full memory barrier ++ * __set_current_state(@state) signal_wake_up() ++ * schedule() set_tsk_thread_flag(p, TIF_SIGPENDING) ++ * wake_up_state(p, state) ++ * LOCK rq->lock LOCK p->pi_state ++ * smp_mb__after_spinlock() smp_mb__after_spinlock() ++ * if (signal_pending_state()) if (p->state & @state) ++ * ++ * Also, the membarrier system call requires a full memory barrier + * after coming from user-space, before storing to rq->curr. + */ + raw_spin_lock(&rq->lock); @@ -3902,13 +5181,37 @@ index 000000000000..09ca47de425c + update_rq_clock(rq); + + switch_count = &prev->nivcsw; -+ if (!preempt && prev->state) { -+ if (signal_pending_state(prev->state, prev)) { -+ prev->state = TASK_RUNNING; ++ /* ++ * We must load prev->state once (task_struct::state is volatile), such ++ * that we form a control dependency vs deactivate_task() below. ++ */ ++ prev_state = READ_ONCE(prev->__state); ++ if (!(sched_mode & SM_MASK_PREEMPT) && prev_state) { ++ if (signal_pending_state(prev_state, prev)) { ++ WRITE_ONCE(prev->__state, TASK_RUNNING); + } else { -+ if (rq_switch_time(rq) < boost_threshold(prev)) -+ boost_task(prev); ++ prev->sched_contributes_to_load = ++ (prev_state & TASK_UNINTERRUPTIBLE) && ++ !(prev_state & TASK_NOLOAD) && ++ !(prev->flags & PF_FROZEN); ++ ++ if (prev->sched_contributes_to_load) ++ rq->nr_uninterruptible++; ++ ++ /* ++ * __schedule() ttwu() ++ * prev_state = prev->state; if (p->on_rq && ...) ++ * if (prev_state) goto out; ++ * p->on_rq = 0; smp_acquire__after_ctrl_dep(); ++ * p->state = TASK_WAKING ++ * ++ * Where __schedule() and ttwu() have matching control dependencies. ++ * ++ * After this, schedule() must not care about p->state any more. ++ */ ++ sched_task_deactivate(prev, rq); + deactivate_task(prev, rq); ++ deactivated = 1; + + if (prev->in_iowait) { + atomic_inc(&rq->nr_iowait); @@ -3918,14 +5221,18 @@ index 000000000000..09ca47de425c + switch_count = &prev->nvcsw; + } + -+ clear_tsk_need_resched(prev); -+ clear_preempt_need_resched(); -+ + check_curr(prev, rq); + + next = choose_next_task(rq, cpu, prev); ++ clear_tsk_need_resched(prev); ++ clear_preempt_need_resched(); ++#ifdef CONFIG_SCHED_DEBUG ++ rq->last_seen_need_resched_ns = 0; ++#endif + + if (likely(prev != next)) { ++ if (deactivated) ++ update_sched_rq_watermark(rq); + next->last_ran = rq->clock_task; + rq->last_ts_switch = rq->clock; + @@ -3953,15 +5260,17 @@ index 000000000000..09ca47de425c + + psi_sched_switch(prev, next, !task_on_rq_queued(prev)); + -+ trace_sched_switch(preempt, prev, next); ++ trace_sched_switch(sched_mode & SM_MASK_PREEMPT, prev, next, prev_state); + + /* Also unlocks the rq: */ + rq = context_switch(rq, prev, next); -+ } else ++ } else { ++ __balance_callbacks(rq); + raw_spin_unlock_irq(&rq->lock); ++ } + +#ifdef CONFIG_SCHED_SMT -+ sg_balance_check(rq); ++ sg_balance(rq); +#endif +} + @@ -3973,7 +5282,7 @@ index 000000000000..09ca47de425c + /* Tell freezer to ignore us: */ + current->flags |= PF_NOFREEZE; + -+ __schedule(false); ++ __schedule(SM_NONE); + BUG(); + + /* Avoid "noreturn function does return" - but don't continue if BUG() is a NOP: */ @@ -3983,24 +5292,21 @@ index 000000000000..09ca47de425c + +static inline void sched_submit_work(struct task_struct *tsk) +{ -+ if (!tsk->state) ++ unsigned int task_flags; ++ ++ if (task_is_running(tsk)) + return; + ++ task_flags = tsk->flags; + /* -+ * If a worker went to sleep, notify and ask workqueue whether -+ * it wants to wake up a task to maintain concurrency. -+ * As this function is called inside the schedule() context, -+ * we disable preemption to avoid it calling schedule() again -+ * in the possible wakeup of a kworker and because wq_worker_sleeping() -+ * requires it. ++ * If a worker goes to sleep, notify and ask workqueue whether it ++ * wants to wake up a task to maintain concurrency. + */ -+ if (tsk->flags & (PF_WQ_WORKER | PF_IO_WORKER)) { -+ preempt_disable(); -+ if (tsk->flags & PF_WQ_WORKER) ++ if (task_flags & (PF_WQ_WORKER | PF_IO_WORKER)) { ++ if (task_flags & PF_WQ_WORKER) + wq_worker_sleeping(tsk); + else + io_wq_worker_sleeping(tsk); -+ preempt_enable_no_resched(); + } + + if (tsk_is_pi_blocked(tsk)) @@ -4010,8 +5316,7 @@ index 000000000000..09ca47de425c + * If we are going to sleep and we have plugged IO queued, + * make sure to submit it to avoid deadlocks. + */ -+ if (blk_needs_flush_plug(tsk)) -+ blk_schedule_flush_plug(tsk); ++ blk_flush_plug(tsk->plug, true); +} + +static void sched_update_worker(struct task_struct *tsk) @@ -4031,7 +5336,7 @@ index 000000000000..09ca47de425c + sched_submit_work(tsk); + do { + preempt_disable(); -+ __schedule(false); ++ __schedule(SM_NONE); + sched_preempt_enable_no_resched(); + } while (need_resched()); + sched_update_worker(tsk); @@ -4057,13 +5362,13 @@ index 000000000000..09ca47de425c + * current task can be in any other state. Note, idle is always in the + * TASK_RUNNING state. + */ -+ WARN_ON_ONCE(current->state); ++ WARN_ON_ONCE(current->__state); + do { -+ __schedule(false); ++ __schedule(SM_NONE); + } while (need_resched()); +} + -+#ifdef CONFIG_CONTEXT_TRACKING ++#if defined(CONFIG_CONTEXT_TRACKING) && !defined(CONFIG_HAVE_CONTEXT_TRACKING_OFFSTACK) +asmlinkage __visible void __sched schedule_user(void) +{ + /* @@ -4094,6 +5399,18 @@ index 000000000000..09ca47de425c + preempt_disable(); +} + ++#ifdef CONFIG_PREEMPT_RT ++void __sched notrace schedule_rtlock(void) ++{ ++ do { ++ preempt_disable(); ++ __schedule(SM_RTLOCK_WAIT); ++ sched_preempt_enable_no_resched(); ++ } while (need_resched()); ++} ++NOKPROBE_SYMBOL(schedule_rtlock); ++#endif ++ +static void __sched notrace preempt_schedule_common(void) +{ + do { @@ -4112,7 +5429,7 @@ index 000000000000..09ca47de425c + */ + preempt_disable_notrace(); + preempt_latency_start(1); -+ __schedule(true); ++ __schedule(SM_PREEMPT); + preempt_latency_stop(1); + preempt_enable_no_resched_notrace(); + @@ -4142,6 +5459,27 @@ index 000000000000..09ca47de425c +NOKPROBE_SYMBOL(preempt_schedule); +EXPORT_SYMBOL(preempt_schedule); + ++#ifdef CONFIG_PREEMPT_DYNAMIC ++#if defined(CONFIG_HAVE_PREEMPT_DYNAMIC_CALL) ++#ifndef preempt_schedule_dynamic_enabled ++#define preempt_schedule_dynamic_enabled preempt_schedule ++#define preempt_schedule_dynamic_disabled NULL ++#endif ++DEFINE_STATIC_CALL(preempt_schedule, preempt_schedule_dynamic_enabled); ++EXPORT_STATIC_CALL_TRAMP(preempt_schedule); ++#elif defined(CONFIG_HAVE_PREEMPT_DYNAMIC_KEY) ++static DEFINE_STATIC_KEY_TRUE(sk_dynamic_preempt_schedule); ++void __sched notrace dynamic_preempt_schedule(void) ++{ ++ if (!static_branch_unlikely(&sk_dynamic_preempt_schedule)) ++ return; ++ preempt_schedule(); ++} ++NOKPROBE_SYMBOL(dynamic_preempt_schedule); ++EXPORT_SYMBOL(dynamic_preempt_schedule); ++#endif ++#endif ++ +/** + * preempt_schedule_notrace - preempt_schedule called by tracing + * @@ -4185,7 +5523,7 @@ index 000000000000..09ca47de425c + * an infinite recursion. + */ + prev_ctx = exception_enter(); -+ __schedule(true); ++ __schedule(SM_PREEMPT); + exception_exit(prev_ctx); + + preempt_latency_stop(1); @@ -4194,6 +5532,27 @@ index 000000000000..09ca47de425c +} +EXPORT_SYMBOL_GPL(preempt_schedule_notrace); + ++#ifdef CONFIG_PREEMPT_DYNAMIC ++#if defined(CONFIG_HAVE_PREEMPT_DYNAMIC_CALL) ++#ifndef preempt_schedule_notrace_dynamic_enabled ++#define preempt_schedule_notrace_dynamic_enabled preempt_schedule_notrace ++#define preempt_schedule_notrace_dynamic_disabled NULL ++#endif ++DEFINE_STATIC_CALL(preempt_schedule_notrace, preempt_schedule_notrace_dynamic_enabled); ++EXPORT_STATIC_CALL_TRAMP(preempt_schedule_notrace); ++#elif defined(CONFIG_HAVE_PREEMPT_DYNAMIC_KEY) ++static DEFINE_STATIC_KEY_TRUE(sk_dynamic_preempt_schedule_notrace); ++void __sched notrace dynamic_preempt_schedule_notrace(void) ++{ ++ if (!static_branch_unlikely(&sk_dynamic_preempt_schedule_notrace)) ++ return; ++ preempt_schedule_notrace(); ++} ++NOKPROBE_SYMBOL(dynamic_preempt_schedule_notrace); ++EXPORT_SYMBOL(dynamic_preempt_schedule_notrace); ++#endif ++#endif ++ +#endif /* CONFIG_PREEMPTION */ + +/* @@ -4214,7 +5573,7 @@ index 000000000000..09ca47de425c + do { + preempt_disable(); + local_irq_enable(); -+ __schedule(true); ++ __schedule(SM_PREEMPT); + local_irq_disable(); + sched_preempt_enable_no_resched(); + } while (need_resched()); @@ -4225,19 +5584,27 @@ index 000000000000..09ca47de425c +int default_wake_function(wait_queue_entry_t *curr, unsigned mode, int wake_flags, + void *key) +{ ++ WARN_ON_ONCE(IS_ENABLED(CONFIG_SCHED_DEBUG) && wake_flags & ~WF_SYNC); + return try_to_wake_up(curr->private, mode, wake_flags); +} +EXPORT_SYMBOL(default_wake_function); + -+static inline void check_task_changed(struct rq *rq, struct task_struct *p) ++static inline void check_task_changed(struct task_struct *p, struct rq *rq) +{ ++ int idx; ++ + /* Trigger resched if task sched_prio has been modified. */ -+ if (task_on_rq_queued(p) && sched_task_need_requeue(p)) { -+ requeue_task(p, rq); ++ if (task_on_rq_queued(p) && (idx = task_sched_prio_idx(p, rq)) != p->sq_idx) { ++ requeue_task(p, rq, idx); + check_preempt_curr(rq); + } +} + ++static void __setscheduler_prio(struct task_struct *p, int prio) ++{ ++ p->prio = prio; ++} ++ +#ifdef CONFIG_RT_MUTEXES + +static inline int __rt_effective_prio(struct task_struct *pi_task, int prio) @@ -4290,7 +5657,7 @@ index 000000000000..09ca47de425c + * right. rt_mutex_slowunlock()+rt_mutex_postunlock() work together to + * ensure a task is de-boosted (pi_task is set to NULL) before the + * task is allowed to run again (and can exit). This ensures the pointer -+ * points to a blocked task -- which guaratees the task is present. ++ * points to a blocked task -- which guarantees the task is present. + */ + p->pi_top_task = pi_task; + @@ -4319,11 +5686,18 @@ index 000000000000..09ca47de425c + } + + trace_sched_pi_setprio(p, pi_task); -+ p->prio = prio; + -+ check_task_changed(rq, p); ++ __setscheduler_prio(p, prio); ++ ++ check_task_changed(p, rq); +out_unlock: ++ /* Avoid rq from going away on us: */ ++ preempt_disable(); ++ ++ __balance_callbacks(rq); + __task_access_unlock(p, lock); ++ ++ preempt_enable(); +} +#else +static inline int rt_effective_prio(struct task_struct *p, int prio) @@ -4351,14 +5725,15 @@ index 000000000000..09ca47de425c + /* + * The RT priorities are set via sched_setscheduler(), but we still + * allow the 'normal' nice value to be set - but as expected -+ * it wont have any effect on scheduling until the task is ++ * it won't have any effect on scheduling until the task is + * not SCHED_NORMAL/SCHED_BATCH: + */ + if (task_has_rt_policy(p)) + goto out_unlock; + + p->prio = effective_prio(p); -+ check_task_changed(rq, p); ++ ++ check_task_changed(p, rq); +out_unlock: + __task_access_unlock(p, lock); + raw_spin_unlock_irqrestore(&p->pi_lock, flags); @@ -4420,14 +5795,17 @@ index 000000000000..09ca47de425c + * @p: the task in question. + * + * Return: The priority value as seen by users in /proc. -+ * RT tasks are offset by -100. Normal tasks are centered around 1, value goes -+ * from 0(SCHED_ISO) up to 82 (nice +19 SCHED_IDLE). ++ * ++ * sched policy return value kernel prio user prio/nice ++ * ++ * (BMQ)normal, batch, idle[0 ... 53] [100 ... 139] 0/[-20 ... 19]/[-7 ... 7] ++ * (PDS)normal, batch, idle[0 ... 39] 100 0/[-20 ... 19] ++ * fifo, rr [-1 ... -100] [99 ... 0] [0 ... 99] + */ +int task_prio(const struct task_struct *p) +{ -+ if (p->prio < MAX_RT_PRIO) -+ return (p->prio - MAX_RT_PRIO); -+ return (p->prio - MAX_RT_PRIO + p->boost_prio); ++ return (p->prio < MAX_RT_PRIO) ? p->prio - MAX_RT_PRIO : ++ task_sched_prio_normal(p, task_rq(p)); +} + +/** @@ -4438,7 +5816,20 @@ index 000000000000..09ca47de425c + */ +int idle_cpu(int cpu) +{ -+ return cpu_curr(cpu) == cpu_rq(cpu)->idle; ++ struct rq *rq = cpu_rq(cpu); ++ ++ if (rq->curr != rq->idle) ++ return 0; ++ ++ if (rq->nr_running) ++ return 0; ++ ++#ifdef CONFIG_SMP ++ if (rq->ttwu_pending) ++ return 0; ++#endif ++ ++ return 1; +} + +/** @@ -4495,21 +5886,6 @@ index 000000000000..09ca47de425c + p->normal_prio = normal_prio(p); +} + -+/* Actually do priority change: must hold rq lock. */ -+static void __setscheduler(struct rq *rq, struct task_struct *p, -+ const struct sched_attr *attr, bool keep_boost) -+{ -+ __setscheduler_params(p, attr); -+ -+ /* -+ * Keep a potential priority boosting if called from -+ * sched_setscheduler(). -+ */ -+ p->prio = normal_prio(p); -+ if (keep_boost) -+ p->prio = rt_effective_prio(p, p->prio); -+} -+ +/* + * check the target process has a UID that matches the current process's + */ @@ -4536,9 +5912,9 @@ index 000000000000..09ca47de425c + .sched_nice = 0, + .sched_priority = 99, + }; -+ int newprio = MAX_RT_PRIO - 1 - attr->sched_priority; -+ int retval, oldpolicy = -1; -+ int policy = attr->sched_policy; ++ int oldpolicy = -1, policy = attr->sched_policy; ++ int retval, newprio; ++ struct callback_head *head; + unsigned long flags; + struct rq *rq; + int reset_on_fork; @@ -4553,7 +5929,6 @@ index 000000000000..09ca47de425c + if (unlikely(SCHED_DEADLINE == policy)) { + attr = &dl_squash_attr; + policy = attr->sched_policy; -+ newprio = MAX_RT_PRIO - 1 - attr->sched_priority; + } +recheck: + /* Double check policy once rq lock held */ @@ -4572,11 +5947,11 @@ index 000000000000..09ca47de425c + + /* + * Valid priorities for SCHED_FIFO and SCHED_RR are -+ * 1..MAX_USER_RT_PRIO-1, valid priority for SCHED_NORMAL and ++ * 1..MAX_RT_PRIO-1, valid priority for SCHED_NORMAL and + * SCHED_BATCH and SCHED_IDLE is 0. + */ + if (attr->sched_priority < 0 || -+ (p->mm && attr->sched_priority > MAX_USER_RT_PRIO - 1) || ++ (p->mm && attr->sched_priority > MAX_RT_PRIO - 1) || + (!p->mm && attr->sched_priority > MAX_RT_PRIO - 1)) + return -EINVAL; + if ((SCHED_RR == policy || SCHED_FIFO == policy) != @@ -4671,6 +6046,7 @@ index 000000000000..09ca47de425c + + p->sched_reset_on_fork = reset_on_fork; + ++ newprio = __normal_prio(policy, attr->sched_priority, NICE_TO_PRIO(attr->sched_nice)); + if (pi) { + /* + * Take priority boosted tasks into account. If the new @@ -4679,19 +6055,19 @@ index 000000000000..09ca47de425c + * the runqueue. This will be done when the task deboost + * itself. + */ -+ if (rt_effective_prio(p, newprio) == p->prio) { -+ __setscheduler_params(p, attr); -+ retval = 0; -+ goto unlock; -+ } ++ newprio = rt_effective_prio(p, newprio); + } + -+ __setscheduler(rq, p, attr, pi); ++ if (!(attr->sched_flags & SCHED_FLAG_KEEP_PARAMS)) { ++ __setscheduler_params(p, attr); ++ __setscheduler_prio(p, newprio); ++ } + -+ check_task_changed(rq, p); ++ check_task_changed(p, rq); + + /* Avoid rq from going away on us: */ + preempt_disable(); ++ head = splice_balance_callbacks(rq); + __task_access_unlock(p, lock); + raw_spin_unlock_irqrestore(&p->pi_lock, flags); + @@ -4700,6 +6076,8 @@ index 000000000000..09ca47de425c + rt_mutex_adjust_pi(p); + } + ++ /* Run balance callbacks after we've adjusted the PI chain: */ ++ balance_callbacks(rq, head); + preempt_enable(); + + return 0; @@ -4737,6 +6115,8 @@ index 000000000000..09ca47de425c + * @policy: new policy. + * @param: structure containing the new RT priority. + * ++ * Use sched_set_fifo(), read its comment. ++ * + * Return: 0 on success. An error code otherwise. + * + * NOTE that the task may be already dead. @@ -4747,18 +6127,16 @@ index 000000000000..09ca47de425c + return _sched_setscheduler(p, policy, param, true); +} + -+EXPORT_SYMBOL_GPL(sched_setscheduler); -+ +int sched_setattr(struct task_struct *p, const struct sched_attr *attr) +{ + return __sched_setscheduler(p, attr, true, true); +} -+EXPORT_SYMBOL_GPL(sched_setattr); + +int sched_setattr_nocheck(struct task_struct *p, const struct sched_attr *attr) +{ + return __sched_setscheduler(p, attr, false, true); +} ++EXPORT_SYMBOL_GPL(sched_setattr_nocheck); + +/** + * sched_setscheduler_nocheck - change the scheduling policy and/or RT priority of a thread from kernelspace. @@ -4778,7 +6156,51 @@ index 000000000000..09ca47de425c +{ + return _sched_setscheduler(p, policy, param, false); +} -+EXPORT_SYMBOL_GPL(sched_setscheduler_nocheck); ++ ++/* ++ * SCHED_FIFO is a broken scheduler model; that is, it is fundamentally ++ * incapable of resource management, which is the one thing an OS really should ++ * be doing. ++ * ++ * This is of course the reason it is limited to privileged users only. ++ * ++ * Worse still; it is fundamentally impossible to compose static priority ++ * workloads. You cannot take two correctly working static prio workloads ++ * and smash them together and still expect them to work. ++ * ++ * For this reason 'all' FIFO tasks the kernel creates are basically at: ++ * ++ * MAX_RT_PRIO / 2 ++ * ++ * The administrator _MUST_ configure the system, the kernel simply doesn't ++ * know enough information to make a sensible choice. ++ */ ++void sched_set_fifo(struct task_struct *p) ++{ ++ struct sched_param sp = { .sched_priority = MAX_RT_PRIO / 2 }; ++ WARN_ON_ONCE(sched_setscheduler_nocheck(p, SCHED_FIFO, &sp) != 0); ++} ++EXPORT_SYMBOL_GPL(sched_set_fifo); ++ ++/* ++ * For when you don't much care about FIFO, but want to be above SCHED_NORMAL. ++ */ ++void sched_set_fifo_low(struct task_struct *p) ++{ ++ struct sched_param sp = { .sched_priority = 1 }; ++ WARN_ON_ONCE(sched_setscheduler_nocheck(p, SCHED_FIFO, &sp) != 0); ++} ++EXPORT_SYMBOL_GPL(sched_set_fifo_low); ++ ++void sched_set_normal(struct task_struct *p, int nice) ++{ ++ struct sched_attr attr = { ++ .sched_policy = SCHED_NORMAL, ++ .sched_nice = nice, ++ }; ++ WARN_ON_ONCE(sched_setattr_nocheck(p, &attr) != 0); ++} ++EXPORT_SYMBOL_GPL(sched_set_normal); + +static int +do_sched_setscheduler(pid_t pid, int policy, struct sched_param __user *param) @@ -4903,10 +6325,15 @@ index 000000000000..09ca47de425c + rcu_read_lock(); + retval = -ESRCH; + p = find_process_by_pid(pid); -+ if (p != NULL) -+ retval = sched_setattr(p, &attr); ++ if (likely(p)) ++ get_task_struct(p); + rcu_read_unlock(); + ++ if (likely(p)) { ++ retval = sched_setattr(p, &attr); ++ put_task_struct(p); ++ } ++ + return retval; +} + @@ -5057,6 +6484,7 @@ index 000000000000..09ca47de425c + kattr.sched_priority = p->rt_priority; + else + kattr.sched_nice = task_nice(p); ++ kattr.sched_flags &= SCHED_FLAG_ALL; + +#ifdef CONFIG_UCLAMP_TASK + kattr.sched_util_min = p->uclamp_req[UCLAMP_MIN].value; @@ -5072,19 +6500,55 @@ index 000000000000..09ca47de425c + return retval; +} + ++static int ++__sched_setaffinity(struct task_struct *p, const struct cpumask *mask) ++{ ++ int retval; ++ cpumask_var_t cpus_allowed, new_mask; ++ ++ if (!alloc_cpumask_var(&cpus_allowed, GFP_KERNEL)) ++ return -ENOMEM; ++ ++ if (!alloc_cpumask_var(&new_mask, GFP_KERNEL)) { ++ retval = -ENOMEM; ++ goto out_free_cpus_allowed; ++ } ++ ++ cpuset_cpus_allowed(p, cpus_allowed); ++ cpumask_and(new_mask, mask, cpus_allowed); ++again: ++ retval = __set_cpus_allowed_ptr(p, new_mask, SCA_CHECK | SCA_USER); ++ if (retval) ++ goto out_free_new_mask; ++ ++ cpuset_cpus_allowed(p, cpus_allowed); ++ if (!cpumask_subset(new_mask, cpus_allowed)) { ++ /* ++ * We must have raced with a concurrent cpuset ++ * update. Just reset the cpus_allowed to the ++ * cpuset's cpus_allowed ++ */ ++ cpumask_copy(new_mask, cpus_allowed); ++ goto again; ++ } ++ ++out_free_new_mask: ++ free_cpumask_var(new_mask); ++out_free_cpus_allowed: ++ free_cpumask_var(cpus_allowed); ++ return retval; ++} ++ +long sched_setaffinity(pid_t pid, const struct cpumask *in_mask) +{ -+ cpumask_var_t cpus_allowed, new_mask; + struct task_struct *p; + int retval; + -+ get_online_cpus(); + rcu_read_lock(); + + p = find_process_by_pid(pid); + if (!p) { + rcu_read_unlock(); -+ put_online_cpus(); + return -ESRCH; + } + @@ -5096,52 +6560,24 @@ index 000000000000..09ca47de425c + retval = -EINVAL; + goto out_put_task; + } -+ if (!alloc_cpumask_var(&cpus_allowed, GFP_KERNEL)) { -+ retval = -ENOMEM; -+ goto out_put_task; -+ } -+ if (!alloc_cpumask_var(&new_mask, GFP_KERNEL)) { -+ retval = -ENOMEM; -+ goto out_free_cpus_allowed; -+ } -+ retval = -EPERM; ++ + if (!check_same_owner(p)) { + rcu_read_lock(); + if (!ns_capable(__task_cred(p)->user_ns, CAP_SYS_NICE)) { + rcu_read_unlock(); -+ goto out_unlock; ++ retval = -EPERM; ++ goto out_put_task; + } + rcu_read_unlock(); + } + + retval = security_task_setscheduler(p); + if (retval) -+ goto out_unlock; ++ goto out_put_task; + -+ cpuset_cpus_allowed(p, cpus_allowed); -+ cpumask_and(new_mask, in_mask, cpus_allowed); -+again: -+ retval = __set_cpus_allowed_ptr(p, new_mask, true); -+ -+ if (!retval) { -+ cpuset_cpus_allowed(p, cpus_allowed); -+ if (!cpumask_subset(new_mask, cpus_allowed)) { -+ /* -+ * We must have raced with a concurrent cpuset -+ * update. Just reset the cpus_allowed to the -+ * cpuset's cpus_allowed -+ */ -+ cpumask_copy(new_mask, cpus_allowed); -+ goto again; -+ } -+ } -+out_unlock: -+ free_cpumask_var(new_mask); -+out_free_cpus_allowed: -+ free_cpumask_var(cpus_allowed); ++ retval = __sched_setaffinity(p, in_mask); +out_put_task: + put_task_struct(p); -+ put_online_cpus(); + return retval; +} + @@ -5245,15 +6681,6 @@ index 000000000000..09ca47de425c + return ret; +} + -+/** -+ * sys_sched_yield - yield the current processor to other threads. -+ * -+ * This function yields the current CPU to other tasks. It does this by -+ * scheduling away the current task. If it still has the earliest deadline -+ * it will be scheduled again as the next task. -+ * -+ * Return: 0. -+ */ +static void do_sched_yield(void) +{ + struct rq *rq; @@ -5267,43 +6694,90 @@ index 000000000000..09ca47de425c + schedstat_inc(rq->yld_count); + + if (1 == sched_yield_type) { -+ if (!rt_task(current)) { -+ current->boost_prio = MAX_PRIORITY_ADJ; -+ requeue_task(current, rq); -+ } ++ if (!rt_task(current)) ++ do_sched_yield_type_1(current, rq); + } else if (2 == sched_yield_type) { + if (rq->nr_running > 1) + rq->skip = current; + } + -+ /* -+ * Since we are going to call schedule() anyway, there's -+ * no need to preempt or enable interrupts: -+ */ + preempt_disable(); -+ raw_spin_unlock(&rq->lock); ++ raw_spin_unlock_irq(&rq->lock); + sched_preempt_enable_no_resched(); + + schedule(); +} + ++/** ++ * sys_sched_yield - yield the current processor to other threads. ++ * ++ * This function yields the current CPU to other tasks. If there are no ++ * other threads running on this CPU then this function will return. ++ * ++ * Return: 0. ++ */ +SYSCALL_DEFINE0(sched_yield) +{ + do_sched_yield(); + return 0; +} + -+#ifndef CONFIG_PREEMPTION -+int __sched _cond_resched(void) ++#if !defined(CONFIG_PREEMPTION) || defined(CONFIG_PREEMPT_DYNAMIC) ++int __sched __cond_resched(void) +{ + if (should_resched(0)) { + preempt_schedule_common(); + return 1; + } ++ /* ++ * In preemptible kernels, ->rcu_read_lock_nesting tells the tick ++ * whether the current CPU is in an RCU read-side critical section, ++ * so the tick can report quiescent states even for CPUs looping ++ * in kernel context. In contrast, in non-preemptible kernels, ++ * RCU readers leave no in-memory hints, which means that CPU-bound ++ * processes executing in kernel context might never report an ++ * RCU quiescent state. Therefore, the following code causes ++ * cond_resched() to report a quiescent state, but only when RCU ++ * is in urgent need of one. ++ */ ++#ifndef CONFIG_PREEMPT_RCU + rcu_all_qs(); ++#endif + return 0; +} -+EXPORT_SYMBOL(_cond_resched); ++EXPORT_SYMBOL(__cond_resched); ++#endif ++ ++#ifdef CONFIG_PREEMPT_DYNAMIC ++#if defined(CONFIG_HAVE_PREEMPT_DYNAMIC_CALL) ++#define cond_resched_dynamic_enabled __cond_resched ++#define cond_resched_dynamic_disabled ((void *)&__static_call_return0) ++DEFINE_STATIC_CALL_RET0(cond_resched, __cond_resched); ++EXPORT_STATIC_CALL_TRAMP(cond_resched); ++ ++#define might_resched_dynamic_enabled __cond_resched ++#define might_resched_dynamic_disabled ((void *)&__static_call_return0) ++DEFINE_STATIC_CALL_RET0(might_resched, __cond_resched); ++EXPORT_STATIC_CALL_TRAMP(might_resched); ++#elif defined(CONFIG_HAVE_PREEMPT_DYNAMIC_KEY) ++static DEFINE_STATIC_KEY_FALSE(sk_dynamic_cond_resched); ++int __sched dynamic_cond_resched(void) ++{ ++ if (!static_branch_unlikely(&sk_dynamic_cond_resched)) ++ return 0; ++ return __cond_resched(); ++} ++EXPORT_SYMBOL(dynamic_cond_resched); ++ ++static DEFINE_STATIC_KEY_FALSE(sk_dynamic_might_resched); ++int __sched dynamic_might_resched(void) ++{ ++ if (!static_branch_unlikely(&sk_dynamic_might_resched)) ++ return 0; ++ return __cond_resched(); ++} ++EXPORT_SYMBOL(dynamic_might_resched); ++#endif +#endif + +/* @@ -5323,9 +6797,7 @@ index 000000000000..09ca47de425c + + if (spin_needbreak(lock) || resched) { + spin_unlock(lock); -+ if (resched) -+ preempt_schedule_common(); -+ else ++ if (!_cond_resched()) + cpu_relax(); + ret = 1; + spin_lock(lock); @@ -5334,6 +6806,202 @@ index 000000000000..09ca47de425c +} +EXPORT_SYMBOL(__cond_resched_lock); + ++int __cond_resched_rwlock_read(rwlock_t *lock) ++{ ++ int resched = should_resched(PREEMPT_LOCK_OFFSET); ++ int ret = 0; ++ ++ lockdep_assert_held_read(lock); ++ ++ if (rwlock_needbreak(lock) || resched) { ++ read_unlock(lock); ++ if (!_cond_resched()) ++ cpu_relax(); ++ ret = 1; ++ read_lock(lock); ++ } ++ return ret; ++} ++EXPORT_SYMBOL(__cond_resched_rwlock_read); ++ ++int __cond_resched_rwlock_write(rwlock_t *lock) ++{ ++ int resched = should_resched(PREEMPT_LOCK_OFFSET); ++ int ret = 0; ++ ++ lockdep_assert_held_write(lock); ++ ++ if (rwlock_needbreak(lock) || resched) { ++ write_unlock(lock); ++ if (!_cond_resched()) ++ cpu_relax(); ++ ret = 1; ++ write_lock(lock); ++ } ++ return ret; ++} ++EXPORT_SYMBOL(__cond_resched_rwlock_write); ++ ++#ifdef CONFIG_PREEMPT_DYNAMIC ++ ++#ifdef CONFIG_GENERIC_ENTRY ++#include ++#endif ++ ++/* ++ * SC:cond_resched ++ * SC:might_resched ++ * SC:preempt_schedule ++ * SC:preempt_schedule_notrace ++ * SC:irqentry_exit_cond_resched ++ * ++ * ++ * NONE: ++ * cond_resched <- __cond_resched ++ * might_resched <- RET0 ++ * preempt_schedule <- NOP ++ * preempt_schedule_notrace <- NOP ++ * irqentry_exit_cond_resched <- NOP ++ * ++ * VOLUNTARY: ++ * cond_resched <- __cond_resched ++ * might_resched <- __cond_resched ++ * preempt_schedule <- NOP ++ * preempt_schedule_notrace <- NOP ++ * irqentry_exit_cond_resched <- NOP ++ * ++ * FULL: ++ * cond_resched <- RET0 ++ * might_resched <- RET0 ++ * preempt_schedule <- preempt_schedule ++ * preempt_schedule_notrace <- preempt_schedule_notrace ++ * irqentry_exit_cond_resched <- irqentry_exit_cond_resched ++ */ ++ ++enum { ++ preempt_dynamic_undefined = -1, ++ preempt_dynamic_none, ++ preempt_dynamic_voluntary, ++ preempt_dynamic_full, ++}; ++ ++int preempt_dynamic_mode = preempt_dynamic_undefined; ++ ++int sched_dynamic_mode(const char *str) ++{ ++ if (!strcmp(str, "none")) ++ return preempt_dynamic_none; ++ ++ if (!strcmp(str, "voluntary")) ++ return preempt_dynamic_voluntary; ++ ++ if (!strcmp(str, "full")) ++ return preempt_dynamic_full; ++ ++ return -EINVAL; ++} ++ ++#if defined(CONFIG_HAVE_PREEMPT_DYNAMIC_CALL) ++#define preempt_dynamic_enable(f) static_call_update(f, f##_dynamic_enabled) ++#define preempt_dynamic_disable(f) static_call_update(f, f##_dynamic_disabled) ++#elif defined(CONFIG_HAVE_PREEMPT_DYNAMIC_KEY) ++#define preempt_dynamic_enable(f) static_key_enable(&sk_dynamic_##f.key) ++#define preempt_dynamic_disable(f) static_key_disable(&sk_dynamic_##f.key) ++#else ++#error "Unsupported PREEMPT_DYNAMIC mechanism" ++#endif ++ ++void sched_dynamic_update(int mode) ++{ ++ /* ++ * Avoid {NONE,VOLUNTARY} -> FULL transitions from ever ending up in ++ * the ZERO state, which is invalid. ++ */ ++ preempt_dynamic_enable(cond_resched); ++ preempt_dynamic_enable(might_resched); ++ preempt_dynamic_enable(preempt_schedule); ++ preempt_dynamic_enable(preempt_schedule_notrace); ++ preempt_dynamic_enable(irqentry_exit_cond_resched); ++ ++ switch (mode) { ++ case preempt_dynamic_none: ++ preempt_dynamic_enable(cond_resched); ++ preempt_dynamic_disable(might_resched); ++ preempt_dynamic_disable(preempt_schedule); ++ preempt_dynamic_disable(preempt_schedule_notrace); ++ preempt_dynamic_disable(irqentry_exit_cond_resched); ++ pr_info("Dynamic Preempt: none\n"); ++ break; ++ ++ case preempt_dynamic_voluntary: ++ preempt_dynamic_enable(cond_resched); ++ preempt_dynamic_enable(might_resched); ++ preempt_dynamic_disable(preempt_schedule); ++ preempt_dynamic_disable(preempt_schedule_notrace); ++ preempt_dynamic_disable(irqentry_exit_cond_resched); ++ pr_info("Dynamic Preempt: voluntary\n"); ++ break; ++ ++ case preempt_dynamic_full: ++ preempt_dynamic_disable(cond_resched); ++ preempt_dynamic_disable(might_resched); ++ preempt_dynamic_enable(preempt_schedule); ++ preempt_dynamic_enable(preempt_schedule_notrace); ++ preempt_dynamic_enable(irqentry_exit_cond_resched); ++ pr_info("Dynamic Preempt: full\n"); ++ break; ++ } ++ ++ preempt_dynamic_mode = mode; ++} ++ ++static int __init setup_preempt_mode(char *str) ++{ ++ int mode = sched_dynamic_mode(str); ++ if (mode < 0) { ++ pr_warn("Dynamic Preempt: unsupported mode: %s\n", str); ++ return 0; ++ } ++ ++ sched_dynamic_update(mode); ++ return 1; ++} ++__setup("preempt=", setup_preempt_mode); ++ ++static void __init preempt_dynamic_init(void) ++{ ++ if (preempt_dynamic_mode == preempt_dynamic_undefined) { ++ if (IS_ENABLED(CONFIG_PREEMPT_NONE)) { ++ sched_dynamic_update(preempt_dynamic_none); ++ } else if (IS_ENABLED(CONFIG_PREEMPT_VOLUNTARY)) { ++ sched_dynamic_update(preempt_dynamic_voluntary); ++ } else { ++ /* Default static call setting, nothing to do */ ++ WARN_ON_ONCE(!IS_ENABLED(CONFIG_PREEMPT)); ++ preempt_dynamic_mode = preempt_dynamic_full; ++ pr_info("Dynamic Preempt: full\n"); ++ } ++ } ++} ++ ++#define PREEMPT_MODEL_ACCESSOR(mode) \ ++ bool preempt_model_##mode(void) \ ++ { \ ++ WARN_ON_ONCE(preempt_dynamic_mode == preempt_dynamic_undefined); \ ++ return preempt_dynamic_mode == preempt_dynamic_##mode; \ ++ } \ ++ EXPORT_SYMBOL_GPL(preempt_model_##mode) ++ ++PREEMPT_MODEL_ACCESSOR(none); ++PREEMPT_MODEL_ACCESSOR(voluntary); ++PREEMPT_MODEL_ACCESSOR(full); ++ ++#else /* !CONFIG_PREEMPT_DYNAMIC */ ++ ++static inline void preempt_dynamic_init(void) { } ++ ++#endif /* #ifdef CONFIG_PREEMPT_DYNAMIC */ ++ +/** + * yield - yield the current processor to other threads. + * @@ -5341,7 +7009,7 @@ index 000000000000..09ca47de425c + * + * The scheduler is at all times free to pick the calling task as the most + * eligible task to run, if removing the yield() call from your code breaks -+ * it, its already broken. ++ * it, it's already broken. + * + * Typical broken usage is: + * @@ -5391,8 +7059,7 @@ index 000000000000..09ca47de425c + int old_iowait = current->in_iowait; + + current->in_iowait = 1; -+ blk_schedule_flush_plug(current); -+ ++ blk_flush_plug(current->plug, true); + return old_iowait; +} + @@ -5447,7 +7114,7 @@ index 000000000000..09ca47de425c + switch (policy) { + case SCHED_FIFO: + case SCHED_RR: -+ ret = MAX_USER_RT_PRIO-1; ++ ret = MAX_RT_PRIO - 1; + break; + case SCHED_NORMAL: + case SCHED_BATCH: @@ -5489,6 +7156,8 @@ index 000000000000..09ca47de425c + struct task_struct *p; + int retval; + ++ alt_sched_debug(); ++ + if (pid < 0) + return -EINVAL; + @@ -5553,10 +7222,10 @@ index 000000000000..09ca47de425c + if (!try_get_task_stack(p)) + return; + -+ printk(KERN_INFO "%-15.15s %c", p->comm, task_state_to_char(p)); ++ pr_info("task:%-15.15s state:%c", p->comm, task_state_to_char(p)); + -+ if (p->state == TASK_RUNNING) -+ printk(KERN_CONT " running task "); ++ if (task_is_running(p)) ++ pr_cont(" running task "); +#ifdef CONFIG_DEBUG_STACK_USAGE + free = stack_not_used(p); +#endif @@ -5565,12 +7234,13 @@ index 000000000000..09ca47de425c + if (pid_alive(p)) + ppid = task_pid_nr(rcu_dereference(p->real_parent)); + rcu_read_unlock(); -+ printk(KERN_CONT "%5lu %5d %6d 0x%08lx\n", free, -+ task_pid_nr(p), ppid, -+ (unsigned long)task_thread_info(p)->flags); ++ pr_cont(" stack:%5lu pid:%5d ppid:%6d flags:0x%08lx\n", ++ free, task_pid_nr(p), ppid, ++ read_task_thread_flags(p)); + + print_worker_info(KERN_INFO, p); -+ show_stack(p, NULL); ++ print_stop_info(KERN_INFO, p); ++ show_stack(p, NULL, KERN_INFO); + put_task_stack(p); +} +EXPORT_SYMBOL_GPL(sched_show_task); @@ -5578,36 +7248,31 @@ index 000000000000..09ca47de425c +static inline bool +state_filter_match(unsigned long state_filter, struct task_struct *p) +{ ++ unsigned int state = READ_ONCE(p->__state); ++ + /* no filter, everything matches */ + if (!state_filter) + return true; + + /* filter, but doesn't match */ -+ if (!(p->state & state_filter)) ++ if (!(state & state_filter)) + return false; + + /* + * When looking for TASK_UNINTERRUPTIBLE skip TASK_IDLE (allows + * TASK_KILLABLE). + */ -+ if (state_filter == TASK_UNINTERRUPTIBLE && p->state == TASK_IDLE) ++ if (state_filter == TASK_UNINTERRUPTIBLE && state == TASK_IDLE) + return false; + + return true; +} + + -+void show_state_filter(unsigned long state_filter) ++void show_state_filter(unsigned int state_filter) +{ + struct task_struct *g, *p; + -+#if BITS_PER_LONG == 32 -+ printk(KERN_INFO -+ " task PC stack pid father\n"); -+#else -+ printk(KERN_INFO -+ " task PC stack pid father\n"); -+#endif + rcu_read_lock(); + for_each_process_thread(g, p) { + /* @@ -5651,7 +7316,7 @@ index 000000000000..09ca47de425c + * NOTE: this function does not set the idle thread's NEED_RESCHED + * flag, to make booting more robust. + */ -+void init_idle(struct task_struct *idle, int cpu) ++void __init init_idle(struct task_struct *idle, int cpu) +{ + struct rq *rq = cpu_rq(cpu); + unsigned long flags; @@ -5663,11 +7328,15 @@ index 000000000000..09ca47de425c + update_rq_clock(rq); + + idle->last_ran = rq->clock_task; -+ idle->state = TASK_RUNNING; -+ idle->flags |= PF_IDLE; -+ sched_queue_init_idle(rq, idle); ++ idle->__state = TASK_RUNNING; ++ /* ++ * PF_KTHREAD should already be set at this point; regardless, make it ++ * look like a proper per-CPU kthread. ++ */ ++ idle->flags |= PF_IDLE | PF_KTHREAD | PF_NO_SETAFFINITY; ++ kthread_set_per_cpu(idle, cpu); + -+ kasan_unpoison_task_stack(idle); ++ sched_queue_init_idle(&rq->queue, idle); + +#ifdef CONFIG_SMP + /* @@ -5750,87 +7419,148 @@ index 000000000000..09ca47de425c + /* finish_cpu(), as ran on the BP, will clean up the active_mm state */ +} + -+/* -+ * Migrate all tasks from the rq, sleeping tasks will be migrated by -+ * try_to_wake_up()->select_task_rq(). -+ * -+ * Called with rq->lock held even though we'er in stop_machine() and -+ * there's no concurrency possible, we hold the required locks anyway -+ * because of lock validation efforts. -+ */ -+static void migrate_tasks(struct rq *dead_rq) ++static int __balance_push_cpu_stop(void *arg) +{ -+ struct rq *rq = dead_rq; -+ struct task_struct *p, *stop = rq->stop; -+ int count = 0; ++ struct task_struct *p = arg; ++ struct rq *rq = this_rq(); ++ struct rq_flags rf; ++ int cpu; + -+ /* -+ * Fudge the rq selection such that the below task selection loop -+ * doesn't get stuck on the currently eligible stop task. -+ * -+ * We're currently inside stop_machine() and the rq is either stuck -+ * in the stop_machine_cpu_stop() loop, or we're executing this code, -+ * either way we should never end up calling schedule() until we're -+ * done here. -+ */ -+ rq->stop = NULL; ++ raw_spin_lock_irq(&p->pi_lock); ++ rq_lock(rq, &rf); + -+ p = sched_rq_first_task(rq); -+ while (p != rq->idle) { -+ int dest_cpu; ++ update_rq_clock(rq); + -+ /* skip the running task */ -+ if (task_running(p) || 1 == p->nr_cpus_allowed) { -+ p = sched_rq_next_task(p, rq); -+ continue; -+ } -+ -+ /* -+ * Rules for changing task_struct::cpus_allowed are holding -+ * both pi_lock and rq->lock, such that holding either -+ * stabilizes the mask. -+ * -+ * Drop rq->lock is not quite as disastrous as it usually is -+ * because !cpu_active at this point, which means load-balance -+ * will not interfere. Also, stop-machine. -+ */ -+ raw_spin_unlock(&rq->lock); -+ raw_spin_lock(&p->pi_lock); -+ raw_spin_lock(&rq->lock); -+ -+ /* -+ * Since we're inside stop-machine, _nothing_ should have -+ * changed the task, WARN if weird stuff happened, because in -+ * that case the above rq->lock drop is a fail too. -+ */ -+ if (WARN_ON(task_rq(p) != rq || !task_on_rq_queued(p))) { -+ raw_spin_unlock(&p->pi_lock); -+ p = sched_rq_next_task(p, rq); -+ continue; -+ } -+ -+ count++; -+ /* Find suitable destination for @next, with force if needed. */ -+ dest_cpu = select_fallback_rq(dead_rq->cpu, p); -+ rq = __migrate_task(rq, p, dest_cpu); -+ raw_spin_unlock(&rq->lock); -+ raw_spin_unlock(&p->pi_lock); -+ -+ rq = dead_rq; -+ raw_spin_lock(&rq->lock); -+ /* Check queued task all over from the header again */ -+ p = sched_rq_first_task(rq); ++ if (task_rq(p) == rq && task_on_rq_queued(p)) { ++ cpu = select_fallback_rq(rq->cpu, p); ++ rq = __migrate_task(rq, p, cpu); + } + -+ rq->stop = stop; ++ rq_unlock(rq, &rf); ++ raw_spin_unlock_irq(&p->pi_lock); ++ ++ put_task_struct(p); ++ ++ return 0; +} + ++static DEFINE_PER_CPU(struct cpu_stop_work, push_work); ++ ++/* ++ * This is enabled below SCHED_AP_ACTIVE; when !cpu_active(), but only ++ * effective when the hotplug motion is down. ++ */ ++static void balance_push(struct rq *rq) ++{ ++ struct task_struct *push_task = rq->curr; ++ ++ lockdep_assert_held(&rq->lock); ++ ++ /* ++ * Ensure the thing is persistent until balance_push_set(.on = false); ++ */ ++ rq->balance_callback = &balance_push_callback; ++ ++ /* ++ * Only active while going offline and when invoked on the outgoing ++ * CPU. ++ */ ++ if (!cpu_dying(rq->cpu) || rq != this_rq()) ++ return; ++ ++ /* ++ * Both the cpu-hotplug and stop task are in this case and are ++ * required to complete the hotplug process. ++ */ ++ if (kthread_is_per_cpu(push_task) || ++ is_migration_disabled(push_task)) { ++ ++ /* ++ * If this is the idle task on the outgoing CPU try to wake ++ * up the hotplug control thread which might wait for the ++ * last task to vanish. The rcuwait_active() check is ++ * accurate here because the waiter is pinned on this CPU ++ * and can't obviously be running in parallel. ++ * ++ * On RT kernels this also has to check whether there are ++ * pinned and scheduled out tasks on the runqueue. They ++ * need to leave the migrate disabled section first. ++ */ ++ if (!rq->nr_running && !rq_has_pinned_tasks(rq) && ++ rcuwait_active(&rq->hotplug_wait)) { ++ raw_spin_unlock(&rq->lock); ++ rcuwait_wake_up(&rq->hotplug_wait); ++ raw_spin_lock(&rq->lock); ++ } ++ return; ++ } ++ ++ get_task_struct(push_task); ++ /* ++ * Temporarily drop rq->lock such that we can wake-up the stop task. ++ * Both preemption and IRQs are still disabled. ++ */ ++ raw_spin_unlock(&rq->lock); ++ stop_one_cpu_nowait(rq->cpu, __balance_push_cpu_stop, push_task, ++ this_cpu_ptr(&push_work)); ++ /* ++ * At this point need_resched() is true and we'll take the loop in ++ * schedule(). The next pick is obviously going to be the stop task ++ * which kthread_is_per_cpu() and will push this task away. ++ */ ++ raw_spin_lock(&rq->lock); ++} ++ ++static void balance_push_set(int cpu, bool on) ++{ ++ struct rq *rq = cpu_rq(cpu); ++ struct rq_flags rf; ++ ++ rq_lock_irqsave(rq, &rf); ++ if (on) { ++ WARN_ON_ONCE(rq->balance_callback); ++ rq->balance_callback = &balance_push_callback; ++ } else if (rq->balance_callback == &balance_push_callback) { ++ rq->balance_callback = NULL; ++ } ++ rq_unlock_irqrestore(rq, &rf); ++} ++ ++/* ++ * Invoked from a CPUs hotplug control thread after the CPU has been marked ++ * inactive. All tasks which are not per CPU kernel threads are either ++ * pushed off this CPU now via balance_push() or placed on a different CPU ++ * during wakeup. Wait until the CPU is quiescent. ++ */ ++static void balance_hotplug_wait(void) ++{ ++ struct rq *rq = this_rq(); ++ ++ rcuwait_wait_event(&rq->hotplug_wait, ++ rq->nr_running == 1 && !rq_has_pinned_tasks(rq), ++ TASK_UNINTERRUPTIBLE); ++} ++ ++#else ++ ++static void balance_push(struct rq *rq) ++{ ++} ++ ++static void balance_push_set(int cpu, bool on) ++{ ++} ++ ++static inline void balance_hotplug_wait(void) ++{ ++} ++#endif /* CONFIG_HOTPLUG_CPU */ ++ +static void set_rq_offline(struct rq *rq) +{ + if (rq->online) + rq->online = false; +} -+#endif /* CONFIG_HOTPLUG_CPU */ + +static void set_rq_online(struct rq *rq) +{ @@ -5890,6 +7620,12 @@ index 000000000000..09ca47de425c + struct rq *rq = cpu_rq(cpu); + unsigned long flags; + ++ /* ++ * Clear the balance_push callback and prepare to schedule ++ * regular tasks. ++ */ ++ balance_push_set(cpu, false); ++ +#ifdef CONFIG_SCHED_SMT + /* + * When going up, increment the number of cores with SMT present. @@ -5920,18 +7656,37 @@ index 000000000000..09ca47de425c + +int sched_cpu_deactivate(unsigned int cpu) +{ ++ struct rq *rq = cpu_rq(cpu); ++ unsigned long flags; + int ret; + + set_cpu_active(cpu, false); ++ ++ /* ++ * From this point forward, this CPU will refuse to run any task that ++ * is not: migrate_disable() or KTHREAD_IS_PER_CPU, and will actively ++ * push those tasks away until this gets cleared, see ++ * sched_cpu_dying(). ++ */ ++ balance_push_set(cpu, true); ++ + /* + * We've cleared cpu_active_mask, wait for all preempt-disabled and RCU + * users of this state to go away such that all new such users will + * observe it. + * ++ * Specifically, we rely on ttwu to no longer target this CPU, see ++ * ttwu_queue_cond() and is_cpu_allowed(). ++ * + * Do sync before park smpboot threads to take care the rcu boost case. + */ + synchronize_rcu(); + ++ raw_spin_lock_irqsave(&rq->lock, flags); ++ update_rq_clock(rq); ++ set_rq_offline(rq); ++ raw_spin_unlock_irqrestore(&rq->lock, flags); ++ +#ifdef CONFIG_SCHED_SMT + /* + * When going down, decrement the number of cores with SMT present. @@ -5948,9 +7703,11 @@ index 000000000000..09ca47de425c + + ret = cpuset_cpu_inactive(cpu); + if (ret) { ++ balance_push_set(cpu, false); + set_cpu_active(cpu, true); + return ret; + } ++ + return 0; +} + @@ -5969,17 +7726,76 @@ index 000000000000..09ca47de425c +} + +#ifdef CONFIG_HOTPLUG_CPU ++ ++/* ++ * Invoked immediately before the stopper thread is invoked to bring the ++ * CPU down completely. At this point all per CPU kthreads except the ++ * hotplug thread (current) and the stopper thread (inactive) have been ++ * either parked or have been unbound from the outgoing CPU. Ensure that ++ * any of those which might be on the way out are gone. ++ * ++ * If after this point a bound task is being woken on this CPU then the ++ * responsible hotplug callback has failed to do it's job. ++ * sched_cpu_dying() will catch it with the appropriate fireworks. ++ */ ++int sched_cpu_wait_empty(unsigned int cpu) ++{ ++ balance_hotplug_wait(); ++ return 0; ++} ++ ++/* ++ * Since this CPU is going 'away' for a while, fold any nr_active delta we ++ * might have. Called from the CPU stopper task after ensuring that the ++ * stopper is the last running task on the CPU, so nr_active count is ++ * stable. We need to take the teardown thread which is calling this into ++ * account, so we hand in adjust = 1 to the load calculation. ++ * ++ * Also see the comment "Global load-average calculations". ++ */ ++static void calc_load_migrate(struct rq *rq) ++{ ++ long delta = calc_load_fold_active(rq, 1); ++ ++ if (delta) ++ atomic_long_add(delta, &calc_load_tasks); ++} ++ ++static void dump_rq_tasks(struct rq *rq, const char *loglvl) ++{ ++ struct task_struct *g, *p; ++ int cpu = cpu_of(rq); ++ ++ lockdep_assert_held(&rq->lock); ++ ++ printk("%sCPU%d enqueued tasks (%u total):\n", loglvl, cpu, rq->nr_running); ++ for_each_process_thread(g, p) { ++ if (task_cpu(p) != cpu) ++ continue; ++ ++ if (!task_on_rq_queued(p)) ++ continue; ++ ++ printk("%s\tpid: %d, name: %s\n", loglvl, p->pid, p->comm); ++ } ++} ++ +int sched_cpu_dying(unsigned int cpu) +{ + struct rq *rq = cpu_rq(cpu); + unsigned long flags; + ++ /* Handle pending wakeups and then migrate everything off */ + sched_tick_stop(cpu); ++ + raw_spin_lock_irqsave(&rq->lock, flags); -+ set_rq_offline(rq); -+ migrate_tasks(rq); ++ if (rq->nr_running != 1 || rq_has_pinned_tasks(rq)) { ++ WARN(true, "Dying CPU not properly vacated!"); ++ dump_rq_tasks(rq, KERN_WARNING); ++ } + raw_spin_unlock_irqrestore(&rq->lock, flags); + ++ calc_load_migrate(rq); + hrtick_clear(rq); + return 0; +} @@ -5988,58 +7804,59 @@ index 000000000000..09ca47de425c +#ifdef CONFIG_SMP +static void sched_init_topology_cpumask_early(void) +{ -+ int cpu, level; ++ int cpu; + cpumask_t *tmp; + + for_each_possible_cpu(cpu) { -+ for (level = 0; level < NR_CPU_AFFINITY_CHK_LEVEL; level++) { -+ tmp = &(per_cpu(sched_cpu_affinity_masks, cpu)[level]); -+ cpumask_copy(tmp, cpu_possible_mask); -+ cpumask_clear_cpu(cpu, tmp); -+ } -+ per_cpu(sched_cpu_llc_mask, cpu) = -+ &(per_cpu(sched_cpu_affinity_masks, cpu)[0]); -+ per_cpu(sched_cpu_affinity_end_mask, cpu) = -+ &(per_cpu(sched_cpu_affinity_masks, cpu)[1]); -+ per_cpu(sd_llc_id, cpu) = cpu; ++ /* init topo masks */ ++ tmp = per_cpu(sched_cpu_topo_masks, cpu); ++ ++ cpumask_copy(tmp, cpumask_of(cpu)); ++ tmp++; ++ cpumask_copy(tmp, cpu_possible_mask); ++ per_cpu(sched_cpu_llc_mask, cpu) = tmp; ++ per_cpu(sched_cpu_topo_end_mask, cpu) = ++tmp; ++ /*per_cpu(sd_llc_id, cpu) = cpu;*/ + } +} + -+#define TOPOLOGY_CPUMASK(name, mask, last) \ -+ if (cpumask_and(chk, chk, mask)) \ -+ printk(KERN_INFO "sched: cpu#%02d affinity mask: 0x%08lx - "#name,\ -+ cpu, (chk++)->bits[0]); \ ++#define TOPOLOGY_CPUMASK(name, mask, last)\ ++ if (cpumask_and(topo, topo, mask)) { \ ++ cpumask_copy(topo, mask); \ ++ printk(KERN_INFO "sched: cpu#%02d topo: 0x%08lx - "#name, \ ++ cpu, (topo++)->bits[0]); \ ++ } \ + if (!last) \ -+ cpumask_complement(chk, mask) ++ cpumask_complement(topo, mask) + +static void sched_init_topology_cpumask(void) +{ + int cpu; -+ cpumask_t *chk; ++ cpumask_t *topo; + + for_each_online_cpu(cpu) { + /* take chance to reset time slice for idle tasks */ + cpu_rq(cpu)->idle->time_slice = sched_timeslice_ns; + -+ chk = &(per_cpu(sched_cpu_affinity_masks, cpu)[0]); ++ topo = per_cpu(sched_cpu_topo_masks, cpu) + 1; + -+ cpumask_complement(chk, cpumask_of(cpu)); ++ cpumask_complement(topo, cpumask_of(cpu)); +#ifdef CONFIG_SCHED_SMT + TOPOLOGY_CPUMASK(smt, topology_sibling_cpumask(cpu), false); +#endif + per_cpu(sd_llc_id, cpu) = cpumask_first(cpu_coregroup_mask(cpu)); -+ per_cpu(sched_cpu_llc_mask, cpu) = chk; ++ per_cpu(sched_cpu_llc_mask, cpu) = topo; + TOPOLOGY_CPUMASK(coregroup, cpu_coregroup_mask(cpu), false); + + TOPOLOGY_CPUMASK(core, topology_core_cpumask(cpu), false); + + TOPOLOGY_CPUMASK(others, cpu_online_mask, true); + -+ per_cpu(sched_cpu_affinity_end_mask, cpu) = chk; ++ per_cpu(sched_cpu_topo_end_mask, cpu) = topo; + printk(KERN_INFO "sched: cpu#%02d llc_id = %d, llc_mask idx = %d\n", + cpu, per_cpu(sd_llc_id, cpu), + (int) (per_cpu(sched_cpu_llc_mask, cpu) - -+ &(per_cpu(sched_cpu_affinity_masks, cpu)[0]))); ++ per_cpu(sched_cpu_topo_masks, cpu))); + } +} +#endif @@ -6047,8 +7864,9 @@ index 000000000000..09ca47de425c +void __init sched_init_smp(void) +{ + /* Move init over to a non-isolated CPU */ -+ if (set_cpus_allowed_ptr(current, housekeeping_cpumask(HK_FLAG_DOMAIN)) < 0) ++ if (set_cpus_allowed_ptr(current, housekeeping_cpumask(HK_TYPE_DOMAIN)) < 0) + BUG(); ++ current->flags &= ~PF_NO_SETAFFINITY; + + sched_init_topology_cpumask(); + @@ -6079,6 +7897,9 @@ index 000000000000..09ca47de425c + struct task_group *parent; + struct list_head siblings; + struct list_head children; ++#ifdef CONFIG_FAIR_GROUP_SCHED ++ unsigned long shares; ++#endif +}; + +/* @@ -6102,8 +7923,8 @@ index 000000000000..09ca47de425c + wait_bit_init(); + +#ifdef CONFIG_SMP -+ for (i = 0; i < SCHED_BITS; i++) -+ cpumask_copy(&sched_rq_watermark[i], cpu_present_mask); ++ for (i = 0; i < SCHED_QUEUE_BITS; i++) ++ cpumask_copy(sched_rq_watermark + i, cpu_present_mask); +#endif + +#ifdef CONFIG_CGROUP_SCHED @@ -6116,8 +7937,8 @@ index 000000000000..09ca47de425c + for_each_possible_cpu(i) { + rq = cpu_rq(i); + -+ sched_queue_init(rq); -+ rq->watermark = IDLE_WM; ++ sched_queue_init(&rq->queue); ++ rq->watermark = IDLE_TASK_SCHED_PRIO; + rq->skip = NULL; + + raw_spin_lock_init(&rq->lock); @@ -6131,10 +7952,19 @@ index 000000000000..09ca47de425c +#ifdef CONFIG_SCHED_SMT + rq->active_balance = 0; +#endif ++ ++#ifdef CONFIG_NO_HZ_COMMON ++ INIT_CSD(&rq->nohz_csd, nohz_csd_func, rq); +#endif ++ rq->balance_callback = &balance_push_callback; ++#ifdef CONFIG_HOTPLUG_CPU ++ rcuwait_init(&rq->hotplug_wait); ++#endif ++#endif /* CONFIG_SMP */ + rq->nr_switches = 0; -+ atomic_set(&rq->nr_iowait, 0); ++ + hrtick_rq_init(rq); ++ atomic_set(&rq->nr_iowait, 0); + } +#ifdef CONFIG_SMP + /* Set rq->online for cpu 0 */ @@ -6147,6 +7977,14 @@ index 000000000000..09ca47de425c + enter_lazy_tlb(&init_mm, current); + + /* ++ * The idle task doesn't need the kthread struct to function, but it ++ * is dressed up as a per-CPU kthread and thus needs to play the part ++ * if we want to avoid special-casing it in code that deals with per-CPU ++ * kthreads. ++ */ ++ WARN_ON(!set_kthread_struct(current)); ++ ++ /* + * Make us the idle thread. Technically, schedule() should not be + * called from this thread, however somewhere below it might be, + * but because we are the idle thread, we just pick up running again @@ -6158,42 +7996,58 @@ index 000000000000..09ca47de425c + +#ifdef CONFIG_SMP + idle_thread_set_boot_cpu(); ++ balance_push_set(smp_processor_id(), false); + + sched_init_topology_cpumask_early(); +#endif /* SMP */ + -+ init_schedstats(); -+ + psi_init(); ++ ++ preempt_dynamic_init(); +} + +#ifdef CONFIG_DEBUG_ATOMIC_SLEEP -+static inline int preempt_count_equals(int preempt_offset) -+{ -+ int nested = preempt_count() + rcu_preempt_depth(); + -+ return (nested == preempt_offset); -+} -+ -+void __might_sleep(const char *file, int line, int preempt_offset) ++void __might_sleep(const char *file, int line) +{ ++ unsigned int state = get_current_state(); + /* + * Blocking primitives will set (and therefore destroy) current->state, + * since we will exit with TASK_RUNNING make sure we enter with it, + * otherwise we will destroy state. + */ -+ WARN_ONCE(current->state != TASK_RUNNING && current->task_state_change, ++ WARN_ONCE(state != TASK_RUNNING && current->task_state_change, + "do not call blocking ops when !TASK_RUNNING; " -+ "state=%lx set at [<%p>] %pS\n", -+ current->state, ++ "state=%x set at [<%p>] %pS\n", state, + (void *)current->task_state_change, + (void *)current->task_state_change); + -+ ___might_sleep(file, line, preempt_offset); ++ __might_resched(file, line, 0); +} +EXPORT_SYMBOL(__might_sleep); + -+void ___might_sleep(const char *file, int line, int preempt_offset) ++static void print_preempt_disable_ip(int preempt_offset, unsigned long ip) ++{ ++ if (!IS_ENABLED(CONFIG_DEBUG_PREEMPT)) ++ return; ++ ++ if (preempt_count() == preempt_offset) ++ return; ++ ++ pr_err("Preemption disabled at:"); ++ print_ip_sym(KERN_ERR, ip); ++} ++ ++static inline bool resched_offsets_ok(unsigned int offsets) ++{ ++ unsigned int nested = preempt_count(); ++ ++ nested += rcu_preempt_depth() << MIGHT_RESCHED_RCU_SHIFT; ++ ++ return nested == offsets; ++} ++ ++void __might_resched(const char *file, int line, unsigned int offsets) +{ + /* Ratelimiting timestamp: */ + static unsigned long prev_jiffy; @@ -6203,7 +8057,7 @@ index 000000000000..09ca47de425c + /* WARN_ON_ONCE() by default, no rate limit required: */ + rcu_sleep_check(); + -+ if ((preempt_count_equals(preempt_offset) && !irqs_disabled() && ++ if ((resched_offsets_ok(offsets) && !irqs_disabled() && + !is_idle_task(current) && !current->non_block_count) || + system_state == SYSTEM_BOOTING || system_state > SYSTEM_RUNNING || + oops_in_progress) @@ -6215,31 +8069,33 @@ index 000000000000..09ca47de425c + /* Save this before calling printk(), since that will clobber it: */ + preempt_disable_ip = get_preempt_disable_ip(current); + -+ printk(KERN_ERR -+ "BUG: sleeping function called from invalid context at %s:%d\n", -+ file, line); -+ printk(KERN_ERR -+ "in_atomic(): %d, irqs_disabled(): %d, non_block: %d, pid: %d, name: %s\n", -+ in_atomic(), irqs_disabled(), current->non_block_count, -+ current->pid, current->comm); ++ pr_err("BUG: sleeping function called from invalid context at %s:%d\n", ++ file, line); ++ pr_err("in_atomic(): %d, irqs_disabled(): %d, non_block: %d, pid: %d, name: %s\n", ++ in_atomic(), irqs_disabled(), current->non_block_count, ++ current->pid, current->comm); ++ pr_err("preempt_count: %x, expected: %x\n", preempt_count(), ++ offsets & MIGHT_RESCHED_PREEMPT_MASK); ++ ++ if (IS_ENABLED(CONFIG_PREEMPT_RCU)) { ++ pr_err("RCU nest depth: %d, expected: %u\n", ++ rcu_preempt_depth(), offsets >> MIGHT_RESCHED_RCU_SHIFT); ++ } + + if (task_stack_end_corrupted(current)) -+ printk(KERN_EMERG "Thread overran stack, or stack corrupted\n"); ++ pr_emerg("Thread overran stack, or stack corrupted\n"); + + debug_show_held_locks(current); + if (irqs_disabled()) + print_irqtrace_events(current); -+#ifdef CONFIG_DEBUG_PREEMPT -+ if (!preempt_count_equals(preempt_offset)) { -+ pr_err("Preemption disabled at:"); -+ print_ip_sym(preempt_disable_ip); -+ pr_cont("\n"); -+ } -+#endif ++ ++ print_preempt_disable_ip(offsets & MIGHT_RESCHED_PREEMPT_MASK, ++ preempt_disable_ip); ++ + dump_stack(); + add_taint(TAINT_WARN, LOCKDEP_STILL_OK); +} -+EXPORT_SYMBOL(___might_sleep); ++EXPORT_SYMBOL(__might_resched); + +void __cant_sleep(const char *file, int line, int preempt_offset) +{ @@ -6268,6 +8124,42 @@ index 000000000000..09ca47de425c + add_taint(TAINT_WARN, LOCKDEP_STILL_OK); +} +EXPORT_SYMBOL_GPL(__cant_sleep); ++ ++#ifdef CONFIG_SMP ++void __cant_migrate(const char *file, int line) ++{ ++ static unsigned long prev_jiffy; ++ ++ if (irqs_disabled()) ++ return; ++ ++ if (is_migration_disabled(current)) ++ return; ++ ++ if (!IS_ENABLED(CONFIG_PREEMPT_COUNT)) ++ return; ++ ++ if (preempt_count() > 0) ++ return; ++ ++ if (current->migration_flags & MDF_FORCE_ENABLED) ++ return; ++ ++ if (time_before(jiffies, prev_jiffy + HZ) && prev_jiffy) ++ return; ++ prev_jiffy = jiffies; ++ ++ pr_err("BUG: assuming non migratable context at %s:%d\n", file, line); ++ pr_err("in_atomic(): %d, irqs_disabled(): %d, migration_disabled() %u pid: %d, name: %s\n", ++ in_atomic(), irqs_disabled(), is_migration_disabled(current), ++ current->pid, current->comm); ++ ++ debug_show_held_locks(current); ++ dump_stack(); ++ add_taint(TAINT_WARN, LOCKDEP_STILL_OK); ++} ++EXPORT_SYMBOL_GPL(__cant_migrate); ++#endif +#endif + +#ifdef CONFIG_MAGIC_SYSRQ @@ -6286,6 +8178,10 @@ index 000000000000..09ca47de425c + if (p->flags & PF_KTHREAD) + continue; + ++ schedstat_set(p->stats.wait_start, 0); ++ schedstat_set(p->stats.sleep_start, 0); ++ schedstat_set(p->stats.block_start, 0); ++ + if (!rt_task(p)) { + /* + * Renice negative nice level userspace @@ -6357,6 +8253,20 @@ index 000000000000..09ca47de425c + kmem_cache_free(task_group_cache, tg); +} + ++static void sched_free_group_rcu(struct rcu_head *rhp) ++{ ++ sched_free_group(container_of(rhp, struct task_group, rcu)); ++} ++ ++static void sched_unregister_group(struct task_group *tg) ++{ ++ /* ++ * We have to wait for yet another RCU grace period to expire, as ++ * print_cfs_stats() might run concurrently. ++ */ ++ call_rcu(&tg->rcu, sched_free_group_rcu); ++} ++ +/* allocate runqueue etc for a new task group */ +struct task_group *sched_create_group(struct task_group *parent) +{ @@ -6374,19 +8284,19 @@ index 000000000000..09ca47de425c +} + +/* rcu callback to free various structures associated with a task group */ -+static void sched_free_group_rcu(struct rcu_head *rhp) ++static void sched_unregister_group_rcu(struct rcu_head *rhp) +{ -+ /* Now it should be safe to free those cfs_rqs */ -+ sched_free_group(container_of(rhp, struct task_group, rcu)); ++ /* Now it should be safe to free those cfs_rqs: */ ++ sched_unregister_group(container_of(rhp, struct task_group, rcu)); +} + +void sched_destroy_group(struct task_group *tg) +{ -+ /* Wait for possible concurrent references to cfs_rqs complete */ -+ call_rcu(&tg->rcu, sched_free_group_rcu); ++ /* Wait for possible concurrent references to cfs_rqs complete: */ ++ call_rcu(&tg->rcu, sched_unregister_group_rcu); +} + -+void sched_offline_group(struct task_group *tg) ++void sched_release_group(struct task_group *tg) +{ +} + @@ -6427,7 +8337,7 @@ index 000000000000..09ca47de425c +{ + struct task_group *tg = css_tg(css); + -+ sched_offline_group(tg); ++ sched_release_group(tg); +} + +static void cpu_cgroup_css_free(struct cgroup_subsys_state *css) @@ -6437,7 +8347,7 @@ index 000000000000..09ca47de425c + /* + * Relies on the RCU grace period between css_released() and this. + */ -+ sched_free_group(tg); ++ sched_unregister_group(tg); +} + +static void cpu_cgroup_fork(struct task_struct *task) @@ -6453,7 +8363,54 @@ index 000000000000..09ca47de425c +{ +} + ++#ifdef CONFIG_FAIR_GROUP_SCHED ++static DEFINE_MUTEX(shares_mutex); ++ ++int sched_group_set_shares(struct task_group *tg, unsigned long shares) ++{ ++ /* ++ * We can't change the weight of the root cgroup. ++ */ ++ if (&root_task_group == tg) ++ return -EINVAL; ++ ++ shares = clamp(shares, scale_load(MIN_SHARES), scale_load(MAX_SHARES)); ++ ++ mutex_lock(&shares_mutex); ++ if (tg->shares == shares) ++ goto done; ++ ++ tg->shares = shares; ++done: ++ mutex_unlock(&shares_mutex); ++ return 0; ++} ++ ++static int cpu_shares_write_u64(struct cgroup_subsys_state *css, ++ struct cftype *cftype, u64 shareval) ++{ ++ if (shareval > scale_load_down(ULONG_MAX)) ++ shareval = MAX_SHARES; ++ return sched_group_set_shares(css_tg(css), scale_load(shareval)); ++} ++ ++static u64 cpu_shares_read_u64(struct cgroup_subsys_state *css, ++ struct cftype *cft) ++{ ++ struct task_group *tg = css_tg(css); ++ ++ return (u64) scale_load_down(tg->shares); ++} ++#endif ++ +static struct cftype cpu_legacy_files[] = { ++#ifdef CONFIG_FAIR_GROUP_SCHED ++ { ++ .name = "shares", ++ .read_u64 = cpu_shares_read_u64, ++ .write_u64 = cpu_shares_write_u64, ++ }, ++#endif + { } /* Terminate */ +}; + @@ -6488,14 +8445,14 @@ index 000000000000..09ca47de425c +#undef CREATE_TRACE_POINTS diff --git a/kernel/sched/alt_debug.c b/kernel/sched/alt_debug.c new file mode 100644 -index 000000000000..835e6bb98dda +index 000000000000..1212a031700e --- /dev/null +++ b/kernel/sched/alt_debug.c @@ -0,0 +1,31 @@ +/* + * kernel/sched/alt_debug.c + * -+ * Print the BMQ debugging details ++ * Print the alt scheduler debugging details + * + * Author: Alfred Chen + * Date : 2020 @@ -6525,60 +8482,89 @@ index 000000000000..835e6bb98dda +{} diff --git a/kernel/sched/alt_sched.h b/kernel/sched/alt_sched.h new file mode 100644 -index 000000000000..0936cf766514 +index 000000000000..a181bf9ce57d --- /dev/null +++ b/kernel/sched/alt_sched.h -@@ -0,0 +1,521 @@ +@@ -0,0 +1,645 @@ +#ifndef ALT_SCHED_H +#define ALT_SCHED_H + -+#include -+ -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+ -+#include -+ -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+#include +#include -+#include +#include -+#include -+#include +#include -+#include ++#include + -+#include ++#include ++#include + -+#ifdef CONFIG_PARAVIRT -+# include -+#endif ++#include "../workqueue_internal.h" + +#include "cpupri.h" + +#ifdef CONFIG_SCHED_BMQ -+#include "bmq.h" ++/* bits: ++ * RT(0-99), (Low prio adj range, nice width, high prio adj range) / 2, cpu idle task */ ++#define SCHED_BITS (MAX_RT_PRIO + NICE_WIDTH / 2 + MAX_PRIORITY_ADJ + 1) ++#endif ++ ++#ifdef CONFIG_SCHED_PDS ++/* bits: RT(0-99), reserved(100-127), NORMAL_PRIO_NUM, cpu idle task */ ++#define SCHED_BITS (MIN_NORMAL_PRIO + NORMAL_PRIO_NUM + 1) ++#endif /* CONFIG_SCHED_PDS */ ++ ++#define IDLE_TASK_SCHED_PRIO (SCHED_BITS - 1) ++ ++#ifdef CONFIG_SCHED_DEBUG ++# define SCHED_WARN_ON(x) WARN_ONCE(x, #x) ++extern void resched_latency_warn(int cpu, u64 latency); ++#else ++# define SCHED_WARN_ON(x) ({ (void)(x), 0; }) ++static inline void resched_latency_warn(int cpu, u64 latency) {} ++#endif ++ ++/* ++ * Increase resolution of nice-level calculations for 64-bit architectures. ++ * The extra resolution improves shares distribution and load balancing of ++ * low-weight task groups (eg. nice +19 on an autogroup), deeper taskgroup ++ * hierarchies, especially on larger systems. This is not a user-visible change ++ * and does not change the user-interface for setting shares/weights. ++ * ++ * We increase resolution only if we have enough bits to allow this increased ++ * resolution (i.e. 64-bit). The costs for increasing resolution when 32-bit ++ * are pretty high and the returns do not justify the increased costs. ++ * ++ * Really only required when CONFIG_FAIR_GROUP_SCHED=y is also set, but to ++ * increase coverage and consistency always enable it on 64-bit platforms. ++ */ ++#ifdef CONFIG_64BIT ++# define NICE_0_LOAD_SHIFT (SCHED_FIXEDPOINT_SHIFT + SCHED_FIXEDPOINT_SHIFT) ++# define scale_load(w) ((w) << SCHED_FIXEDPOINT_SHIFT) ++# define scale_load_down(w) \ ++({ \ ++ unsigned long __w = (w); \ ++ if (__w) \ ++ __w = max(2UL, __w >> SCHED_FIXEDPOINT_SHIFT); \ ++ __w; \ ++}) ++#else ++# define NICE_0_LOAD_SHIFT (SCHED_FIXEDPOINT_SHIFT) ++# define scale_load(w) (w) ++# define scale_load_down(w) (w) ++#endif ++ ++#ifdef CONFIG_FAIR_GROUP_SCHED ++#define ROOT_TASK_GROUP_LOAD NICE_0_LOAD ++ ++/* ++ * A weight of 0 or 1 can cause arithmetics problems. ++ * A weight of a cfs_rq is the sum of weights of which entities ++ * are queued on this cfs_rq, so a weight of a entity should not be ++ * too large, so as the shares value of a task group. ++ * (The default weight is 1024 - so there's no practical ++ * limitation from this.) ++ */ ++#define MIN_SHARES (1UL << 1) ++#define MAX_SHARES (1UL << 18) +#endif + +/* task_struct::on_rq states: */ @@ -6601,6 +8587,14 @@ index 000000000000..0936cf766514 +#define WF_SYNC 0x01 /* waker goes to sleep after wakeup */ +#define WF_FORK 0x02 /* child wakeup after fork */ +#define WF_MIGRATED 0x04 /* internal use, task got migrated */ ++#define WF_ON_CPU 0x08 /* Wakee is on_rq */ ++ ++#define SCHED_QUEUE_BITS (SCHED_BITS - 1) ++ ++struct sched_queue { ++ DECLARE_BITMAP(bitmap, SCHED_QUEUE_BITS); ++ struct list_head heads[SCHED_BITS]; ++}; + +/* + * This is the main, per-CPU runqueue data structure. @@ -6614,8 +8608,9 @@ index 000000000000..0936cf766514 + struct task_struct *idle, *stop, *skip; + struct mm_struct *prev_mm; + -+#ifdef CONFIG_SCHED_BMQ -+ struct bmq queue; ++ struct sched_queue queue; ++#ifdef CONFIG_SCHED_PDS ++ u64 time_edge; +#endif + unsigned long watermark; + @@ -6624,6 +8619,11 @@ index 000000000000..0936cf766514 + + atomic_t nr_iowait; + ++#ifdef CONFIG_SCHED_DEBUG ++ u64 last_seen_need_resched_ns; ++ int ticks_without_resched; ++#endif ++ +#ifdef CONFIG_MEMBARRIER + int membarrier_state; +#endif @@ -6632,14 +8632,24 @@ index 000000000000..0936cf766514 + int cpu; /* cpu of this runqueue */ + bool online; + ++ unsigned int ttwu_pending; ++ unsigned char nohz_idle_balance; ++ unsigned char idle_balance; ++ +#ifdef CONFIG_HAVE_SCHED_AVG_IRQ + struct sched_avg avg_irq; +#endif + +#ifdef CONFIG_SCHED_SMT + int active_balance; -+ struct cpu_stop_work active_balance_work; ++ struct cpu_stop_work active_balance_work; +#endif ++ struct callback_head *balance_callback; ++#ifdef CONFIG_HOTPLUG_CPU ++ struct rcuwait hotplug_wait; ++#endif ++ unsigned int nr_pinned; ++ +#endif /* CONFIG_SMP */ +#ifdef CONFIG_IRQ_TIME_ACCOUNTING + u64 prev_irq_time; @@ -6651,6 +8661,11 @@ index 000000000000..0936cf766514 + u64 prev_steal_time_rq; +#endif /* CONFIG_PARAVIRT_TIME_ACCOUNTING */ + ++ /* For genenal cpu load util */ ++ s32 load_history; ++ u64 load_block; ++ u64 load_stamp; ++ + /* calc_load related fields */ + unsigned long calc_load_update; + long calc_load_active; @@ -6659,14 +8674,15 @@ index 000000000000..0936cf766514 + u64 last_ts_switch; + u64 clock_task; + -+ unsigned long nr_running; ++ unsigned int nr_running; + unsigned long nr_uninterruptible; + +#ifdef CONFIG_SCHED_HRTICK +#ifdef CONFIG_SMP + call_single_data_t hrtick_csd; +#endif -+ struct hrtimer hrtick_timer; ++ struct hrtimer hrtick_timer; ++ ktime_t hrtick_time; +#endif + +#ifdef CONFIG_SCHEDSTATS @@ -6688,12 +8704,22 @@ index 000000000000..0936cf766514 + unsigned int ttwu_count; + unsigned int ttwu_local; +#endif /* CONFIG_SCHEDSTATS */ ++ +#ifdef CONFIG_CPU_IDLE + /* Must be inspected within a rcu lock section */ + struct cpuidle_state *idle_state; +#endif ++ ++#ifdef CONFIG_NO_HZ_COMMON ++#ifdef CONFIG_SMP ++ call_single_data_t nohz_csd; ++#endif ++ atomic_t nohz_flags; ++#endif /* CONFIG_NO_HZ_COMMON */ +}; + ++extern unsigned long rq_load_util(struct rq *rq, unsigned long max); ++ +extern unsigned long calc_load_update; +extern atomic_long_t calc_load_tasks; + @@ -6723,33 +8749,40 @@ index 000000000000..0936cf766514 +extern bool sched_smp_initialized; + +enum { -+ BASE_CPU_AFFINITY_CHK_LEVEL = 1, ++ ITSELF_LEVEL_SPACE_HOLDER, +#ifdef CONFIG_SCHED_SMT -+ SMT_CPU_AFFINITY_CHK_LEVEL_SPACE_HOLDER, ++ SMT_LEVEL_SPACE_HOLDER, +#endif -+#ifdef CONFIG_SCHED_MC -+ MC_CPU_AFFINITY_CHK_LEVEL_SPACE_HOLDER, -+#endif -+ NR_CPU_AFFINITY_CHK_LEVEL ++ COREGROUP_LEVEL_SPACE_HOLDER, ++ CORE_LEVEL_SPACE_HOLDER, ++ OTHER_LEVEL_SPACE_HOLDER, ++ NR_CPU_AFFINITY_LEVELS +}; + -+DECLARE_PER_CPU(cpumask_t [NR_CPU_AFFINITY_CHK_LEVEL], sched_cpu_affinity_masks); ++DECLARE_PER_CPU(cpumask_t [NR_CPU_AFFINITY_LEVELS], sched_cpu_topo_masks); ++DECLARE_PER_CPU(cpumask_t *, sched_cpu_llc_mask); + -+static inline int __best_mask_cpu(int cpu, const cpumask_t *cpumask, -+ const cpumask_t *mask) ++static inline int ++__best_mask_cpu(const cpumask_t *cpumask, const cpumask_t *mask) +{ ++ int cpu; ++ + while ((cpu = cpumask_any_and(cpumask, mask)) >= nr_cpu_ids) + mask++; ++ + return cpu; +} + -+static inline int best_mask_cpu(int cpu, const cpumask_t *cpumask) ++static inline int best_mask_cpu(int cpu, const cpumask_t *mask) +{ -+ return cpumask_test_cpu(cpu, cpumask)? cpu : -+ __best_mask_cpu(cpu, cpumask, &(per_cpu(sched_cpu_affinity_masks, cpu)[0])); ++ return __best_mask_cpu(mask, per_cpu(sched_cpu_topo_masks, cpu)); +} + -+#endif /* CONFIG_SMP */ ++extern void flush_smp_call_function_queue(void); ++ ++#else /* !CONFIG_SMP */ ++static inline void flush_smp_call_function_queue(void) { } ++#endif + +#ifndef arch_scale_freq_tick +static __always_inline @@ -6837,12 +8870,26 @@ index 000000000000..0936cf766514 +} + +static inline void ++rq_lock(struct rq *rq, struct rq_flags *rf) ++ __acquires(rq->lock) ++{ ++ raw_spin_lock(&rq->lock); ++} ++ ++static inline void +rq_unlock_irq(struct rq *rq, struct rq_flags *rf) + __releases(rq->lock) +{ + raw_spin_unlock_irq(&rq->lock); +} + ++static inline void ++rq_unlock(struct rq *rq, struct rq_flags *rf) ++ __releases(rq->lock) ++{ ++ raw_spin_unlock(&rq->lock); ++} ++ +static inline struct rq * +this_rq_lock_irq(struct rq_flags *rf) + __acquires(rq->lock) @@ -6856,6 +8903,41 @@ index 000000000000..0936cf766514 + return rq; +} + ++static inline raw_spinlock_t *__rq_lockp(struct rq *rq) ++{ ++ return &rq->lock; ++} ++ ++static inline raw_spinlock_t *rq_lockp(struct rq *rq) ++{ ++ return __rq_lockp(rq); ++} ++ ++static inline void lockdep_assert_rq_held(struct rq *rq) ++{ ++ lockdep_assert_held(__rq_lockp(rq)); ++} ++ ++extern void raw_spin_rq_lock_nested(struct rq *rq, int subclass); ++extern void raw_spin_rq_unlock(struct rq *rq); ++ ++static inline void raw_spin_rq_lock(struct rq *rq) ++{ ++ raw_spin_rq_lock_nested(rq, 0); ++} ++ ++static inline void raw_spin_rq_lock_irq(struct rq *rq) ++{ ++ local_irq_disable(); ++ raw_spin_rq_lock(rq); ++} ++ ++static inline void raw_spin_rq_unlock_irq(struct rq *rq) ++{ ++ raw_spin_rq_unlock(rq); ++ local_irq_enable(); ++} ++ +static inline int task_current(struct rq *rq, struct task_struct *p) +{ + return rq->curr == p; @@ -6866,9 +8948,9 @@ index 000000000000..0936cf766514 + return p->on_cpu; +} + -+extern struct static_key_false sched_schedstats; ++extern int task_running_nice(struct task_struct *p); + -+static inline void sched_ttwu_pending(void) { } ++extern struct static_key_false sched_schedstats; + +#ifdef CONFIG_CPU_IDLE +static inline void idle_set_state(struct rq *rq, @@ -6905,6 +8987,24 @@ index 000000000000..0936cf766514 + +#include "stats.h" + ++#ifdef CONFIG_NO_HZ_COMMON ++#define NOHZ_BALANCE_KICK_BIT 0 ++#define NOHZ_STATS_KICK_BIT 1 ++ ++#define NOHZ_BALANCE_KICK BIT(NOHZ_BALANCE_KICK_BIT) ++#define NOHZ_STATS_KICK BIT(NOHZ_STATS_KICK_BIT) ++ ++#define NOHZ_KICK_MASK (NOHZ_BALANCE_KICK | NOHZ_STATS_KICK) ++ ++#define nohz_flags(cpu) (&cpu_rq(cpu)->nohz_flags) ++ ++/* TODO: needed? ++extern void nohz_balance_exit_idle(struct rq *rq); ++#else ++static inline void nohz_balance_exit_idle(struct rq *rq) { } ++*/ ++#endif ++ +#ifdef CONFIG_IRQ_TIME_ACCOUNTING +struct irqtime { + u64 total; @@ -6937,39 +9037,6 @@ index 000000000000..0936cf766514 + +#ifdef CONFIG_CPU_FREQ +DECLARE_PER_CPU(struct update_util_data __rcu *, cpufreq_update_util_data); -+ -+/** -+ * cpufreq_update_util - Take a note about CPU utilization changes. -+ * @rq: Runqueue to carry out the update for. -+ * @flags: Update reason flags. -+ * -+ * This function is called by the scheduler on the CPU whose utilization is -+ * being updated. -+ * -+ * It can only be called from RCU-sched read-side critical sections. -+ * -+ * The way cpufreq is currently arranged requires it to evaluate the CPU -+ * performance state (frequency/voltage) on a regular basis to prevent it from -+ * being stuck in a completely inadequate performance level for too long. -+ * That is not guaranteed to happen if the updates are only triggered from CFS -+ * and DL, though, because they may not be coming in if only RT tasks are -+ * active all the time (or there are RT tasks only). -+ * -+ * As a workaround for that issue, this function is called periodically by the -+ * RT sched class to trigger extra cpufreq updates to prevent it from stalling, -+ * but that really is a band-aid. Going forward it should be replaced with -+ * solutions targeted more specifically at RT tasks. -+ */ -+static inline void cpufreq_update_util(struct rq *rq, unsigned int flags) -+{ -+ struct update_util_data *data; -+ -+ data = rcu_dereference_sched(*this_cpu_ptr(&cpufreq_update_util_data)); -+ if (data) -+ data->func(data, rq_clock(rq), flags); -+} -+#else -+static inline void cpufreq_update_util(struct rq *rq, unsigned int flags) {} +#endif /* CONFIG_CPU_FREQ */ + +#ifdef CONFIG_NO_HZ_FULL @@ -6988,6 +9055,8 @@ index 000000000000..0936cf766514 + +extern void schedule_idle(void); + ++#define cap_scale(v, s) ((v)*(s) >> SCHED_CAPACITY_SHIFT) ++ +/* + * !! For sched_setattr_nocheck() (kernel) only !! + * @@ -7032,11 +9101,6 @@ index 000000000000..0936cf766514 +} +#endif + -+static inline int task_running_nice(struct task_struct *p) -+{ -+ return (p->prio + p->boost_prio > DEFAULT_PRIO + MAX_PRIORITY_ADJ); -+} -+ +#ifdef CONFIG_NUMA +extern int sched_numa_find_closest(const struct cpumask *cpus, int cpu); +#else @@ -7046,236 +9110,230 @@ index 000000000000..0936cf766514 +} +#endif + -+void swake_up_all_locked(struct swait_queue_head *q); -+void __prepare_to_swait(struct swait_queue_head *q, struct swait_queue *wait); ++extern void swake_up_all_locked(struct swait_queue_head *q); ++extern void __prepare_to_swait(struct swait_queue_head *q, struct swait_queue *wait); ++ ++#ifdef CONFIG_PREEMPT_DYNAMIC ++extern int preempt_dynamic_mode; ++extern int sched_dynamic_mode(const char *str); ++extern void sched_dynamic_update(int mode); ++#endif ++ ++static inline void nohz_run_idle_balance(int cpu) { } ++ ++static inline ++unsigned long uclamp_rq_util_with(struct rq *rq, unsigned long util, ++ struct task_struct *p) ++{ ++ return util; ++} ++ ++static inline bool uclamp_rq_is_capped(struct rq *rq) { return false; } + +#endif /* ALT_SCHED_H */ diff --git a/kernel/sched/bmq.h b/kernel/sched/bmq.h new file mode 100644 -index 000000000000..4ce30c30bd3e +index 000000000000..66b77291b9d0 --- /dev/null +++ b/kernel/sched/bmq.h -@@ -0,0 +1,14 @@ -+#ifndef BMQ_H -+#define BMQ_H +@@ -0,0 +1,110 @@ ++#define ALT_SCHED_VERSION_MSG "sched/bmq: BMQ CPU Scheduler "ALT_SCHED_VERSION" by Alfred Chen.\n" + -+/* bits: -+ * RT(0-99), Low prio adj range, nice width, high prio adj range, cpu idle task */ -+#define SCHED_BITS (MAX_RT_PRIO + NICE_WIDTH + 2 * MAX_PRIORITY_ADJ + 1) -+#define IDLE_TASK_SCHED_PRIO (SCHED_BITS - 1) ++/* ++ * BMQ only routines ++ */ ++#define rq_switch_time(rq) ((rq)->clock - (rq)->last_ts_switch) ++#define boost_threshold(p) (sched_timeslice_ns >>\ ++ (15 - MAX_PRIORITY_ADJ - (p)->boost_prio)) + -+struct bmq { -+ DECLARE_BITMAP(bitmap, SCHED_BITS); -+ struct list_head heads[SCHED_BITS]; -+}; -+ -+#endif -diff --git a/kernel/sched/bmq_imp.h b/kernel/sched/bmq_imp.h -new file mode 100644 -index 000000000000..68313e01356d ---- /dev/null -+++ b/kernel/sched/bmq_imp.h -@@ -0,0 +1,86 @@ -+#define ALT_SCHED_VERSION_MSG "sched/bmq: BMQ CPU Scheduler 5.7-r2 by Alfred Chen.\n" -+ -+static inline void sched_queue_init(struct rq *rq) ++static inline void boost_task(struct task_struct *p) +{ -+ struct bmq *q = &rq->queue; -+ int i; ++ int limit; + -+ bitmap_zero(q->bitmap, SCHED_BITS); -+ for(i = 0; i < SCHED_BITS; i++) -+ INIT_LIST_HEAD(&q->heads[i]); ++ switch (p->policy) { ++ case SCHED_NORMAL: ++ limit = -MAX_PRIORITY_ADJ; ++ break; ++ case SCHED_BATCH: ++ case SCHED_IDLE: ++ limit = 0; ++ break; ++ default: ++ return; ++ } ++ ++ if (p->boost_prio > limit) ++ p->boost_prio--; +} + -+static inline void sched_queue_init_idle(struct rq *rq, struct task_struct *idle) ++static inline void deboost_task(struct task_struct *p) +{ -+ struct bmq *q = &rq->queue; -+ -+ idle->bmq_idx = IDLE_TASK_SCHED_PRIO; -+ INIT_LIST_HEAD(&q->heads[idle->bmq_idx]); -+ list_add(&idle->bmq_node, &q->heads[idle->bmq_idx]); -+ set_bit(idle->bmq_idx, q->bitmap); ++ if (p->boost_prio < MAX_PRIORITY_ADJ) ++ p->boost_prio++; +} + +/* -+ * This routine used in bmq scheduler only which assume the idle task in the bmq ++ * Common interfaces + */ -+static inline struct task_struct *sched_rq_first_task(struct rq *rq) -+{ -+ unsigned long idx = find_first_bit(rq->queue.bitmap, SCHED_BITS); -+ const struct list_head *head = &rq->queue.heads[idx]; ++static inline void sched_timeslice_imp(const int timeslice_ms) {} + -+ return list_first_entry(head, struct task_struct, bmq_node); ++static inline int ++task_sched_prio_normal(const struct task_struct *p, const struct rq *rq) ++{ ++ return p->prio + p->boost_prio - MAX_RT_PRIO; +} + -+static inline struct task_struct * -+sched_rq_next_task(struct task_struct *p, struct rq *rq) ++static inline int task_sched_prio(const struct task_struct *p) +{ -+ unsigned long idx = p->bmq_idx; -+ struct list_head *head = &rq->queue.heads[idx]; -+ -+ if (list_is_last(&p->bmq_node, head)) { -+ idx = find_next_bit(rq->queue.bitmap, SCHED_BITS, idx + 1); -+ head = &rq->queue.heads[idx]; -+ -+ return list_first_entry(head, struct task_struct, bmq_node); -+ } -+ -+ return list_next_entry(p, bmq_node); ++ return (p->prio < MAX_RT_PRIO)? p->prio : MAX_RT_PRIO / 2 + (p->prio + p->boost_prio) / 2; +} + -+#define __SCHED_DEQUEUE_TASK(p, rq, flags, func) \ -+ psi_dequeue(p, flags & DEQUEUE_SLEEP); \ -+ sched_info_dequeued(rq, p); \ -+ \ -+ list_del(&p->bmq_node); \ -+ if (list_empty(&rq->queue.heads[p->bmq_idx])) { \ -+ clear_bit(p->bmq_idx, rq->queue.bitmap);\ -+ func; \ -+ } -+ -+#define __SCHED_ENQUEUE_TASK(p, rq, flags) \ -+ sched_info_queued(rq, p); \ -+ psi_enqueue(p, flags); \ -+ \ -+ p->bmq_idx = task_sched_prio(p); \ -+ list_add_tail(&p->bmq_node, &rq->queue.heads[p->bmq_idx]); \ -+ set_bit(p->bmq_idx, rq->queue.bitmap) -+ -+static inline void __requeue_task(struct task_struct *p, struct rq *rq) ++static inline int ++task_sched_prio_idx(const struct task_struct *p, const struct rq *rq) +{ -+ int idx = task_sched_prio(p); ++ return task_sched_prio(p); ++} + -+ list_del(&p->bmq_node); -+ list_add_tail(&p->bmq_node, &rq->queue.heads[idx]); -+ if (idx != p->bmq_idx) { -+ if (list_empty(&rq->queue.heads[p->bmq_idx])) -+ clear_bit(p->bmq_idx, rq->queue.bitmap); -+ p->bmq_idx = idx; -+ set_bit(p->bmq_idx, rq->queue.bitmap); -+ update_sched_rq_watermark(rq); ++static inline int sched_prio2idx(int prio, struct rq *rq) ++{ ++ return prio; ++} ++ ++static inline int sched_idx2prio(int idx, struct rq *rq) ++{ ++ return idx; ++} ++ ++static inline void time_slice_expired(struct task_struct *p, struct rq *rq) ++{ ++ p->time_slice = sched_timeslice_ns; ++ ++ if (SCHED_FIFO != p->policy && task_on_rq_queued(p)) { ++ if (SCHED_RR != p->policy) ++ deboost_task(p); ++ requeue_task(p, rq, task_sched_prio_idx(p, rq)); + } +} + -+static inline bool sched_task_need_requeue(struct task_struct *p) ++static inline void sched_task_sanity_check(struct task_struct *p, struct rq *rq) {} ++ ++inline int task_running_nice(struct task_struct *p) +{ -+ return (task_sched_prio(p) != p->bmq_idx); ++ return (p->prio + p->boost_prio > DEFAULT_PRIO + MAX_PRIORITY_ADJ); +} ++ ++static void sched_task_fork(struct task_struct *p, struct rq *rq) ++{ ++ p->boost_prio = MAX_PRIORITY_ADJ; ++} ++ ++static inline void do_sched_yield_type_1(struct task_struct *p, struct rq *rq) ++{ ++ p->boost_prio = MAX_PRIORITY_ADJ; ++} ++ ++#ifdef CONFIG_SMP ++static inline void sched_task_ttwu(struct task_struct *p) ++{ ++ if(this_rq()->clock_task - p->last_ran > sched_timeslice_ns) ++ boost_task(p); ++} ++#endif ++ ++static inline void sched_task_deactivate(struct task_struct *p, struct rq *rq) ++{ ++ if (rq_switch_time(rq) < boost_threshold(p)) ++ boost_task(p); ++} ++ ++static inline void update_rq_time_edge(struct rq *rq) {} diff --git a/kernel/sched/build_policy.c b/kernel/sched/build_policy.c index d9dc9ab3773f..71a25540d65e 100644 --- a/kernel/sched/build_policy.c +++ b/kernel/sched/build_policy.c @@ -42,13 +42,19 @@ - + #include "idle.c" - + +#ifndef CONFIG_SCHED_ALT #include "rt.c" +#endif - + #ifdef CONFIG_SMP +#ifndef CONFIG_SCHED_ALT # include "cpudeadline.c" +#endif # include "pelt.c" #endif - + #include "cputime.c" -#include "deadline.c" - + +#ifndef CONFIG_SCHED_ALT +#include "deadline.c" +#endif diff --git a/kernel/sched/build_utility.c b/kernel/sched/build_utility.c -index 99bdd96f454f..128a283332f4 100644 +index 99bdd96f454f..23f80a86d2d7 100644 --- a/kernel/sched/build_utility.c +++ b/kernel/sched/build_utility.c -@@ -69,9 +69,11 @@ - # include "cpufreq_schedutil.c" - #endif - -+#ifdef CONFIG_SCHED_ALT - #ifdef CONFIG_SCHED_DEBUG - # include "debug.c" - #endif -+#endif - - #ifdef CONFIG_SCHEDSTATS - # include "stats.c" -@@ -85,7 +87,9 @@ - +@@ -85,7 +85,9 @@ + #ifdef CONFIG_SMP # include "cpupri.c" -+#ifdef CONFIG_SCHED_ALT ++#ifndef CONFIG_SCHED_ALT # include "stop_task.c" +#endif # include "topology.c" #endif - + diff --git a/kernel/sched/cpufreq_schedutil.c b/kernel/sched/cpufreq_schedutil.c -index 3dbf351d12d5..424b1e360af5 100644 +index 3dbf351d12d5..b2590f961139 100644 --- a/kernel/sched/cpufreq_schedutil.c +++ b/kernel/sched/cpufreq_schedutil.c -@@ -154,6 +154,7 @@ static unsigned int get_next_freq(struct sugov_policy *sg_policy, - return cpufreq_driver_resolve_freq(policy, freq); - } - +@@ -160,9 +160,14 @@ static void sugov_get_util(struct sugov_cpu *sg_cpu) + unsigned long max = arch_scale_cpu_capacity(sg_cpu->cpu); + + sg_cpu->max = max; +#ifndef CONFIG_SCHED_ALT - static void sugov_get_util(struct sugov_cpu *sg_cpu) - { - struct rq *rq = cpu_rq(sg_cpu->cpu); -@@ -164,6 +165,13 @@ static void sugov_get_util(struct sugov_cpu *sg_cpu) + sg_cpu->bw_dl = cpu_bw_dl(rq); sg_cpu->util = effective_cpu_util(sg_cpu->cpu, cpu_util_cfs(sg_cpu->cpu), max, FREQUENCY_UTIL, NULL); ++#else ++ sg_cpu->bw_dl = 0; ++ sg_cpu->util = rq_load_util(rq, max); ++#endif /* CONFIG_SCHED_ALT */ } -+#else /* CONFIG_SCHED_ALT */ -+static unsigned long sugov_get_util(struct sugov_cpu *sg_cpu) -+{ -+ sg_cpu->max = arch_scale_cpu_capacity(sg_cpu->cpu); -+ return sg_cpu->max; -+} -+#endif - + /** - * sugov_iowait_reset() - Reset the IO boost status of a CPU. -@@ -306,7 +314,9 @@ static inline bool sugov_cpu_is_busy(struct sugov_cpu *sg_cpu) { return false; } +@@ -306,8 +311,10 @@ static inline bool sugov_cpu_is_busy(struct sugov_cpu *sg_cpu) { return false; } */ static inline void ignore_dl_rate_limit(struct sugov_cpu *sg_cpu) { +#ifndef CONFIG_SCHED_ALT if (cpu_bw_dl(cpu_rq(sg_cpu->cpu)) > sg_cpu->bw_dl) -+#endif sg_cpu->sg_policy->limits_changed = true; ++#endif } - -@@ -607,6 +617,7 @@ static int sugov_kthread_create(struct sugov_policy *sg_policy) + + static inline bool sugov_update_single_common(struct sugov_cpu *sg_cpu, +@@ -607,6 +614,7 @@ static int sugov_kthread_create(struct sugov_policy *sg_policy) } - + ret = sched_setattr_nocheck(thread, &attr); + if (ret) { kthread_stop(thread); pr_warn("%s: failed to set SCHED_DEADLINE\n", __func__); -@@ -837,6 +848,7 @@ struct cpufreq_governor *cpufreq_default_governor(void) - cpufreq_governor_init(schedutil_gov); - +@@ -839,7 +847,9 @@ cpufreq_governor_init(schedutil_gov); #ifdef CONFIG_ENERGY_MODEL -+#ifndef CONFIG_SCHED_ALT static void rebuild_sd_workfn(struct work_struct *work) { ++#ifndef CONFIG_SCHED_ALT rebuild_sched_domains_energy(); -@@ -860,4 +872,10 @@ void sched_cpufreq_governor_change(struct cpufreq_policy *policy, - } - ++#endif /* CONFIG_SCHED_ALT */ } -+#else /* CONFIG_SCHED_ALT */ -+void sched_cpufreq_governor_change(struct cpufreq_policy *policy, -+ struct cpufreq_governor *old_gov) -+{ -+} -+#endif - #endif + static DECLARE_WORK(rebuild_sd_work, rebuild_sd_workfn); + diff --git a/kernel/sched/cputime.c b/kernel/sched/cputime.c index 78a233d43757..b3bbc87d4352 100644 --- a/kernel/sched/cputime.c @@ -7283,15 +9341,15 @@ index 78a233d43757..b3bbc87d4352 100644 @@ -122,7 +122,7 @@ void account_user_time(struct task_struct *p, u64 cputime) p->utime += cputime; account_group_user_time(p, cputime); - + - index = (task_nice(p) > 0) ? CPUTIME_NICE : CPUTIME_USER; + index = task_running_nice(p) ? CPUTIME_NICE : CPUTIME_USER; - + /* Add user time to cpustat. */ task_group_account_field(p, index, cputime); @@ -146,7 +146,7 @@ void account_guest_time(struct task_struct *p, u64 cputime) p->gtime += cputime; - + /* Add guest time to cpustat. */ - if (task_nice(p) > 0) { + if (task_running_nice(p)) { @@ -7309,12 +9367,12 @@ index 78a233d43757..b3bbc87d4352 100644 static u64 read_sum_exec_runtime(struct task_struct *t) @@ -279,7 +279,7 @@ static u64 read_sum_exec_runtime(struct task_struct *t) struct rq *rq; - + rq = task_rq_lock(t, &rf); - ns = t->se.sum_exec_runtime; + ns = tsk_seruntime(t); task_rq_unlock(rq, t, &rf); - + return ns; @@ -611,7 +611,7 @@ void cputime_adjust(struct task_cputime *curr, struct prev_cputime *prev, void task_cputime_adjusted(struct task_struct *p, u64 *ut, u64 *st) @@ -7323,8 +9381,82 @@ index 78a233d43757..b3bbc87d4352 100644 - .sum_exec_runtime = p->se.sum_exec_runtime, + .sum_exec_runtime = tsk_seruntime(p), }; - + if (task_cputime(p, &cputime.utime, &cputime.stime)) +diff --git a/kernel/sched/debug.c b/kernel/sched/debug.c +index bb3d63bdf4ae..4e1680785704 100644 +--- a/kernel/sched/debug.c ++++ b/kernel/sched/debug.c +@@ -7,6 +7,7 @@ + * Copyright(C) 2007, Red Hat, Inc., Ingo Molnar + */ + ++#ifndef CONFIG_SCHED_ALT + /* + * This allows printing both to /proc/sched_debug and + * to the console +@@ -215,6 +216,7 @@ static const struct file_operations sched_scaling_fops = { + }; + + #endif /* SMP */ ++#endif /* !CONFIG_SCHED_ALT */ + + #ifdef CONFIG_PREEMPT_DYNAMIC + +@@ -278,6 +280,7 @@ static const struct file_operations sched_dynamic_fops = { + + #endif /* CONFIG_PREEMPT_DYNAMIC */ + ++#ifndef CONFIG_SCHED_ALT + __read_mostly bool sched_debug_verbose; + + static const struct seq_operations sched_debug_sops; +@@ -293,6 +296,7 @@ static const struct file_operations sched_debug_fops = { + .llseek = seq_lseek, + .release = seq_release, + }; ++#endif /* !CONFIG_SCHED_ALT */ + + static struct dentry *debugfs_sched; + +@@ -302,12 +306,15 @@ static __init int sched_init_debug(void) + + debugfs_sched = debugfs_create_dir("sched", NULL); + ++#ifndef CONFIG_SCHED_ALT + debugfs_create_file("features", 0644, debugfs_sched, NULL, &sched_feat_fops); + debugfs_create_bool("verbose", 0644, debugfs_sched, &sched_debug_verbose); ++#endif /* !CONFIG_SCHED_ALT */ + #ifdef CONFIG_PREEMPT_DYNAMIC + debugfs_create_file("preempt", 0644, debugfs_sched, NULL, &sched_dynamic_fops); + #endif + ++#ifndef CONFIG_SCHED_ALT + debugfs_create_u32("latency_ns", 0644, debugfs_sched, &sysctl_sched_latency); + debugfs_create_u32("min_granularity_ns", 0644, debugfs_sched, &sysctl_sched_min_granularity); + debugfs_create_u32("idle_min_granularity_ns", 0644, debugfs_sched, &sysctl_sched_idle_min_granularity); +@@ -336,11 +343,13 @@ static __init int sched_init_debug(void) + #endif + + debugfs_create_file("debug", 0444, debugfs_sched, NULL, &sched_debug_fops); ++#endif /* !CONFIG_SCHED_ALT */ + + return 0; + } + late_initcall(sched_init_debug); + ++#ifndef CONFIG_SCHED_ALT + #ifdef CONFIG_SMP + + static cpumask_var_t sd_sysctl_cpus; +@@ -1067,6 +1076,7 @@ void proc_sched_set_task(struct task_struct *p) + memset(&p->stats, 0, sizeof(p->stats)); + #endif + } ++#endif /* !CONFIG_SCHED_ALT */ + + void resched_latency_warn(int cpu, u64 latency) + { diff --git a/kernel/sched/idle.c b/kernel/sched/idle.c index 328cccbee444..aef991facc79 100644 --- a/kernel/sched/idle.c @@ -7332,7 +9464,7 @@ index 328cccbee444..aef991facc79 100644 @@ -400,6 +400,7 @@ void cpu_startup_entry(enum cpuhp_state state) do_idle(); } - + +#ifndef CONFIG_SCHED_ALT /* * idle-task scheduling class. @@ -7342,34 +9474,170 @@ index 328cccbee444..aef991facc79 100644 .update_curr = update_curr_idle, }; +#endif +diff --git a/kernel/sched/pds.h b/kernel/sched/pds.h +new file mode 100644 +index 000000000000..56a649d02e49 +--- /dev/null ++++ b/kernel/sched/pds.h +@@ -0,0 +1,127 @@ ++#define ALT_SCHED_VERSION_MSG "sched/pds: PDS CPU Scheduler "ALT_SCHED_VERSION" by Alfred Chen.\n" ++ ++static int sched_timeslice_shift = 22; ++ ++#define NORMAL_PRIO_MOD(x) ((x) & (NORMAL_PRIO_NUM - 1)) ++ ++/* ++ * Common interfaces ++ */ ++static inline void sched_timeslice_imp(const int timeslice_ms) ++{ ++ if (2 == timeslice_ms) ++ sched_timeslice_shift = 21; ++} ++ ++static inline int ++task_sched_prio_normal(const struct task_struct *p, const struct rq *rq) ++{ ++ s64 delta = p->deadline - rq->time_edge + NORMAL_PRIO_NUM - NICE_WIDTH; ++ ++ if (WARN_ONCE(delta > NORMAL_PRIO_NUM - 1, ++ "pds: task_sched_prio_normal() delta %lld\n", delta)) ++ return NORMAL_PRIO_NUM - 1; ++ ++ return (delta < 0) ? 0 : delta; ++} ++ ++static inline int task_sched_prio(const struct task_struct *p) ++{ ++ return (p->prio < MAX_RT_PRIO) ? p->prio : ++ MIN_NORMAL_PRIO + task_sched_prio_normal(p, task_rq(p)); ++} ++ ++static inline int ++task_sched_prio_idx(const struct task_struct *p, const struct rq *rq) ++{ ++ return (p->prio < MAX_RT_PRIO) ? p->prio : MIN_NORMAL_PRIO + ++ NORMAL_PRIO_MOD(task_sched_prio_normal(p, rq) + rq->time_edge); ++} ++ ++static inline int sched_prio2idx(int prio, struct rq *rq) ++{ ++ return (IDLE_TASK_SCHED_PRIO == prio || prio < MAX_RT_PRIO) ? prio : ++ MIN_NORMAL_PRIO + NORMAL_PRIO_MOD((prio - MIN_NORMAL_PRIO) + ++ rq->time_edge); ++} ++ ++static inline int sched_idx2prio(int idx, struct rq *rq) ++{ ++ return (idx < MAX_RT_PRIO) ? idx : MIN_NORMAL_PRIO + ++ NORMAL_PRIO_MOD((idx - MIN_NORMAL_PRIO) + NORMAL_PRIO_NUM - ++ NORMAL_PRIO_MOD(rq->time_edge)); ++} ++ ++static inline void sched_renew_deadline(struct task_struct *p, const struct rq *rq) ++{ ++ if (p->prio >= MAX_RT_PRIO) ++ p->deadline = (rq->clock >> sched_timeslice_shift) + ++ p->static_prio - (MAX_PRIO - NICE_WIDTH); ++} ++ ++int task_running_nice(struct task_struct *p) ++{ ++ return (p->prio > DEFAULT_PRIO); ++} ++ ++static inline void update_rq_time_edge(struct rq *rq) ++{ ++ struct list_head head; ++ u64 old = rq->time_edge; ++ u64 now = rq->clock >> sched_timeslice_shift; ++ u64 prio, delta; ++ ++ if (now == old) ++ return; ++ ++ delta = min_t(u64, NORMAL_PRIO_NUM, now - old); ++ INIT_LIST_HEAD(&head); ++ ++ for_each_set_bit(prio, &rq->queue.bitmap[2], delta) ++ list_splice_tail_init(rq->queue.heads + MIN_NORMAL_PRIO + ++ NORMAL_PRIO_MOD(prio + old), &head); ++ ++ rq->queue.bitmap[2] = (NORMAL_PRIO_NUM == delta) ? 0UL : ++ rq->queue.bitmap[2] >> delta; ++ rq->time_edge = now; ++ if (!list_empty(&head)) { ++ u64 idx = MIN_NORMAL_PRIO + NORMAL_PRIO_MOD(now); ++ struct task_struct *p; ++ ++ list_for_each_entry(p, &head, sq_node) ++ p->sq_idx = idx; ++ ++ list_splice(&head, rq->queue.heads + idx); ++ rq->queue.bitmap[2] |= 1UL; ++ } ++} ++ ++static inline void time_slice_expired(struct task_struct *p, struct rq *rq) ++{ ++ p->time_slice = sched_timeslice_ns; ++ sched_renew_deadline(p, rq); ++ if (SCHED_FIFO != p->policy && task_on_rq_queued(p)) ++ requeue_task(p, rq, task_sched_prio_idx(p, rq)); ++} ++ ++static inline void sched_task_sanity_check(struct task_struct *p, struct rq *rq) ++{ ++ u64 max_dl = rq->time_edge + NICE_WIDTH - 1; ++ if (unlikely(p->deadline > max_dl)) ++ p->deadline = max_dl; ++} ++ ++static void sched_task_fork(struct task_struct *p, struct rq *rq) ++{ ++ sched_renew_deadline(p, rq); ++} ++ ++static inline void do_sched_yield_type_1(struct task_struct *p, struct rq *rq) ++{ ++ time_slice_expired(p, rq); ++} ++ ++#ifdef CONFIG_SMP ++static inline void sched_task_ttwu(struct task_struct *p) {} ++#endif ++static inline void sched_task_deactivate(struct task_struct *p, struct rq *rq) {} diff --git a/kernel/sched/pelt.c b/kernel/sched/pelt.c -index 0f310768260c..184918afc89c 100644 +index 0f310768260c..bd38bf738fe9 100644 --- a/kernel/sched/pelt.c +++ b/kernel/sched/pelt.c @@ -266,6 +266,7 @@ ___update_load_avg(struct sched_avg *sa, unsigned long load) WRITE_ONCE(sa->util_avg, sa->util_sum / divider); } - + +#ifndef CONFIG_SCHED_ALT /* * sched_entity: * -@@ -383,6 +384,7 @@ int update_dl_rq_load_avg(u64 now, struct rq *rq, int running) - +@@ -383,8 +384,9 @@ int update_dl_rq_load_avg(u64 now, struct rq *rq, int running) + return 0; } +#endif - - #ifdef CONFIG_SCHED_THERMAL_PRESSURE + +-#ifdef CONFIG_SCHED_THERMAL_PRESSURE ++#if defined(CONFIG_SCHED_THERMAL_PRESSURE) && !defined(CONFIG_SCHED_ALT) /* + * thermal: + * diff --git a/kernel/sched/pelt.h b/kernel/sched/pelt.h -index 4ff2ed4f8fa1..b95f7db44dd8 100644 +index 4ff2ed4f8fa1..226eeed61318 100644 --- a/kernel/sched/pelt.h +++ b/kernel/sched/pelt.h -@@ -1,11 +1,13 @@ +@@ -1,13 +1,15 @@ #ifdef CONFIG_SMP #include "sched-pelt.h" - + +#ifndef CONFIG_SCHED_ALT int __update_load_avg_blocked_se(u64 now, struct sched_entity *se); int __update_load_avg_se(u64 now, struct cfs_rq *cfs_rq, struct sched_entity *se); @@ -7377,13 +9645,16 @@ index 4ff2ed4f8fa1..b95f7db44dd8 100644 int update_rt_rq_load_avg(u64 now, struct rq *rq, int running); int update_dl_rq_load_avg(u64 now, struct rq *rq, int running); +#endif - - #ifdef CONFIG_SCHED_THERMAL_PRESSURE + +-#ifdef CONFIG_SCHED_THERMAL_PRESSURE ++#if defined(CONFIG_SCHED_THERMAL_PRESSURE) && !defined(CONFIG_SCHED_ALT) int update_thermal_load_avg(u64 now, struct rq *rq, u64 capacity); + + static inline u64 thermal_load_avg(struct rq *rq) @@ -44,6 +46,7 @@ static inline u32 get_pelt_divider(struct sched_avg *avg) return PELT_MIN_DIVIDER + avg->period_contrib; } - + +#ifndef CONFIG_SCHED_ALT static inline void cfs_se_util_change(struct sched_avg *avg) { @@ -7393,9 +9664,9 @@ index 4ff2ed4f8fa1..b95f7db44dd8 100644 } #endif +#endif /* CONFIG_SCHED_ALT */ - + #else - + +#ifndef CONFIG_SCHED_ALT static inline int update_cfs_rq_load_avg(u64 now, struct cfs_rq *cfs_rq) @@ -7405,7 +9676,7 @@ index 4ff2ed4f8fa1..b95f7db44dd8 100644 return 0; } +#endif - + static inline int update_thermal_load_avg(u64 now, struct rq *rq, u64 capacity) diff --git a/kernel/sched/sched.h b/kernel/sched/sched.h @@ -7415,7 +9686,7 @@ index 47b89a0fc6e5..de2641a32c22 100644 @@ -5,6 +5,10 @@ #ifndef _KERNEL_SCHED_SCHED_H #define _KERNEL_SCHED_SCHED_H - + +#ifdef CONFIG_SCHED_ALT +#include "alt_sched.h" +#else @@ -7426,7 +9697,7 @@ index 47b89a0fc6e5..de2641a32c22 100644 @@ -3116,4 +3120,9 @@ extern int sched_dynamic_mode(const char *str); extern void sched_dynamic_update(int mode); #endif - + +static inline int task_running_nice(struct task_struct *p) +{ + return (task_nice(p) > 0); @@ -7450,7 +9721,7 @@ index 857f837f52cb..5486c63e4790 100644 rq = cpu_rq(cpu); @@ -143,6 +145,7 @@ static int show_schedstat(struct seq_file *seq, void *v) seq_printf(seq, "\n"); - + #ifdef CONFIG_SMP +#ifndef CONFIG_SCHED_ALT /* domain-specific stats */ @@ -7464,46 +9735,66 @@ index 857f837f52cb..5486c63e4790 100644 #endif } return 0; +diff --git a/kernel/sched/stats.h b/kernel/sched/stats.h +index baa839c1ba96..15238be0581b 100644 +--- a/kernel/sched/stats.h ++++ b/kernel/sched/stats.h +@@ -89,6 +89,7 @@ static inline void rq_sched_info_depart (struct rq *rq, unsigned long long delt + + #endif /* CONFIG_SCHEDSTATS */ + ++#ifndef CONFIG_SCHED_ALT + #ifdef CONFIG_FAIR_GROUP_SCHED + struct sched_entity_stats { + struct sched_entity se; +@@ -105,6 +106,7 @@ __schedstats_from_se(struct sched_entity *se) + #endif + return &task_of(se)->stats; + } ++#endif /* CONFIG_SCHED_ALT */ + + #ifdef CONFIG_PSI + /* diff --git a/kernel/sched/topology.c b/kernel/sched/topology.c -index 05b6c2ad90b9..2c9daf90398f 100644 +index 05b6c2ad90b9..480ef393b3c9 100644 --- a/kernel/sched/topology.c +++ b/kernel/sched/topology.c @@ -3,6 +3,7 @@ * Scheduler topology setup/handling methods */ - + +#ifndef CONFIG_SCHED_ALT DEFINE_MUTEX(sched_domains_mutex); - + /* Protected by sched_domains_mutex: */ @@ -1413,8 +1414,10 @@ static void asym_cpu_capacity_scan(void) */ - + static int default_relax_domain_level = -1; +#endif /* CONFIG_SCHED_ALT */ int sched_domain_level_max; - + +#ifndef CONFIG_SCHED_ALT static int __init setup_relax_domain_level(char *str) { if (kstrtoint(str, 0, &default_relax_domain_level)) @@ -1647,6 +1650,7 @@ sd_init(struct sched_domain_topology_level *tl, - + return sd; } +#endif /* CONFIG_SCHED_ALT */ - + /* * Topology list, bottom-up. @@ -1683,6 +1687,7 @@ void set_sched_topology(struct sched_domain_topology_level *tl) sched_domain_topology_saved = NULL; } - + +#ifndef CONFIG_SCHED_ALT #ifdef CONFIG_NUMA - + static const struct cpumask *sd_numa_mask(int cpu) -@@ -2638,3 +2643,17 @@ void partition_sched_domains(int ndoms_new, cpumask_var_t doms_new[], +@@ -2638,3 +2643,15 @@ void partition_sched_domains(int ndoms_new, cpumask_var_t doms_new[], partition_sched_domains_locked(ndoms_new, doms_new, dattr_new); mutex_unlock(&sched_domains_mutex); } @@ -7513,8 +9804,6 @@ index 05b6c2ad90b9..2c9daf90398f 100644 +{} + +#ifdef CONFIG_NUMA -+int __read_mostly node_reclaim_distance = RECLAIM_DISTANCE; -+ +int sched_numa_find_closest(const struct cpumask *cpus, int cpu) +{ + return best_mask_cpu(cpu, cpus); @@ -7522,13 +9811,13 @@ index 05b6c2ad90b9..2c9daf90398f 100644 +#endif /* CONFIG_NUMA */ +#endif diff --git a/kernel/sysctl.c b/kernel/sysctl.c -index e52b6e372c60..124713423ebc 100644 +index 35d034219513..23719c728677 100644 --- a/kernel/sysctl.c +++ b/kernel/sysctl.c @@ -86,6 +86,10 @@ - + /* Constants used for minimum and maximum */ - + +#ifdef CONFIG_SCHED_ALT +extern int sched_yield_type; +#endif @@ -7536,7 +9825,23 @@ index e52b6e372c60..124713423ebc 100644 #ifdef CONFIG_PERF_EVENTS static const int six_hundred_forty_kb = 640 * 1024; #endif -@@ -1899,6 +1903,17 @@ static struct ctl_table kern_table[] = { +@@ -1590,6 +1594,7 @@ int proc_do_static_key(struct ctl_table *table, int write, + } + + static struct ctl_table kern_table[] = { ++#ifndef CONFIG_SCHED_ALT + #ifdef CONFIG_NUMA_BALANCING + { + .procname = "numa_balancing", +@@ -1601,6 +1606,7 @@ static struct ctl_table kern_table[] = { + .extra2 = SYSCTL_FOUR, + }, + #endif /* CONFIG_NUMA_BALANCING */ ++#endif /* !CONFIG_SCHED_ALT */ + { + .procname = "panic", + .data = &panic_timeout, +@@ -1902,6 +1908,17 @@ static struct ctl_table kern_table[] = { .proc_handler = proc_dointvec, }, #endif @@ -7561,13 +9866,13 @@ index 0ea8702eb516..a27a0f3a654d 100644 @@ -2088,8 +2088,10 @@ long hrtimer_nanosleep(ktime_t rqtp, const enum hrtimer_mode mode, int ret = 0; u64 slack; - + +#ifndef CONFIG_SCHED_ALT slack = current->timer_slack_ns; if (dl_task(current) || rt_task(current)) +#endif slack = 0; - + hrtimer_init_sleeper_on_stack(&t, clockid, mode); diff --git a/kernel/time/posix-cpu-timers.c b/kernel/time/posix-cpu-timers.c index cb925e8ef9a8..67d823510f5c 100644 @@ -7575,17 +9880,17 @@ index cb925e8ef9a8..67d823510f5c 100644 +++ b/kernel/time/posix-cpu-timers.c @@ -223,7 +223,7 @@ static void task_sample_cputime(struct task_struct *p, u64 *samples) u64 stime, utime; - + task_cputime(p, &utime, &stime); - store_samples(samples, stime, utime, p->se.sum_exec_runtime); + store_samples(samples, stime, utime, tsk_seruntime(p)); } - + static void proc_sample_cputime_atomic(struct task_cputime_atomic *at, @@ -866,6 +866,7 @@ static void collect_posix_cputimers(struct posix_cputimers *pct, u64 *samples, } } - + +#ifndef CONFIG_SCHED_ALT static inline void check_dl_overrun(struct task_struct *tsk) { @@ -7595,18 +9900,18 @@ index cb925e8ef9a8..67d823510f5c 100644 } } +#endif - + static bool check_rlimit(u64 time, u64 limit, int signo, bool rt, bool hard) { @@ -900,8 +902,10 @@ static void check_thread_timers(struct task_struct *tsk, u64 samples[CPUCLOCK_MAX]; unsigned long soft; - + +#ifndef CONFIG_SCHED_ALT if (dl_task(tsk)) check_dl_overrun(tsk); +#endif - + if (expiry_cache_is_inactive(pct)) return; @@ -915,7 +919,7 @@ static void check_thread_timers(struct task_struct *tsk, @@ -7616,21 +9921,21 @@ index cb925e8ef9a8..67d823510f5c 100644 - unsigned long rttime = tsk->rt.timeout * (USEC_PER_SEC / HZ); + unsigned long rttime = tsk_rttimeout(tsk) * (USEC_PER_SEC / HZ); unsigned long hard = task_rlimit_max(tsk, RLIMIT_RTTIME); - + /* At the hard limit, send SIGKILL. No further action. */ @@ -1151,8 +1155,10 @@ static inline bool fastpath_timer_check(struct task_struct *tsk) return true; } - + +#ifndef CONFIG_SCHED_ALT if (dl_task(tsk) && tsk->dl.dl_overrun) return true; +#endif - + return false; } diff --git a/kernel/trace/trace_selftest.c b/kernel/trace/trace_selftest.c -index a2d301f58ced..bc131c6d1299 100644 +index a2d301f58ced..2ccdede8585c 100644 --- a/kernel/trace/trace_selftest.c +++ b/kernel/trace/trace_selftest.c @@ -1143,10 +1143,15 @@ static int trace_wakeup_test_thread(void *data) @@ -7638,7 +9943,7 @@ index a2d301f58ced..bc131c6d1299 100644 /* Make this a -deadline thread */ static const struct sched_attr attr = { +#ifdef CONFIG_SCHED_ALT -+ /* No deadline on BMQ, use RR */ ++ /* No deadline on BMQ/PDS, use RR */ + .sched_policy = SCHED_RR, +#else .sched_policy = SCHED_DEADLINE, @@ -7648,23632 +9953,4 @@ index a2d301f58ced..bc131c6d1299 100644 +#endif }; struct wakeup_test_data *x = data; - --- -2.37.0 - - -From d98eea85062a3b613ec15b7ceba280c0549d6257 Mon Sep 17 00:00:00 2001 -From: Alfred Chen -Date: Wed, 1 Jul 2020 21:56:45 +0800 -Subject: [PATCH 002/297] sched/alt: Fix compilation issue when - CONFIG_SCHED_TRACER - ---- - include/linux/sched/deadline.h | 5 +++++ - 1 file changed, 5 insertions(+) - -diff --git a/include/linux/sched/deadline.h b/include/linux/sched/deadline.h -index 529e1b2ebd19..da7834c45673 100644 ---- a/include/linux/sched/deadline.h -+++ b/include/linux/sched/deadline.h -@@ -4,6 +4,11 @@ - - #ifdef CONFIG_SCHED_BMQ - #define __tsk_deadline(p) (0UL) -+ -+static inline int dl_task(struct task_struct *p) -+{ -+ return 0; -+} - #endif - - #else --- -2.37.0 - - -From 2545c2e765185dfa44556e917aae8d99e5cc81b2 Mon Sep 17 00:00:00 2001 -From: Piotr Gorski -Date: Fri, 10 Jul 2020 16:18:57 +0200 -Subject: [PATCH 003/297] sched/alt: Backport update to - "__set_cpus_allowed_ptr()" in sched/core.c - -Signed-off-by: Piotr Gorski ---- - kernel/sched/alt_core.c | 2 +- - 1 file changed, 1 insertion(+), 1 deletion(-) - -diff --git a/kernel/sched/alt_core.c b/kernel/sched/alt_core.c -index 09ca47de425c..ab331d814e4e 100644 ---- a/kernel/sched/alt_core.c -+++ b/kernel/sched/alt_core.c -@@ -1473,7 +1473,7 @@ static int __set_cpus_allowed_ptr(struct task_struct *p, - goto out; - } - -- if (cpumask_equal(p->cpus_ptr, new_mask)) -+ if (cpumask_equal(&p->cpus_mask, new_mask)) - goto out; - - dest_cpu = cpumask_any_and(cpu_valid_mask, new_mask); --- -2.37.0 - - -From 0936f2bb4e9964b32c7a6e94ccf75d137a36dfdc Mon Sep 17 00:00:00 2001 -From: Alfred Chen -Date: Wed, 29 Jul 2020 06:54:45 +0800 -Subject: [PATCH 004/297] sched/alt: Instroduce rq wake_list. - ---- - include/linux/sched.h | 3 + - kernel/sched/alt_core.c | 123 ++++++++++++++++++++++++++++++++++++++- - kernel/sched/alt_sched.h | 10 +++- - 3 files changed, 131 insertions(+), 5 deletions(-) - -diff --git a/include/linux/sched.h b/include/linux/sched.h -index 6907241224a5..af63be15fb5e 100644 ---- a/include/linux/sched.h -+++ b/include/linux/sched.h -@@ -750,6 +750,9 @@ struct task_struct { - unsigned int flags; - unsigned int ptrace; - -+#if defined(CONFIG_SMP) -+ struct llist_node wake_entry; -+#endif - #if defined(CONFIG_SMP) || defined(CONFIG_SCHED_ALT) - int on_cpu; - #endif -diff --git a/kernel/sched/alt_core.c b/kernel/sched/alt_core.c -index ab331d814e4e..48e5fac710bc 100644 ---- a/kernel/sched/alt_core.c -+++ b/kernel/sched/alt_core.c -@@ -336,6 +336,20 @@ struct rq *task_rq_lock(struct task_struct *p, struct rq_flags *rf) - } - } - -+static inline void -+rq_lock_irqsave(struct rq *rq, struct rq_flags *rf) -+ __acquires(rq->lock) -+{ -+ raw_spin_lock_irqsave(&rq->lock, rf->flags); -+} -+ -+static inline void -+rq_unlock_irqrestore(struct rq *rq, struct rq_flags *rf) -+ __releases(rq->lock) -+{ -+ raw_spin_unlock_irqrestore(&rq->lock, rf->flags); -+} -+ - /* - * RQ-clock updating methods: - */ -@@ -770,6 +784,34 @@ void wake_up_nohz_cpu(int cpu) - wake_up_idle_cpu(cpu); - } - -+static inline bool got_nohz_idle_kick(void) -+{ -+ int cpu = smp_processor_id(); -+ -+ /* TODO: need to support nohz_flag -+ if (!(atomic_read(nohz_flags(cpu)) & NOHZ_KICK_MASK)) -+ return false; -+ */ -+ -+ if (idle_cpu(cpu) && !need_resched()) -+ return true; -+ -+ /* -+ * We can't run Idle Load Balance on this CPU for this time so we -+ * cancel it and clear NOHZ_BALANCE_KICK -+ */ -+ /* TODO: need to support nohz_flag -+ atomic_andnot(NOHZ_KICK_MASK, nohz_flags(cpu)); -+ */ -+ return false; -+} -+ -+#else /* CONFIG_NO_HZ_COMMON */ -+ -+static inline bool got_nohz_idle_kick(void) -+{ -+ return false; -+} - #endif /* CONFIG_NO_HZ_COMMON */ - #endif /* CONFIG_SMP */ - -@@ -1127,6 +1169,12 @@ static int migration_cpu_stop(void *data) - * be on another CPU but it doesn't matter. - */ - local_irq_disable(); -+ /* -+ * We need to explicitly wake pending tasks before running -+ * __migrate_task() such that we will not miss enforcing cpus_ptr -+ * during wakeups, see set_cpus_allowed_ptr()'s TASK_WAKING test. -+ */ -+ sched_ttwu_pending(); - - raw_spin_lock(&p->pi_lock); - raw_spin_lock(&rq->lock); -@@ -1608,6 +1656,26 @@ static int ttwu_remote(struct task_struct *p, int wake_flags) - } - - #ifdef CONFIG_SMP -+void sched_ttwu_pending(void) -+{ -+ struct rq *rq = this_rq(); -+ struct llist_node *llist = llist_del_all(&rq->wake_list); -+ struct task_struct *p, *t; -+ struct rq_flags rf; -+ -+ if (!llist) -+ return; -+ -+ rq_lock_irqsave(rq, &rf); -+ update_rq_clock(rq); -+ -+ llist_for_each_entry_safe(p, t, llist, wake_entry) -+ ttwu_do_activate(rq, p, p->sched_remote_wakeup ? WF_MIGRATED : 0); -+ check_preempt_curr(rq); -+ -+ rq_unlock_irqrestore(rq, &rf); -+} -+ - void scheduler_ipi(void) - { - /* -@@ -1617,13 +1685,38 @@ void scheduler_ipi(void) - */ - preempt_fold_need_resched(); - -- if (!idle_cpu(smp_processor_id()) || need_resched()) -+ if (llist_empty(&this_rq()->wake_list) && !got_nohz_idle_kick()) - return; - - irq_enter(); -+ sched_ttwu_pending(); -+ -+ /* -+ * Check if someone kicked us for doing the nohz idle load balance. -+ */ -+ if (unlikely(got_nohz_idle_kick())) { -+ /* TODO need to kick off balance -+ this_rq()->idle_balance = 1; -+ raise_softirq_irqoff(SCHED_SOFTIRQ); -+ */ -+ } - irq_exit(); - } - -+static void ttwu_queue_remote(struct task_struct *p, int cpu, int wake_flags) -+{ -+ struct rq *rq = cpu_rq(cpu); -+ -+ p->sched_remote_wakeup = !!(wake_flags & WF_MIGRATED); -+ -+ if (llist_add(&p->wake_entry, &cpu_rq(cpu)->wake_list)) { -+ if (!set_nr_if_polling(rq->idle)) -+ smp_send_reschedule(cpu); -+ else -+ trace_sched_wake_idle_without_ipi(cpu); -+ } -+} -+ - void wake_up_if_idle(int cpu) - { - struct rq *rq = cpu_rq(cpu); -@@ -1658,6 +1751,14 @@ static inline void ttwu_queue(struct task_struct *p, int cpu, int wake_flags) - { - struct rq *rq = cpu_rq(cpu); - -+#if defined(CONFIG_SMP) -+ if (!cpus_share_cache(smp_processor_id(), cpu)) { -+ sched_clock_cpu(cpu); /* Sync clocks across CPUs */ -+ ttwu_queue_remote(p, cpu, wake_flags); -+ return; -+ } -+#endif -+ - raw_spin_lock(&rq->lock); - update_rq_clock(rq); - ttwu_do_activate(rq, p, wake_flags); -@@ -3890,7 +3991,20 @@ int task_prio(const struct task_struct *p) - */ - int idle_cpu(int cpu) - { -- return cpu_curr(cpu) == cpu_rq(cpu)->idle; -+ struct rq *rq = cpu_rq(cpu); -+ -+ if (rq->curr != rq->idle) -+ return 0; -+ -+ if (rq->nr_running) -+ return 0; -+ -+#ifdef CONFIG_SMP -+ if (!llist_empty(&rq->wake_list)) -+ return 0; -+#endif -+ -+ return 1; - } - - /** -@@ -5426,6 +5540,9 @@ int sched_cpu_dying(unsigned int cpu) - struct rq *rq = cpu_rq(cpu); - unsigned long flags; - -+ /* Handle pending wakeups and then migrate everything off */ -+ sched_ttwu_pending(); -+ - sched_tick_stop(cpu); - raw_spin_lock_irqsave(&rq->lock, flags); - set_rq_offline(rq); -@@ -5453,7 +5570,7 @@ static void sched_init_topology_cpumask_early(void) - &(per_cpu(sched_cpu_affinity_masks, cpu)[0]); - per_cpu(sched_cpu_affinity_end_mask, cpu) = - &(per_cpu(sched_cpu_affinity_masks, cpu)[1]); -- per_cpu(sd_llc_id, cpu) = cpu; -+ /*per_cpu(sd_llc_id, cpu) = cpu;*/ - } - } - -diff --git a/kernel/sched/alt_sched.h b/kernel/sched/alt_sched.h -index 0936cf766514..2b66983cce42 100644 ---- a/kernel/sched/alt_sched.h -+++ b/kernel/sched/alt_sched.h -@@ -157,6 +157,11 @@ struct rq { - unsigned int ttwu_count; - unsigned int ttwu_local; - #endif /* CONFIG_SCHEDSTATS */ -+ -+#ifdef CONFIG_SMP -+ struct llist_head wake_list; -+#endif -+ - #ifdef CONFIG_CPU_IDLE - /* Must be inspected within a rcu lock section */ - struct cpuidle_state *idle_state; -@@ -218,6 +223,9 @@ static inline int best_mask_cpu(int cpu, const cpumask_t *cpumask) - __best_mask_cpu(cpu, cpumask, &(per_cpu(sched_cpu_affinity_masks, cpu)[0])); - } - -+extern void sched_ttwu_pending(void); -+#else /* !CONFIG_SMP */ -+static inline void sched_ttwu_pending(void) { } - #endif /* CONFIG_SMP */ - - #ifndef arch_scale_freq_tick -@@ -337,8 +345,6 @@ static inline bool task_running(struct task_struct *p) - - extern struct static_key_false sched_schedstats; - --static inline void sched_ttwu_pending(void) { } -- - #ifdef CONFIG_CPU_IDLE - static inline void idle_set_state(struct rq *rq, - struct cpuidle_state *idle_state) --- -2.37.0 - - -From c7a96fabb29e5b688949e369f0bff15d46fbeefe Mon Sep 17 00:00:00 2001 -From: Alfred Chen -Date: Wed, 29 Jul 2020 07:39:05 +0800 -Subject: [PATCH 005/297] Project-C v5.7-r3 - ---- - kernel/sched/bmq_imp.h | 2 +- - 1 file changed, 1 insertion(+), 1 deletion(-) - -diff --git a/kernel/sched/bmq_imp.h b/kernel/sched/bmq_imp.h -index 68313e01356d..cb0fc0688a89 100644 ---- a/kernel/sched/bmq_imp.h -+++ b/kernel/sched/bmq_imp.h -@@ -1,4 +1,4 @@ --#define ALT_SCHED_VERSION_MSG "sched/bmq: BMQ CPU Scheduler 5.7-r2 by Alfred Chen.\n" -+#define ALT_SCHED_VERSION_MSG "sched/bmq: BMQ CPU Scheduler 5.7-r3 by Alfred Chen.\n" - - static inline void sched_queue_init(struct rq *rq) - { --- -2.37.0 - - -From 2f12ca6d33d2f9c2a71214b5d59f9e32d548a37e Mon Sep 17 00:00:00 2001 -From: Alfred Chen -Date: Thu, 16 Jul 2020 16:48:01 +0800 -Subject: [PATCH 006/297] sched/alt: [Sync] 2beaf3280e57 sched/core: Add - function to sample state of locked-down task - ---- - kernel/sched/alt_core.c | 48 +++++++++++++++++++++++++++++++++++++++++ - 1 file changed, 48 insertions(+) - -diff --git a/kernel/sched/alt_core.c b/kernel/sched/alt_core.c -index 48e5fac710bc..51c2b8d0e89b 100644 ---- a/kernel/sched/alt_core.c -+++ b/kernel/sched/alt_core.c -@@ -1938,6 +1938,8 @@ static int try_to_wake_up(struct task_struct *p, unsigned int state, - * - * Pairs with the LOCK+smp_mb__after_spinlock() on rq->lock in - * __schedule(). See the comment for smp_mb__after_spinlock(). -+ * -+ * A similar smb_rmb() lives in try_invoke_on_locked_down_task(). - */ - smp_rmb(); - if (p->on_rq && ttwu_remote(p, wake_flags)) -@@ -2012,6 +2014,52 @@ static int try_to_wake_up(struct task_struct *p, unsigned int state, - return success; - } - -+/** -+ * try_invoke_on_locked_down_task - Invoke a function on task in fixed state -+ * @p: Process for which the function is to be invoked. -+ * @func: Function to invoke. -+ * @arg: Argument to function. -+ * -+ * If the specified task can be quickly locked into a definite state -+ * (either sleeping or on a given runqueue), arrange to keep it in that -+ * state while invoking @func(@arg). This function can use ->on_rq and -+ * task_curr() to work out what the state is, if required. Given that -+ * @func can be invoked with a runqueue lock held, it had better be quite -+ * lightweight. -+ * -+ * Returns: -+ * @false if the task slipped out from under the locks. -+ * @true if the task was locked onto a runqueue or is sleeping. -+ * However, @func can override this by returning @false. -+ */ -+bool try_invoke_on_locked_down_task(struct task_struct *p, bool (*func)(struct task_struct *t, void *arg), void *arg) -+{ -+ bool ret = false; -+ struct rq_flags rf; -+ struct rq *rq; -+ -+ lockdep_assert_irqs_enabled(); -+ raw_spin_lock_irq(&p->pi_lock); -+ if (p->on_rq) { -+ rq = __task_rq_lock(p, &rf); -+ if (task_rq(p) == rq) -+ ret = func(p, arg); -+ __task_rq_unlock(rq, &rf); -+ } else { -+ switch (p->state) { -+ case TASK_RUNNING: -+ case TASK_WAKING: -+ break; -+ default: -+ smp_rmb(); // See smp_rmb() comment in try_to_wake_up(). -+ if (!p->on_rq) -+ ret = func(p, arg); -+ } -+ } -+ raw_spin_unlock_irq(&p->pi_lock); -+ return ret; -+} -+ - /** - * wake_up_process - Wake up a specific process - * @p: The process to be woken up. --- -2.37.0 - - -From e416d9ae39871ba06d034f1ad70674384d255dac Mon Sep 17 00:00:00 2001 -From: Alfred Chen -Date: Thu, 16 Jul 2020 16:51:17 +0800 -Subject: [PATCH 007/297] sched/alt: [Sync] d08b9f0ca660 scs: Add support for - Clang's Shadow Call Stack (SCS) - ---- - kernel/sched/alt_core.c | 2 ++ - 1 file changed, 2 insertions(+) - -diff --git a/kernel/sched/alt_core.c b/kernel/sched/alt_core.c -index 51c2b8d0e89b..c85e72b38320 100644 ---- a/kernel/sched/alt_core.c -+++ b/kernel/sched/alt_core.c -@@ -31,6 +31,7 @@ - #include - - #include -+#include - - #include - -@@ -5281,6 +5282,7 @@ void init_idle(struct task_struct *idle, int cpu) - idle->flags |= PF_IDLE; - sched_queue_init_idle(rq, idle); - -+ scs_task_reset(idle); - kasan_unpoison_task_stack(idle); - - #ifdef CONFIG_SMP --- -2.37.0 - - -From 9a34c52be25591f34a4fdc74d4ce1f90cb343566 Mon Sep 17 00:00:00 2001 -From: Alfred Chen -Date: Thu, 16 Jul 2020 16:53:06 +0800 -Subject: [PATCH 008/297] sched/alt: [Sync] 88485be531f4 scs: Move - scs_overflow_check() out of architecture code - ---- - kernel/sched/alt_core.c | 3 +++ - 1 file changed, 3 insertions(+) - -diff --git a/kernel/sched/alt_core.c b/kernel/sched/alt_core.c -index c85e72b38320..03020c9154fe 100644 ---- a/kernel/sched/alt_core.c -+++ b/kernel/sched/alt_core.c -@@ -3262,6 +3262,9 @@ static inline void schedule_debug(struct task_struct *prev, bool preempt) - #ifdef CONFIG_SCHED_STACK_END_CHECK - if (task_stack_end_corrupted(prev)) - panic("corrupted stack end detected inside scheduler\n"); -+ -+ if (task_scs_end_corrupted(prev)) -+ panic("corrupted shadow stack detected inside scheduler\n"); - #endif - - #ifdef CONFIG_DEBUG_ATOMIC_SLEEP --- -2.37.0 - - -From a954cc7c6ed77402e00f1a4421d0bafb1573f6a3 Mon Sep 17 00:00:00 2001 -From: Alfred Chen -Date: Fri, 31 Jul 2020 10:14:20 +0800 -Subject: [PATCH 009/297] sched/alt: [Sync] 90b5363acd47 sched: Clean up - scheduler_ipi() - ---- - kernel/sched/alt_core.c | 75 ++++++++++++++++++++++++++++------------ - kernel/sched/alt_sched.h | 7 ++++ - 2 files changed, 59 insertions(+), 23 deletions(-) - -diff --git a/kernel/sched/alt_core.c b/kernel/sched/alt_core.c -index 03020c9154fe..97a857fd0fb3 100644 ---- a/kernel/sched/alt_core.c -+++ b/kernel/sched/alt_core.c -@@ -807,6 +807,34 @@ static inline bool got_nohz_idle_kick(void) - return false; - } - -+static void nohz_csd_func(void *info) -+{ -+ struct rq *rq = info; -+ -+ if (got_nohz_idle_kick()) { -+ /* TODO need to kick off balance -+ rq->idle_balance = 1; -+ raise_softirq_irqoff(SCHED_SOFTIRQ); -+ */ -+ } -+ /* -+ int cpu = cpu_of(rq); -+ unsigned int flags; -+ */ -+ -+ /* -+ * Release the rq::nohz_csd. -+ flags = atomic_fetch_andnot(NOHZ_KICK_MASK, nohz_flags(cpu)); -+ WARN_ON(!(flags & NOHZ_KICK_MASK)); -+ -+ rq->idle_balance = idle_cpu(cpu); -+ if (rq->idle_balance && !need_resched()) { -+ rq->nohz_idle_balance = flags; -+ raise_softirq_irqoff(SCHED_SOFTIRQ); -+ } -+ */ -+} -+ - #else /* CONFIG_NO_HZ_COMMON */ - - static inline bool got_nohz_idle_kick(void) -@@ -822,6 +850,14 @@ static inline void check_preempt_curr(struct rq *rq) - resched_curr(rq); - } - -+static inline void -+rq_csd_init(struct rq *rq, call_single_data_t *csd, smp_call_func_t func) -+{ -+ csd->flags = 0; -+ csd->func = func; -+ csd->info = rq; -+} -+ - #ifdef CONFIG_SCHED_HRTICK - /* - * Use HR-timers to deliver accurate preemption points. -@@ -938,9 +974,7 @@ void hrtick_start(struct rq *rq, u64 delay) - static void hrtick_rq_init(struct rq *rq) - { - #ifdef CONFIG_SMP -- rq->hrtick_csd.flags = 0; -- rq->hrtick_csd.func = __hrtick_start; -- rq->hrtick_csd.info = rq; -+ rq_csd_init(rq, &rq->hrtick_csd, __hrtick_start); - #endif - - hrtimer_init(&rq->hrtick_timer, CLOCK_MONOTONIC, HRTIMER_MODE_REL_HARD); -@@ -1677,6 +1711,11 @@ void sched_ttwu_pending(void) - rq_unlock_irqrestore(rq, &rf); - } - -+static void wake_csd_func(void *info) -+{ -+ sched_ttwu_pending(); -+} -+ - void scheduler_ipi(void) - { - /* -@@ -1685,23 +1724,6 @@ void scheduler_ipi(void) - * this IPI. - */ - preempt_fold_need_resched(); -- -- if (llist_empty(&this_rq()->wake_list) && !got_nohz_idle_kick()) -- return; -- -- irq_enter(); -- sched_ttwu_pending(); -- -- /* -- * Check if someone kicked us for doing the nohz idle load balance. -- */ -- if (unlikely(got_nohz_idle_kick())) { -- /* TODO need to kick off balance -- this_rq()->idle_balance = 1; -- raise_softirq_irqoff(SCHED_SOFTIRQ); -- */ -- } -- irq_exit(); - } - - static void ttwu_queue_remote(struct task_struct *p, int cpu, int wake_flags) -@@ -1710,9 +1732,9 @@ static void ttwu_queue_remote(struct task_struct *p, int cpu, int wake_flags) - - p->sched_remote_wakeup = !!(wake_flags & WF_MIGRATED); - -- if (llist_add(&p->wake_entry, &cpu_rq(cpu)->wake_list)) { -+ if (llist_add(&p->wake_entry, &rq->wake_list)) { - if (!set_nr_if_polling(rq->idle)) -- smp_send_reschedule(cpu); -+ smp_call_function_single_async(cpu, &rq->wake_csd); - else - trace_sched_wake_idle_without_ipi(cpu); - } -@@ -5755,8 +5777,15 @@ void __init sched_init(void) - #endif - #endif - rq->nr_switches = 0; -- atomic_set(&rq->nr_iowait, 0); -+ -+#ifdef CONFIG_NO_HZ_COMMON -+ rq_csd_init(rq, &rq->nohz_csd, nohz_csd_func); -+#endif -+ - hrtick_rq_init(rq); -+ atomic_set(&rq->nr_iowait, 0); -+ -+ rq_csd_init(rq, &rq->nohz_csd, nohz_csd_func); - } - #ifdef CONFIG_SMP - /* Set rq->online for cpu 0 */ -diff --git a/kernel/sched/alt_sched.h b/kernel/sched/alt_sched.h -index 2b66983cce42..b4148dd822b2 100644 ---- a/kernel/sched/alt_sched.h -+++ b/kernel/sched/alt_sched.h -@@ -166,6 +166,13 @@ struct rq { - /* Must be inspected within a rcu lock section */ - struct cpuidle_state *idle_state; - #endif -+ -+#ifdef CONFIG_NO_HZ_COMMON -+#ifdef CONFIG_SMP -+ call_single_data_t nohz_csd; -+#endif -+ atomic_t nohz_flags; -+#endif /* CONFIG_NO_HZ_COMMON */ - }; - - extern unsigned long calc_load_update; --- -2.37.0 - - -From de95aaaaf7777cb7bf10ac96db43350607242654 Mon Sep 17 00:00:00 2001 -From: Alfred Chen -Date: Fri, 31 Jul 2020 10:17:21 +0800 -Subject: [PATCH 010/297] sched/alt: [Sync] 2a0a24ebb499 sched: Make - scheduler_ipi inline - ---- - kernel/sched/alt_core.c | 10 ---------- - 1 file changed, 10 deletions(-) - -diff --git a/kernel/sched/alt_core.c b/kernel/sched/alt_core.c -index 97a857fd0fb3..c2301b9f4f55 100644 ---- a/kernel/sched/alt_core.c -+++ b/kernel/sched/alt_core.c -@@ -1716,16 +1716,6 @@ static void wake_csd_func(void *info) - sched_ttwu_pending(); - } - --void scheduler_ipi(void) --{ -- /* -- * Fold TIF_NEED_RESCHED into the preempt_count; anybody setting -- * TIF_NEED_RESCHED remotely (for the first time) will also send -- * this IPI. -- */ -- preempt_fold_need_resched(); --} -- - static void ttwu_queue_remote(struct task_struct *p, int cpu, int wake_flags) - { - struct rq *rq = cpu_rq(cpu); --- -2.37.0 - - -From 0bdea6f1069003f6446267162abe2f94b96ec18b Mon Sep 17 00:00:00 2001 -From: Alfred Chen -Date: Fri, 31 Jul 2020 14:50:39 +0800 -Subject: [PATCH 011/297] sched/alt: [Sync] c6e7bd7afaeb sched/core: Optimize - ttwu() spinning on p->on_cpu - ---- - kernel/sched/alt_core.c | 49 ++++++++++++++++++++++++++--------------- - 1 file changed, 31 insertions(+), 18 deletions(-) - -diff --git a/kernel/sched/alt_core.c b/kernel/sched/alt_core.c -index c2301b9f4f55..d387ae45f5e8 100644 ---- a/kernel/sched/alt_core.c -+++ b/kernel/sched/alt_core.c -@@ -1716,7 +1716,7 @@ static void wake_csd_func(void *info) - sched_ttwu_pending(); - } - --static void ttwu_queue_remote(struct task_struct *p, int cpu, int wake_flags) -+static void __ttwu_queue_remote(struct task_struct *p, int cpu, int wake_flags) - { - struct rq *rq = cpu_rq(cpu); - -@@ -1730,6 +1730,17 @@ static void ttwu_queue_remote(struct task_struct *p, int cpu, int wake_flags) - } - } - -+static void ttwu_queue_remote(struct task_struct *p, int cpu, int wake_flags) -+{ -+ if (!cpus_share_cache(smp_processor_id(), cpu)) { -+ sched_clock_cpu(cpu); /* Sync clocks across CPUs */ -+ __ttwu_queue_remote(p, cpu, wake_flags); -+ return true; -+ } -+ -+ return false; -+} -+ - void wake_up_if_idle(int cpu) - { - struct rq *rq = cpu_rq(cpu); -@@ -1765,11 +1776,8 @@ static inline void ttwu_queue(struct task_struct *p, int cpu, int wake_flags) - struct rq *rq = cpu_rq(cpu); - - #if defined(CONFIG_SMP) -- if (!cpus_share_cache(smp_processor_id(), cpu)) { -- sched_clock_cpu(cpu); /* Sync clocks across CPUs */ -- ttwu_queue_remote(p, cpu, wake_flags); -+ if (ttwu_queue_remote(p, cpu, wake_flags)) - return; -- } - #endif - - raw_spin_lock(&rq->lock); -@@ -1958,7 +1966,15 @@ static int try_to_wake_up(struct task_struct *p, unsigned int state, - if (p->on_rq && ttwu_remote(p, wake_flags)) - goto unlock; - -+ if (p->in_iowait) { -+ delayacct_blkio_end(p); -+ atomic_dec(&task_rq(p)->nr_iowait); -+ } -+ - #ifdef CONFIG_SMP -+ p->sched_contributes_to_load = !!task_contributes_to_load(p); -+ p->state = TASK_WAKING; -+ - /* - * Ensure we load p->on_cpu _after_ p->on_rq, otherwise it would be - * possible to, falsely, observe p->on_cpu == 0. -@@ -1980,6 +1996,16 @@ static int try_to_wake_up(struct task_struct *p, unsigned int state, - */ - smp_rmb(); - -+ /* -+ * If the owning (remote) CPU is still in the middle of schedule() with -+ * this task as prev, considering queueing p on the remote CPUs wake_list -+ * which potentially sends an IPI instead of spinning on p->on_cpu to -+ * let the waker make forward progress. This is safe because IRQs are -+ * disabled and the IPI will deliver after on_cpu is cleared. -+ */ -+ if (READ_ONCE(p->on_cpu) && ttwu_queue_remote(p, cpu, wake_flags)) -+ goto unlock; -+ - /* - * If the owning (remote) CPU is still in the middle of schedule() with - * this task as prev, wait until its done referencing the task. -@@ -1991,14 +2017,6 @@ static int try_to_wake_up(struct task_struct *p, unsigned int state, - */ - smp_cond_load_acquire(&p->on_cpu, !VAL); - -- p->sched_contributes_to_load = !!task_contributes_to_load(p); -- p->state = TASK_WAKING; -- -- if (p->in_iowait) { -- delayacct_blkio_end(p); -- atomic_dec(&task_rq(p)->nr_iowait); -- } -- - if(this_rq()->clock_task - p->last_ran > sched_timeslice_ns) - boost_task(p); - -@@ -2009,11 +2027,6 @@ static int try_to_wake_up(struct task_struct *p, unsigned int state, - psi_ttwu_dequeue(p); - set_task_cpu(p, cpu); - } --#else /* CONFIG_SMP */ -- if (p->in_iowait) { -- delayacct_blkio_end(p); -- atomic_dec(&task_rq(p)->nr_iowait); -- } - #endif /* CONFIG_SMP */ - - ttwu_queue(p, cpu, wake_flags); --- -2.37.0 - - -From 751986f9d045ee7d1277a5351ec0b09f60548dc1 Mon Sep 17 00:00:00 2001 -From: Alfred Chen -Date: Sun, 2 Aug 2020 08:42:49 +0800 -Subject: [PATCH 012/297] sched/alt: [Sync] 2ebb17717550 sched/core: Offload - wakee task activation if it the wakee is descheduling - ---- - kernel/sched/alt_core.c | 39 +++++++++++++++++++++++++++++++++------ - kernel/sched/alt_sched.h | 1 + - 2 files changed, 34 insertions(+), 6 deletions(-) - -diff --git a/kernel/sched/alt_core.c b/kernel/sched/alt_core.c -index d387ae45f5e8..84cde78423c6 100644 ---- a/kernel/sched/alt_core.c -+++ b/kernel/sched/alt_core.c -@@ -1716,7 +1716,13 @@ static void wake_csd_func(void *info) - sched_ttwu_pending(); - } - --static void __ttwu_queue_remote(struct task_struct *p, int cpu, int wake_flags) -+/* -+ * Queue a task on the target CPUs wake_list and wake the CPU via IPI if -+ * necessary. The wakee CPU on receipt of the IPI will queue the task -+ * via sched_ttwu_wakeup() for activation so the wakee incurs the cost -+ * of the wakeup instead of the waker. -+ */ -+static void __ttwu_queue_wakelist(struct task_struct *p, int cpu, int wake_flags) - { - struct rq *rq = cpu_rq(cpu); - -@@ -1730,11 +1736,32 @@ static void __ttwu_queue_remote(struct task_struct *p, int cpu, int wake_flags) - } - } - --static void ttwu_queue_remote(struct task_struct *p, int cpu, int wake_flags) -+static inline bool ttwu_queue_cond(int cpu, int wake_flags) -+{ -+ /* -+ * If the CPU does not share cache, then queue the task on the -+ * remote rqs wakelist to avoid accessing remote data. -+ */ -+ if (!cpus_share_cache(smp_processor_id(), cpu)) -+ return true; -+ -+ /* -+ * If the task is descheduling and the only running task on the -+ * CPU then use the wakelist to offload the task activation to -+ * the soon-to-be-idle CPU as the current CPU is likely busy. -+ * nr_running is checked to avoid unnecessary task stacking. -+ */ -+ if ((wake_flags & WF_ON_RQ) && cpu_rq(cpu)->nr_running <= 1) -+ return true; -+ -+ return false; -+} -+ -+static bool ttwu_queue_wakelist(struct task_struct *p, int cpu, int wake_flags) - { -- if (!cpus_share_cache(smp_processor_id(), cpu)) { -+ if (ttwu_queue_cond(cpu, wake_flags)) { - sched_clock_cpu(cpu); /* Sync clocks across CPUs */ -- __ttwu_queue_remote(p, cpu, wake_flags); -+ __ttwu_queue_wakelist(p, cpu, wake_flags); - return true; - } - -@@ -1776,7 +1803,7 @@ static inline void ttwu_queue(struct task_struct *p, int cpu, int wake_flags) - struct rq *rq = cpu_rq(cpu); - - #if defined(CONFIG_SMP) -- if (ttwu_queue_remote(p, cpu, wake_flags)) -+ if (ttwu_queue_wakelist(p, cpu, wake_flags)) - return; - #endif - -@@ -2003,7 +2030,7 @@ static int try_to_wake_up(struct task_struct *p, unsigned int state, - * let the waker make forward progress. This is safe because IRQs are - * disabled and the IPI will deliver after on_cpu is cleared. - */ -- if (READ_ONCE(p->on_cpu) && ttwu_queue_remote(p, cpu, wake_flags)) -+ if (READ_ONCE(p->on_cpu) && ttwu_queue_wakelist(p, cpu, wake_flags | WF_ON_RQ)) - goto unlock; - - /* -diff --git a/kernel/sched/alt_sched.h b/kernel/sched/alt_sched.h -index b4148dd822b2..4684f2d27c7c 100644 ---- a/kernel/sched/alt_sched.h -+++ b/kernel/sched/alt_sched.h -@@ -70,6 +70,7 @@ static inline int task_on_rq_migrating(struct task_struct *p) - #define WF_SYNC 0x01 /* waker goes to sleep after wakeup */ - #define WF_FORK 0x02 /* child wakeup after fork */ - #define WF_MIGRATED 0x04 /* internal use, task got migrated */ -+#define WF_ON_RQ 0x08 /* Wakee is on_rq */ - - /* - * This is the main, per-CPU runqueue data structure. --- -2.37.0 - - -From 4410bc19de7ae9b89b5e76d540547fc1da4ec1f7 Mon Sep 17 00:00:00 2001 -From: Alfred Chen -Date: Sun, 2 Aug 2020 09:11:04 +0800 -Subject: [PATCH 013/297] sched/alt: [Sync] 19a1f5ec6999 sched: Fix - smp_call_function_single_async() usage for ILB - ---- - kernel/sched/alt_core.c | 38 +------------------------------------- - kernel/sched/alt_sched.h | 7 +++---- - 2 files changed, 4 insertions(+), 41 deletions(-) - -diff --git a/kernel/sched/alt_core.c b/kernel/sched/alt_core.c -index 84cde78423c6..929a2654355e 100644 ---- a/kernel/sched/alt_core.c -+++ b/kernel/sched/alt_core.c -@@ -785,45 +785,15 @@ void wake_up_nohz_cpu(int cpu) - wake_up_idle_cpu(cpu); - } - --static inline bool got_nohz_idle_kick(void) --{ -- int cpu = smp_processor_id(); -- -- /* TODO: need to support nohz_flag -- if (!(atomic_read(nohz_flags(cpu)) & NOHZ_KICK_MASK)) -- return false; -- */ -- -- if (idle_cpu(cpu) && !need_resched()) -- return true; -- -- /* -- * We can't run Idle Load Balance on this CPU for this time so we -- * cancel it and clear NOHZ_BALANCE_KICK -- */ -- /* TODO: need to support nohz_flag -- atomic_andnot(NOHZ_KICK_MASK, nohz_flags(cpu)); -- */ -- return false; --} -- - static void nohz_csd_func(void *info) - { - struct rq *rq = info; -- -- if (got_nohz_idle_kick()) { -- /* TODO need to kick off balance -- rq->idle_balance = 1; -- raise_softirq_irqoff(SCHED_SOFTIRQ); -- */ -- } -- /* - int cpu = cpu_of(rq); - unsigned int flags; -- */ - - /* - * Release the rq::nohz_csd. -+ */ - flags = atomic_fetch_andnot(NOHZ_KICK_MASK, nohz_flags(cpu)); - WARN_ON(!(flags & NOHZ_KICK_MASK)); - -@@ -835,12 +805,6 @@ static void nohz_csd_func(void *info) - */ - } - --#else /* CONFIG_NO_HZ_COMMON */ -- --static inline bool got_nohz_idle_kick(void) --{ -- return false; --} - #endif /* CONFIG_NO_HZ_COMMON */ - #endif /* CONFIG_SMP */ - -diff --git a/kernel/sched/alt_sched.h b/kernel/sched/alt_sched.h -index 4684f2d27c7c..a6e618494b52 100644 ---- a/kernel/sched/alt_sched.h -+++ b/kernel/sched/alt_sched.h -@@ -102,6 +102,9 @@ struct rq { - int cpu; /* cpu of this runqueue */ - bool online; - -+ struct llist_head wake_list; -+ unsigned char nohz_idle_balance; -+ - #ifdef CONFIG_HAVE_SCHED_AVG_IRQ - struct sched_avg avg_irq; - #endif -@@ -159,10 +162,6 @@ struct rq { - unsigned int ttwu_local; - #endif /* CONFIG_SCHEDSTATS */ - --#ifdef CONFIG_SMP -- struct llist_head wake_list; --#endif -- - #ifdef CONFIG_CPU_IDLE - /* Must be inspected within a rcu lock section */ - struct cpuidle_state *idle_state; --- -2.37.0 - - -From 4103b95205040c136c99efc9830d150cfb1500ac Mon Sep 17 00:00:00 2001 -From: Alfred Chen -Date: Sun, 2 Aug 2020 09:52:48 +0800 -Subject: [PATCH 014/297] sched/alt: [Sync] b2a02fc43a1f smp: Optimize - send_call_function_single_ipi() - ---- - kernel/sched/alt_core.c | 11 ++++++++++- - kernel/sched/alt_sched.h | 5 ++++- - 2 files changed, 14 insertions(+), 2 deletions(-) - -diff --git a/kernel/sched/alt_core.c b/kernel/sched/alt_core.c -index 929a2654355e..5b27f920de0b 100644 ---- a/kernel/sched/alt_core.c -+++ b/kernel/sched/alt_core.c -@@ -793,7 +793,6 @@ static void nohz_csd_func(void *info) - - /* - * Release the rq::nohz_csd. -- */ - flags = atomic_fetch_andnot(NOHZ_KICK_MASK, nohz_flags(cpu)); - WARN_ON(!(flags & NOHZ_KICK_MASK)); - -@@ -1680,6 +1679,16 @@ static void wake_csd_func(void *info) - sched_ttwu_pending(); - } - -+void send_call_function_single_ipi(int cpu) -+{ -+ struct rq *rq = cpu_rq(cpu); -+ -+ if (!set_nr_if_polling(rq->idle)) -+ arch_send_call_function_single_ipi(cpu); -+ else -+ trace_sched_wake_idle_without_ipi(cpu); -+} -+ - /* - * Queue a task on the target CPUs wake_list and wake the CPU via IPI if - * necessary. The wakee CPU on receipt of the IPI will queue the task -diff --git a/kernel/sched/alt_sched.h b/kernel/sched/alt_sched.h -index a6e618494b52..d996bc91e1b2 100644 ---- a/kernel/sched/alt_sched.h -+++ b/kernel/sched/alt_sched.h -@@ -231,9 +231,12 @@ static inline int best_mask_cpu(int cpu, const cpumask_t *cpumask) - } - - extern void sched_ttwu_pending(void); -+extern void flush_smp_call_function_from_idle(void); -+ - #else /* !CONFIG_SMP */ -+static inline void flush_smp_call_function_from_idle(void) { } - static inline void sched_ttwu_pending(void) { } --#endif /* CONFIG_SMP */ -+#endif - - #ifndef arch_scale_freq_tick - static __always_inline --- -2.37.0 - - -From 1c29c11a7f640df8b8eedb958a8fce35338d1236 Mon Sep 17 00:00:00 2001 -From: Alfred Chen -Date: Sun, 2 Aug 2020 10:04:51 +0800 -Subject: [PATCH 015/297] sched/alt: [Sync] 126c2092e5c8 sched: Add - rq::ttwu_pending - ---- - kernel/sched/alt_core.c | 13 +++++++++++-- - kernel/sched/alt_sched.h | 1 + - 2 files changed, 12 insertions(+), 2 deletions(-) - -diff --git a/kernel/sched/alt_core.c b/kernel/sched/alt_core.c -index 5b27f920de0b..9165ad79b363 100644 ---- a/kernel/sched/alt_core.c -+++ b/kernel/sched/alt_core.c -@@ -1657,13 +1657,21 @@ static int ttwu_remote(struct task_struct *p, int wake_flags) - void sched_ttwu_pending(void) - { - struct rq *rq = this_rq(); -- struct llist_node *llist = llist_del_all(&rq->wake_list); -+ struct llist_node *llist; - struct task_struct *p, *t; - struct rq_flags rf; - -+ llist = llist_del_all(&rq->wake_list); - if (!llist) - return; - -+ /* -+ * rq::ttwu_pending racy indication of out-standing wakeups. -+ * Races such that false-negatives are possible, since they -+ * are shorter lived that false-positives would be. -+ */ -+ WRITE_ONCE(rq->ttwu_pending, 0); -+ - rq_lock_irqsave(rq, &rf); - update_rq_clock(rq); - -@@ -1701,6 +1709,7 @@ static void __ttwu_queue_wakelist(struct task_struct *p, int cpu, int wake_flags - - p->sched_remote_wakeup = !!(wake_flags & WF_MIGRATED); - -+ WRITE_ONCE(rq->ttwu_pending, 1); - if (llist_add(&p->wake_entry, &rq->wake_list)) { - if (!set_nr_if_polling(rq->idle)) - smp_call_function_single_async(cpu, &rq->wake_csd); -@@ -4077,7 +4086,7 @@ int idle_cpu(int cpu) - return 0; - - #ifdef CONFIG_SMP -- if (!llist_empty(&rq->wake_list)) -+ if (rq->ttwu_pending) - return 0; - #endif - -diff --git a/kernel/sched/alt_sched.h b/kernel/sched/alt_sched.h -index d996bc91e1b2..194aae423056 100644 ---- a/kernel/sched/alt_sched.h -+++ b/kernel/sched/alt_sched.h -@@ -102,6 +102,7 @@ struct rq { - int cpu; /* cpu of this runqueue */ - bool online; - -+ unsigned int ttwu_pending; - struct llist_head wake_list; - unsigned char nohz_idle_balance; - --- -2.37.0 - - -From 994e7926ae4a5806586086c33d43d89d7a93d799 Mon Sep 17 00:00:00 2001 -From: Alfred Chen -Date: Sun, 2 Aug 2020 11:17:52 +0800 -Subject: [PATCH 016/297] sched/alt: [Sync] a148866489fb sched: Replace - rq::wake_list - ---- - include/linux/sched.h | 3 ++- - kernel/sched/alt_core.c | 25 ++++++++----------------- - kernel/sched/alt_sched.h | 3 --- - 3 files changed, 10 insertions(+), 21 deletions(-) - -diff --git a/include/linux/sched.h b/include/linux/sched.h -index af63be15fb5e..4d74a6f6c66d 100644 ---- a/include/linux/sched.h -+++ b/include/linux/sched.h -@@ -750,8 +750,9 @@ struct task_struct { - unsigned int flags; - unsigned int ptrace; - --#if defined(CONFIG_SMP) -+#ifdef CONFIG_SMP - struct llist_node wake_entry; -+ unsigned int wake_entry_type; - #endif - #if defined(CONFIG_SMP) || defined(CONFIG_SCHED_ALT) - int on_cpu; -diff --git a/kernel/sched/alt_core.c b/kernel/sched/alt_core.c -index 9165ad79b363..e412ca697c38 100644 ---- a/kernel/sched/alt_core.c -+++ b/kernel/sched/alt_core.c -@@ -1172,7 +1172,7 @@ static int migration_cpu_stop(void *data) - * __migrate_task() such that we will not miss enforcing cpus_ptr - * during wakeups, see set_cpus_allowed_ptr()'s TASK_WAKING test. - */ -- sched_ttwu_pending(); -+ flush_smp_call_function_from_idle(); - - raw_spin_lock(&p->pi_lock); - raw_spin_lock(&rq->lock); -@@ -1654,14 +1654,13 @@ static int ttwu_remote(struct task_struct *p, int wake_flags) - } - - #ifdef CONFIG_SMP --void sched_ttwu_pending(void) -+void sched_ttwu_pending(void *arg) - { -+ struct llist_node *llist = arg; - struct rq *rq = this_rq(); -- struct llist_node *llist; - struct task_struct *p, *t; - struct rq_flags rf; - -- llist = llist_del_all(&rq->wake_list); - if (!llist) - return; - -@@ -1682,11 +1681,6 @@ void sched_ttwu_pending(void) - rq_unlock_irqrestore(rq, &rf); - } - --static void wake_csd_func(void *info) --{ -- sched_ttwu_pending(); --} -- - void send_call_function_single_ipi(int cpu) - { - struct rq *rq = cpu_rq(cpu); -@@ -1710,12 +1704,7 @@ static void __ttwu_queue_wakelist(struct task_struct *p, int cpu, int wake_flags - p->sched_remote_wakeup = !!(wake_flags & WF_MIGRATED); - - WRITE_ONCE(rq->ttwu_pending, 1); -- if (llist_add(&p->wake_entry, &rq->wake_list)) { -- if (!set_nr_if_polling(rq->idle)) -- smp_call_function_single_async(cpu, &rq->wake_csd); -- else -- trace_sched_wake_idle_without_ipi(cpu); -- } -+ __smp_call_single_queue(cpu, &p->wake_entry); - } - - static inline bool ttwu_queue_cond(int cpu, int wake_flags) -@@ -2138,6 +2127,9 @@ static inline void __sched_fork(unsigned long clone_flags, struct task_struct *p - #ifdef CONFIG_COMPACTION - p->capture_control = NULL; - #endif -+#ifdef CONFIG_SMP -+ p->wake_entry_type = CSD_TYPE_TTWU; -+#endif - } - - /* -@@ -5628,9 +5620,8 @@ int sched_cpu_dying(unsigned int cpu) - unsigned long flags; - - /* Handle pending wakeups and then migrate everything off */ -- sched_ttwu_pending(); -- - sched_tick_stop(cpu); -+ - raw_spin_lock_irqsave(&rq->lock, flags); - set_rq_offline(rq); - migrate_tasks(rq); -diff --git a/kernel/sched/alt_sched.h b/kernel/sched/alt_sched.h -index 194aae423056..2e7fa0fe74fc 100644 ---- a/kernel/sched/alt_sched.h -+++ b/kernel/sched/alt_sched.h -@@ -103,7 +103,6 @@ struct rq { - bool online; - - unsigned int ttwu_pending; -- struct llist_head wake_list; - unsigned char nohz_idle_balance; - - #ifdef CONFIG_HAVE_SCHED_AVG_IRQ -@@ -231,12 +230,10 @@ static inline int best_mask_cpu(int cpu, const cpumask_t *cpumask) - __best_mask_cpu(cpu, cpumask, &(per_cpu(sched_cpu_affinity_masks, cpu)[0])); - } - --extern void sched_ttwu_pending(void); - extern void flush_smp_call_function_from_idle(void); - - #else /* !CONFIG_SMP */ - static inline void flush_smp_call_function_from_idle(void) { } --static inline void sched_ttwu_pending(void) { } - #endif - - #ifndef arch_scale_freq_tick --- -2.37.0 - - -From 843332026ebdd2baab8e23ff0e21ff97579ba3bf Mon Sep 17 00:00:00 2001 -From: Alfred Chen -Date: Sun, 2 Aug 2020 11:43:42 +0800 -Subject: [PATCH 017/297] sched/alt: [Sync] 1f8db4150536 sched/headers: Split - out open-coded prototypes into kernel/sched/smp.h - ---- - kernel/sched/alt_core.c | 1 + - 1 file changed, 1 insertion(+) - -diff --git a/kernel/sched/alt_core.c b/kernel/sched/alt_core.c -index e412ca697c38..4a06d8f6d356 100644 ---- a/kernel/sched/alt_core.c -+++ b/kernel/sched/alt_core.c -@@ -40,6 +40,7 @@ - #include "../smpboot.h" - - #include "pelt.h" -+#include "smp.h" - - #define CREATE_TRACE_POINTS - #include --- -2.37.0 - - -From 2482e5722a64bc24893a2388dee534e34503b3fe Mon Sep 17 00:00:00 2001 -From: Alfred Chen -Date: Sun, 2 Aug 2020 11:47:14 +0800 -Subject: [PATCH 018/297] sched/alt: [Sync] 2062a4e8ae9f kallsyms/printk: add - loglvl to print_ip_sym() - ---- - kernel/sched/alt_core.c | 6 ++---- - 1 file changed, 2 insertions(+), 4 deletions(-) - -diff --git a/kernel/sched/alt_core.c b/kernel/sched/alt_core.c -index 4a06d8f6d356..963bb5d4ab3f 100644 ---- a/kernel/sched/alt_core.c -+++ b/kernel/sched/alt_core.c -@@ -3271,8 +3271,7 @@ static noinline void __schedule_bug(struct task_struct *prev) - if (IS_ENABLED(CONFIG_DEBUG_PREEMPT) - && in_atomic_preempt_off()) { - pr_err("Preemption disabled at:"); -- print_ip_sym(preempt_disable_ip); -- pr_cont("\n"); -+ print_ip_sym(KERN_ERR, preempt_disable_ip); - } - if (panic_on_warn) - panic("scheduling while atomic\n"); -@@ -5887,8 +5886,7 @@ void ___might_sleep(const char *file, int line, int preempt_offset) - #ifdef CONFIG_DEBUG_PREEMPT - if (!preempt_count_equals(preempt_offset)) { - pr_err("Preemption disabled at:"); -- print_ip_sym(preempt_disable_ip); -- pr_cont("\n"); -+ print_ip_sym(KERN_ERR, preempt_disable_ip); - } - #endif - dump_stack(); --- -2.37.0 - - -From 77916abb42d024c2f852afa55ca034e8db840a8f Mon Sep 17 00:00:00 2001 -From: Alfred Chen -Date: Sun, 2 Aug 2020 11:59:38 +0800 -Subject: [PATCH 019/297] sched/alt: [Sync] 8ba09b1dc131 sched: print stack - trace with KERN_INFO - ---- - kernel/sched/alt_core.c | 2 +- - 1 file changed, 1 insertion(+), 1 deletion(-) - -diff --git a/kernel/sched/alt_core.c b/kernel/sched/alt_core.c -index 963bb5d4ab3f..9e0feae7aa60 100644 ---- a/kernel/sched/alt_core.c -+++ b/kernel/sched/alt_core.c -@@ -5214,7 +5214,7 @@ void sched_show_task(struct task_struct *p) - (unsigned long)task_thread_info(p)->flags); - - print_worker_info(KERN_INFO, p); -- show_stack(p, NULL); -+ show_stack_loglvl(p, NULL, KERN_INFO); - put_task_stack(p); - } - EXPORT_SYMBOL_GPL(sched_show_task); --- -2.37.0 - - -From 0113ee7acf2f8d4aea16c6d10ba2cd2755120801 Mon Sep 17 00:00:00 2001 -From: Alfred Chen -Date: Sun, 2 Aug 2020 18:48:11 +0800 -Subject: [PATCH 020/297] sched/alt: [Sync] 9cb8f069deee kernel: rename - show_stack_loglvl() => show_stack() - ---- - kernel/sched/alt_core.c | 2 +- - 1 file changed, 1 insertion(+), 1 deletion(-) - -diff --git a/kernel/sched/alt_core.c b/kernel/sched/alt_core.c -index 9e0feae7aa60..c47934a24d70 100644 ---- a/kernel/sched/alt_core.c -+++ b/kernel/sched/alt_core.c -@@ -5214,7 +5214,7 @@ void sched_show_task(struct task_struct *p) - (unsigned long)task_thread_info(p)->flags); - - print_worker_info(KERN_INFO, p); -- show_stack_loglvl(p, NULL, KERN_INFO); -+ show_stack(p, NULL, KERN_INFO); - put_task_stack(p); - } - EXPORT_SYMBOL_GPL(sched_show_task); --- -2.37.0 - - -From cae34e897581b021f12285da75e46d03c36df448 Mon Sep 17 00:00:00 2001 -From: Alfred Chen -Date: Sun, 2 Aug 2020 20:22:57 +0800 -Subject: [PATCH 021/297] sched/alt: [Sync] b6e13e85829f sched/core: Fix ttwu() - race - ---- - kernel/sched/alt_core.c | 34 +++++++++++++++++++++++++++++----- - 1 file changed, 29 insertions(+), 5 deletions(-) - -diff --git a/kernel/sched/alt_core.c b/kernel/sched/alt_core.c -index c47934a24d70..d5d3111850a8 100644 ---- a/kernel/sched/alt_core.c -+++ b/kernel/sched/alt_core.c -@@ -1675,8 +1675,16 @@ void sched_ttwu_pending(void *arg) - rq_lock_irqsave(rq, &rf); - update_rq_clock(rq); - -- llist_for_each_entry_safe(p, t, llist, wake_entry) -+ llist_for_each_entry_safe(p, t, llist, wake_entry) { -+ if (WARN_ON_ONCE(p->on_cpu)) -+ smp_cond_load_acquire(&p->on_cpu, !VAL); -+ -+ if (WARN_ON_ONCE(task_cpu(p) != cpu_of(rq))) -+ set_task_cpu(p, cpu_of(rq)); -+ - ttwu_do_activate(rq, p, p->sched_remote_wakeup ? WF_MIGRATED : 0); -+ } -+ - check_preempt_curr(rq); - - rq_unlock_irqrestore(rq, &rf); -@@ -1732,6 +1740,9 @@ static inline bool ttwu_queue_cond(int cpu, int wake_flags) - static bool ttwu_queue_wakelist(struct task_struct *p, int cpu, int wake_flags) - { - if (ttwu_queue_cond(cpu, wake_flags)) { -+ if (WARN_ON_ONCE(cpu == smp_processor_id())) -+ return false; -+ - sched_clock_cpu(cpu); /* Sync clocks across CPUs */ - __ttwu_queue_wakelist(p, cpu, wake_flags); - return true; -@@ -1915,7 +1926,6 @@ static int try_to_wake_up(struct task_struct *p, unsigned int state, - goto out; - - success = 1; -- cpu = task_cpu(p); - trace_sched_waking(p); - p->state = TASK_RUNNING; - trace_sched_wakeup(p); -@@ -1937,7 +1947,6 @@ static int try_to_wake_up(struct task_struct *p, unsigned int state, - - /* We're going to change ->state: */ - success = 1; -- cpu = task_cpu(p); - - /* - * Ensure we load p->on_rq _after_ p->state, otherwise it would -@@ -2001,8 +2010,21 @@ static int try_to_wake_up(struct task_struct *p, unsigned int state, - * which potentially sends an IPI instead of spinning on p->on_cpu to - * let the waker make forward progress. This is safe because IRQs are - * disabled and the IPI will deliver after on_cpu is cleared. -+ * -+ * Ensure we load task_cpu(p) after p->on_cpu: -+ * -+ * set_task_cpu(p, cpu); -+ * STORE p->cpu = @cpu -+ * __schedule() (switch to task 'p') -+ * LOCK rq->lock -+ * smp_mb__after_spin_lock() smp_cond_load_acquire(&p->on_cpu) -+ * STORE p->on_cpu = 1 LOAD p->cpu -+ * -+ * to ensure we observe the correct CPU on which the task is currently -+ * scheduling. - */ -- if (READ_ONCE(p->on_cpu) && ttwu_queue_wakelist(p, cpu, wake_flags | WF_ON_RQ)) -+ if (smp_load_acquire(&p->on_cpu) && -+ ttwu_queue_wakelist(p, task_cpu(p), wake_flags | WF_ON_RQ)) - goto unlock; - - /* -@@ -2026,6 +2048,8 @@ static int try_to_wake_up(struct task_struct *p, unsigned int state, - psi_ttwu_dequeue(p); - set_task_cpu(p, cpu); - } -+#else -+ cpu = task_cpu(p); - #endif /* CONFIG_SMP */ - - ttwu_queue(p, cpu, wake_flags); -@@ -2033,7 +2057,7 @@ static int try_to_wake_up(struct task_struct *p, unsigned int state, - raw_spin_unlock_irqrestore(&p->pi_lock, flags); - out: - if (success) -- ttwu_stat(p, cpu, wake_flags); -+ ttwu_stat(p, task_cpu(p), wake_flags); - preempt_enable(); - - return success; --- -2.37.0 - - -From 24c842c7c342175b20b86aae5059a97029a0c5e8 Mon Sep 17 00:00:00 2001 -From: Alfred Chen -Date: Sun, 2 Aug 2020 20:27:29 +0800 -Subject: [PATCH 022/297] sched/alt: [Sync] 739f70b476cf sched/core: - s/WF_ON_RQ/WQ_ON_CPU/ - ---- - kernel/sched/alt_core.c | 4 ++-- - kernel/sched/alt_sched.h | 2 +- - 2 files changed, 3 insertions(+), 3 deletions(-) - -diff --git a/kernel/sched/alt_core.c b/kernel/sched/alt_core.c -index d5d3111850a8..c1664c64222b 100644 ---- a/kernel/sched/alt_core.c -+++ b/kernel/sched/alt_core.c -@@ -1731,7 +1731,7 @@ static inline bool ttwu_queue_cond(int cpu, int wake_flags) - * the soon-to-be-idle CPU as the current CPU is likely busy. - * nr_running is checked to avoid unnecessary task stacking. - */ -- if ((wake_flags & WF_ON_RQ) && cpu_rq(cpu)->nr_running <= 1) -+ if ((wake_flags & WF_ON_CPU) && cpu_rq(cpu)->nr_running <= 1) - return true; - - return false; -@@ -2024,7 +2024,7 @@ static int try_to_wake_up(struct task_struct *p, unsigned int state, - * scheduling. - */ - if (smp_load_acquire(&p->on_cpu) && -- ttwu_queue_wakelist(p, task_cpu(p), wake_flags | WF_ON_RQ)) -+ ttwu_queue_wakelist(p, task_cpu(p), wake_flags | WF_ON_CPU)) - goto unlock; - - /* -diff --git a/kernel/sched/alt_sched.h b/kernel/sched/alt_sched.h -index 2e7fa0fe74fc..cf4df89f6b50 100644 ---- a/kernel/sched/alt_sched.h -+++ b/kernel/sched/alt_sched.h -@@ -70,7 +70,7 @@ static inline int task_on_rq_migrating(struct task_struct *p) - #define WF_SYNC 0x01 /* waker goes to sleep after wakeup */ - #define WF_FORK 0x02 /* child wakeup after fork */ - #define WF_MIGRATED 0x04 /* internal use, task got migrated */ --#define WF_ON_RQ 0x08 /* Wakee is on_rq */ -+#define WF_ON_CPU 0x08 /* Wakee is on_rq */ - - /* - * This is the main, per-CPU runqueue data structure. --- -2.37.0 - - -From 51b020d97aa97327a38bcfc6a8b510017c6e27a6 Mon Sep 17 00:00:00 2001 -From: Alfred Chen -Date: Sun, 2 Aug 2020 23:58:37 +0800 -Subject: [PATCH 023/297] sched/alt: [Sync] 8c4890d1c335 smp, irq_work: - Continue smp_call_function*() and irq_work*() integration - ---- - include/linux/sched.h | 3 +-- - kernel/sched/alt_core.c | 6 +++--- - 2 files changed, 4 insertions(+), 5 deletions(-) - -diff --git a/include/linux/sched.h b/include/linux/sched.h -index 4d74a6f6c66d..20afd23b94d8 100644 ---- a/include/linux/sched.h -+++ b/include/linux/sched.h -@@ -751,8 +751,7 @@ struct task_struct { - unsigned int ptrace; - - #ifdef CONFIG_SMP -- struct llist_node wake_entry; -- unsigned int wake_entry_type; -+ struct __call_single_node wake_entry; - #endif - #if defined(CONFIG_SMP) || defined(CONFIG_SCHED_ALT) - int on_cpu; -diff --git a/kernel/sched/alt_core.c b/kernel/sched/alt_core.c -index c1664c64222b..399f9025fe59 100644 ---- a/kernel/sched/alt_core.c -+++ b/kernel/sched/alt_core.c -@@ -1675,7 +1675,7 @@ void sched_ttwu_pending(void *arg) - rq_lock_irqsave(rq, &rf); - update_rq_clock(rq); - -- llist_for_each_entry_safe(p, t, llist, wake_entry) { -+ llist_for_each_entry_safe(p, t, llist, wake_entry.llist) { - if (WARN_ON_ONCE(p->on_cpu)) - smp_cond_load_acquire(&p->on_cpu, !VAL); - -@@ -1713,7 +1713,7 @@ static void __ttwu_queue_wakelist(struct task_struct *p, int cpu, int wake_flags - p->sched_remote_wakeup = !!(wake_flags & WF_MIGRATED); - - WRITE_ONCE(rq->ttwu_pending, 1); -- __smp_call_single_queue(cpu, &p->wake_entry); -+ __smp_call_single_queue(cpu, &p->wake_entry.llist); - } - - static inline bool ttwu_queue_cond(int cpu, int wake_flags) -@@ -2153,7 +2153,7 @@ static inline void __sched_fork(unsigned long clone_flags, struct task_struct *p - p->capture_control = NULL; - #endif - #ifdef CONFIG_SMP -- p->wake_entry_type = CSD_TYPE_TTWU; -+ p->wake_entry.u_flags = CSD_TYPE_TTWU; - #endif - } - --- -2.37.0 - - -From 853a5db6560ee53bec0dbdeee92c520a1cd9b397 Mon Sep 17 00:00:00 2001 -From: Alfred Chen -Date: Mon, 3 Aug 2020 21:41:20 +0800 -Subject: [PATCH 024/297] sched/alt: [Sync] dbfb089d360b sched: Fix loadavg - accounting race - ---- - kernel/sched/alt_core.c | 71 ++++++++++++++++++++++++++++++++--------- - 1 file changed, 56 insertions(+), 15 deletions(-) - -diff --git a/kernel/sched/alt_core.c b/kernel/sched/alt_core.c -index 399f9025fe59..d6011a8c4c76 100644 ---- a/kernel/sched/alt_core.c -+++ b/kernel/sched/alt_core.c -@@ -993,8 +993,6 @@ static int effective_prio(struct task_struct *p) - */ - static void activate_task(struct task_struct *p, struct rq *rq) - { -- if (task_contributes_to_load(p)) -- rq->nr_uninterruptible--; - enqueue_task(p, rq, ENQUEUE_WAKEUP); - p->on_rq = TASK_ON_RQ_QUEUED; - cpufreq_update_util(rq, 0); -@@ -1007,8 +1005,6 @@ static void activate_task(struct task_struct *p, struct rq *rq) - */ - static inline void deactivate_task(struct task_struct *p, struct rq *rq) - { -- if (task_contributes_to_load(p)) -- rq->nr_uninterruptible++; - dequeue_task(p, rq, DEQUEUE_SLEEP); - p->on_rq = 0; - cpufreq_update_util(rq, 0); -@@ -1629,10 +1625,8 @@ ttwu_do_wakeup(struct rq *rq, struct task_struct *p, int wake_flags) - static inline void - ttwu_do_activate(struct rq *rq, struct task_struct *p, int wake_flags) - { --#ifdef CONFIG_SMP - if (p->sched_contributes_to_load) - rq->nr_uninterruptible--; --#endif - - activate_task(p, rq); - ttwu_do_wakeup(rq, p, 0); -@@ -1971,7 +1965,7 @@ static int try_to_wake_up(struct task_struct *p, unsigned int state, - * A similar smb_rmb() lives in try_invoke_on_locked_down_task(). - */ - smp_rmb(); -- if (p->on_rq && ttwu_remote(p, wake_flags)) -+ if (READ_ONCE(p->on_rq) && ttwu_remote(p, wake_flags)) - goto unlock; - - if (p->in_iowait) { -@@ -1980,9 +1974,6 @@ static int try_to_wake_up(struct task_struct *p, unsigned int state, - } - - #ifdef CONFIG_SMP -- p->sched_contributes_to_load = !!task_contributes_to_load(p); -- p->state = TASK_WAKING; -- - /* - * Ensure we load p->on_cpu _after_ p->on_rq, otherwise it would be - * possible to, falsely, observe p->on_cpu == 0. -@@ -2001,8 +1992,20 @@ static int try_to_wake_up(struct task_struct *p, unsigned int state, - * - * Pairs with the LOCK+smp_mb__after_spinlock() on rq->lock in - * __schedule(). See the comment for smp_mb__after_spinlock(). -+ * -+ * Form a control-dep-acquire with p->on_rq == 0 above, to ensure -+ * schedule()'s deactivate_task() has 'happened' and p will no longer -+ * care about it's own p->state. See the comment in __schedule(). - */ -- smp_rmb(); -+ smp_acquire__after_ctrl_dep(); -+ -+ /* -+ * We're doing the wakeup (@success == 1), they did a dequeue (p->on_rq -+ * == 0), which means we need to do an enqueue, change p->state to -+ * TASK_WAKING such that we can unlock p->pi_lock before doing the -+ * enqueue, such as ttwu_queue_wakelist(). -+ */ -+ p->state = TASK_WAKING; - - /* - * If the owning (remote) CPU is still in the middle of schedule() with -@@ -3528,6 +3531,7 @@ static void __sched notrace __schedule(bool preempt) - { - struct task_struct *prev, *next; - unsigned long *switch_count; -+ unsigned long prev_state; - struct rq *rq; - int cpu; - -@@ -3543,12 +3547,22 @@ static void __sched notrace __schedule(bool preempt) - local_irq_disable(); - rcu_note_context_switch(preempt); - -+ /* See deactivate_task() below. */ -+ prev_state = prev->state; -+ - /* - * Make sure that signal_pending_state()->signal_pending() below - * can't be reordered with __set_current_state(TASK_INTERRUPTIBLE) -- * done by the caller to avoid the race with signal_wake_up(). -+ * done by the caller to avoid the race with signal_wake_up(): -+ * -+ * __set_current_state(@state) signal_wake_up() -+ * schedule() set_tsk_thread_flag(p, TIF_SIGPENDING) -+ * wake_up_state(p, state) -+ * LOCK rq->lock LOCK p->pi_state -+ * smp_mb__after_spinlock() smp_mb__after_spinlock() -+ * if (signal_pending_state()) if (p->state & @state) - * -- * The membarrier system call requires a full memory barrier -+ * Also, the membarrier system call requires a full memory barrier - * after coming from user-space, before storing to rq->curr. - */ - raw_spin_lock(&rq->lock); -@@ -3557,10 +3571,37 @@ static void __sched notrace __schedule(bool preempt) - update_rq_clock(rq); - - switch_count = &prev->nivcsw; -- if (!preempt && prev->state) { -- if (signal_pending_state(prev->state, prev)) { -+ /* -+ * We must load prev->state once (task_struct::state is volatile), such -+ * that: -+ * -+ * - we form a control dependency vs deactivate_task() below. -+ * - ptrace_{,un}freeze_traced() can change ->state underneath us. -+ */ -+ prev_state = prev->state; -+ if (!preempt && prev_state && prev_state == prev->state) { -+ if (signal_pending_state(prev_state, prev)) { - prev->state = TASK_RUNNING; - } else { -+ prev->sched_contributes_to_load = -+ (prev_state & TASK_UNINTERRUPTIBLE) && -+ !(prev_state & TASK_NOLOAD) && -+ !(prev->flags & PF_FROZEN); -+ -+ if (prev->sched_contributes_to_load) -+ rq->nr_uninterruptible++; -+ -+ /* -+ * __schedule() ttwu() -+ * prev_state = prev->state; if (p->on_rq && ...) -+ * if (prev_state) goto out; -+ * p->on_rq = 0; smp_acquire__after_ctrl_dep(); -+ * p->state = TASK_WAKING -+ * -+ * Where __schedule() and ttwu() have matching control dependencies. -+ * -+ * After this, schedule() must not care about p->state any more. -+ */ - if (rq_switch_time(rq) < boost_threshold(prev)) - boost_task(prev); - deactivate_task(prev, rq); --- -2.37.0 - - -From f751c772c3c27dbfce66568a0f23fda6eb8daeaa Mon Sep 17 00:00:00 2001 -From: Alfred Chen -Date: Mon, 3 Aug 2020 21:49:38 +0800 -Subject: [PATCH 025/297] sched/alt: [Sync] ce3614daabea sched: Fix unreliable - rseq cpu_id for new tasks - ---- - kernel/sched/alt_core.c | 2 ++ - 1 file changed, 2 insertions(+) - -diff --git a/kernel/sched/alt_core.c b/kernel/sched/alt_core.c -index d6011a8c4c76..ef73e640af62 100644 ---- a/kernel/sched/alt_core.c -+++ b/kernel/sched/alt_core.c -@@ -2230,6 +2230,7 @@ int sched_fork(unsigned long clone_flags, struct task_struct *p) - } - raw_spin_unlock(&rq->lock); - -+ rseq_migrate(p); - /* - * We're setting the CPU for the first time, we don't migrate, - * so use __set_task_cpu(). -@@ -2341,6 +2342,7 @@ void wake_up_new_task(struct task_struct *p) - - rq = cpu_rq(select_task_rq(p)); - #ifdef CONFIG_SMP -+ rseq_migrate(p); - /* - * Fork balancing, do it here and not earlier because: - * - cpus_ptr can change in the fork path --- -2.37.0 - - -From 0ee63988b12700f261aa28a588806f4c91ba1949 Mon Sep 17 00:00:00 2001 -From: Alfred Chen -Date: Mon, 3 Aug 2020 21:52:54 +0800 -Subject: [PATCH 026/297] sched/alt: [Sync] d136122f5845 sched: Fix race - against ptrace_freeze_trace() - ---- - kernel/sched/alt_core.c | 3 --- - 1 file changed, 3 deletions(-) - -diff --git a/kernel/sched/alt_core.c b/kernel/sched/alt_core.c -index ef73e640af62..abe5321fa665 100644 ---- a/kernel/sched/alt_core.c -+++ b/kernel/sched/alt_core.c -@@ -3549,9 +3549,6 @@ static void __sched notrace __schedule(bool preempt) - local_irq_disable(); - rcu_note_context_switch(preempt); - -- /* See deactivate_task() below. */ -- prev_state = prev->state; -- - /* - * Make sure that signal_pending_state()->signal_pending() below - * can't be reordered with __set_current_state(TASK_INTERRUPTIBLE) --- -2.37.0 - - -From 611fc53384a98f1b89b59fe928c6693ed7d724f0 Mon Sep 17 00:00:00 2001 -From: Alfred Chen -Date: Mon, 3 Aug 2020 21:58:15 +0800 -Subject: [PATCH 027/297] sched/alt: [Sync] 062d3f95b630 sched: Warn if garbage - is passed to default_wake_function() - ---- - kernel/sched/alt_core.c | 1 + - 1 file changed, 1 insertion(+) - -diff --git a/kernel/sched/alt_core.c b/kernel/sched/alt_core.c -index abe5321fa665..a6b71eba16ab 100644 ---- a/kernel/sched/alt_core.c -+++ b/kernel/sched/alt_core.c -@@ -3920,6 +3920,7 @@ asmlinkage __visible void __sched preempt_schedule_irq(void) - int default_wake_function(wait_queue_entry_t *curr, unsigned mode, int wake_flags, - void *key) - { -+ WARN_ON_ONCE(IS_ENABLED(CONFIG_SCHED_DEBUG) && wake_flags & ~WF_SYNC); - return try_to_wake_up(curr->private, mode, wake_flags); - } - EXPORT_SYMBOL(default_wake_function); --- -2.37.0 - - -From 8562c5bfbd8fdc4fd796a0260fb26003a66a1a10 Mon Sep 17 00:00:00 2001 -From: Alfred Chen -Date: Fri, 7 Aug 2020 10:28:07 +0800 -Subject: [PATCH 028/297] sched/alt: Fix/Enabled nohz_csd_func(), but not used. - ---- - kernel/sched/alt_core.c | 12 +++--------- - kernel/sched/alt_sched.h | 19 +++++++++++++++++++ - 2 files changed, 22 insertions(+), 9 deletions(-) - -diff --git a/kernel/sched/alt_core.c b/kernel/sched/alt_core.c -index a6b71eba16ab..2f67a87b4b35 100644 ---- a/kernel/sched/alt_core.c -+++ b/kernel/sched/alt_core.c -@@ -701,13 +701,9 @@ void resched_cpu(int cpu) - - #ifdef CONFIG_SMP - #ifdef CONFIG_NO_HZ_COMMON --void nohz_balance_enter_idle(int cpu) --{ --} -+void nohz_balance_enter_idle(int cpu) {} - --void select_nohz_load_balancer(int stop_tick) --{ --} -+void select_nohz_load_balancer(int stop_tick) {} - - void set_cpu_sd_state_idle(void) {} - -@@ -794,6 +790,7 @@ static void nohz_csd_func(void *info) - - /* - * Release the rq::nohz_csd. -+ */ - flags = atomic_fetch_andnot(NOHZ_KICK_MASK, nohz_flags(cpu)); - WARN_ON(!(flags & NOHZ_KICK_MASK)); - -@@ -802,7 +799,6 @@ static void nohz_csd_func(void *info) - rq->nohz_idle_balance = flags; - raise_softirq_irqoff(SCHED_SOFTIRQ); - } -- */ - } - - #endif /* CONFIG_NO_HZ_COMMON */ -@@ -5852,8 +5848,6 @@ void __init sched_init(void) - - hrtick_rq_init(rq); - atomic_set(&rq->nr_iowait, 0); -- -- rq_csd_init(rq, &rq->nohz_csd, nohz_csd_func); - } - #ifdef CONFIG_SMP - /* Set rq->online for cpu 0 */ -diff --git a/kernel/sched/alt_sched.h b/kernel/sched/alt_sched.h -index cf4df89f6b50..d8887f377455 100644 ---- a/kernel/sched/alt_sched.h -+++ b/kernel/sched/alt_sched.h -@@ -104,6 +104,7 @@ struct rq { - - unsigned int ttwu_pending; - unsigned char nohz_idle_balance; -+ unsigned char idle_balance; - - #ifdef CONFIG_HAVE_SCHED_AVG_IRQ - struct sched_avg avg_irq; -@@ -388,6 +389,24 @@ static inline int cpu_of(const struct rq *rq) - - #include "stats.h" - -+#ifdef CONFIG_NO_HZ_COMMON -+#define NOHZ_BALANCE_KICK_BIT 0 -+#define NOHZ_STATS_KICK_BIT 1 -+ -+#define NOHZ_BALANCE_KICK BIT(NOHZ_BALANCE_KICK_BIT) -+#define NOHZ_STATS_KICK BIT(NOHZ_STATS_KICK_BIT) -+ -+#define NOHZ_KICK_MASK (NOHZ_BALANCE_KICK | NOHZ_STATS_KICK) -+ -+#define nohz_flags(cpu) (&cpu_rq(cpu)->nohz_flags) -+ -+/* TODO: needed? -+extern void nohz_balance_exit_idle(struct rq *rq); -+#else -+static inline void nohz_balance_exit_idle(struct rq *rq) { } -+*/ -+#endif -+ - #ifdef CONFIG_IRQ_TIME_ACCOUNTING - struct irqtime { - u64 total; --- -2.37.0 - - -From 0107ae666f6d3b8a02885300ad77051b147cb69f Mon Sep 17 00:00:00 2001 -From: Alfred Chen -Date: Fri, 7 Aug 2020 23:09:43 +0800 -Subject: [PATCH 029/297] Project-C v5.8-r0 - ---- - kernel/sched/bmq_imp.h | 2 +- - 1 file changed, 1 insertion(+), 1 deletion(-) - -diff --git a/kernel/sched/bmq_imp.h b/kernel/sched/bmq_imp.h -index cb0fc0688a89..e581fbf3528b 100644 ---- a/kernel/sched/bmq_imp.h -+++ b/kernel/sched/bmq_imp.h -@@ -1,4 +1,4 @@ --#define ALT_SCHED_VERSION_MSG "sched/bmq: BMQ CPU Scheduler 5.7-r3 by Alfred Chen.\n" -+#define ALT_SCHED_VERSION_MSG "sched/bmq: BMQ CPU Scheduler 5.8-r0 by Alfred Chen.\n" - - static inline void sched_queue_init(struct rq *rq) - { --- -2.37.0 - - -From 1a97ac217cf27a9b059f49ac83890a1b4449fd9c Mon Sep 17 00:00:00 2001 -From: Alfred Chen -Date: Tue, 11 Aug 2020 21:58:03 +0800 -Subject: [PATCH 030/297] sched/alt: Fix UP compilation issue. - ---- - kernel/sched/alt_core.c | 6 ++++-- - 1 file changed, 4 insertions(+), 2 deletions(-) - -diff --git a/kernel/sched/alt_core.c b/kernel/sched/alt_core.c -index 2f67a87b4b35..5580b600db67 100644 ---- a/kernel/sched/alt_core.c -+++ b/kernel/sched/alt_core.c -@@ -141,7 +141,9 @@ DEFINE_PER_CPU_SHARED_ALIGNED(struct rq, runqueues); - - #define IDLE_WM (IDLE_TASK_SCHED_PRIO) - -+#ifdef CONFIG_SCHED_SMT - static cpumask_t sched_sg_idle_mask ____cacheline_aligned_in_smp; -+#endif - static cpumask_t sched_rq_watermark[SCHED_BITS] ____cacheline_aligned_in_smp; - - static inline void update_sched_rq_watermark(struct rq *rq) -@@ -5839,12 +5841,12 @@ void __init sched_init(void) - #ifdef CONFIG_SCHED_SMT - rq->active_balance = 0; - #endif --#endif -- rq->nr_switches = 0; - - #ifdef CONFIG_NO_HZ_COMMON - rq_csd_init(rq, &rq->nohz_csd, nohz_csd_func); - #endif -+#endif /* CONFIG_SMP */ -+ rq->nr_switches = 0; - - hrtick_rq_init(rq); - atomic_set(&rq->nr_iowait, 0); --- -2.37.0 - - -From 257d6f0f7f6e42d1dd6eefb4d2e0382ab7c6635a Mon Sep 17 00:00:00 2001 -From: Alfred Chen -Date: Mon, 17 Aug 2020 12:31:46 +0800 -Subject: [PATCH 031/297] sched/alt: Fix compilation issue when - CONFIG_SCHED_THERMAL_PRESSURE enabled. - ---- - kernel/sched/pelt.h | 2 +- - 1 file changed, 1 insertion(+), 1 deletion(-) - -diff --git a/kernel/sched/pelt.h b/kernel/sched/pelt.h -index b95f7db44dd8..226eeed61318 100644 ---- a/kernel/sched/pelt.h -+++ b/kernel/sched/pelt.h -@@ -9,7 +9,7 @@ int update_rt_rq_load_avg(u64 now, struct rq *rq, int running); - int update_dl_rq_load_avg(u64 now, struct rq *rq, int running); - #endif - --#ifdef CONFIG_SCHED_THERMAL_PRESSURE -+#if defined(CONFIG_SCHED_THERMAL_PRESSURE) && !defined(CONFIG_SCHED_ALT) - int update_thermal_load_avg(u64 now, struct rq *rq, u64 capacity); - - static inline u64 thermal_load_avg(struct rq *rq) --- -2.37.0 - - -From 1772a32431c6955530b2c93c79170f4f94a7601c Mon Sep 17 00:00:00 2001 -From: Alfred Chen -Date: Mon, 17 Aug 2020 13:55:00 +0800 -Subject: [PATCH 032/297] Project-C v5.8-r1 - ---- - kernel/sched/bmq_imp.h | 2 +- - 1 file changed, 1 insertion(+), 1 deletion(-) - -diff --git a/kernel/sched/bmq_imp.h b/kernel/sched/bmq_imp.h -index e581fbf3528b..86d496ec23b3 100644 ---- a/kernel/sched/bmq_imp.h -+++ b/kernel/sched/bmq_imp.h -@@ -1,4 +1,4 @@ --#define ALT_SCHED_VERSION_MSG "sched/bmq: BMQ CPU Scheduler 5.8-r0 by Alfred Chen.\n" -+#define ALT_SCHED_VERSION_MSG "sched/bmq: BMQ CPU Scheduler 5.8-r1 by Alfred Chen.\n" - - static inline void sched_queue_init(struct rq *rq) - { --- -2.37.0 - - -From e57a7b15a79162df169b9f5caf3fea94a697d568 Mon Sep 17 00:00:00 2001 -From: Alfred Chen -Date: Sun, 30 Aug 2020 08:36:33 +0800 -Subject: [PATCH 033/297] sched/alt: Fix compilation issue when - CONFIG_SCHED_THERMAL_PRESSURE enabled, cont. - ---- - kernel/sched/pelt.c | 2 +- - 1 file changed, 1 insertion(+), 1 deletion(-) - -diff --git a/kernel/sched/pelt.c b/kernel/sched/pelt.c -index 184918afc89c..bd38bf738fe9 100644 ---- a/kernel/sched/pelt.c -+++ b/kernel/sched/pelt.c -@@ -386,7 +386,7 @@ int update_dl_rq_load_avg(u64 now, struct rq *rq, int running) - } - #endif - --#ifdef CONFIG_SCHED_THERMAL_PRESSURE -+#if defined(CONFIG_SCHED_THERMAL_PRESSURE) && !defined(CONFIG_SCHED_ALT) - /* - * thermal: - * --- -2.37.0 - - -From 9bff06ecd831861235d8de28895d952a65fdc520 Mon Sep 17 00:00:00 2001 -From: Alfred Chen -Date: Sun, 9 Aug 2020 08:49:45 +0800 -Subject: [PATCH 034/297] sched/pds: Port of PDS - -Port PDS from 5.0 to current Project C. ---- - include/linux/sched.h | 11 +- - include/linux/sched/deadline.h | 4 + - include/linux/sched/prio.h | 7 +- - include/linux/skip_list.h | 177 ++++++++++++++++++++++++++++ - init/Kconfig | 6 + - init/init_task.c | 7 +- - kernel/locking/rtmutex.c | 8 ++ - kernel/sched/alt_core.c | 110 ++++++------------ - kernel/sched/alt_sched.h | 11 +- - kernel/sched/bmq.h | 6 + - kernel/sched/bmq_imp.h | 127 +++++++++++++++++--- - kernel/sched/pds.h | 14 +++ - kernel/sched/pds_imp.h | 205 +++++++++++++++++++++++++++++++++ - 13 files changed, 594 insertions(+), 99 deletions(-) - create mode 100644 include/linux/skip_list.h - create mode 100644 kernel/sched/pds.h - create mode 100644 kernel/sched/pds_imp.h - -diff --git a/include/linux/sched.h b/include/linux/sched.h -index 20afd23b94d8..2bbe9f377c6e 100644 ---- a/include/linux/sched.h -+++ b/include/linux/sched.h -@@ -34,6 +34,7 @@ - #include - #include - #include -+#include - #include - - /* task_struct member predeclarations (sorted alphabetically): */ -@@ -782,11 +783,19 @@ struct task_struct { - #ifdef CONFIG_SCHED_ALT - u64 last_ran; - s64 time_slice; -- int boost_prio; - #ifdef CONFIG_SCHED_BMQ -+ int boost_prio; - int bmq_idx; - struct list_head bmq_node; - #endif /* CONFIG_SCHED_BMQ */ -+#ifdef CONFIG_SCHED_PDS -+ u64 deadline; -+ u64 priodl; -+ /* skip list level */ -+ int sl_level; -+ /* skip list node */ -+ struct skiplist_node sl_node; -+#endif /* CONFIG_SCHED_PDS */ - /* sched_clock time spent running */ - u64 sched_time; - #else /* !CONFIG_SCHED_ALT */ -diff --git a/include/linux/sched/deadline.h b/include/linux/sched/deadline.h -index da7834c45673..0aa37b0a1676 100644 ---- a/include/linux/sched/deadline.h -+++ b/include/linux/sched/deadline.h -@@ -11,6 +11,10 @@ static inline int dl_task(struct task_struct *p) - } - #endif - -+#ifdef CONFIG_SCHED_PDS -+#define __tsk_deadline(p) ((p)->priodl) -+#endif -+ - #else - - #define __tsk_deadline(p) ((p)->dl.deadline) -diff --git a/include/linux/sched/prio.h b/include/linux/sched/prio.h -index c28676e431be..4d4f92bffeea 100644 ---- a/include/linux/sched/prio.h -+++ b/include/linux/sched/prio.h -@@ -18,9 +18,12 @@ - #define MAX_PRIO (MAX_RT_PRIO + NICE_WIDTH) - #define DEFAULT_PRIO (MAX_RT_PRIO + NICE_WIDTH / 2) - --#ifdef CONFIG_SCHED_ALT - /* +/- priority levels from the base priority */ --#define MAX_PRIORITY_ADJ 4 -+#ifdef CONFIG_SCHED_BMQ -+#define MAX_PRIORITY_ADJ 7 -+#endif -+#ifdef CONFIG_SCHED_PDS -+#define MAX_PRIORITY_ADJ 0 - #endif - - /* -diff --git a/include/linux/skip_list.h b/include/linux/skip_list.h -new file mode 100644 -index 000000000000..47ca955a451d ---- /dev/null -+++ b/include/linux/skip_list.h -@@ -0,0 +1,177 @@ -+/* -+ * Copyright (C) 2016 Alfred Chen. -+ * -+ * Code based on Con Kolivas's skip list implementation for BFS, and -+ * which is based on example originally by William Pugh. -+ * -+ * Skip Lists are a probabilistic alternative to balanced trees, as -+ * described in the June 1990 issue of CACM and were invented by -+ * William Pugh in 1987. -+ * -+ * A couple of comments about this implementation: -+ * -+ * This file only provides a infrastructure of skip list. -+ * -+ * skiplist_node is embedded into container data structure, to get rid -+ * the dependency of kmalloc/kfree operation in scheduler code. -+ * -+ * A customized search function should be defined using DEFINE_SKIPLIST_INSERT -+ * macro and be used for skip list insert operation. -+ * -+ * Random Level is also not defined in this file, instead, it should be -+ * customized implemented and set to node->level then pass to the customized -+ * skiplist_insert function. -+ * -+ * Levels start at zero and go up to (NUM_SKIPLIST_LEVEL -1) -+ * -+ * NUM_SKIPLIST_LEVEL in this implementation is 8 instead of origin 16, -+ * considering that there will be 256 entries to enable the top level when using -+ * random level p=0.5, and that number is more than enough for a run queue usage -+ * in a scheduler usage. And it also help to reduce the memory usage of the -+ * embedded skip list node in task_struct to about 50%. -+ * -+ * The insertion routine has been implemented so as to use the -+ * dirty hack described in the CACM paper: if a random level is -+ * generated that is more than the current maximum level, the -+ * current maximum level plus one is used instead. -+ * -+ * BFS Notes: In this implementation of skiplists, there are bidirectional -+ * next/prev pointers and the insert function returns a pointer to the actual -+ * node the value is stored. The key here is chosen by the scheduler so as to -+ * sort tasks according to the priority list requirements and is no longer used -+ * by the scheduler after insertion. The scheduler lookup, however, occurs in -+ * O(1) time because it is always the first item in the level 0 linked list. -+ * Since the task struct stores a copy of the node pointer upon skiplist_insert, -+ * it can also remove it much faster than the original implementation with the -+ * aid of prev<->next pointer manipulation and no searching. -+ */ -+#ifndef _LINUX_SKIP_LIST_H -+#define _LINUX_SKIP_LIST_H -+ -+#include -+ -+#define NUM_SKIPLIST_LEVEL (8) -+ -+struct skiplist_node { -+ int level; /* Levels in this node */ -+ struct skiplist_node *next[NUM_SKIPLIST_LEVEL]; -+ struct skiplist_node *prev[NUM_SKIPLIST_LEVEL]; -+}; -+ -+#define SKIPLIST_NODE_INIT(name) { 0,\ -+ {&name, &name, &name, &name,\ -+ &name, &name, &name, &name},\ -+ {&name, &name, &name, &name,\ -+ &name, &name, &name, &name},\ -+ } -+ -+static inline void INIT_SKIPLIST_NODE(struct skiplist_node *node) -+{ -+ /* only level 0 ->next matters in skiplist_empty() */ -+ WRITE_ONCE(node->next[0], node); -+} -+ -+/** -+ * FULL_INIT_SKIPLIST_NODE -- fully init a skiplist_node, expecially for header -+ * @node: the skip list node to be inited. -+ */ -+static inline void FULL_INIT_SKIPLIST_NODE(struct skiplist_node *node) -+{ -+ int i; -+ -+ node->level = 0; -+ for (i = 0; i < NUM_SKIPLIST_LEVEL; i++) { -+ WRITE_ONCE(node->next[i], node); -+ node->prev[i] = node; -+ } -+} -+ -+/** -+ * skiplist_empty - test whether a skip list is empty -+ * @head: the skip list to test. -+ */ -+static inline int skiplist_empty(const struct skiplist_node *head) -+{ -+ return READ_ONCE(head->next[0]) == head; -+} -+ -+/** -+ * skiplist_entry - get the struct for this entry -+ * @ptr: the &struct skiplist_node pointer. -+ * @type: the type of the struct this is embedded in. -+ * @member: the name of the skiplist_node within the struct. -+ */ -+#define skiplist_entry(ptr, type, member) \ -+ container_of(ptr, type, member) -+ -+/** -+ * DEFINE_SKIPLIST_INSERT_FUNC -- macro to define a customized skip list insert -+ * function, which takes two parameters, first one is the header node of the -+ * skip list, second one is the skip list node to be inserted -+ * @func_name: the customized skip list insert function name -+ * @search_func: the search function to be used, which takes two parameters, -+ * 1st one is the itrator of skiplist_node in the list, the 2nd is the skip list -+ * node to be inserted, the function should return true if search should be -+ * continued, otherwise return false. -+ * Returns 1 if @node is inserted as the first item of skip list at level zero, -+ * otherwise 0 -+ */ -+#define DEFINE_SKIPLIST_INSERT_FUNC(func_name, search_func)\ -+static inline int func_name(struct skiplist_node *head, struct skiplist_node *node)\ -+{\ -+ struct skiplist_node *update[NUM_SKIPLIST_LEVEL];\ -+ struct skiplist_node *p, *q;\ -+ int k = head->level;\ -+\ -+ p = head;\ -+ do {\ -+ while (q = p->next[k], q != head && search_func(q, node))\ -+ p = q;\ -+ update[k] = p;\ -+ } while (--k >= 0);\ -+\ -+ k = node->level;\ -+ if (unlikely(k > head->level)) {\ -+ node->level = k = ++head->level;\ -+ update[k] = head;\ -+ }\ -+\ -+ do {\ -+ p = update[k];\ -+ q = p->next[k];\ -+ node->next[k] = q;\ -+ p->next[k] = node;\ -+ node->prev[k] = p;\ -+ q->prev[k] = node;\ -+ } while (--k >= 0);\ -+\ -+ return (p == head);\ -+} -+ -+/** -+ * skiplist_del_init -- delete skip list node from a skip list and reset it's -+ * init state -+ * @head: the header node of the skip list to be deleted from. -+ * @node: the skip list node to be deleted, the caller need to ensure @node is -+ * in skip list which @head represent. -+ * Returns 1 if @node is the first item of skip level at level zero, otherwise 0 -+ */ -+static inline int -+skiplist_del_init(struct skiplist_node *head, struct skiplist_node *node) -+{ -+ int l, m = node->level; -+ -+ for (l = 0; l <= m; l++) { -+ node->prev[l]->next[l] = node->next[l]; -+ node->next[l]->prev[l] = node->prev[l]; -+ } -+ if (m == head->level && m > 0) { -+ while (head->next[m] == head && m > 0) -+ m--; -+ head->level = m; -+ } -+ INIT_SKIPLIST_NODE(node); -+ -+ return (node->prev[0] == head); -+} -+#endif /* _LINUX_SKIP_LIST_H */ -diff --git a/init/Kconfig b/init/Kconfig -index 0a817c86c966..8059f9d860db 100644 ---- a/init/Kconfig -+++ b/init/Kconfig -@@ -828,6 +828,12 @@ config SCHED_BMQ - responsiveness on the desktop and solid scalability on normal - hardware and commodity servers. - -+config SCHED_PDS -+ bool "PDS CPU scheduler" -+ help -+ The Priority and Deadline based Skip list multiple queue CPU -+ Scheduler. -+ - endchoice - - endif -diff --git a/init/init_task.c b/init/init_task.c -index 9017276b1a80..fc8fcdbbd07a 100644 ---- a/init/init_task.c -+++ b/init/init_task.c -@@ -95,10 +95,15 @@ struct task_struct init_task - .fn = do_no_restart_syscall, - }, - #ifdef CONFIG_SCHED_ALT -- .boost_prio = 0, - #ifdef CONFIG_SCHED_BMQ -+ .boost_prio = 0, - .bmq_idx = 15, - .bmq_node = LIST_HEAD_INIT(init_task.bmq_node), -+#endif -+#ifdef CONFIG_SCHED_PDS -+ .deadline = 0, -+ .sl_level = 0, -+ .sl_node = SKIPLIST_NODE_INIT(init_task.sl_node), - #endif - .time_slice = HZ, - #else -diff --git a/kernel/locking/rtmutex.c b/kernel/locking/rtmutex.c -index 4258d1c08c71..5b9893cdfb1b 100644 ---- a/kernel/locking/rtmutex.c -+++ b/kernel/locking/rtmutex.c -@@ -312,6 +312,9 @@ waiter_update_prio(struct rt_mutex_waiter *waiter, struct task_struct *task) - static __always_inline int rt_mutex_waiter_less(struct rt_mutex_waiter *left, - struct rt_mutex_waiter *right) - { -+#ifdef CONFIG_SCHED_PDS -+ return (left->deadline < right->deadline); -+#else - if (left->prio < right->prio) - return 1; - -@@ -327,11 +330,15 @@ static __always_inline int rt_mutex_waiter_less(struct rt_mutex_waiter *left, - #endif - - return 0; -+#endif - } - - static __always_inline int rt_mutex_waiter_equal(struct rt_mutex_waiter *left, - struct rt_mutex_waiter *right) - { -+#ifdef CONFIG_SCHED_PDS -+ return (left->deadline == right->deadline); -+#else - if (left->prio != right->prio) - return 0; - -@@ -347,6 +354,7 @@ static __always_inline int rt_mutex_waiter_equal(struct rt_mutex_waiter *left, - #endif - - return 1; -+#endif - } - - static inline bool rt_mutex_steal(struct rt_mutex_waiter *waiter, -diff --git a/kernel/sched/alt_core.c b/kernel/sched/alt_core.c -index 5580b600db67..407bc46de451 100644 ---- a/kernel/sched/alt_core.c -+++ b/kernel/sched/alt_core.c -@@ -78,36 +78,6 @@ early_param("sched_timeslice", sched_timeslice); - */ - int sched_yield_type __read_mostly = 1; - --#define rq_switch_time(rq) ((rq)->clock - (rq)->last_ts_switch) --#define boost_threshold(p) (sched_timeslice_ns >>\ -- (10 - MAX_PRIORITY_ADJ - (p)->boost_prio)) -- --static inline void boost_task(struct task_struct *p) --{ -- int limit; -- -- switch (p->policy) { -- case SCHED_NORMAL: -- limit = -MAX_PRIORITY_ADJ; -- break; -- case SCHED_BATCH: -- case SCHED_IDLE: -- limit = 0; -- break; -- default: -- return; -- } -- -- if (p->boost_prio > limit) -- p->boost_prio--; --} -- --static inline void deboost_task(struct task_struct *p) --{ -- if (p->boost_prio < MAX_PRIORITY_ADJ) -- p->boost_prio++; --} -- - #ifdef CONFIG_SMP - static cpumask_t sched_rq_pending_mask ____cacheline_aligned_in_smp; - -@@ -146,13 +116,22 @@ static cpumask_t sched_sg_idle_mask ____cacheline_aligned_in_smp; - #endif - static cpumask_t sched_rq_watermark[SCHED_BITS] ____cacheline_aligned_in_smp; - -+#ifdef CONFIG_SCHED_BMQ -+#include "bmq_imp.h" -+#endif -+#ifdef CONFIG_SCHED_PDS -+#include "pds_imp.h" -+#endif -+ - static inline void update_sched_rq_watermark(struct rq *rq) - { -- unsigned long watermark = find_first_bit(rq->queue.bitmap, SCHED_BITS); -+ unsigned long watermark = sched_queue_watermark(rq); - unsigned long last_wm = rq->watermark; - unsigned long i; - int cpu; - -+ /*printk(KERN_INFO "sched: watermark(%d) %d, last %d\n", -+ cpu_of(rq), watermark, last_wm);*/ - if (watermark == last_wm) - return; - -@@ -187,13 +166,6 @@ static inline void update_sched_rq_watermark(struct rq *rq) - #endif - } - --static inline int task_sched_prio(struct task_struct *p) --{ -- return (p->prio < MAX_RT_PRIO)? p->prio : p->prio + p->boost_prio; --} -- --#include "bmq_imp.h" -- - static inline struct task_struct *rq_runnable_task(struct rq *rq) - { - struct task_struct *next = sched_rq_first_task(rq); -@@ -456,6 +428,7 @@ static inline void dequeue_task(struct task_struct *p, struct rq *rq, int flags) - { - lockdep_assert_held(&rq->lock); - -+ /*printk(KERN_INFO "sched: dequeue(%d) %px %016llx\n", cpu_of(rq), p, p->priodl);*/ - WARN_ONCE(task_rq(p) != rq, "sched: dequeue task reside on cpu%d from cpu%d\n", - task_cpu(p), cpu_of(rq)); - -@@ -473,6 +446,7 @@ static inline void enqueue_task(struct task_struct *p, struct rq *rq, int flags) - { - lockdep_assert_held(&rq->lock); - -+ /*printk(KERN_INFO "sched: enqueue(%d) %px %016llx\n", cpu_of(rq), p, p->priodl);*/ - WARN_ONCE(task_rq(p) != rq, "sched: enqueue task reside on cpu%d to cpu%d\n", - task_cpu(p), cpu_of(rq)); - -@@ -498,10 +472,11 @@ static inline void enqueue_task(struct task_struct *p, struct rq *rq, int flags) - static inline void requeue_task(struct task_struct *p, struct rq *rq) - { - lockdep_assert_held(&rq->lock); -+ /*printk(KERN_INFO "sched: requeue(%d) %px %016llx\n", cpu_of(rq), p, p->priodl);*/ - WARN_ONCE(task_rq(p) != rq, "sched: cpu[%d] requeue task reside on cpu%d\n", - cpu_of(rq), task_cpu(p)); - -- __requeue_task(p, rq); -+ __SCHED_REQUEUE_TASK(p, rq, update_sched_rq_watermark(rq)); - } - - /* -@@ -1428,7 +1403,7 @@ static int select_fallback_rq(int cpu, struct task_struct *p) - return dest_cpu; - } - --static inline int select_task_rq(struct task_struct *p) -+static inline int select_task_rq(struct task_struct *p, struct rq *rq) - { - cpumask_t chk_mask, tmp; - -@@ -1441,7 +1416,7 @@ static inline int select_task_rq(struct task_struct *p) - #endif - cpumask_and(&tmp, &chk_mask, &sched_rq_watermark[IDLE_WM]) || - cpumask_and(&tmp, &chk_mask, -- &sched_rq_watermark[task_sched_prio(p) + 1])) -+ &sched_rq_watermark[task_sched_prio(p, rq) + 1])) - return best_mask_cpu(task_cpu(p), &tmp); - - return best_mask_cpu(task_cpu(p), &chk_mask); -@@ -1573,7 +1548,7 @@ EXPORT_SYMBOL_GPL(set_cpus_allowed_ptr); - - #else /* CONFIG_SMP */ - --static inline int select_task_rq(struct task_struct *p) -+static inline int select_task_rq(struct task_struct *p, struct rq *rq) - { - return 0; - } -@@ -2039,10 +2014,9 @@ static int try_to_wake_up(struct task_struct *p, unsigned int state, - */ - smp_cond_load_acquire(&p->on_cpu, !VAL); - -- if(this_rq()->clock_task - p->last_ran > sched_timeslice_ns) -- boost_task(p); -+ sched_task_ttwu(p); - -- cpu = select_task_rq(p); -+ cpu = select_task_rq(p, this_rq()); - - if (cpu != task_cpu(p)) { - wake_flags |= WF_MIGRATED; -@@ -2198,9 +2172,9 @@ int sched_fork(unsigned long clone_flags, struct task_struct *p) - */ - p->sched_reset_on_fork = 0; - } -+ update_task_priodl(p); - -- p->boost_prio = (p->boost_prio < 0) ? -- p->boost_prio + MAX_PRIORITY_ADJ : MAX_PRIORITY_ADJ; -+ sched_task_fork(p); - /* - * The child is not yet in the pid-hash so no cgroup attach races, - * and the cgroup is pinned to this child due to cgroup_fork() -@@ -2224,6 +2198,7 @@ int sched_fork(unsigned long clone_flags, struct task_struct *p) - - if (p->time_slice < RESCHED_NS) { - p->time_slice = sched_timeslice_ns; -+ time_slice_expired(p, rq); - resched_curr(rq); - } - raw_spin_unlock(&rq->lock); -@@ -2338,7 +2313,7 @@ void wake_up_new_task(struct task_struct *p) - - p->state = TASK_RUNNING; - -- rq = cpu_rq(select_task_rq(p)); -+ rq = cpu_rq(select_task_rq(p, this_rq())); - #ifdef CONFIG_SMP - rseq_migrate(p); - /* -@@ -3436,11 +3411,7 @@ static inline void check_curr(struct task_struct *p, struct rq *rq) - - if (p->time_slice < RESCHED_NS) { - p->time_slice = sched_timeslice_ns; -- if (SCHED_FIFO != p->policy && task_on_rq_queued(p)) { -- if (SCHED_RR != p->policy) -- deboost_task(p); -- requeue_task(p, rq); -- } -+ time_slice_expired(p, rq); - } - } - -@@ -3476,6 +3447,7 @@ choose_next_task(struct rq *rq, int cpu, struct task_struct *prev) - if (!take_other_rq_tasks(rq, cpu)) { - #endif - schedstat_inc(rq->sched_goidle); -+ /*printk(KERN_INFO "sched: choose_next_task(%d) idle %px\n", cpu, next);*/ - return next; - #ifdef CONFIG_SMP - } -@@ -3485,6 +3457,8 @@ choose_next_task(struct rq *rq, int cpu, struct task_struct *prev) - #ifdef CONFIG_HIGH_RES_TIMERS - hrtick_start(rq, next->time_slice); - #endif -+ /*printk(KERN_INFO "sched: choose_next_task(%d) next %px\n", cpu, -+ * next);*/ - return next; - } - -@@ -3599,8 +3573,7 @@ static void __sched notrace __schedule(bool preempt) - * - * After this, schedule() must not care about p->state any more. - */ -- if (rq_switch_time(rq) < boost_threshold(prev)) -- boost_task(prev); -+ sched_task_deactivate(prev, rq); - deactivate_task(prev, rq); - - if (prev->in_iowait) { -@@ -3926,7 +3899,7 @@ EXPORT_SYMBOL(default_wake_function); - static inline void check_task_changed(struct rq *rq, struct task_struct *p) - { - /* Trigger resched if task sched_prio has been modified. */ -- if (task_on_rq_queued(p) && sched_task_need_requeue(p)) { -+ if (task_on_rq_queued(p) && sched_task_need_requeue(p, rq)) { - requeue_task(p, rq); - check_preempt_curr(rq); - } -@@ -4014,6 +3987,7 @@ void rt_mutex_setprio(struct task_struct *p, struct task_struct *pi_task) - - trace_sched_pi_setprio(p, pi_task); - p->prio = prio; -+ update_task_priodl(p); - - check_task_changed(rq, p); - out_unlock: -@@ -4052,6 +4026,8 @@ void set_user_nice(struct task_struct *p, long nice) - goto out_unlock; - - p->prio = effective_prio(p); -+ update_task_priodl(p); -+ - check_task_changed(rq, p); - out_unlock: - __task_access_unlock(p, lock); -@@ -4109,21 +4085,6 @@ SYSCALL_DEFINE1(nice, int, increment) - - #endif - --/** -- * task_prio - return the priority value of a given task. -- * @p: the task in question. -- * -- * Return: The priority value as seen by users in /proc. -- * RT tasks are offset by -100. Normal tasks are centered around 1, value goes -- * from 0(SCHED_ISO) up to 82 (nice +19 SCHED_IDLE). -- */ --int task_prio(const struct task_struct *p) --{ -- if (p->prio < MAX_RT_PRIO) -- return (p->prio - MAX_RT_PRIO); -- return (p->prio - MAX_RT_PRIO + p->boost_prio); --} -- - /** - * idle_cpu - is a given CPU idle currently? - * @cpu: the processor in question. -@@ -4215,6 +4176,7 @@ static void __setscheduler(struct rq *rq, struct task_struct *p, - p->prio = normal_prio(p); - if (keep_boost) - p->prio = rt_effective_prio(p, p->prio); -+ update_task_priodl(p); - } - - /* -@@ -4974,10 +4936,8 @@ static void do_sched_yield(void) - schedstat_inc(rq->yld_count); - - if (1 == sched_yield_type) { -- if (!rt_task(current)) { -- current->boost_prio = MAX_PRIORITY_ADJ; -- requeue_task(current, rq); -- } -+ if (!rt_task(current)) -+ do_sched_yield_type_1(current, rq); - } else if (2 == sched_yield_type) { - if (rq->nr_running > 1) - rq->skip = current; -diff --git a/kernel/sched/alt_sched.h b/kernel/sched/alt_sched.h -index d8887f377455..99be2c51c88d 100644 ---- a/kernel/sched/alt_sched.h -+++ b/kernel/sched/alt_sched.h -@@ -49,6 +49,9 @@ - #ifdef CONFIG_SCHED_BMQ - #include "bmq.h" - #endif -+#ifdef CONFIG_SCHED_PDS -+#include "pds.h" -+#endif - - /* task_struct::on_rq states: */ - #define TASK_ON_RQ_QUEUED 1 -@@ -86,6 +89,9 @@ struct rq { - - #ifdef CONFIG_SCHED_BMQ - struct bmq queue; -+#endif -+#ifdef CONFIG_SCHED_PDS -+ struct skiplist_node sl_header; - #endif - unsigned long watermark; - -@@ -534,11 +540,6 @@ static inline void membarrier_switch_mm(struct rq *rq, - } - #endif - --static inline int task_running_nice(struct task_struct *p) --{ -- return (p->prio + p->boost_prio > DEFAULT_PRIO + MAX_PRIORITY_ADJ); --} -- - #ifdef CONFIG_NUMA - extern int sched_numa_find_closest(const struct cpumask *cpus, int cpu); - #else -diff --git a/kernel/sched/bmq.h b/kernel/sched/bmq.h -index 4ce30c30bd3e..1364824cfa7d 100644 ---- a/kernel/sched/bmq.h -+++ b/kernel/sched/bmq.h -@@ -11,4 +11,10 @@ struct bmq { - struct list_head heads[SCHED_BITS]; - }; - -+ -+static inline int task_running_nice(struct task_struct *p) -+{ -+ return (p->prio + p->boost_prio > DEFAULT_PRIO + MAX_PRIORITY_ADJ); -+} -+ - #endif -diff --git a/kernel/sched/bmq_imp.h b/kernel/sched/bmq_imp.h -index 86d496ec23b3..d7df1d3f9495 100644 ---- a/kernel/sched/bmq_imp.h -+++ b/kernel/sched/bmq_imp.h -@@ -1,5 +1,64 @@ - #define ALT_SCHED_VERSION_MSG "sched/bmq: BMQ CPU Scheduler 5.8-r1 by Alfred Chen.\n" - -+/* -+ * BMQ only routines -+ */ -+#define rq_switch_time(rq) ((rq)->clock - (rq)->last_ts_switch) -+#define boost_threshold(p) (sched_timeslice_ns >>\ -+ (15 - MAX_PRIORITY_ADJ - (p)->boost_prio)) -+ -+static inline void boost_task(struct task_struct *p) -+{ -+ int limit; -+ -+ switch (p->policy) { -+ case SCHED_NORMAL: -+ limit = -MAX_PRIORITY_ADJ; -+ break; -+ case SCHED_BATCH: -+ case SCHED_IDLE: -+ limit = 0; -+ break; -+ default: -+ return; -+ } -+ -+ if (p->boost_prio > limit) -+ p->boost_prio--; -+} -+ -+static inline void deboost_task(struct task_struct *p) -+{ -+ if (p->boost_prio < MAX_PRIORITY_ADJ) -+ p->boost_prio++; -+} -+ -+/* -+ * Common interfaces -+ */ -+static inline int task_sched_prio(struct task_struct *p, struct rq *rq) -+{ -+ return (p->prio < MAX_RT_PRIO)? p->prio : MAX_RT_PRIO / 2 + (p->prio + p->boost_prio) / 2; -+} -+ -+static inline void requeue_task(struct task_struct *p, struct rq *rq); -+ -+static inline void time_slice_expired(struct task_struct *p, struct rq *rq) -+{ -+ if (SCHED_FIFO != p->policy && task_on_rq_queued(p)) { -+ if (SCHED_RR != p->policy) -+ deboost_task(p); -+ requeue_task(p, rq); -+ } -+} -+ -+static inline void update_task_priodl(struct task_struct *p) {} -+ -+static inline unsigned long sched_queue_watermark(struct rq *rq) -+{ -+ return find_first_bit(rq->queue.bitmap, SCHED_BITS); -+} -+ - static inline void sched_queue_init(struct rq *rq) - { - struct bmq *q = &rq->queue; -@@ -61,26 +120,64 @@ sched_rq_next_task(struct task_struct *p, struct rq *rq) - sched_info_queued(rq, p); \ - psi_enqueue(p, flags); \ - \ -- p->bmq_idx = task_sched_prio(p); \ -+ p->bmq_idx = task_sched_prio(p, rq); \ - list_add_tail(&p->bmq_node, &rq->queue.heads[p->bmq_idx]); \ - set_bit(p->bmq_idx, rq->queue.bitmap) - --static inline void __requeue_task(struct task_struct *p, struct rq *rq) -+#define __SCHED_REQUEUE_TASK(p, rq, func) \ -+{ \ -+ int idx = task_sched_prio(p, rq); \ -+\ -+ list_del(&p->bmq_node); \ -+ list_add_tail(&p->bmq_node, &rq->queue.heads[idx]); \ -+ if (idx != p->bmq_idx) { \ -+ if (list_empty(&rq->queue.heads[p->bmq_idx])) \ -+ clear_bit(p->bmq_idx, rq->queue.bitmap); \ -+ p->bmq_idx = idx; \ -+ set_bit(p->bmq_idx, rq->queue.bitmap); \ -+ func; \ -+ } \ -+} -+ -+static inline bool sched_task_need_requeue(struct task_struct *p, struct rq *rq) - { -- int idx = task_sched_prio(p); -- -- list_del(&p->bmq_node); -- list_add_tail(&p->bmq_node, &rq->queue.heads[idx]); -- if (idx != p->bmq_idx) { -- if (list_empty(&rq->queue.heads[p->bmq_idx])) -- clear_bit(p->bmq_idx, rq->queue.bitmap); -- p->bmq_idx = idx; -- set_bit(p->bmq_idx, rq->queue.bitmap); -- update_sched_rq_watermark(rq); -- } -+ return (task_sched_prio(p, rq) != p->bmq_idx); -+} -+ -+static void sched_task_fork(struct task_struct *p) -+{ -+ p->boost_prio = (p->boost_prio < 0) ? -+ p->boost_prio + MAX_PRIORITY_ADJ : MAX_PRIORITY_ADJ; -+} -+ -+/** -+ * task_prio - return the priority value of a given task. -+ * @p: the task in question. -+ * -+ * Return: The priority value as seen by users in /proc. -+ * RT tasks are offset by -100. Normal tasks are centered around 1, value goes -+ * from 0(SCHED_ISO) up to 82 (nice +19 SCHED_IDLE). -+ */ -+int task_prio(const struct task_struct *p) -+{ -+ if (p->prio < MAX_RT_PRIO) -+ return (p->prio - MAX_RT_PRIO); -+ return (p->prio - MAX_RT_PRIO + p->boost_prio); -+} -+ -+static void do_sched_yield_type_1(struct task_struct *p, struct rq *rq) -+{ -+ p->boost_prio = MAX_PRIORITY_ADJ; -+} -+ -+static void sched_task_ttwu(struct task_struct *p) -+{ -+ if(this_rq()->clock_task - p->last_ran > sched_timeslice_ns) -+ boost_task(p); - } - --static inline bool sched_task_need_requeue(struct task_struct *p) -+static void sched_task_deactivate(struct task_struct *p, struct rq *rq) - { -- return (task_sched_prio(p) != p->bmq_idx); -+ if (rq_switch_time(rq) < boost_threshold(p)) -+ boost_task(p); - } -diff --git a/kernel/sched/pds.h b/kernel/sched/pds.h -new file mode 100644 -index 000000000000..9b9addc205a9 ---- /dev/null -+++ b/kernel/sched/pds.h -@@ -0,0 +1,14 @@ -+#ifndef PDS_H -+#define PDS_H -+ -+/* bits: -+ * RT(0-99), (Low prio adj range, nice width, high prio adj range) / 2, cpu idle task */ -+#define SCHED_BITS (MAX_RT_PRIO + MAX_PRIORITY_ADJ * 2 + 8 + 1) -+#define IDLE_TASK_SCHED_PRIO (SCHED_BITS - 1) -+ -+static inline int task_running_nice(struct task_struct *p) -+{ -+ return (p->prio > DEFAULT_PRIO); -+} -+ -+#endif -diff --git a/kernel/sched/pds_imp.h b/kernel/sched/pds_imp.h -new file mode 100644 -index 000000000000..b970879f1d2e ---- /dev/null -+++ b/kernel/sched/pds_imp.h -@@ -0,0 +1,205 @@ -+#define ALT_SCHED_VERSION_MSG "sched/bmq: PDS CPU Scheduler 5.8-r0 by Alfred Chen.\n" -+ -+static const u64 user_prio2deadline[NICE_WIDTH] = { -+/* -20 */ 6291456, 6920601, 7612661, 8373927, 9211319, -+/* -15 */ 10132450, 11145695, 12260264, 13486290, 14834919, -+/* -10 */ 16318410, 17950251, 19745276, 21719803, 23891783, -+/* -5 */ 26280961, 28909057, 31799962, 34979958, 38477953, -+/* 0 */ 42325748, 46558322, 51214154, 56335569, 61969125, -+/* 5 */ 68166037, 74982640, 82480904, 90728994, 99801893, -+/* 10 */ 109782082, 120760290, 132836319, 146119950, 160731945, -+/* 15 */ 176805139, 194485652, 213934217, 235327638, 258860401 -+}; -+ -+static const int dl_level_map[] = { -+/* 0 4 8 12 */ -+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, -+/* 16 20 24 28 */ -+ 1, 1, 1, 2, 2, 2, 2, 2, 3, 3, 3, 4, 4, 5, 6, 7 -+}; -+ -+static inline int -+task_sched_prio(const struct task_struct *p, const struct rq *rq) -+{ -+ u64 delta = (rq->clock + user_prio2deadline[39] - p->deadline) >> 23; -+ -+ delta = min((size_t)delta, ARRAY_SIZE(dl_level_map) - 1); -+ -+ return (p->prio < MAX_RT_PRIO)? p->prio : MAX_RT_PRIO + dl_level_map[delta]; -+} -+ -+static inline void update_task_priodl(struct task_struct *p) -+{ -+ p->priodl = (((u64) (p->prio))<<56) | ((p->deadline)>>8); -+} -+ -+static inline void requeue_task(struct task_struct *p, struct rq *rq); -+ -+static inline void time_slice_expired(struct task_struct *p, struct rq *rq) -+{ -+ /*printk(KERN_INFO "sched: time_slice_expired(%d) - %px\n", cpu_of(rq), p);*/ -+ -+ if (p->prio >= MAX_RT_PRIO) -+ p->deadline = rq->clock + user_prio2deadline[TASK_USER_PRIO(p)]; -+ update_task_priodl(p); -+ -+ if (SCHED_FIFO != p->policy && task_on_rq_queued(p)) -+ requeue_task(p, rq); -+} -+ -+/* -+ * pds_skiplist_task_search -- search function used in PDS run queue skip list -+ * node insert operation. -+ * @it: iterator pointer to the node in the skip list -+ * @node: pointer to the skiplist_node to be inserted -+ * -+ * Returns true if key of @it is less or equal to key value of @node, otherwise -+ * false. -+ */ -+static inline bool -+pds_skiplist_task_search(struct skiplist_node *it, struct skiplist_node *node) -+{ -+ return (skiplist_entry(it, struct task_struct, sl_node)->priodl <= -+ skiplist_entry(node, struct task_struct, sl_node)->priodl); -+} -+ -+/* -+ * Define the skip list insert function for PDS -+ */ -+DEFINE_SKIPLIST_INSERT_FUNC(pds_skiplist_insert, pds_skiplist_task_search); -+ -+/* -+ * Init the queue structure in rq -+ */ -+static inline void sched_queue_init(struct rq *rq) -+{ -+ FULL_INIT_SKIPLIST_NODE(&rq->sl_header); -+} -+ -+/* -+ * Init idle task and put into queue structure of rq -+ * IMPORTANT: may be called multiple times for a single cpu -+ */ -+static inline void sched_queue_init_idle(struct rq *rq, struct task_struct *idle) -+{ -+ /*printk(KERN_INFO "sched: init(%d) - %px\n", cpu_of(rq), idle);*/ -+ int default_prio = idle->prio; -+ -+ idle->prio = MAX_PRIO; -+ idle->deadline = 0ULL; -+ update_task_priodl(idle); -+ -+ FULL_INIT_SKIPLIST_NODE(&rq->sl_header); -+ -+ idle->sl_node.level = idle->sl_level; -+ pds_skiplist_insert(&rq->sl_header, &idle->sl_node); -+ -+ idle->prio = default_prio; -+} -+ -+/* -+ * This routine assume that the idle task always in queue -+ */ -+static inline struct task_struct *sched_rq_first_task(struct rq *rq) -+{ -+ struct skiplist_node *node = rq->sl_header.next[0]; -+ -+ BUG_ON(node == &rq->sl_header); -+ return skiplist_entry(node, struct task_struct, sl_node); -+} -+ -+static inline struct task_struct * -+sched_rq_next_task(struct task_struct *p, struct rq *rq) -+{ -+ struct skiplist_node *next = p->sl_node.next[0]; -+ -+ BUG_ON(next == &rq->sl_header); -+ return skiplist_entry(next, struct task_struct, sl_node); -+} -+ -+static inline unsigned long sched_queue_watermark(struct rq *rq) -+{ -+ return task_sched_prio(sched_rq_first_task(rq), rq); -+} -+ -+#define __SCHED_DEQUEUE_TASK(p, rq, flags, func) \ -+ psi_dequeue(p, flags & DEQUEUE_SLEEP); \ -+ sched_info_dequeued(rq, p); \ -+ \ -+ if (skiplist_del_init(&rq->sl_header, &p->sl_node)) { \ -+ func; \ -+ } -+ -+#define __SCHED_ENQUEUE_TASK(p, rq, flags) \ -+ sched_info_queued(rq, p); \ -+ psi_enqueue(p, flags); \ -+ \ -+ p->sl_node.level = p->sl_level; \ -+ pds_skiplist_insert(&rq->sl_header, &p->sl_node) -+ -+/* -+ * Requeue a task @p to @rq -+ */ -+#define __SCHED_REQUEUE_TASK(p, rq, func) \ -+{\ -+ bool b_first = skiplist_del_init(&rq->sl_header, &p->sl_node); \ -+\ -+ p->sl_node.level = p->sl_level; \ -+ if (pds_skiplist_insert(&rq->sl_header, &p->sl_node) || b_first) { \ -+ func; \ -+ } \ -+} -+ -+static inline bool sched_task_need_requeue(struct task_struct *p, struct rq *rq) -+{ -+ struct skiplist_node *node = p->sl_node.prev[0]; -+ -+ if (node != &rq->sl_header) { -+ struct task_struct *t = skiplist_entry(node, struct task_struct, sl_node); -+ -+ if (t->priodl > p->priodl) -+ return true; -+ } -+ -+ node = p->sl_node.next[0]; -+ if (node != &rq->sl_header) { -+ struct task_struct *t = skiplist_entry(node, struct task_struct, sl_node); -+ -+ if (t->priodl < p->priodl) -+ return true; -+ } -+ -+ return false; -+} -+ -+static void sched_task_fork(struct task_struct *p) {} -+ -+/** -+ * task_prio - return the priority value of a given task. -+ * @p: the task in question. -+ * -+ * Return: The priority value as seen by users in /proc. -+ * RT tasks are offset by -100. Normal tasks are centered around 1, value goes -+ * from 0(SCHED_ISO) up to 82 (nice +19 SCHED_IDLE). -+ */ -+int task_prio(const struct task_struct *p) -+{ -+ int ret; -+ -+ if (p->prio < MAX_RT_PRIO) -+ return (p->prio - MAX_RT_PRIO); -+ -+ preempt_disable(); -+ ret = task_sched_prio(p, this_rq()) - MAX_RT_PRIO; -+ preempt_enable(); -+ -+ return ret; -+} -+ -+static void do_sched_yield_type_1(struct task_struct *p, struct rq *rq) -+{ -+ time_slice_expired(p, rq); -+} -+ -+static void sched_task_ttwu(struct task_struct *p) {} -+static void sched_task_deactivate(struct task_struct *p, struct rq *rq) {} --- -2.37.0 - - -From 1dafadf5e49f3fbd36f83f35d9d88a3452236a0d Mon Sep 17 00:00:00 2001 -From: Alfred Chen -Date: Wed, 2 Sep 2020 08:24:30 +0800 -Subject: [PATCH 035/297] sched/alt: Refill time_slice in time_slice_expired(). - ---- - kernel/sched/alt_core.c | 5 +---- - kernel/sched/bmq_imp.h | 2 ++ - kernel/sched/pds_imp.h | 1 + - 3 files changed, 4 insertions(+), 4 deletions(-) - -diff --git a/kernel/sched/alt_core.c b/kernel/sched/alt_core.c -index 407bc46de451..5187d23f27df 100644 ---- a/kernel/sched/alt_core.c -+++ b/kernel/sched/alt_core.c -@@ -2197,7 +2197,6 @@ int sched_fork(unsigned long clone_flags, struct task_struct *p) - #endif - - if (p->time_slice < RESCHED_NS) { -- p->time_slice = sched_timeslice_ns; - time_slice_expired(p, rq); - resched_curr(rq); - } -@@ -3409,10 +3408,8 @@ static inline void check_curr(struct task_struct *p, struct rq *rq) - - update_curr(rq, p); - -- if (p->time_slice < RESCHED_NS) { -- p->time_slice = sched_timeslice_ns; -+ if (p->time_slice < RESCHED_NS) - time_slice_expired(p, rq); -- } - } - - static inline struct task_struct * -diff --git a/kernel/sched/bmq_imp.h b/kernel/sched/bmq_imp.h -index d7df1d3f9495..c9f0c708dd61 100644 ---- a/kernel/sched/bmq_imp.h -+++ b/kernel/sched/bmq_imp.h -@@ -45,6 +45,8 @@ static inline void requeue_task(struct task_struct *p, struct rq *rq); - - static inline void time_slice_expired(struct task_struct *p, struct rq *rq) - { -+ p->time_slice = sched_timeslice_ns; -+ - if (SCHED_FIFO != p->policy && task_on_rq_queued(p)) { - if (SCHED_RR != p->policy) - deboost_task(p); -diff --git a/kernel/sched/pds_imp.h b/kernel/sched/pds_imp.h -index b970879f1d2e..aa7e933f08b8 100644 ---- a/kernel/sched/pds_imp.h -+++ b/kernel/sched/pds_imp.h -@@ -38,6 +38,7 @@ static inline void requeue_task(struct task_struct *p, struct rq *rq); - static inline void time_slice_expired(struct task_struct *p, struct rq *rq) - { - /*printk(KERN_INFO "sched: time_slice_expired(%d) - %px\n", cpu_of(rq), p);*/ -+ p->time_slice = sched_timeslice_ns; - - if (p->prio >= MAX_RT_PRIO) - p->deadline = rq->clock + user_prio2deadline[TASK_USER_PRIO(p)]; --- -2.37.0 - - -From 4fdcb6d7592ea6c2190af732f82688528ca72758 Mon Sep 17 00:00:00 2001 -From: Alfred Chen -Date: Sat, 5 Sep 2020 16:25:49 +0800 -Subject: [PATCH 036/297] sched/alt: PDS rework. - -alt: -Rework bmq&pds routine in sched_fork(). -Sync-up mainline implement in sched_exec(), add task pi locking. -Add alt_sched_debug() and control by ALT_SCHED_DEBUG macro. - -pds: -Update user_prio2deadline which now based on default 4ms time slice. -Update dl_level_map which provides 20 levels instead of the original 8 -levels. -Fix issue that task_sched_prio() doesn't return corrent sched prio for -idle task. -Implement sched_task_for() routine. ---- - kernel/sched/alt_core.c | 57 ++++++++++++++++++++++++++++------------ - kernel/sched/alt_debug.c | 2 +- - kernel/sched/bmq_imp.h | 2 +- - kernel/sched/pds.h | 2 +- - kernel/sched/pds_imp.h | 52 +++++++++++++++++++++++++----------- - 5 files changed, 79 insertions(+), 36 deletions(-) - -diff --git a/kernel/sched/alt_core.c b/kernel/sched/alt_core.c -index 5187d23f27df..091f6919195c 100644 ---- a/kernel/sched/alt_core.c -+++ b/kernel/sched/alt_core.c -@@ -2172,9 +2172,7 @@ int sched_fork(unsigned long clone_flags, struct task_struct *p) - */ - p->sched_reset_on_fork = 0; - } -- update_task_priodl(p); - -- sched_task_fork(p); - /* - * The child is not yet in the pid-hash so no cgroup attach races, - * and the cgroup is pinned to this child due to cgroup_fork() -@@ -2190,6 +2188,7 @@ int sched_fork(unsigned long clone_flags, struct task_struct *p) - */ - rq = this_rq(); - raw_spin_lock(&rq->lock); -+ - rq->curr->time_slice /= 2; - p->time_slice = rq->curr->time_slice; - #ifdef CONFIG_SCHED_HRTICK -@@ -2197,9 +2196,10 @@ int sched_fork(unsigned long clone_flags, struct task_struct *p) - #endif - - if (p->time_slice < RESCHED_NS) { -- time_slice_expired(p, rq); -+ p->time_slice = sched_timeslice_ns; - resched_curr(rq); - } -+ sched_task_fork(p, rq); - raw_spin_unlock(&rq->lock); - - rseq_migrate(p); -@@ -2795,25 +2795,29 @@ unsigned long nr_iowait(void) - void sched_exec(void) - { - struct task_struct *p = current; -+ unsigned long flags; - int dest_cpu; -+ struct rq *rq; - -- if (task_rq(p)->nr_running < 2) -- return; -+ raw_spin_lock_irqsave(&p->pi_lock, flags); -+ rq = this_rq(); - -- dest_cpu = cpumask_any_and(p->cpus_ptr, &sched_rq_watermark[IDLE_WM]); -- if ( dest_cpu < nr_cpu_ids) { --#ifdef CONFIG_SCHED_SMT -- int smt = cpumask_any_and(p->cpus_ptr, &sched_sg_idle_mask); -- if (smt < nr_cpu_ids) -- dest_cpu = smt; --#endif -- if (likely(cpu_active(dest_cpu))) { -- struct migration_arg arg = { p, dest_cpu }; -+ if (rq != task_rq(p) || rq->nr_running < 2) -+ goto unlock; - -- stop_one_cpu(task_cpu(p), migration_cpu_stop, &arg); -- return; -- } -+ dest_cpu = select_task_rq(p, task_rq(p)); -+ if (dest_cpu == smp_processor_id()) -+ goto unlock; -+ -+ if (likely(cpu_active(dest_cpu))) { -+ struct migration_arg arg = { p, dest_cpu }; -+ -+ raw_spin_unlock_irqrestore(&p->pi_lock, flags); -+ stop_one_cpu(task_cpu(p), migration_cpu_stop, &arg); -+ return; - } -+unlock: -+ raw_spin_unlock_irqrestore(&p->pi_lock, flags); - } - - #endif -@@ -3314,6 +3318,23 @@ static inline void schedule_debug(struct task_struct *prev, bool preempt) - schedstat_inc(this_rq()->sched_count); - } - -+/* -+ * Compile time debug macro -+ * #define ALT_SCHED_DEBUG -+ */ -+ -+#ifdef ALT_SCHED_DEBUG -+void alt_sched_debug(void) -+{ -+ printk(KERN_INFO "sched: pending: 0x%04lx, idle: 0x%04lx, sg_idle: 0x%04lx\n", -+ sched_rq_pending_mask.bits[0], -+ sched_rq_watermark[IDLE_WM].bits[0], -+ sched_sg_idle_mask.bits[0]); -+} -+#else -+inline void alt_sched_debug(void) {} -+#endif -+ - #ifdef CONFIG_SMP - - #define SCHED_RQ_NR_MIGRATION (32UL) -@@ -5153,6 +5174,8 @@ static int sched_rr_get_interval(pid_t pid, struct timespec64 *t) - struct task_struct *p; - int retval; - -+ alt_sched_debug(); -+ - if (pid < 0) - return -EINVAL; - -diff --git a/kernel/sched/alt_debug.c b/kernel/sched/alt_debug.c -index 835e6bb98dda..1212a031700e 100644 ---- a/kernel/sched/alt_debug.c -+++ b/kernel/sched/alt_debug.c -@@ -1,7 +1,7 @@ - /* - * kernel/sched/alt_debug.c - * -- * Print the BMQ debugging details -+ * Print the alt scheduler debugging details - * - * Author: Alfred Chen - * Date : 2020 -diff --git a/kernel/sched/bmq_imp.h b/kernel/sched/bmq_imp.h -index c9f0c708dd61..0e67e00a6020 100644 ---- a/kernel/sched/bmq_imp.h -+++ b/kernel/sched/bmq_imp.h -@@ -146,7 +146,7 @@ static inline bool sched_task_need_requeue(struct task_struct *p, struct rq *rq) - return (task_sched_prio(p, rq) != p->bmq_idx); - } - --static void sched_task_fork(struct task_struct *p) -+static void sched_task_fork(struct task_struct *p, struct rq *rq) - { - p->boost_prio = (p->boost_prio < 0) ? - p->boost_prio + MAX_PRIORITY_ADJ : MAX_PRIORITY_ADJ; -diff --git a/kernel/sched/pds.h b/kernel/sched/pds.h -index 9b9addc205a9..7fdeace7e8a5 100644 ---- a/kernel/sched/pds.h -+++ b/kernel/sched/pds.h -@@ -3,7 +3,7 @@ - - /* bits: - * RT(0-99), (Low prio adj range, nice width, high prio adj range) / 2, cpu idle task */ --#define SCHED_BITS (MAX_RT_PRIO + MAX_PRIORITY_ADJ * 2 + 8 + 1) -+#define SCHED_BITS (MAX_RT_PRIO + 20 + 1) - #define IDLE_TASK_SCHED_PRIO (SCHED_BITS - 1) - - static inline int task_running_nice(struct task_struct *p) -diff --git a/kernel/sched/pds_imp.h b/kernel/sched/pds_imp.h -index aa7e933f08b8..4a2fc8993229 100644 ---- a/kernel/sched/pds_imp.h -+++ b/kernel/sched/pds_imp.h -@@ -1,31 +1,46 @@ - #define ALT_SCHED_VERSION_MSG "sched/bmq: PDS CPU Scheduler 5.8-r0 by Alfred Chen.\n" - - static const u64 user_prio2deadline[NICE_WIDTH] = { --/* -20 */ 6291456, 6920601, 7612661, 8373927, 9211319, --/* -15 */ 10132450, 11145695, 12260264, 13486290, 14834919, --/* -10 */ 16318410, 17950251, 19745276, 21719803, 23891783, --/* -5 */ 26280961, 28909057, 31799962, 34979958, 38477953, --/* 0 */ 42325748, 46558322, 51214154, 56335569, 61969125, --/* 5 */ 68166037, 74982640, 82480904, 90728994, 99801893, --/* 10 */ 109782082, 120760290, 132836319, 146119950, 160731945, --/* 15 */ 176805139, 194485652, 213934217, 235327638, 258860401 -+/* -20 */ 4194304, 4613734, 5075107, 5582617, 6140878, -+/* -15 */ 6754965, 7430461, 8173507, 8990857, 9889942, -+/* -10 */ 10878936, 11966829, 13163511, 14479862, 15927848, -+/* -5 */ 17520632, 19272695, 21199964, 23319960, 25651956, -+/* 0 */ 28217151, 31038866, 34142752, 37557027, 41312729, -+/* 5 */ 45444001, 49988401, 54987241, 60485965, 66534561, -+/* 10 */ 73188017, 80506818, 88557499, 97413248, 107154572, -+/* 15 */ 117870029, 129657031, 142622734, 156885007, 172573507 - }; - --static const int dl_level_map[] = { --/* 0 4 8 12 */ -- 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, --/* 16 20 24 28 */ -- 1, 1, 1, 2, 2, 2, 2, 2, 3, 3, 3, 4, 4, 5, 6, 7 -+static const unsigned char dl_level_map[] = { -+/* 0 4 8 12 */ -+ 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 18, -+/* 16 20 24 28 */ -+ 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 17, 17, 17, 17, 17, -+/* 32 36 40 44 */ -+ 17, 17, 17, 17, 16, 16, 16, 16, 16, 16, 16, 16, 15, 15, 15, 15, -+/* 48 52 56 60 */ -+ 15, 15, 15, 14, 14, 14, 14, 14, 14, 13, 13, 13, 13, 12, 12, 12, -+/* 64 68 72 76 */ -+ 12, 11, 11, 11, 10, 10, 10, 9, 9, 8, 7, 6, 5, 4, 3, 2, -+/* 80 84 88 92 */ -+ 1, 0 - }; - - static inline int - task_sched_prio(const struct task_struct *p, const struct rq *rq) - { -- u64 delta = (rq->clock + user_prio2deadline[39] - p->deadline) >> 23; -+ size_t delta; - -+ if (p == rq->idle) -+ return IDLE_TASK_SCHED_PRIO; -+ -+ if (p->prio < MAX_RT_PRIO) -+ return p->prio; -+ -+ delta = (rq->clock + user_prio2deadline[39] - p->deadline) >> 21; - delta = min((size_t)delta, ARRAY_SIZE(dl_level_map) - 1); - -- return (p->prio < MAX_RT_PRIO)? p->prio : MAX_RT_PRIO + dl_level_map[delta]; -+ return MAX_RT_PRIO + dl_level_map[delta]; - } - - static inline void update_task_priodl(struct task_struct *p) -@@ -173,7 +188,12 @@ static inline bool sched_task_need_requeue(struct task_struct *p, struct rq *rq) - return false; - } - --static void sched_task_fork(struct task_struct *p) {} -+static void sched_task_fork(struct task_struct *p, struct rq *rq) -+{ -+ if (p->prio >= MAX_RT_PRIO) -+ p->deadline = rq->clock + user_prio2deadline[TASK_USER_PRIO(p)]; -+ update_task_priodl(p); -+} - - /** - * task_prio - return the priority value of a given task. --- -2.37.0 - - -From c258f4da07a61595f9ff450dfab0e23bf3cbe3d8 Mon Sep 17 00:00:00 2001 -From: Alfred Chen -Date: Sun, 6 Sep 2020 10:26:05 +0800 -Subject: [PATCH 037/297] sched/alt: Documentation and comments updates. - ---- - Documentation/admin-guide/kernel-parameters.txt | 2 +- - Documentation/admin-guide/sysctl/kernel.rst | 4 ++-- - kernel/trace/trace_selftest.c | 2 +- - 3 files changed, 4 insertions(+), 4 deletions(-) - -diff --git a/Documentation/admin-guide/kernel-parameters.txt b/Documentation/admin-guide/kernel-parameters.txt -index 11e0b608c57d..d243405aa3bf 100644 ---- a/Documentation/admin-guide/kernel-parameters.txt -+++ b/Documentation/admin-guide/kernel-parameters.txt -@@ -5274,7 +5274,7 @@ - See drivers/net/irda/sa1100_ir.c. - - sched_timeslice= -- [KNL] Time slice in us for BMQ scheduler. -+ [KNL] Time slice in us for BMQ/PDS scheduler. - Format: (must be >= 1000) - Default: 4000 - See Documentation/scheduler/sched-BMQ.txt -diff --git a/Documentation/admin-guide/sysctl/kernel.rst b/Documentation/admin-guide/sysctl/kernel.rst -index 8fd3bfa2ecd9..e24781970a3d 100644 ---- a/Documentation/admin-guide/sysctl/kernel.rst -+++ b/Documentation/admin-guide/sysctl/kernel.rst -@@ -1528,8 +1528,8 @@ tunable to zero will disable lockup detection altogether. - yield_type: - =========== - --BMQ CPU scheduler only. This determines what type of yield calls to --sched_yield will perform. -+BMQ/PDS CPU scheduler only. This determines what type of yield calls -+to sched_yield will perform. - - 0 - No yield. - 1 - Deboost and requeue task. (default) -diff --git a/kernel/trace/trace_selftest.c b/kernel/trace/trace_selftest.c -index bc131c6d1299..2ccdede8585c 100644 ---- a/kernel/trace/trace_selftest.c -+++ b/kernel/trace/trace_selftest.c -@@ -1144,7 +1144,7 @@ static int trace_wakeup_test_thread(void *data) - /* Make this a -deadline thread */ - static const struct sched_attr attr = { - #ifdef CONFIG_SCHED_ALT -- /* No deadline on BMQ, use RR */ -+ /* No deadline on BMQ/PDS, use RR */ - .sched_policy = SCHED_RR, - #else - .sched_policy = SCHED_DEADLINE, --- -2.37.0 - - -From 2b65a7b2dff1584dea7e9a82fb907ae4b10398d0 Mon Sep 17 00:00:00 2001 -From: Alfred Chen -Date: Sun, 6 Sep 2020 10:28:26 +0800 -Subject: [PATCH 038/297] sched/alt: Revert NORMAL_PRIO defination for powerpc - cell. - ---- - arch/powerpc/platforms/cell/spufs/sched.c | 5 +++++ - 1 file changed, 5 insertions(+) - -diff --git a/arch/powerpc/platforms/cell/spufs/sched.c b/arch/powerpc/platforms/cell/spufs/sched.c -index 525372fcd0f2..99bd027a7f7c 100644 ---- a/arch/powerpc/platforms/cell/spufs/sched.c -+++ b/arch/powerpc/platforms/cell/spufs/sched.c -@@ -51,6 +51,11 @@ static struct task_struct *spusched_task; - static struct timer_list spusched_timer; - static struct timer_list spuloadavg_timer; - -+/* -+ * Priority of a normal, non-rt, non-niced'd process (aka nice level 0). -+ */ -+#define NORMAL_PRIO 120 -+ - /* - * Frequency of the spu scheduler tick. By default we do one SPU scheduler - * tick for every 10 CPU scheduler ticks. --- -2.37.0 - - -From fbc1e5869bff61dcb09bc4b2cca15c5a34d0a0e7 Mon Sep 17 00:00:00 2001 -From: Alfred Chen -Date: Mon, 31 Aug 2020 15:40:14 +0800 -Subject: [PATCH 039/297] sched/alt: Add ALT_SCHED_VERSION micro. - ---- - kernel/sched/alt_core.c | 2 ++ - kernel/sched/bmq_imp.h | 2 +- - kernel/sched/pds_imp.h | 2 +- - 3 files changed, 4 insertions(+), 2 deletions(-) - -diff --git a/kernel/sched/alt_core.c b/kernel/sched/alt_core.c -index 091f6919195c..76f72292e28a 100644 ---- a/kernel/sched/alt_core.c -+++ b/kernel/sched/alt_core.c -@@ -45,6 +45,8 @@ - #define CREATE_TRACE_POINTS - #include - -+#define ALT_SCHED_VERSION "v5.8-r2" -+ - /* rt_prio(prio) defined in include/linux/sched/rt.h */ - #define rt_task(p) rt_prio((p)->prio) - #define rt_policy(policy) ((policy) == SCHED_FIFO || (policy) == SCHED_RR) -diff --git a/kernel/sched/bmq_imp.h b/kernel/sched/bmq_imp.h -index 0e67e00a6020..ad9a7c448da7 100644 ---- a/kernel/sched/bmq_imp.h -+++ b/kernel/sched/bmq_imp.h -@@ -1,4 +1,4 @@ --#define ALT_SCHED_VERSION_MSG "sched/bmq: BMQ CPU Scheduler 5.8-r1 by Alfred Chen.\n" -+#define ALT_SCHED_VERSION_MSG "sched/bmq: BMQ CPU Scheduler "ALT_SCHED_VERSION" by Alfred Chen.\n" - - /* - * BMQ only routines -diff --git a/kernel/sched/pds_imp.h b/kernel/sched/pds_imp.h -index 4a2fc8993229..041827b92910 100644 ---- a/kernel/sched/pds_imp.h -+++ b/kernel/sched/pds_imp.h -@@ -1,4 +1,4 @@ --#define ALT_SCHED_VERSION_MSG "sched/bmq: PDS CPU Scheduler 5.8-r0 by Alfred Chen.\n" -+#define ALT_SCHED_VERSION_MSG "sched/bmq: PDS CPU Scheduler "ALT_SCHED_VERSION" by Alfred Chen.\n" - - static const u64 user_prio2deadline[NICE_WIDTH] = { - /* -20 */ 4194304, 4613734, 5075107, 5582617, 6140878, --- -2.37.0 - - -From 1dadb0fdb024f924062679231d67959715381f05 Mon Sep 17 00:00:00 2001 -From: Alfred Chen -Date: Sun, 6 Sep 2020 20:02:47 +0800 -Subject: [PATCH 040/297] sched/pds: Fix compilation issue with - CONFIG_SCHED_TRACER. - ---- - include/linux/sched/deadline.h | 6 +++--- - 1 file changed, 3 insertions(+), 3 deletions(-) - -diff --git a/include/linux/sched/deadline.h b/include/linux/sched/deadline.h -index 0aa37b0a1676..9516a98cf160 100644 ---- a/include/linux/sched/deadline.h -+++ b/include/linux/sched/deadline.h -@@ -2,13 +2,13 @@ - - #ifdef CONFIG_SCHED_ALT - --#ifdef CONFIG_SCHED_BMQ --#define __tsk_deadline(p) (0UL) -- - static inline int dl_task(struct task_struct *p) - { - return 0; - } -+ -+#ifdef CONFIG_SCHED_BMQ -+#define __tsk_deadline(p) (0UL) - #endif - - #ifdef CONFIG_SCHED_PDS --- -2.37.0 - - -From c228f0d8e54cc76d0311b102e28ac359bc56d324 Mon Sep 17 00:00:00 2001 -From: Alfred Chen -Date: Mon, 7 Sep 2020 06:56:27 +0800 -Subject: [PATCH 041/297] sched/alt: Disable UCLAMP_TASK, NUMA_BALANCING, - SCHED_AUTOGROUP, FAIR_GROUP_SCHED for Project C. - ---- - init/Kconfig | 8 ++++---- - 1 file changed, 4 insertions(+), 4 deletions(-) - -diff --git a/init/Kconfig b/init/Kconfig -index 8059f9d860db..c4acd49edf94 100644 ---- a/init/Kconfig -+++ b/init/Kconfig -@@ -841,7 +841,7 @@ endif - config UCLAMP_TASK - bool "Enable utilization clamping for RT/FAIR tasks" - depends on CPU_FREQ_GOV_SCHEDUTIL -- depends on !SCHED_BMQ -+ depends on !SCHED_ALT - help - This feature enables the scheduler to track the clamped utilization - of each CPU based on RUNNABLE tasks scheduled on that CPU. -@@ -941,7 +941,7 @@ config NUMA_BALANCING - depends on ARCH_SUPPORTS_NUMA_BALANCING - depends on !ARCH_WANT_NUMA_VARIABLE_LOCALITY - depends on SMP && NUMA && MIGRATION && !PREEMPT_RT -- depends on !SCHED_BMQ -+ depends on !SCHED_ALT - help - This option adds support for automatic NUMA aware memory/task placement. - The mechanism is quite primitive and is based on migrating memory when -@@ -1028,7 +1028,7 @@ menuconfig CGROUP_SCHED - bandwidth allocation to such task groups. It uses cgroups to group - tasks. - --if CGROUP_SCHED && !SCHED_BMQ -+if CGROUP_SCHED && !SCHED_ALT - config FAIR_GROUP_SCHED - bool "Group scheduling for SCHED_OTHER" - depends on CGROUP_SCHED -@@ -1299,7 +1299,7 @@ config CHECKPOINT_RESTORE - - config SCHED_AUTOGROUP - bool "Automatic process group scheduling" -- depends on !SCHED_BMQ -+ depends on !SCHED_ALT - select CGROUPS - select CGROUP_SCHED - select FAIR_GROUP_SCHED --- -2.37.0 - - -From 2e3bfcb22ff66e9e23696835c6a7bd4be5a5a2c1 Mon Sep 17 00:00:00 2001 -From: Piotr Gorski -Date: Mon, 7 Sep 2020 20:42:07 +0200 -Subject: [PATCH 042/297] sched/pds: Update ALT_SCHED_VERSION_MSG - -Signed-off-by: Piotr Gorski ---- - kernel/sched/pds_imp.h | 2 +- - 1 file changed, 1 insertion(+), 1 deletion(-) - -diff --git a/kernel/sched/pds_imp.h b/kernel/sched/pds_imp.h -index 041827b92910..66dc16218444 100644 ---- a/kernel/sched/pds_imp.h -+++ b/kernel/sched/pds_imp.h -@@ -1,4 +1,4 @@ --#define ALT_SCHED_VERSION_MSG "sched/bmq: PDS CPU Scheduler "ALT_SCHED_VERSION" by Alfred Chen.\n" -+#define ALT_SCHED_VERSION_MSG "sched/pds: PDS CPU Scheduler "ALT_SCHED_VERSION" by Alfred Chen.\n" - - static const u64 user_prio2deadline[NICE_WIDTH] = { - /* -20 */ 4194304, 4613734, 5075107, 5582617, 6140878, --- -2.37.0 - - -From 520577995688bf0b0afa44e8288446cef9d45866 Mon Sep 17 00:00:00 2001 -From: Alfred Chen -Date: Thu, 10 Sep 2020 22:46:11 +0800 -Subject: [PATCH 043/297] sched/alt: Call check_preempt_curr() in - ttwu_do_wakeup(). - -Sync-up this like what it does in mainline scheduler code. ---- - kernel/sched/alt_core.c | 6 +++--- - 1 file changed, 3 insertions(+), 3 deletions(-) - -diff --git a/kernel/sched/alt_core.c b/kernel/sched/alt_core.c -index 76f72292e28a..1dd4972beda4 100644 ---- a/kernel/sched/alt_core.c -+++ b/kernel/sched/alt_core.c -@@ -1593,6 +1593,7 @@ ttwu_stat(struct task_struct *p, int cpu, int wake_flags) - static inline void - ttwu_do_wakeup(struct rq *rq, struct task_struct *p, int wake_flags) - { -+ check_preempt_curr(rq); - p->state = TASK_RUNNING; - trace_sched_wakeup(p); - } -@@ -1615,6 +1616,8 @@ static int ttwu_remote(struct task_struct *p, int wake_flags) - - rq = __task_access_lock(p, &lock); - if (task_on_rq_queued(p)) { -+ /* check_preempt_curr() may use rq clock */ -+ update_rq_clock(rq); - ttwu_do_wakeup(rq, p, wake_flags); - ret = 1; - } -@@ -1654,8 +1657,6 @@ void sched_ttwu_pending(void *arg) - ttwu_do_activate(rq, p, p->sched_remote_wakeup ? WF_MIGRATED : 0); - } - -- check_preempt_curr(rq); -- - rq_unlock_irqrestore(rq, &rf); - } - -@@ -1762,7 +1763,6 @@ static inline void ttwu_queue(struct task_struct *p, int cpu, int wake_flags) - raw_spin_lock(&rq->lock); - update_rq_clock(rq); - ttwu_do_activate(rq, p, wake_flags); -- check_preempt_curr(rq); - raw_spin_unlock(&rq->lock); - } - --- -2.37.0 - - -From d27a0663866bed49f43523c4022aa64a2af78cde Mon Sep 17 00:00:00 2001 -From: Alfred Chen -Date: Tue, 15 Sep 2020 10:31:09 +0800 -Subject: [PATCH 044/297] sched/pds: Sync-up pds_skiplist_random_level() from - original PDS. - -Forgot to pick up this function from original pds, this make tasks -sl_level always be 0. ---- - kernel/sched/pds_imp.h | 31 +++++++++++++++++++++++++++++++ - 1 file changed, 31 insertions(+) - -diff --git a/kernel/sched/pds_imp.h b/kernel/sched/pds_imp.h -index 66dc16218444..6baee5e961b9 100644 ---- a/kernel/sched/pds_imp.h -+++ b/kernel/sched/pds_imp.h -@@ -188,8 +188,39 @@ static inline bool sched_task_need_requeue(struct task_struct *p, struct rq *rq) - return false; - } - -+/* -+ * pds_skiplist_random_level -- Returns a pseudo-random level number for skip -+ * list node which is used in PDS run queue. -+ * -+ * In current implementation, based on testing, the first 8 bits in microseconds -+ * of niffies are suitable for random level population. -+ * find_first_bit() is used to satisfy p = 0.5 between each levels, and there -+ * should be platform hardware supported instruction(known as ctz/clz) to speed -+ * up this function. -+ * The skiplist level for a task is populated when task is created and doesn't -+ * change in task's life time. When task is being inserted into run queue, this -+ * skiplist level is set to task's sl_node->level, the skiplist insert function -+ * may change it based on current level of the skip lsit. -+ */ -+static inline int pds_skiplist_random_level(const struct task_struct *p) -+{ -+ long unsigned int randseed; -+ -+ /* -+ * 1. Some architectures don't have better than microsecond resolution -+ * so mask out ~microseconds as a factor of the random seed for skiplist -+ * insertion. -+ * 2. Use address of task structure pointer as another factor of the -+ * random seed for task burst forking scenario. -+ */ -+ randseed = (task_rq(p)->clock ^ (long unsigned int)p) >> 10; -+ -+ return find_first_bit(&randseed, NUM_SKIPLIST_LEVEL - 1); -+} -+ - static void sched_task_fork(struct task_struct *p, struct rq *rq) - { -+ p->sl_level = pds_skiplist_random_level(p); - if (p->prio >= MAX_RT_PRIO) - p->deadline = rq->clock + user_prio2deadline[TASK_USER_PRIO(p)]; - update_task_priodl(p); --- -2.37.0 - - -From 950f9789e0010a17644a6d226d195ad3aa72c43e Mon Sep 17 00:00:00 2001 -From: Alfred Chen -Date: Tue, 15 Sep 2020 11:07:43 +0800 -Subject: [PATCH 045/297] Project-C v5.8-r3 - ---- - kernel/sched/alt_core.c | 2 +- - 1 file changed, 1 insertion(+), 1 deletion(-) - -diff --git a/kernel/sched/alt_core.c b/kernel/sched/alt_core.c -index 1dd4972beda4..b469c9488d18 100644 ---- a/kernel/sched/alt_core.c -+++ b/kernel/sched/alt_core.c -@@ -45,7 +45,7 @@ - #define CREATE_TRACE_POINTS - #include - --#define ALT_SCHED_VERSION "v5.8-r2" -+#define ALT_SCHED_VERSION "v5.8-r3" - - /* rt_prio(prio) defined in include/linux/sched/rt.h */ - #define rt_task(p) rt_prio((p)->prio) --- -2.37.0 - - -From e786fd1dae544fb4e3ea3b8f228eb0081a2d0bf6 Mon Sep 17 00:00:00 2001 -From: Alfred Chen -Date: Fri, 18 Sep 2020 21:37:34 +0800 -Subject: [PATCH 046/297] sched/alt: [Sync] 58877d347b58 sched: Better document - ttwu() - ---- - kernel/sched/alt_core.c | 188 +++++++++++++++++++++++++++++++++++----- - 1 file changed, 168 insertions(+), 20 deletions(-) - -diff --git a/kernel/sched/alt_core.c b/kernel/sched/alt_core.c -index b469c9488d18..994dce2a7070 100644 ---- a/kernel/sched/alt_core.c -+++ b/kernel/sched/alt_core.c -@@ -178,6 +178,99 @@ static inline struct task_struct *rq_runnable_task(struct rq *rq) - return next; - } - -+/* -+ * Serialization rules: -+ * -+ * Lock order: -+ * -+ * p->pi_lock -+ * rq->lock -+ * hrtimer_cpu_base->lock (hrtimer_start() for bandwidth controls) -+ * -+ * rq1->lock -+ * rq2->lock where: rq1 < rq2 -+ * -+ * Regular state: -+ * -+ * Normal scheduling state is serialized by rq->lock. __schedule() takes the -+ * local CPU's rq->lock, it optionally removes the task from the runqueue and -+ * always looks at the local rq data structures to find the most elegible task -+ * to run next. -+ * -+ * Task enqueue is also under rq->lock, possibly taken from another CPU. -+ * Wakeups from another LLC domain might use an IPI to transfer the enqueue to -+ * the local CPU to avoid bouncing the runqueue state around [ see -+ * ttwu_queue_wakelist() ] -+ * -+ * Task wakeup, specifically wakeups that involve migration, are horribly -+ * complicated to avoid having to take two rq->locks. -+ * -+ * Special state: -+ * -+ * System-calls and anything external will use task_rq_lock() which acquires -+ * both p->pi_lock and rq->lock. As a consequence the state they change is -+ * stable while holding either lock: -+ * -+ * - sched_setaffinity()/ -+ * set_cpus_allowed_ptr(): p->cpus_ptr, p->nr_cpus_allowed -+ * - set_user_nice(): p->se.load, p->*prio -+ * - __sched_setscheduler(): p->sched_class, p->policy, p->*prio, -+ * p->se.load, p->rt_priority, -+ * p->dl.dl_{runtime, deadline, period, flags, bw, density} -+ * - sched_setnuma(): p->numa_preferred_nid -+ * - sched_move_task()/ -+ * cpu_cgroup_fork(): p->sched_task_group -+ * - uclamp_update_active() p->uclamp* -+ * -+ * p->state <- TASK_*: -+ * -+ * is changed locklessly using set_current_state(), __set_current_state() or -+ * set_special_state(), see their respective comments, or by -+ * try_to_wake_up(). This latter uses p->pi_lock to serialize against -+ * concurrent self. -+ * -+ * p->on_rq <- { 0, 1 = TASK_ON_RQ_QUEUED, 2 = TASK_ON_RQ_MIGRATING }: -+ * -+ * is set by activate_task() and cleared by deactivate_task(), under -+ * rq->lock. Non-zero indicates the task is runnable, the special -+ * ON_RQ_MIGRATING state is used for migration without holding both -+ * rq->locks. It indicates task_cpu() is not stable, see task_rq_lock(). -+ * -+ * p->on_cpu <- { 0, 1 }: -+ * -+ * is set by prepare_task() and cleared by finish_task() such that it will be -+ * set before p is scheduled-in and cleared after p is scheduled-out, both -+ * under rq->lock. Non-zero indicates the task is running on its CPU. -+ * -+ * [ The astute reader will observe that it is possible for two tasks on one -+ * CPU to have ->on_cpu = 1 at the same time. ] -+ * -+ * task_cpu(p): is changed by set_task_cpu(), the rules are: -+ * -+ * - Don't call set_task_cpu() on a blocked task: -+ * -+ * We don't care what CPU we're not running on, this simplifies hotplug, -+ * the CPU assignment of blocked tasks isn't required to be valid. -+ * -+ * - for try_to_wake_up(), called under p->pi_lock: -+ * -+ * This allows try_to_wake_up() to only take one rq->lock, see its comment. -+ * -+ * - for migration called under rq->lock: -+ * [ see task_on_rq_migrating() in task_rq_lock() ] -+ * -+ * o move_queued_task() -+ * o detach_task() -+ * -+ * - for migration called under double_rq_lock(): -+ * -+ * o __migrate_swap_task() -+ * o push_rt_task() / pull_rt_task() -+ * o push_dl_task() / pull_dl_task() -+ * o dl_task_offline_migration() -+ * -+ */ -+ - /* - * Context: p->pi_lock - */ -@@ -1608,7 +1701,32 @@ ttwu_do_activate(struct rq *rq, struct task_struct *p, int wake_flags) - ttwu_do_wakeup(rq, p, 0); - } - --static int ttwu_remote(struct task_struct *p, int wake_flags) -+/* -+ * Consider @p being inside a wait loop: -+ * -+ * for (;;) { -+ * set_current_state(TASK_UNINTERRUPTIBLE); -+ * -+ * if (CONDITION) -+ * break; -+ * -+ * schedule(); -+ * } -+ * __set_current_state(TASK_RUNNING); -+ * -+ * between set_current_state() and schedule(). In this case @p is still -+ * runnable, so all that needs doing is change p->state back to TASK_RUNNING in -+ * an atomic manner. -+ * -+ * By taking task_rq(p)->lock we serialize against schedule(), if @p->on_rq -+ * then schedule() must still happen and p->state can be changed to -+ * TASK_RUNNING. Otherwise we lost the race, schedule() has happened, and we -+ * need to do a full wakeup with enqueue. -+ * -+ * Returns: %true when the wakeup is done, -+ * %false otherwise. -+ */ -+static int ttwu_runnable(struct task_struct *p, int wake_flags) - { - struct rq *rq; - raw_spinlock_t *lock; -@@ -1749,16 +1867,21 @@ bool cpus_share_cache(int this_cpu, int that_cpu) - { - return per_cpu(sd_llc_id, this_cpu) == per_cpu(sd_llc_id, that_cpu); - } -+#else /* !CONFIG_SMP */ -+ -+static inline bool ttwu_queue_wakelist(struct task_struct *p, int cpu, int wake_flags) -+{ -+ return false; -+} -+ - #endif /* CONFIG_SMP */ - - static inline void ttwu_queue(struct task_struct *p, int cpu, int wake_flags) - { - struct rq *rq = cpu_rq(cpu); - --#if defined(CONFIG_SMP) - if (ttwu_queue_wakelist(p, cpu, wake_flags)) - return; --#endif - - raw_spin_lock(&rq->lock); - update_rq_clock(rq); -@@ -1815,8 +1938,8 @@ static inline void ttwu_queue(struct task_struct *p, int cpu, int wake_flags) - * migration. However the means are completely different as there is no lock - * chain to provide order. Instead we do: - * -- * 1) smp_store_release(X->on_cpu, 0) -- * 2) smp_cond_load_acquire(!X->on_cpu) -+ * 1) smp_store_release(X->on_cpu, 0) -- finish_task() -+ * 2) smp_cond_load_acquire(!X->on_cpu) -- try_to_wake_up() - * - * Example: - * -@@ -1857,20 +1980,42 @@ static inline void ttwu_queue(struct task_struct *p, int cpu, int wake_flags) - * - */ - --/*** -+/** - * try_to_wake_up - wake up a thread - * @p: the thread to be awakened - * @state: the mask of task states that can be woken - * @wake_flags: wake modifier flags (WF_*) - * -- * Put it on the run-queue if it's not already there. The "current" -- * thread is always on the run-queue (except when the actual -- * re-schedule is in progress), and as such you're allowed to do -- * the simpler "current->state = TASK_RUNNING" to mark yourself -- * runnable without the overhead of this. -+ * Conceptually does: -+ * -+ * If (@state & @p->state) @p->state = TASK_RUNNING. -+ * -+ * If the task was not queued/runnable, also place it back on a runqueue. -+ * -+ * This function is atomic against schedule() which would dequeue the task. - * -- * Return: %true if @p was woken up, %false if it was already running. -- * or @state didn't match @p's state. -+ * It issues a full memory barrier before accessing @p->state, see the comment -+ * with set_current_state(). -+ * -+ * Uses p->pi_lock to serialize against concurrent wake-ups. -+ * -+ * Relies on p->pi_lock stabilizing: -+ * - p->sched_class -+ * - p->cpus_ptr -+ * - p->sched_task_group -+ * in order to do migration, see its use of select_task_rq()/set_task_cpu(). -+ * -+ * Tries really hard to only take one task_rq(p)->lock for performance. -+ * Takes rq->lock in: -+ * - ttwu_runnable() -- old rq, unavoidable, see comment there; -+ * - ttwu_queue() -- new rq, for enqueue of the task; -+ * - psi_ttwu_dequeue() -- much sadness :-( accounting will kill us. -+ * -+ * As a consequence we race really badly with just about everything. See the -+ * many memory barriers and their comments for details. -+ * -+ * Return: %true if @p->state changes (an actual wakeup was done), -+ * %false otherwise. - */ - static int try_to_wake_up(struct task_struct *p, unsigned int state, - int wake_flags) -@@ -1883,7 +2028,7 @@ static int try_to_wake_up(struct task_struct *p, unsigned int state, - /* - * We're waking current, this means 'p->on_rq' and 'task_cpu(p) - * == smp_processor_id()'. Together this means we can special -- * case the whole 'p->on_rq && ttwu_remote()' case below -+ * case the whole 'p->on_rq && ttwu_runnable()' case below - * without taking any locks. - * - * In particular: -@@ -1904,8 +2049,8 @@ static int try_to_wake_up(struct task_struct *p, unsigned int state, - /* - * If we are going to wake up a thread waiting for CONDITION we - * need to ensure that CONDITION=1 done by the caller can not be -- * reordered with p->state check below. This pairs with mb() in -- * set_current_state() the waiting thread does. -+ * reordered with p->state check below. This pairs with smp_store_mb() -+ * in set_current_state() that the waiting thread does. - */ - raw_spin_lock_irqsave(&p->pi_lock, flags); - smp_mb__after_spinlock(); -@@ -1940,7 +2085,7 @@ static int try_to_wake_up(struct task_struct *p, unsigned int state, - * A similar smb_rmb() lives in try_invoke_on_locked_down_task(). - */ - smp_rmb(); -- if (READ_ONCE(p->on_rq) && ttwu_remote(p, wake_flags)) -+ if (READ_ONCE(p->on_rq) && ttwu_runnable(p, wake_flags)) - goto unlock; - - if (p->in_iowait) { -@@ -2430,16 +2575,19 @@ static inline void prepare_task(struct task_struct *next) - /* - * Claim the task as running, we do this before switching to it - * such that any running task will have this set. -+ * -+ * See the ttwu() WF_ON_CPU case and its ordering comment. - */ -- next->on_cpu = 1; -+ WRITE_ONCE(next->on_cpu, 1); - } - - static inline void finish_task(struct task_struct *prev) - { - #ifdef CONFIG_SMP - /* -- * After ->on_cpu is cleared, the task can be moved to a different CPU. -- * We must ensure this doesn't happen until the switch is completely -+ * This must be the very last reference to @prev from this CPU. After -+ * p->on_cpu is cleared, the task can be moved to a different CPU. We -+ * must ensure this doesn't happen until the switch is completely - * finished. - * - * In particular, the load of prev->state in finish_task_switch() must --- -2.37.0 - - -From f08ec8a64a1e4153f744102ff8d7cd95cc763759 Mon Sep 17 00:00:00 2001 -From: Alfred Chen -Date: Fri, 18 Sep 2020 21:38:29 +0800 -Subject: [PATCH 047/297] sched/alt: [Sync] 25980c7a79af arch_topology, - sched/core: Cleanup thermal pressure definition - ---- - kernel/sched/alt_core.c | 11 ----------- - 1 file changed, 11 deletions(-) - -diff --git a/kernel/sched/alt_core.c b/kernel/sched/alt_core.c -index 994dce2a7070..3cc8d7a8d3fe 100644 ---- a/kernel/sched/alt_core.c -+++ b/kernel/sched/alt_core.c -@@ -3033,17 +3033,6 @@ unsigned long long task_sched_runtime(struct task_struct *p) - return ns; - } - --DEFINE_PER_CPU(unsigned long, thermal_pressure); -- --void arch_set_thermal_pressure(struct cpumask *cpus, -- unsigned long th_pressure) --{ -- int cpu; -- -- for_each_cpu(cpu, cpus) -- WRITE_ONCE(per_cpu(thermal_pressure, cpu), th_pressure); --} -- - /* This manages tasks that have run out of timeslice during a scheduler_tick */ - static inline void scheduler_task_tick(struct rq *rq) - { --- -2.37.0 - - -From 3583d699907a64f362492df2b3a5c6f307d79bdb Mon Sep 17 00:00:00 2001 -From: Alfred Chen -Date: Fri, 18 Sep 2020 21:45:53 +0800 -Subject: [PATCH 048/297] sched/alt: [Sync] 7318d4cc14c8 sched: Provide - sched_set_fifo() - ---- - kernel/sched/alt_core.c | 47 +++++++++++++++++++++++++++++++++++++++++ - 1 file changed, 47 insertions(+) - -diff --git a/kernel/sched/alt_core.c b/kernel/sched/alt_core.c -index 3cc8d7a8d3fe..d5c6df27b2ca 100644 ---- a/kernel/sched/alt_core.c -+++ b/kernel/sched/alt_core.c -@@ -4563,6 +4563,8 @@ static int _sched_setscheduler(struct task_struct *p, int policy, - * @policy: new policy. - * @param: structure containing the new RT priority. - * -+ * Use sched_set_fifo(), read its comment. -+ * - * Return: 0 on success. An error code otherwise. - * - * NOTE that the task may be already dead. -@@ -4606,6 +4608,51 @@ int sched_setscheduler_nocheck(struct task_struct *p, int policy, - } - EXPORT_SYMBOL_GPL(sched_setscheduler_nocheck); - -+/* -+ * SCHED_FIFO is a broken scheduler model; that is, it is fundamentally -+ * incapable of resource management, which is the one thing an OS really should -+ * be doing. -+ * -+ * This is of course the reason it is limited to privileged users only. -+ * -+ * Worse still; it is fundamentally impossible to compose static priority -+ * workloads. You cannot take two correctly working static prio workloads -+ * and smash them together and still expect them to work. -+ * -+ * For this reason 'all' FIFO tasks the kernel creates are basically at: -+ * -+ * MAX_RT_PRIO / 2 -+ * -+ * The administrator _MUST_ configure the system, the kernel simply doesn't -+ * know enough information to make a sensible choice. -+ */ -+void sched_set_fifo(struct task_struct *p) -+{ -+ struct sched_param sp = { .sched_priority = MAX_RT_PRIO / 2 }; -+ WARN_ON_ONCE(sched_setscheduler_nocheck(p, SCHED_FIFO, &sp) != 0); -+} -+EXPORT_SYMBOL_GPL(sched_set_fifo); -+ -+/* -+ * For when you don't much care about FIFO, but want to be above SCHED_NORMAL. -+ */ -+void sched_set_fifo_low(struct task_struct *p) -+{ -+ struct sched_param sp = { .sched_priority = 1 }; -+ WARN_ON_ONCE(sched_setscheduler_nocheck(p, SCHED_FIFO, &sp) != 0); -+} -+EXPORT_SYMBOL_GPL(sched_set_fifo_low); -+ -+void sched_set_normal(struct task_struct *p, int nice) -+{ -+ struct sched_attr attr = { -+ .sched_policy = SCHED_NORMAL, -+ .sched_nice = nice, -+ }; -+ WARN_ON_ONCE(sched_setattr_nocheck(p, &attr) != 0); -+} -+EXPORT_SYMBOL_GPL(sched_set_normal); -+ - static int - do_sched_setscheduler(pid_t pid, int policy, struct sched_param __user *param) - { --- -2.37.0 - - -From 12657c8a61232ebb0b955ef1207d7ad75c1ee776 Mon Sep 17 00:00:00 2001 -From: Alfred Chen -Date: Fri, 18 Sep 2020 21:48:06 +0800 -Subject: [PATCH 049/297] sched/alt: [Sync] 616d91b68cd5 sched: Remove - sched_setscheduler*() EXPORTs - ---- - kernel/sched/alt_core.c | 4 ---- - 1 file changed, 4 deletions(-) - -diff --git a/kernel/sched/alt_core.c b/kernel/sched/alt_core.c -index d5c6df27b2ca..c265c8fbc8d6 100644 ---- a/kernel/sched/alt_core.c -+++ b/kernel/sched/alt_core.c -@@ -4575,13 +4575,10 @@ int sched_setscheduler(struct task_struct *p, int policy, - return _sched_setscheduler(p, policy, param, true); - } - --EXPORT_SYMBOL_GPL(sched_setscheduler); -- - int sched_setattr(struct task_struct *p, const struct sched_attr *attr) - { - return __sched_setscheduler(p, attr, true, true); - } --EXPORT_SYMBOL_GPL(sched_setattr); - - int sched_setattr_nocheck(struct task_struct *p, const struct sched_attr *attr) - { -@@ -4606,7 +4603,6 @@ int sched_setscheduler_nocheck(struct task_struct *p, int policy, - { - return _sched_setscheduler(p, policy, param, false); - } --EXPORT_SYMBOL_GPL(sched_setscheduler_nocheck); - - /* - * SCHED_FIFO is a broken scheduler model; that is, it is fundamentally --- -2.37.0 - - -From daeb45651d2337f4f99fbc3d94d7c54dbd3fe869 Mon Sep 17 00:00:00 2001 -From: Alfred Chen -Date: Fri, 18 Sep 2020 21:53:47 +0800 -Subject: [PATCH 050/297] sched/alt: [Sync] cc172ff301d8 sched/debug: Fix the - alignment of the show-state debug output - ---- - kernel/sched/alt_core.c | 15 ++++----------- - 1 file changed, 4 insertions(+), 11 deletions(-) - -diff --git a/kernel/sched/alt_core.c b/kernel/sched/alt_core.c -index c265c8fbc8d6..4bd60dd264f3 100644 ---- a/kernel/sched/alt_core.c -+++ b/kernel/sched/alt_core.c -@@ -5422,10 +5422,10 @@ void sched_show_task(struct task_struct *p) - if (!try_get_task_stack(p)) - return; - -- printk(KERN_INFO "%-15.15s %c", p->comm, task_state_to_char(p)); -+ pr_info("task:%-15.15s state:%c", p->comm, task_state_to_char(p)); - - if (p->state == TASK_RUNNING) -- printk(KERN_CONT " running task "); -+ pr_cont(" running task "); - #ifdef CONFIG_DEBUG_STACK_USAGE - free = stack_not_used(p); - #endif -@@ -5434,8 +5434,8 @@ void sched_show_task(struct task_struct *p) - if (pid_alive(p)) - ppid = task_pid_nr(rcu_dereference(p->real_parent)); - rcu_read_unlock(); -- printk(KERN_CONT "%5lu %5d %6d 0x%08lx\n", free, -- task_pid_nr(p), ppid, -+ pr_cont(" stack:%5lu pid:%5d ppid:%6d flags:0x%08lx\n", -+ free, task_pid_nr(p), ppid, - (unsigned long)task_thread_info(p)->flags); - - print_worker_info(KERN_INFO, p); -@@ -5470,13 +5470,6 @@ void show_state_filter(unsigned long state_filter) - { - struct task_struct *g, *p; - --#if BITS_PER_LONG == 32 -- printk(KERN_INFO -- " task PC stack pid father\n"); --#else -- printk(KERN_INFO -- " task PC stack pid father\n"); --#endif - rcu_read_lock(); - for_each_process_thread(g, p) { - /* --- -2.37.0 - - -From 172d673b3df9e3bb928a20619bf923bba67c2d9e Mon Sep 17 00:00:00 2001 -From: Alfred Chen -Date: Fri, 18 Sep 2020 21:55:19 +0800 -Subject: [PATCH 051/297] sched/alt: [Sync] df561f6688fe treewide: Use - fallthrough pseudo-keyword - ---- - kernel/sched/alt_core.c | 2 +- - 1 file changed, 1 insertion(+), 1 deletion(-) - -diff --git a/kernel/sched/alt_core.c b/kernel/sched/alt_core.c -index 4bd60dd264f3..754f1950da32 100644 ---- a/kernel/sched/alt_core.c -+++ b/kernel/sched/alt_core.c -@@ -1470,7 +1470,7 @@ static int select_fallback_rq(int cpu, struct task_struct *p) - state = possible; - break; - } -- /* Fall-through */ -+ fallthrough; - case possible: - do_set_cpus_allowed(p, cpu_possible_mask); - state = fail; --- -2.37.0 - - -From 9488ef06c74c519a7f255f9a3c6db43b447e94d7 Mon Sep 17 00:00:00 2001 -From: Alfred Chen -Date: Fri, 18 Sep 2020 22:01:00 +0800 -Subject: [PATCH 052/297] sched/alt: [Sync] 21a6ee14a8f2 sched: Remove - duplicated tick_nohz_full_enabled() check - ---- - kernel/sched/alt_core.c | 7 +------ - 1 file changed, 1 insertion(+), 6 deletions(-) - -diff --git a/kernel/sched/alt_core.c b/kernel/sched/alt_core.c -index 754f1950da32..377fed0cdfda 100644 ---- a/kernel/sched/alt_core.c -+++ b/kernel/sched/alt_core.c -@@ -496,12 +496,7 @@ static inline void update_rq_clock(struct rq *rq) - */ - static inline void sched_update_tick_dependency(struct rq *rq) - { -- int cpu; -- -- if (!tick_nohz_full_enabled()) -- return; -- -- cpu = cpu_of(rq); -+ int cpu = cpu_of(rq); - - if (!tick_nohz_full_cpu(cpu)) - return; --- -2.37.0 - - -From 80981e8996c806841919b1c35480942875ab6841 Mon Sep 17 00:00:00 2001 -From: Alfred Chen -Date: Sat, 19 Sep 2020 07:14:35 +0800 -Subject: [PATCH 053/297] sched/alt: [Sync] 13685c4a08fc sched/uclamp: Add a - new sysctl to control RT default boost value - ---- - kernel/sched/alt_core.c | 2 ++ - 1 file changed, 2 insertions(+) - -diff --git a/kernel/sched/alt_core.c b/kernel/sched/alt_core.c -index 377fed0cdfda..02a9a194f533 100644 ---- a/kernel/sched/alt_core.c -+++ b/kernel/sched/alt_core.c -@@ -2361,6 +2361,8 @@ int sched_fork(unsigned long clone_flags, struct task_struct *p) - return 0; - } - -+void sched_post_fork(struct task_struct *p) {} -+ - #ifdef CONFIG_SCHEDSTATS - - DEFINE_STATIC_KEY_FALSE(sched_schedstats); --- -2.37.0 - - -From f175857e1c2e8d756d5fbbe1b087be6766c2b170 Mon Sep 17 00:00:00 2001 -From: Alfred Chen -Date: Sat, 19 Sep 2020 09:58:04 +0800 -Subject: [PATCH 054/297] sched/alt: Sync-up mainline *need_resched() changes. - ---- - kernel/sched/alt_core.c | 18 ++++++++++++------ - 1 file changed, 12 insertions(+), 6 deletions(-) - -diff --git a/kernel/sched/alt_core.c b/kernel/sched/alt_core.c -index 02a9a194f533..f5380178227e 100644 ---- a/kernel/sched/alt_core.c -+++ b/kernel/sched/alt_core.c -@@ -818,11 +818,15 @@ int get_nohz_timer_target(void) - */ - static inline void wake_up_idle_cpu(int cpu) - { -+ struct rq *rq = cpu_rq(cpu); -+ - if (cpu == smp_processor_id()) - return; - -- set_tsk_need_resched(cpu_rq(cpu)->idle); -- smp_send_reschedule(cpu); -+ if (set_nr_and_not_polling(rq->idle)) -+ smp_send_reschedule(cpu); -+ else -+ trace_sched_wake_idle_without_ipi(cpu); - } - - static inline bool wake_up_full_nohz_cpu(int cpu) -@@ -833,6 +837,8 @@ static inline bool wake_up_full_nohz_cpu(int cpu) - * If needed we can still optimize that later with an - * empty IRQ. - */ -+ if (cpu_is_offline(cpu)) -+ return true; /* Don't try to wake offline CPUs. */ - if (tick_nohz_full_cpu(cpu)) { - if (cpu != smp_processor_id() || - tick_nohz_tick_stopped()) -@@ -845,7 +851,7 @@ static inline bool wake_up_full_nohz_cpu(int cpu) - - void wake_up_nohz_cpu(int cpu) - { -- if (cpu_online(cpu) && !wake_up_full_nohz_cpu(cpu)) -+ if (!wake_up_full_nohz_cpu(cpu)) - wake_up_idle_cpu(cpu); - } - -@@ -3738,12 +3744,12 @@ static void __sched notrace __schedule(bool preempt) - switch_count = &prev->nvcsw; - } - -- clear_tsk_need_resched(prev); -- clear_preempt_need_resched(); -- - check_curr(prev, rq); - - next = choose_next_task(rq, cpu, prev); -+ clear_tsk_need_resched(prev); -+ clear_preempt_need_resched(); -+ - - if (likely(prev != next)) { - next->last_ran = rq->clock_task; --- -2.37.0 - - -From 65d9b114066a24e17411041c7cd8f02f9b7a78bb Mon Sep 17 00:00:00 2001 -From: Alfred Chen -Date: Fri, 9 Oct 2020 11:21:03 +0800 -Subject: [PATCH 055/297] sched/alt: Disable ttwu queue code path by default. - -Based on user's feedback and benchmark, ttwu queue(wakelist) code path -is disabled by default. ---- - kernel/sched/alt_core.c | 2 +- - 1 file changed, 1 insertion(+), 1 deletion(-) - -diff --git a/kernel/sched/alt_core.c b/kernel/sched/alt_core.c -index f5380178227e..3c5eba046ed6 100644 ---- a/kernel/sched/alt_core.c -+++ b/kernel/sched/alt_core.c -@@ -1828,7 +1828,7 @@ static inline bool ttwu_queue_cond(int cpu, int wake_flags) - - static bool ttwu_queue_wakelist(struct task_struct *p, int cpu, int wake_flags) - { -- if (ttwu_queue_cond(cpu, wake_flags)) { -+ if (__is_defined(ALT_SCHED_TTWU_QUEUE) && ttwu_queue_cond(cpu, wake_flags)) { - if (WARN_ON_ONCE(cpu == smp_processor_id())) - return false; - --- -2.37.0 - - -From c03a2be38e690a43fce3165ed8041a1216dbf0f9 Mon Sep 17 00:00:00 2001 -From: Alfred Chen -Date: Sat, 19 Sep 2020 10:00:49 +0800 -Subject: [PATCH 056/297] Project-C v5.9-r0 - ---- - kernel/sched/alt_core.c | 2 +- - 1 file changed, 1 insertion(+), 1 deletion(-) - -diff --git a/kernel/sched/alt_core.c b/kernel/sched/alt_core.c -index 3c5eba046ed6..f36264fea75c 100644 ---- a/kernel/sched/alt_core.c -+++ b/kernel/sched/alt_core.c -@@ -45,7 +45,7 @@ - #define CREATE_TRACE_POINTS - #include - --#define ALT_SCHED_VERSION "v5.8-r3" -+#define ALT_SCHED_VERSION "v5.9-r0" - - /* rt_prio(prio) defined in include/linux/sched/rt.h */ - #define rt_task(p) rt_prio((p)->prio) --- -2.37.0 - - -From b794c7f66b550db5eb4d44acc7314dbae5f6d15c Mon Sep 17 00:00:00 2001 -From: Alfred Chen -Date: Mon, 12 Oct 2020 20:43:53 +0800 -Subject: [PATCH 057/297] sched/alt: Fix compilation erro in pelt.c - ---- - kernel/sched/alt_core.c | 11 +++++++++-- - kernel/sched/alt_sched.h | 4 ++++ - 2 files changed, 13 insertions(+), 2 deletions(-) - -diff --git a/kernel/sched/alt_core.c b/kernel/sched/alt_core.c -index f36264fea75c..d43ca62fd00f 100644 ---- a/kernel/sched/alt_core.c -+++ b/kernel/sched/alt_core.c -@@ -11,6 +11,10 @@ - * scheduler by Alfred Chen. - * 2019-02-20 BMQ(BitMap Queue) kernel scheduler by Alfred Chen. - */ -+#define CREATE_TRACE_POINTS -+#include -+#undef CREATE_TRACE_POINTS -+ - #include "sched.h" - - #include -@@ -42,8 +46,11 @@ - #include "pelt.h" - #include "smp.h" - --#define CREATE_TRACE_POINTS --#include -+/* -+ * Export tracepoints that act as a bare tracehook (ie: have no trace event -+ * associated with them) to allow external modules to probe them. -+ */ -+EXPORT_TRACEPOINT_SYMBOL_GPL(pelt_irq_tp); - - #define ALT_SCHED_VERSION "v5.9-r0" - -diff --git a/kernel/sched/alt_sched.h b/kernel/sched/alt_sched.h -index 99be2c51c88d..03f8b8b1aa27 100644 ---- a/kernel/sched/alt_sched.h -+++ b/kernel/sched/alt_sched.h -@@ -46,6 +46,8 @@ - - #include "cpupri.h" - -+#include -+ - #ifdef CONFIG_SCHED_BMQ - #include "bmq.h" - #endif -@@ -496,6 +498,8 @@ static inline int sched_tick_offload_init(void) { return 0; } - - extern void schedule_idle(void); - -+#define cap_scale(v, s) ((v)*(s) >> SCHED_CAPACITY_SHIFT) -+ - /* - * !! For sched_setattr_nocheck() (kernel) only !! - * --- -2.37.0 - - -From 15718ef8415324b6407ead6535988d1cb6132c14 Mon Sep 17 00:00:00 2001 -From: Alfred Chen -Date: Tue, 29 Sep 2020 20:44:57 +0800 -Subject: [PATCH 058/297] sched/pds: Refine skiplist implementation. - ---- - include/linux/skip_list.h | 82 +++++++++++++++++++-------------------- - kernel/sched/pds_imp.h | 4 +- - 2 files changed, 43 insertions(+), 43 deletions(-) - -diff --git a/include/linux/skip_list.h b/include/linux/skip_list.h -index 47ca955a451d..5f76038e0320 100644 ---- a/include/linux/skip_list.h -+++ b/include/linux/skip_list.h -@@ -65,17 +65,11 @@ struct skiplist_node { - &name, &name, &name, &name},\ - } - --static inline void INIT_SKIPLIST_NODE(struct skiplist_node *node) --{ -- /* only level 0 ->next matters in skiplist_empty() */ -- WRITE_ONCE(node->next[0], node); --} -- - /** -- * FULL_INIT_SKIPLIST_NODE -- fully init a skiplist_node, expecially for header -+ * INIT_SKIPLIST_NODE -- init a skiplist_node, expecially for header - * @node: the skip list node to be inited. - */ --static inline void FULL_INIT_SKIPLIST_NODE(struct skiplist_node *node) -+static inline void INIT_SKIPLIST_NODE(struct skiplist_node *node) - { - int i; - -@@ -86,15 +80,6 @@ static inline void FULL_INIT_SKIPLIST_NODE(struct skiplist_node *node) - } - } - --/** -- * skiplist_empty - test whether a skip list is empty -- * @head: the skip list to test. -- */ --static inline int skiplist_empty(const struct skiplist_node *head) --{ -- return READ_ONCE(head->next[0]) == head; --} -- - /** - * skiplist_entry - get the struct for this entry - * @ptr: the &struct skiplist_node pointer. -@@ -119,31 +104,47 @@ static inline int skiplist_empty(const struct skiplist_node *head) - #define DEFINE_SKIPLIST_INSERT_FUNC(func_name, search_func)\ - static inline int func_name(struct skiplist_node *head, struct skiplist_node *node)\ - {\ -- struct skiplist_node *update[NUM_SKIPLIST_LEVEL];\ - struct skiplist_node *p, *q;\ -- int k = head->level;\ -+ unsigned int k = head->level;\ -+ unsigned int l = node->level;\ - \ - p = head;\ -- do {\ -+ if (l > k) {\ -+ l = node->level = ++head->level;\ -+\ -+ node->next[l] = head;\ -+ node->prev[l] = head;\ -+ head->next[l] = node;\ -+ head->prev[l] = node;\ -+\ -+ do {\ -+ while (q = p->next[k], q != head && search_func(q, node))\ -+ p = q;\ -+\ -+ node->prev[k] = p;\ -+ node->next[k] = q;\ -+ q->prev[k] = node;\ -+ p->next[k] = node;\ -+ } while (k--);\ -+\ -+ return (p == head);\ -+ }\ -+\ -+ while (k > l) {\ - while (q = p->next[k], q != head && search_func(q, node))\ - p = q;\ -- update[k] = p;\ -- } while (--k >= 0);\ --\ -- k = node->level;\ -- if (unlikely(k > head->level)) {\ -- node->level = k = ++head->level;\ -- update[k] = head;\ -+ k--;\ - }\ - \ - do {\ -- p = update[k];\ -- q = p->next[k];\ -- node->next[k] = q;\ -- p->next[k] = node;\ -+ while (q = p->next[k], q != head && search_func(q, node))\ -+ p = q;\ -+\ - node->prev[k] = p;\ -+ node->next[k] = q;\ - q->prev[k] = node;\ -- } while (--k >= 0);\ -+ p->next[k] = node;\ -+ } while (k--);\ - \ - return (p == head);\ - } -@@ -159,18 +160,17 @@ static inline int func_name(struct skiplist_node *head, struct skiplist_node *no - static inline int - skiplist_del_init(struct skiplist_node *head, struct skiplist_node *node) - { -- int l, m = node->level; -+ unsigned int i, level = node->level; - -- for (l = 0; l <= m; l++) { -- node->prev[l]->next[l] = node->next[l]; -- node->next[l]->prev[l] = node->prev[l]; -+ for (i = 0; i <= level; i++) { -+ node->prev[i]->next[i] = node->next[i]; -+ node->next[i]->prev[i] = node->prev[i]; - } -- if (m == head->level && m > 0) { -- while (head->next[m] == head && m > 0) -- m--; -- head->level = m; -+ if (level == head->level && level) { -+ while (head->next[level] == head && level) -+ level--; -+ head->level = level; - } -- INIT_SKIPLIST_NODE(node); - - return (node->prev[0] == head); - } -diff --git a/kernel/sched/pds_imp.h b/kernel/sched/pds_imp.h -index 6baee5e961b9..f93ac2fa3dc3 100644 ---- a/kernel/sched/pds_imp.h -+++ b/kernel/sched/pds_imp.h -@@ -89,7 +89,7 @@ DEFINE_SKIPLIST_INSERT_FUNC(pds_skiplist_insert, pds_skiplist_task_search); - */ - static inline void sched_queue_init(struct rq *rq) - { -- FULL_INIT_SKIPLIST_NODE(&rq->sl_header); -+ INIT_SKIPLIST_NODE(&rq->sl_header); - } - - /* -@@ -105,7 +105,7 @@ static inline void sched_queue_init_idle(struct rq *rq, struct task_struct *idle - idle->deadline = 0ULL; - update_task_priodl(idle); - -- FULL_INIT_SKIPLIST_NODE(&rq->sl_header); -+ INIT_SKIPLIST_NODE(&rq->sl_header); - - idle->sl_node.level = idle->sl_level; - pds_skiplist_insert(&rq->sl_header, &idle->sl_node); --- -2.37.0 - - -From 638c507f60658678e16caef7de5864b5825e7ecf Mon Sep 17 00:00:00 2001 -From: Alfred Chen -Date: Thu, 1 Oct 2020 00:20:50 +0800 -Subject: [PATCH 059/297] sched/pds: Rework pds_skiplist_random_level(). - ---- - kernel/sched/pds_imp.h | 18 +++++++++--------- - 1 file changed, 9 insertions(+), 9 deletions(-) - -diff --git a/kernel/sched/pds_imp.h b/kernel/sched/pds_imp.h -index f93ac2fa3dc3..ef17fec4ec25 100644 ---- a/kernel/sched/pds_imp.h -+++ b/kernel/sched/pds_imp.h -@@ -192,11 +192,9 @@ static inline bool sched_task_need_requeue(struct task_struct *p, struct rq *rq) - * pds_skiplist_random_level -- Returns a pseudo-random level number for skip - * list node which is used in PDS run queue. - * -- * In current implementation, based on testing, the first 8 bits in microseconds -- * of niffies are suitable for random level population. -- * find_first_bit() is used to satisfy p = 0.5 between each levels, and there -- * should be platform hardware supported instruction(known as ctz/clz) to speed -- * up this function. -+ * __ffs() is used to satisfy p = 0.5 between each levels, and there should be -+ * platform instruction(known as ctz/clz) for acceleration. -+ * - * The skiplist level for a task is populated when task is created and doesn't - * change in task's life time. When task is being inserted into run queue, this - * skiplist level is set to task's sl_node->level, the skiplist insert function -@@ -204,8 +202,6 @@ static inline bool sched_task_need_requeue(struct task_struct *p, struct rq *rq) - */ - static inline int pds_skiplist_random_level(const struct task_struct *p) - { -- long unsigned int randseed; -- - /* - * 1. Some architectures don't have better than microsecond resolution - * so mask out ~microseconds as a factor of the random seed for skiplist -@@ -213,9 +209,13 @@ static inline int pds_skiplist_random_level(const struct task_struct *p) - * 2. Use address of task structure pointer as another factor of the - * random seed for task burst forking scenario. - */ -- randseed = (task_rq(p)->clock ^ (long unsigned int)p) >> 10; -+ unsigned long randseed = (task_rq(p)->clock ^ (unsigned long)p) >> 10; -+ -+ randseed &= __GENMASK(NUM_SKIPLIST_LEVEL - 1, 0); -+ if (randseed) -+ return __ffs(randseed); - -- return find_first_bit(&randseed, NUM_SKIPLIST_LEVEL - 1); -+ return (NUM_SKIPLIST_LEVEL - 1); - } - - static void sched_task_fork(struct task_struct *p, struct rq *rq) --- -2.37.0 - - -From e9d8f798e5f97c0a6c14bd1c18540aedd0dfd066 Mon Sep 17 00:00:00 2001 -From: Alfred Chen -Date: Sun, 11 Oct 2020 09:22:57 +0800 -Subject: [PATCH 060/297] sched/alt: Rework best cpu selection. - -Based on testing, selecting first set CPU provide better performance -than current CPU affinity based best_mask_cpu(). - -Macro SCHED_CPUMASK_FIRST_BIT() and routine sched_cpumask_first_and() -are introduced to reduce overhead calling cpumask_xxxx() routines when -NR_CPUS <= 64. ---- - kernel/sched/alt_core.c | 36 ++++++++++++++++++++++++++++-------- - 1 file changed, 28 insertions(+), 8 deletions(-) - -diff --git a/kernel/sched/alt_core.c b/kernel/sched/alt_core.c -index d43ca62fd00f..f6d5c9768701 100644 ---- a/kernel/sched/alt_core.c -+++ b/kernel/sched/alt_core.c -@@ -105,6 +105,29 @@ EXPORT_SYMBOL_GPL(sched_smt_present); - * domain, see cpus_share_cache(). - */ - DEFINE_PER_CPU(int, sd_llc_id); -+ -+#if NR_CPUS <= 64 -+#define SCHED_CPUMASK_FIRST_BIT(mask) (__ffs((mask).bits[0])) -+ -+static inline unsigned int sched_cpumask_first_and(const struct cpumask *srcp, -+ const struct cpumask *andp) -+{ -+ unsigned long t = srcp->bits[0] & andp->bits[0]; -+ -+ if (t) -+ return __ffs(t); -+ -+ return nr_cpu_ids; -+} -+#else -+#define SCHED_CPUMASK_FIRST_BIT(mask) (cpumask_fist_bit(&(mask))) -+static inline unsigned int sched_cpumask_first_and(const struct cpumask *srcp, -+ const struct cpumask *andp) -+{ -+ return cpumask_first_and(srcp, andp); -+} -+#endif -+ - #endif /* CONFIG_SMP */ - - static DEFINE_MUTEX(sched_hotcpu_mutex); -@@ -1520,9 +1543,9 @@ static inline int select_task_rq(struct task_struct *p, struct rq *rq) - cpumask_and(&tmp, &chk_mask, &sched_rq_watermark[IDLE_WM]) || - cpumask_and(&tmp, &chk_mask, - &sched_rq_watermark[task_sched_prio(p, rq) + 1])) -- return best_mask_cpu(task_cpu(p), &tmp); -+ return SCHED_CPUMASK_FIRST_BIT(tmp); - -- return best_mask_cpu(task_cpu(p), &chk_mask); -+ return SCHED_CPUMASK_FIRST_BIT(chk_mask); - } - - void sched_set_stop_task(int cpu, struct task_struct *stop) -@@ -3094,8 +3117,8 @@ static inline int active_load_balance_cpu_stop(void *data) - { - struct rq *rq = this_rq(); - struct task_struct *p = data; -- cpumask_t tmp; - unsigned long flags; -+ int dcpu; - - local_irq_save(flags); - -@@ -3105,12 +3128,9 @@ static inline int active_load_balance_cpu_stop(void *data) - rq->active_balance = 0; - /* _something_ may have changed the task, double check again */ - if (task_on_rq_queued(p) && task_rq(p) == rq && -- cpumask_and(&tmp, p->cpus_ptr, &sched_sg_idle_mask)) { -- int cpu = cpu_of(rq); -- int dcpu = __best_mask_cpu(cpu, &tmp, -- per_cpu(sched_cpu_llc_mask, cpu)); -+ (dcpu = sched_cpumask_first_and(p->cpus_ptr, &sched_sg_idle_mask)) < -+ nr_cpu_ids) - rq = move_queued_task(rq, p, dcpu); -- } - - raw_spin_unlock(&rq->lock); - raw_spin_unlock(&p->pi_lock); --- -2.37.0 - - -From 1265bd5059a7bbd33d7e3f357c253bce68545c1a Mon Sep 17 00:00:00 2001 -From: Alfred Chen -Date: Sun, 11 Oct 2020 11:15:21 +0800 -Subject: [PATCH 061/297] sched/alt: Remove unused sched_cpu_llc_mask. - ---- - kernel/sched/alt_core.c | 10 ++-------- - 1 file changed, 2 insertions(+), 8 deletions(-) - -diff --git a/kernel/sched/alt_core.c b/kernel/sched/alt_core.c -index f6d5c9768701..40b059846496 100644 ---- a/kernel/sched/alt_core.c -+++ b/kernel/sched/alt_core.c -@@ -92,7 +92,6 @@ static cpumask_t sched_rq_pending_mask ____cacheline_aligned_in_smp; - - DEFINE_PER_CPU(cpumask_t [NR_CPU_AFFINITY_CHK_LEVEL], sched_cpu_affinity_masks); - DEFINE_PER_CPU(cpumask_t *, sched_cpu_affinity_end_mask); --DEFINE_PER_CPU(cpumask_t *, sched_cpu_llc_mask); - - #ifdef CONFIG_SCHED_SMT - DEFINE_STATIC_KEY_FALSE(sched_smt_present); -@@ -5892,8 +5891,6 @@ static void sched_init_topology_cpumask_early(void) - cpumask_copy(tmp, cpu_possible_mask); - cpumask_clear_cpu(cpu, tmp); - } -- per_cpu(sched_cpu_llc_mask, cpu) = -- &(per_cpu(sched_cpu_affinity_masks, cpu)[0]); - per_cpu(sched_cpu_affinity_end_mask, cpu) = - &(per_cpu(sched_cpu_affinity_masks, cpu)[1]); - /*per_cpu(sd_llc_id, cpu) = cpu;*/ -@@ -5923,7 +5920,6 @@ static void sched_init_topology_cpumask(void) - TOPOLOGY_CPUMASK(smt, topology_sibling_cpumask(cpu), false); - #endif - per_cpu(sd_llc_id, cpu) = cpumask_first(cpu_coregroup_mask(cpu)); -- per_cpu(sched_cpu_llc_mask, cpu) = chk; - TOPOLOGY_CPUMASK(coregroup, cpu_coregroup_mask(cpu), false); - - TOPOLOGY_CPUMASK(core, topology_core_cpumask(cpu), false); -@@ -5931,10 +5927,8 @@ static void sched_init_topology_cpumask(void) - TOPOLOGY_CPUMASK(others, cpu_online_mask, true); - - per_cpu(sched_cpu_affinity_end_mask, cpu) = chk; -- printk(KERN_INFO "sched: cpu#%02d llc_id = %d, llc_mask idx = %d\n", -- cpu, per_cpu(sd_llc_id, cpu), -- (int) (per_cpu(sched_cpu_llc_mask, cpu) - -- &(per_cpu(sched_cpu_affinity_masks, cpu)[0]))); -+ printk(KERN_INFO "sched: cpu#%02d llc_id = %d\n", -+ cpu, per_cpu(sd_llc_id, cpu)); - } - } - #endif --- -2.37.0 - - -From 3af34a65f5b0831d955097aa5080d2aa2d862819 Mon Sep 17 00:00:00 2001 -From: Alfred Chen -Date: Fri, 16 Oct 2020 15:06:09 +0800 -Subject: [PATCH 062/297] sched/alt: Introduce sched_best_cpu(). - -Introduce new framework which currently only work for LLC. Can be expend -for IMIT or BIG.little in the furture. ---- - kernel/sched/alt_core.c | 51 ++++++++++++++++++++++++++++++++++------ - kernel/sched/alt_sched.h | 14 ----------- - kernel/sched/topology.c | 10 +++++++- - 3 files changed, 53 insertions(+), 22 deletions(-) - -diff --git a/kernel/sched/alt_core.c b/kernel/sched/alt_core.c -index 40b059846496..cec61ca0abb2 100644 ---- a/kernel/sched/alt_core.c -+++ b/kernel/sched/alt_core.c -@@ -105,9 +105,17 @@ EXPORT_SYMBOL_GPL(sched_smt_present); - */ - DEFINE_PER_CPU(int, sd_llc_id); - --#if NR_CPUS <= 64 --#define SCHED_CPUMASK_FIRST_BIT(mask) (__ffs((mask).bits[0])) -+enum { -+ LLC_LEVEL = 1, -+ NR_BEST_CPU_LEVEL -+}; -+ -+#define NR_BEST_CPU_MASK (1 << (NR_BEST_CPU_LEVEL - 1)) - -+static cpumask_t -+sched_best_cpu_masks[NR_CPUS][NR_BEST_CPU_MASK] ____cacheline_aligned_in_smp; -+ -+#if NR_CPUS <= 64 - static inline unsigned int sched_cpumask_first_and(const struct cpumask *srcp, - const struct cpumask *andp) - { -@@ -118,13 +126,35 @@ static inline unsigned int sched_cpumask_first_and(const struct cpumask *srcp, - - return nr_cpu_ids; - } -+ -+static inline unsigned int sched_best_cpu(const unsigned int cpu, -+ const struct cpumask *m) -+{ -+ cpumask_t *chk = sched_best_cpu_masks[cpu]; -+ unsigned long t; -+ -+ while ((t = chk->bits[0] & m->bits[0]) == 0UL) -+ chk++; -+ -+ return __ffs(t); -+} - #else --#define SCHED_CPUMASK_FIRST_BIT(mask) (cpumask_fist_bit(&(mask))) - static inline unsigned int sched_cpumask_first_and(const struct cpumask *srcp, - const struct cpumask *andp) - { - return cpumask_first_and(srcp, andp); - } -+ -+static inline unsigned int sched_best_cpu(const unsigned int cpu, -+ const struct cpumask *m) -+{ -+ cpumask_t t, *chk = sched_best_cpu_masks[cpu]; -+ -+ while (!cpumask_and(&t, chk, m)) -+ chk++; -+ -+ return cpumask_any(t); -+} - #endif - - #endif /* CONFIG_SMP */ -@@ -822,7 +852,7 @@ int get_nohz_timer_target(void) - default_cpu = cpu; - } - -- for (mask = &(per_cpu(sched_cpu_affinity_masks, cpu)[0]); -+ for (mask = per_cpu(sched_cpu_affinity_masks, cpu); - mask < per_cpu(sched_cpu_affinity_end_mask, cpu); mask++) - for_each_cpu_and(i, mask, housekeeping_cpumask(HK_FLAG_TIMER)) - if (!idle_cpu(i)) -@@ -1542,9 +1572,9 @@ static inline int select_task_rq(struct task_struct *p, struct rq *rq) - cpumask_and(&tmp, &chk_mask, &sched_rq_watermark[IDLE_WM]) || - cpumask_and(&tmp, &chk_mask, - &sched_rq_watermark[task_sched_prio(p, rq) + 1])) -- return SCHED_CPUMASK_FIRST_BIT(tmp); -+ return sched_best_cpu(task_cpu(p), &tmp); - -- return SCHED_CPUMASK_FIRST_BIT(chk_mask); -+ return sched_best_cpu(task_cpu(p), &chk_mask); - } - - void sched_set_stop_task(int cpu, struct task_struct *stop) -@@ -3543,7 +3573,7 @@ static inline int take_other_rq_tasks(struct rq *rq, int cpu) - if (cpumask_empty(&sched_rq_pending_mask)) - return 0; - -- affinity_mask = &(per_cpu(sched_cpu_affinity_masks, cpu)[0]); -+ affinity_mask = per_cpu(sched_cpu_affinity_masks, cpu); - end_mask = per_cpu(sched_cpu_affinity_end_mask, cpu); - do { - int i; -@@ -5894,6 +5924,10 @@ static void sched_init_topology_cpumask_early(void) - per_cpu(sched_cpu_affinity_end_mask, cpu) = - &(per_cpu(sched_cpu_affinity_masks, cpu)[1]); - /*per_cpu(sd_llc_id, cpu) = cpu;*/ -+ -+ for (level = 0; level < NR_BEST_CPU_MASK; level++) -+ cpumask_copy(&sched_best_cpu_masks[cpu][level], -+ cpu_possible_mask); - } - } - -@@ -5929,6 +5963,9 @@ static void sched_init_topology_cpumask(void) - per_cpu(sched_cpu_affinity_end_mask, cpu) = chk; - printk(KERN_INFO "sched: cpu#%02d llc_id = %d\n", - cpu, per_cpu(sd_llc_id, cpu)); -+ -+ cpumask_copy(sched_best_cpu_masks[cpu], -+ cpu_coregroup_mask(cpu)); - } - } - #endif -diff --git a/kernel/sched/alt_sched.h b/kernel/sched/alt_sched.h -index 03f8b8b1aa27..fee65eeb1405 100644 ---- a/kernel/sched/alt_sched.h -+++ b/kernel/sched/alt_sched.h -@@ -225,20 +225,6 @@ enum { - - DECLARE_PER_CPU(cpumask_t [NR_CPU_AFFINITY_CHK_LEVEL], sched_cpu_affinity_masks); - --static inline int __best_mask_cpu(int cpu, const cpumask_t *cpumask, -- const cpumask_t *mask) --{ -- while ((cpu = cpumask_any_and(cpumask, mask)) >= nr_cpu_ids) -- mask++; -- return cpu; --} -- --static inline int best_mask_cpu(int cpu, const cpumask_t *cpumask) --{ -- return cpumask_test_cpu(cpu, cpumask)? cpu : -- __best_mask_cpu(cpu, cpumask, &(per_cpu(sched_cpu_affinity_masks, cpu)[0])); --} -- - extern void flush_smp_call_function_from_idle(void); - - #else /* !CONFIG_SMP */ -diff --git a/kernel/sched/topology.c b/kernel/sched/topology.c -index 2c9daf90398f..3e9d4820dce7 100644 ---- a/kernel/sched/topology.c -+++ b/kernel/sched/topology.c -@@ -2653,7 +2653,15 @@ int __read_mostly node_reclaim_distance = RECLAIM_DISTANCE; - - int sched_numa_find_closest(const struct cpumask *cpus, int cpu) - { -- return best_mask_cpu(cpu, cpus); -+ const cpumask_t *mask; -+ -+ if (cpumask_test_cpu(cpu, cpus)) -+ return cpu; -+ -+ mask = per_cpu(sched_cpu_affinity_masks, cpu); -+ while ((cpu = cpumask_any_and(cpus, mask)) >= nr_cpu_ids) -+ mask++; -+ return cpu; - } - #endif /* CONFIG_NUMA */ - #endif --- -2.37.0 - - -From 2ecd48ff957ce766f54b9077964983ed4b718ec2 Mon Sep 17 00:00:00 2001 -From: Alfred Chen -Date: Mon, 19 Oct 2020 17:07:00 +0800 -Subject: [PATCH 063/297] Project-C v5.9-r1 - ---- - kernel/sched/alt_core.c | 2 +- - 1 file changed, 1 insertion(+), 1 deletion(-) - -diff --git a/kernel/sched/alt_core.c b/kernel/sched/alt_core.c -index cec61ca0abb2..fa0ba0d55503 100644 ---- a/kernel/sched/alt_core.c -+++ b/kernel/sched/alt_core.c -@@ -52,7 +52,7 @@ - */ - EXPORT_TRACEPOINT_SYMBOL_GPL(pelt_irq_tp); - --#define ALT_SCHED_VERSION "v5.9-r0" -+#define ALT_SCHED_VERSION "v5.9-r1" - - /* rt_prio(prio) defined in include/linux/sched/rt.h */ - #define rt_task(p) rt_prio((p)->prio) --- -2.37.0 - - -From 296bd263b86305c7668226c75bf8acbdb29b284d Mon Sep 17 00:00:00 2001 -From: Alfred Chen -Date: Tue, 20 Oct 2020 07:10:30 +0800 -Subject: [PATCH 064/297] sched/alt: Fix compilation when NR_CPUS > 64 - ---- - kernel/sched/alt_core.c | 2 +- - 1 file changed, 1 insertion(+), 1 deletion(-) - -diff --git a/kernel/sched/alt_core.c b/kernel/sched/alt_core.c -index fa0ba0d55503..edba089affc0 100644 ---- a/kernel/sched/alt_core.c -+++ b/kernel/sched/alt_core.c -@@ -153,7 +153,7 @@ static inline unsigned int sched_best_cpu(const unsigned int cpu, - while (!cpumask_and(&t, chk, m)) - chk++; - -- return cpumask_any(t); -+ return cpumask_any(&t); - } - #endif - --- -2.37.0 - - -From ed9808f489f64b22b889f5e329d280fad96023ec Mon Sep 17 00:00:00 2001 -From: Alfred Chen -Date: Mon, 26 Oct 2020 13:37:09 +0800 -Subject: [PATCH 065/297] sched/alt: Reduce NUM_SKIPLIST_LEVEL to 4. - ---- - include/linux/skip_list.h | 8 +++----- - kernel/sched/pds_imp.h | 21 ++++++++------------- - 2 files changed, 11 insertions(+), 18 deletions(-) - -diff --git a/include/linux/skip_list.h b/include/linux/skip_list.h -index 5f76038e0320..637c83ecbd6b 100644 ---- a/include/linux/skip_list.h -+++ b/include/linux/skip_list.h -@@ -50,7 +50,7 @@ - - #include - --#define NUM_SKIPLIST_LEVEL (8) -+#define NUM_SKIPLIST_LEVEL (4) - - struct skiplist_node { - int level; /* Levels in this node */ -@@ -59,10 +59,8 @@ struct skiplist_node { - }; - - #define SKIPLIST_NODE_INIT(name) { 0,\ -- {&name, &name, &name, &name,\ -- &name, &name, &name, &name},\ -- {&name, &name, &name, &name,\ -- &name, &name, &name, &name},\ -+ {&name, &name, &name, &name},\ -+ {&name, &name, &name, &name},\ - } - - /** -diff --git a/kernel/sched/pds_imp.h b/kernel/sched/pds_imp.h -index ef17fec4ec25..73fe42b84fc0 100644 ---- a/kernel/sched/pds_imp.h -+++ b/kernel/sched/pds_imp.h -@@ -168,22 +168,17 @@ static inline unsigned long sched_queue_watermark(struct rq *rq) - - static inline bool sched_task_need_requeue(struct task_struct *p, struct rq *rq) - { -- struct skiplist_node *node = p->sl_node.prev[0]; -+ struct skiplist_node *node; - -- if (node != &rq->sl_header) { -- struct task_struct *t = skiplist_entry(node, struct task_struct, sl_node); -- -- if (t->priodl > p->priodl) -- return true; -- } -+ node = p->sl_node.prev[0]; -+ if (node != &rq->sl_header && -+ skiplist_entry(node, struct task_struct, sl_node)->priodl > p->priodl) -+ return true; - - node = p->sl_node.next[0]; -- if (node != &rq->sl_header) { -- struct task_struct *t = skiplist_entry(node, struct task_struct, sl_node); -- -- if (t->priodl < p->priodl) -- return true; -- } -+ if (node != &rq->sl_header && -+ skiplist_entry(node, struct task_struct, sl_node)->priodl < p->priodl) -+ return true; - - return false; - } --- -2.37.0 - - -From e4062774baf84edf6a5df6952f96cf66f14edae3 Mon Sep 17 00:00:00 2001 -From: Alfred Chen -Date: Tue, 3 Nov 2020 22:13:13 +0800 -Subject: [PATCH 066/297] Revert "sched/alt: Fix compilation when NR_CPUS > 64" - -This reverts commit 9a879be8808af904d6faf63b6a9247e76a3b9d7e. ---- - kernel/sched/alt_core.c | 2 +- - 1 file changed, 1 insertion(+), 1 deletion(-) - -diff --git a/kernel/sched/alt_core.c b/kernel/sched/alt_core.c -index edba089affc0..fa0ba0d55503 100644 ---- a/kernel/sched/alt_core.c -+++ b/kernel/sched/alt_core.c -@@ -153,7 +153,7 @@ static inline unsigned int sched_best_cpu(const unsigned int cpu, - while (!cpumask_and(&t, chk, m)) - chk++; - -- return cpumask_any(&t); -+ return cpumask_any(t); - } - #endif - --- -2.37.0 - - -From 7115fdcdcea39c530369d5c60f709d99665fab1d Mon Sep 17 00:00:00 2001 -From: Alfred Chen -Date: Tue, 3 Nov 2020 22:13:28 +0800 -Subject: [PATCH 067/297] Revert "sched/alt: Introduce sched_best_cpu()." - -This reverts commit 7e6b0567a19b1f9b8beb97255bf3ffee5a287f01. ---- - kernel/sched/alt_core.c | 51 ++++++---------------------------------- - kernel/sched/alt_sched.h | 14 +++++++++++ - kernel/sched/topology.c | 10 +------- - 3 files changed, 22 insertions(+), 53 deletions(-) - -diff --git a/kernel/sched/alt_core.c b/kernel/sched/alt_core.c -index fa0ba0d55503..57d10ccf39b8 100644 ---- a/kernel/sched/alt_core.c -+++ b/kernel/sched/alt_core.c -@@ -105,17 +105,9 @@ EXPORT_SYMBOL_GPL(sched_smt_present); - */ - DEFINE_PER_CPU(int, sd_llc_id); - --enum { -- LLC_LEVEL = 1, -- NR_BEST_CPU_LEVEL --}; -- --#define NR_BEST_CPU_MASK (1 << (NR_BEST_CPU_LEVEL - 1)) -- --static cpumask_t --sched_best_cpu_masks[NR_CPUS][NR_BEST_CPU_MASK] ____cacheline_aligned_in_smp; -- - #if NR_CPUS <= 64 -+#define SCHED_CPUMASK_FIRST_BIT(mask) (__ffs((mask).bits[0])) -+ - static inline unsigned int sched_cpumask_first_and(const struct cpumask *srcp, - const struct cpumask *andp) - { -@@ -126,35 +118,13 @@ static inline unsigned int sched_cpumask_first_and(const struct cpumask *srcp, - - return nr_cpu_ids; - } -- --static inline unsigned int sched_best_cpu(const unsigned int cpu, -- const struct cpumask *m) --{ -- cpumask_t *chk = sched_best_cpu_masks[cpu]; -- unsigned long t; -- -- while ((t = chk->bits[0] & m->bits[0]) == 0UL) -- chk++; -- -- return __ffs(t); --} - #else -+#define SCHED_CPUMASK_FIRST_BIT(mask) (cpumask_fist_bit(&(mask))) - static inline unsigned int sched_cpumask_first_and(const struct cpumask *srcp, - const struct cpumask *andp) - { - return cpumask_first_and(srcp, andp); - } -- --static inline unsigned int sched_best_cpu(const unsigned int cpu, -- const struct cpumask *m) --{ -- cpumask_t t, *chk = sched_best_cpu_masks[cpu]; -- -- while (!cpumask_and(&t, chk, m)) -- chk++; -- -- return cpumask_any(t); --} - #endif - - #endif /* CONFIG_SMP */ -@@ -852,7 +822,7 @@ int get_nohz_timer_target(void) - default_cpu = cpu; - } - -- for (mask = per_cpu(sched_cpu_affinity_masks, cpu); -+ for (mask = &(per_cpu(sched_cpu_affinity_masks, cpu)[0]); - mask < per_cpu(sched_cpu_affinity_end_mask, cpu); mask++) - for_each_cpu_and(i, mask, housekeeping_cpumask(HK_FLAG_TIMER)) - if (!idle_cpu(i)) -@@ -1572,9 +1542,9 @@ static inline int select_task_rq(struct task_struct *p, struct rq *rq) - cpumask_and(&tmp, &chk_mask, &sched_rq_watermark[IDLE_WM]) || - cpumask_and(&tmp, &chk_mask, - &sched_rq_watermark[task_sched_prio(p, rq) + 1])) -- return sched_best_cpu(task_cpu(p), &tmp); -+ return SCHED_CPUMASK_FIRST_BIT(tmp); - -- return sched_best_cpu(task_cpu(p), &chk_mask); -+ return SCHED_CPUMASK_FIRST_BIT(chk_mask); - } - - void sched_set_stop_task(int cpu, struct task_struct *stop) -@@ -3573,7 +3543,7 @@ static inline int take_other_rq_tasks(struct rq *rq, int cpu) - if (cpumask_empty(&sched_rq_pending_mask)) - return 0; - -- affinity_mask = per_cpu(sched_cpu_affinity_masks, cpu); -+ affinity_mask = &(per_cpu(sched_cpu_affinity_masks, cpu)[0]); - end_mask = per_cpu(sched_cpu_affinity_end_mask, cpu); - do { - int i; -@@ -5924,10 +5894,6 @@ static void sched_init_topology_cpumask_early(void) - per_cpu(sched_cpu_affinity_end_mask, cpu) = - &(per_cpu(sched_cpu_affinity_masks, cpu)[1]); - /*per_cpu(sd_llc_id, cpu) = cpu;*/ -- -- for (level = 0; level < NR_BEST_CPU_MASK; level++) -- cpumask_copy(&sched_best_cpu_masks[cpu][level], -- cpu_possible_mask); - } - } - -@@ -5963,9 +5929,6 @@ static void sched_init_topology_cpumask(void) - per_cpu(sched_cpu_affinity_end_mask, cpu) = chk; - printk(KERN_INFO "sched: cpu#%02d llc_id = %d\n", - cpu, per_cpu(sd_llc_id, cpu)); -- -- cpumask_copy(sched_best_cpu_masks[cpu], -- cpu_coregroup_mask(cpu)); - } - } - #endif -diff --git a/kernel/sched/alt_sched.h b/kernel/sched/alt_sched.h -index fee65eeb1405..03f8b8b1aa27 100644 ---- a/kernel/sched/alt_sched.h -+++ b/kernel/sched/alt_sched.h -@@ -225,6 +225,20 @@ enum { - - DECLARE_PER_CPU(cpumask_t [NR_CPU_AFFINITY_CHK_LEVEL], sched_cpu_affinity_masks); - -+static inline int __best_mask_cpu(int cpu, const cpumask_t *cpumask, -+ const cpumask_t *mask) -+{ -+ while ((cpu = cpumask_any_and(cpumask, mask)) >= nr_cpu_ids) -+ mask++; -+ return cpu; -+} -+ -+static inline int best_mask_cpu(int cpu, const cpumask_t *cpumask) -+{ -+ return cpumask_test_cpu(cpu, cpumask)? cpu : -+ __best_mask_cpu(cpu, cpumask, &(per_cpu(sched_cpu_affinity_masks, cpu)[0])); -+} -+ - extern void flush_smp_call_function_from_idle(void); - - #else /* !CONFIG_SMP */ -diff --git a/kernel/sched/topology.c b/kernel/sched/topology.c -index 3e9d4820dce7..2c9daf90398f 100644 ---- a/kernel/sched/topology.c -+++ b/kernel/sched/topology.c -@@ -2653,15 +2653,7 @@ int __read_mostly node_reclaim_distance = RECLAIM_DISTANCE; - - int sched_numa_find_closest(const struct cpumask *cpus, int cpu) - { -- const cpumask_t *mask; -- -- if (cpumask_test_cpu(cpu, cpus)) -- return cpu; -- -- mask = per_cpu(sched_cpu_affinity_masks, cpu); -- while ((cpu = cpumask_any_and(cpus, mask)) >= nr_cpu_ids) -- mask++; -- return cpu; -+ return best_mask_cpu(cpu, cpus); - } - #endif /* CONFIG_NUMA */ - #endif --- -2.37.0 - - -From 54b15bb3e55454ca70be55d86b6439ade2f36949 Mon Sep 17 00:00:00 2001 -From: Alfred Chen -Date: Tue, 3 Nov 2020 22:13:44 +0800 -Subject: [PATCH 068/297] Revert "sched/alt: Remove unused sched_cpu_llc_mask." - -This reverts commit d18994d3d143830fe250b9a27e76f3c1b51459d7. ---- - kernel/sched/alt_core.c | 10 ++++++++-- - 1 file changed, 8 insertions(+), 2 deletions(-) - -diff --git a/kernel/sched/alt_core.c b/kernel/sched/alt_core.c -index 57d10ccf39b8..1e2adb3d6a7b 100644 ---- a/kernel/sched/alt_core.c -+++ b/kernel/sched/alt_core.c -@@ -92,6 +92,7 @@ static cpumask_t sched_rq_pending_mask ____cacheline_aligned_in_smp; - - DEFINE_PER_CPU(cpumask_t [NR_CPU_AFFINITY_CHK_LEVEL], sched_cpu_affinity_masks); - DEFINE_PER_CPU(cpumask_t *, sched_cpu_affinity_end_mask); -+DEFINE_PER_CPU(cpumask_t *, sched_cpu_llc_mask); - - #ifdef CONFIG_SCHED_SMT - DEFINE_STATIC_KEY_FALSE(sched_smt_present); -@@ -5891,6 +5892,8 @@ static void sched_init_topology_cpumask_early(void) - cpumask_copy(tmp, cpu_possible_mask); - cpumask_clear_cpu(cpu, tmp); - } -+ per_cpu(sched_cpu_llc_mask, cpu) = -+ &(per_cpu(sched_cpu_affinity_masks, cpu)[0]); - per_cpu(sched_cpu_affinity_end_mask, cpu) = - &(per_cpu(sched_cpu_affinity_masks, cpu)[1]); - /*per_cpu(sd_llc_id, cpu) = cpu;*/ -@@ -5920,6 +5923,7 @@ static void sched_init_topology_cpumask(void) - TOPOLOGY_CPUMASK(smt, topology_sibling_cpumask(cpu), false); - #endif - per_cpu(sd_llc_id, cpu) = cpumask_first(cpu_coregroup_mask(cpu)); -+ per_cpu(sched_cpu_llc_mask, cpu) = chk; - TOPOLOGY_CPUMASK(coregroup, cpu_coregroup_mask(cpu), false); - - TOPOLOGY_CPUMASK(core, topology_core_cpumask(cpu), false); -@@ -5927,8 +5931,10 @@ static void sched_init_topology_cpumask(void) - TOPOLOGY_CPUMASK(others, cpu_online_mask, true); - - per_cpu(sched_cpu_affinity_end_mask, cpu) = chk; -- printk(KERN_INFO "sched: cpu#%02d llc_id = %d\n", -- cpu, per_cpu(sd_llc_id, cpu)); -+ printk(KERN_INFO "sched: cpu#%02d llc_id = %d, llc_mask idx = %d\n", -+ cpu, per_cpu(sd_llc_id, cpu), -+ (int) (per_cpu(sched_cpu_llc_mask, cpu) - -+ &(per_cpu(sched_cpu_affinity_masks, cpu)[0]))); - } - } - #endif --- -2.37.0 - - -From df8313744bcccdcdf6c67655ca021e90fed6a5d3 Mon Sep 17 00:00:00 2001 -From: Alfred Chen -Date: Tue, 3 Nov 2020 22:13:59 +0800 -Subject: [PATCH 069/297] Revert "sched/alt: Rework best cpu selection." - -This reverts commit 173014cfa89544d02216612e812b950a31246c6d. ---- - kernel/sched/alt_core.c | 36 ++++++++---------------------------- - 1 file changed, 8 insertions(+), 28 deletions(-) - -diff --git a/kernel/sched/alt_core.c b/kernel/sched/alt_core.c -index 1e2adb3d6a7b..7cb0edc7fe8c 100644 ---- a/kernel/sched/alt_core.c -+++ b/kernel/sched/alt_core.c -@@ -105,29 +105,6 @@ EXPORT_SYMBOL_GPL(sched_smt_present); - * domain, see cpus_share_cache(). - */ - DEFINE_PER_CPU(int, sd_llc_id); -- --#if NR_CPUS <= 64 --#define SCHED_CPUMASK_FIRST_BIT(mask) (__ffs((mask).bits[0])) -- --static inline unsigned int sched_cpumask_first_and(const struct cpumask *srcp, -- const struct cpumask *andp) --{ -- unsigned long t = srcp->bits[0] & andp->bits[0]; -- -- if (t) -- return __ffs(t); -- -- return nr_cpu_ids; --} --#else --#define SCHED_CPUMASK_FIRST_BIT(mask) (cpumask_fist_bit(&(mask))) --static inline unsigned int sched_cpumask_first_and(const struct cpumask *srcp, -- const struct cpumask *andp) --{ -- return cpumask_first_and(srcp, andp); --} --#endif -- - #endif /* CONFIG_SMP */ - - static DEFINE_MUTEX(sched_hotcpu_mutex); -@@ -1543,9 +1520,9 @@ static inline int select_task_rq(struct task_struct *p, struct rq *rq) - cpumask_and(&tmp, &chk_mask, &sched_rq_watermark[IDLE_WM]) || - cpumask_and(&tmp, &chk_mask, - &sched_rq_watermark[task_sched_prio(p, rq) + 1])) -- return SCHED_CPUMASK_FIRST_BIT(tmp); -+ return best_mask_cpu(task_cpu(p), &tmp); - -- return SCHED_CPUMASK_FIRST_BIT(chk_mask); -+ return best_mask_cpu(task_cpu(p), &chk_mask); - } - - void sched_set_stop_task(int cpu, struct task_struct *stop) -@@ -3117,8 +3094,8 @@ static inline int active_load_balance_cpu_stop(void *data) - { - struct rq *rq = this_rq(); - struct task_struct *p = data; -+ cpumask_t tmp; - unsigned long flags; -- int dcpu; - - local_irq_save(flags); - -@@ -3128,9 +3105,12 @@ static inline int active_load_balance_cpu_stop(void *data) - rq->active_balance = 0; - /* _something_ may have changed the task, double check again */ - if (task_on_rq_queued(p) && task_rq(p) == rq && -- (dcpu = sched_cpumask_first_and(p->cpus_ptr, &sched_sg_idle_mask)) < -- nr_cpu_ids) -+ cpumask_and(&tmp, p->cpus_ptr, &sched_sg_idle_mask)) { -+ int cpu = cpu_of(rq); -+ int dcpu = __best_mask_cpu(cpu, &tmp, -+ per_cpu(sched_cpu_llc_mask, cpu)); - rq = move_queued_task(rq, p, dcpu); -+ } - - raw_spin_unlock(&rq->lock); - raw_spin_unlock(&p->pi_lock); --- -2.37.0 - - -From b8e67a31e27773a5b57f2bcf4c16b9d6a9882102 Mon Sep 17 00:00:00 2001 -From: Alfred Chen -Date: Mon, 9 Nov 2020 11:08:36 +0800 -Subject: [PATCH 070/297] sched/alt: Enhance best_mask_cpu() for better - performance. - -Enhance best_mask_cpu() performance when NR_CPUS <= 64. ---- - kernel/sched/alt_core.c | 6 ++++-- - kernel/sched/alt_sched.h | 31 ++++++++++++++++++++++--------- - 2 files changed, 26 insertions(+), 11 deletions(-) - -diff --git a/kernel/sched/alt_core.c b/kernel/sched/alt_core.c -index 7cb0edc7fe8c..3a4281ba65e6 100644 ---- a/kernel/sched/alt_core.c -+++ b/kernel/sched/alt_core.c -@@ -90,7 +90,7 @@ int sched_yield_type __read_mostly = 1; - #ifdef CONFIG_SMP - static cpumask_t sched_rq_pending_mask ____cacheline_aligned_in_smp; - --DEFINE_PER_CPU(cpumask_t [NR_CPU_AFFINITY_CHK_LEVEL], sched_cpu_affinity_masks); -+DEFINE_PER_CPU(cpumask_t [NR_CPU_AFFINITY_LEVELS], sched_cpu_affinity_masks); - DEFINE_PER_CPU(cpumask_t *, sched_cpu_affinity_end_mask); - DEFINE_PER_CPU(cpumask_t *, sched_cpu_llc_mask); - -@@ -5867,7 +5867,7 @@ static void sched_init_topology_cpumask_early(void) - cpumask_t *tmp; - - for_each_possible_cpu(cpu) { -- for (level = 0; level < NR_CPU_AFFINITY_CHK_LEVEL; level++) { -+ for (level = 0; level < NR_CPU_AFFINITY_LEVELS; level++) { - tmp = &(per_cpu(sched_cpu_affinity_masks, cpu)[level]); - cpumask_copy(tmp, cpu_possible_mask); - cpumask_clear_cpu(cpu, tmp); -@@ -5898,6 +5898,8 @@ static void sched_init_topology_cpumask(void) - - chk = &(per_cpu(sched_cpu_affinity_masks, cpu)[0]); - -+ cpumask_copy(chk++, cpumask_of(cpu)); -+ - cpumask_complement(chk, cpumask_of(cpu)); - #ifdef CONFIG_SCHED_SMT - TOPOLOGY_CPUMASK(smt, topology_sibling_cpumask(cpu), false); -diff --git a/kernel/sched/alt_sched.h b/kernel/sched/alt_sched.h -index 03f8b8b1aa27..4698d6d16a2d 100644 ---- a/kernel/sched/alt_sched.h -+++ b/kernel/sched/alt_sched.h -@@ -213,30 +213,43 @@ static inline void unregister_sched_domain_sysctl(void) - extern bool sched_smp_initialized; - - enum { -- BASE_CPU_AFFINITY_CHK_LEVEL = 1, -+ ITSELF_LEVEL_SPACE_HOLDER, - #ifdef CONFIG_SCHED_SMT -- SMT_CPU_AFFINITY_CHK_LEVEL_SPACE_HOLDER, -+ SMT_LEVEL_SPACE_HOLDER, - #endif --#ifdef CONFIG_SCHED_MC -- MC_CPU_AFFINITY_CHK_LEVEL_SPACE_HOLDER, --#endif -- NR_CPU_AFFINITY_CHK_LEVEL -+ COREGROUP_LEVEL_SPACE_HOLDER, -+ CORE_LEVEL_SPACE_HOLDER, -+ OTHER_LEVEL_SPACE_HOLDER, -+ NR_CPU_AFFINITY_LEVELS - }; - --DECLARE_PER_CPU(cpumask_t [NR_CPU_AFFINITY_CHK_LEVEL], sched_cpu_affinity_masks); -+DECLARE_PER_CPU(cpumask_t [NR_CPU_AFFINITY_LEVELS], sched_cpu_affinity_masks); - - static inline int __best_mask_cpu(int cpu, const cpumask_t *cpumask, - const cpumask_t *mask) - { -+#if NR_CPUS <= 64 -+ unsigned long t; -+ -+ while ((t = cpumask->bits[0] & mask->bits[0]) == 0UL) -+ mask++; -+ -+ return __ffs(t); -+#else - while ((cpu = cpumask_any_and(cpumask, mask)) >= nr_cpu_ids) - mask++; - return cpu; -+#endif - } - - static inline int best_mask_cpu(int cpu, const cpumask_t *cpumask) - { -- return cpumask_test_cpu(cpu, cpumask)? cpu : -- __best_mask_cpu(cpu, cpumask, &(per_cpu(sched_cpu_affinity_masks, cpu)[0])); -+#if NR_CPUS <= 64 -+ return __best_mask_cpu(cpu, cpumask, per_cpu(sched_cpu_affinity_masks, cpu)); -+#else -+ return cpumask_test_cpu(cpu, cpumask) ? cpu: -+ __best_mask_cpu(cpu, cpumask, per_cpu(sched_cpu_affinity_masks, cpu) + 1); -+#endif - } - - extern void flush_smp_call_function_from_idle(void); --- -2.37.0 - - -From a8577b8726041fea13ca9e51472b1b5b253ced35 Mon Sep 17 00:00:00 2001 -From: Alfred Chen -Date: Fri, 20 Nov 2020 10:26:13 +0800 -Subject: [PATCH 071/297] Project-C v5.9-r2 - ---- - kernel/sched/alt_core.c | 2 +- - 1 file changed, 1 insertion(+), 1 deletion(-) - -diff --git a/kernel/sched/alt_core.c b/kernel/sched/alt_core.c -index 3a4281ba65e6..e485c76b1668 100644 ---- a/kernel/sched/alt_core.c -+++ b/kernel/sched/alt_core.c -@@ -52,7 +52,7 @@ - */ - EXPORT_TRACEPOINT_SYMBOL_GPL(pelt_irq_tp); - --#define ALT_SCHED_VERSION "v5.9-r1" -+#define ALT_SCHED_VERSION "v5.9-r2" - - /* rt_prio(prio) defined in include/linux/sched/rt.h */ - #define rt_task(p) rt_prio((p)->prio) --- -2.37.0 - - -From 455a4ecb35ae5d682811f4ad1e83ec21f16e8b67 Mon Sep 17 00:00:00 2001 -From: Alfred Chen -Date: Fri, 4 Dec 2020 10:55:21 +0800 -Subject: [PATCH 072/297] sched/alt: Minor improvement for - sched_cpu_affinity_masks. - ---- - kernel/sched/alt_core.c | 29 +++++++++++++---------------- - 1 file changed, 13 insertions(+), 16 deletions(-) - -diff --git a/kernel/sched/alt_core.c b/kernel/sched/alt_core.c -index e485c76b1668..58a432fffd53 100644 ---- a/kernel/sched/alt_core.c -+++ b/kernel/sched/alt_core.c -@@ -800,7 +800,7 @@ int get_nohz_timer_target(void) - default_cpu = cpu; - } - -- for (mask = &(per_cpu(sched_cpu_affinity_masks, cpu)[0]); -+ for (mask = per_cpu(sched_cpu_affinity_masks, cpu) + 1; - mask < per_cpu(sched_cpu_affinity_end_mask, cpu); mask++) - for_each_cpu_and(i, mask, housekeeping_cpumask(HK_FLAG_TIMER)) - if (!idle_cpu(i)) -@@ -3524,7 +3524,7 @@ static inline int take_other_rq_tasks(struct rq *rq, int cpu) - if (cpumask_empty(&sched_rq_pending_mask)) - return 0; - -- affinity_mask = &(per_cpu(sched_cpu_affinity_masks, cpu)[0]); -+ affinity_mask = per_cpu(sched_cpu_affinity_masks, cpu) + 1; - end_mask = per_cpu(sched_cpu_affinity_end_mask, cpu); - do { - int i; -@@ -5863,19 +5863,18 @@ int sched_cpu_dying(unsigned int cpu) - #ifdef CONFIG_SMP - static void sched_init_topology_cpumask_early(void) - { -- int cpu, level; -+ int cpu; - cpumask_t *tmp; - - for_each_possible_cpu(cpu) { -- for (level = 0; level < NR_CPU_AFFINITY_LEVELS; level++) { -- tmp = &(per_cpu(sched_cpu_affinity_masks, cpu)[level]); -- cpumask_copy(tmp, cpu_possible_mask); -- cpumask_clear_cpu(cpu, tmp); -- } -- per_cpu(sched_cpu_llc_mask, cpu) = -- &(per_cpu(sched_cpu_affinity_masks, cpu)[0]); -- per_cpu(sched_cpu_affinity_end_mask, cpu) = -- &(per_cpu(sched_cpu_affinity_masks, cpu)[1]); -+ tmp = per_cpu(sched_cpu_affinity_masks, cpu); -+ -+ cpumask_copy(tmp, cpumask_of(cpu)); -+ tmp++; -+ cpumask_copy(tmp, cpu_possible_mask); -+ cpumask_clear_cpu(cpu, tmp); -+ per_cpu(sched_cpu_llc_mask, cpu) = tmp; -+ per_cpu(sched_cpu_affinity_end_mask, cpu) = ++tmp; - /*per_cpu(sd_llc_id, cpu) = cpu;*/ - } - } -@@ -5896,9 +5895,7 @@ static void sched_init_topology_cpumask(void) - /* take chance to reset time slice for idle tasks */ - cpu_rq(cpu)->idle->time_slice = sched_timeslice_ns; - -- chk = &(per_cpu(sched_cpu_affinity_masks, cpu)[0]); -- -- cpumask_copy(chk++, cpumask_of(cpu)); -+ chk = per_cpu(sched_cpu_affinity_masks, cpu) + 1; - - cpumask_complement(chk, cpumask_of(cpu)); - #ifdef CONFIG_SCHED_SMT -@@ -5916,7 +5913,7 @@ static void sched_init_topology_cpumask(void) - printk(KERN_INFO "sched: cpu#%02d llc_id = %d, llc_mask idx = %d\n", - cpu, per_cpu(sd_llc_id, cpu), - (int) (per_cpu(sched_cpu_llc_mask, cpu) - -- &(per_cpu(sched_cpu_affinity_masks, cpu)[0]))); -+ per_cpu(sched_cpu_affinity_masks, cpu))); - } - } - #endif --- -2.37.0 - - -From e0fc06cd1ee65246bafcc6c2edce7bfc59963e4a Mon Sep 17 00:00:00 2001 -From: Alfred Chen -Date: Sun, 6 Dec 2020 19:26:20 +0800 -Subject: [PATCH 073/297] sched/pds: Fix for low priority NORMAL policy task. - -Task priodl is not properly set, which cause low priority task freezed. ---- - init/init_task.c | 7 ++++++- - kernel/sched/alt_core.c | 8 -------- - kernel/sched/bmq_imp.h | 8 ++++++++ - kernel/sched/pds_imp.h | 8 ++++++++ - 4 files changed, 22 insertions(+), 9 deletions(-) - -diff --git a/init/init_task.c b/init/init_task.c -index fc8fcdbbd07a..a98a65334c15 100644 ---- a/init/init_task.c -+++ b/init/init_task.c -@@ -75,10 +75,15 @@ struct task_struct init_task - .stack = init_stack, - .usage = REFCOUNT_INIT(2), - .flags = PF_KTHREAD, --#ifdef CONFIG_SCHED_ALT -+#ifdef CONFIG_SCHED_BMQ - .prio = DEFAULT_PRIO + MAX_PRIORITY_ADJ, - .static_prio = DEFAULT_PRIO, - .normal_prio = DEFAULT_PRIO + MAX_PRIORITY_ADJ, -+#endif -+#ifdef CONFIG_SCHED_PDS -+ .prio = MAX_USER_RT_PRIO, -+ .static_prio = DEFAULT_PRIO, -+ .normal_prio = MAX_USER_RT_PRIO, - #else - .prio = MAX_PRIO - 20, - .static_prio = MAX_PRIO - 20, -diff --git a/kernel/sched/alt_core.c b/kernel/sched/alt_core.c -index 58a432fffd53..3808507c44c1 100644 ---- a/kernel/sched/alt_core.c -+++ b/kernel/sched/alt_core.c -@@ -1035,14 +1035,6 @@ static inline void hrtick_rq_init(struct rq *rq) - } - #endif /* CONFIG_SCHED_HRTICK */ - --static inline int normal_prio(struct task_struct *p) --{ -- if (task_has_rt_policy(p)) -- return MAX_RT_PRIO - 1 - p->rt_priority; -- -- return p->static_prio + MAX_PRIORITY_ADJ; --} -- - /* - * Calculate the current priority, i.e. the priority - * taken into account by the scheduler. This value might -diff --git a/kernel/sched/bmq_imp.h b/kernel/sched/bmq_imp.h -index ad9a7c448da7..e213e82475ab 100644 ---- a/kernel/sched/bmq_imp.h -+++ b/kernel/sched/bmq_imp.h -@@ -36,6 +36,14 @@ static inline void deboost_task(struct task_struct *p) - /* - * Common interfaces - */ -+static inline int normal_prio(struct task_struct *p) -+{ -+ if (task_has_rt_policy(p)) -+ return MAX_RT_PRIO - 1 - p->rt_priority; -+ -+ return p->static_prio + MAX_PRIORITY_ADJ; -+} -+ - static inline int task_sched_prio(struct task_struct *p, struct rq *rq) - { - return (p->prio < MAX_RT_PRIO)? p->prio : MAX_RT_PRIO / 2 + (p->prio + p->boost_prio) / 2; -diff --git a/kernel/sched/pds_imp.h b/kernel/sched/pds_imp.h -index 73fe42b84fc0..5a5de14598d4 100644 ---- a/kernel/sched/pds_imp.h -+++ b/kernel/sched/pds_imp.h -@@ -26,6 +26,14 @@ static const unsigned char dl_level_map[] = { - 1, 0 - }; - -+static inline int normal_prio(struct task_struct *p) -+{ -+ if (task_has_rt_policy(p)) -+ return MAX_RT_PRIO - 1 - p->rt_priority; -+ -+ return MAX_USER_RT_PRIO; -+} -+ - static inline int - task_sched_prio(const struct task_struct *p, const struct rq *rq) - { --- -2.37.0 - - -From 13914b2781c16df8a9098f6f3b37c09904e98092 Mon Sep 17 00:00:00 2001 -From: Alfred Chen -Date: Mon, 7 Dec 2020 10:19:13 +0800 -Subject: [PATCH 074/297] Project-C v5.9-r3 - ---- - kernel/sched/alt_core.c | 2 +- - 1 file changed, 1 insertion(+), 1 deletion(-) - -diff --git a/kernel/sched/alt_core.c b/kernel/sched/alt_core.c -index 3808507c44c1..a9c82fffef59 100644 ---- a/kernel/sched/alt_core.c -+++ b/kernel/sched/alt_core.c -@@ -52,7 +52,7 @@ - */ - EXPORT_TRACEPOINT_SYMBOL_GPL(pelt_irq_tp); - --#define ALT_SCHED_VERSION "v5.9-r2" -+#define ALT_SCHED_VERSION "v5.9-r3" - - /* rt_prio(prio) defined in include/linux/sched/rt.h */ - #define rt_task(p) rt_prio((p)->prio) --- -2.37.0 - - -From f841dde802c5d228276d2d82a2d77cafc39a7386 Mon Sep 17 00:00:00 2001 -From: Alfred Chen -Date: Mon, 7 Dec 2020 14:06:53 +0800 -Subject: [PATCH 075/297] sched/alt: [Sync] c1cecf884ad7 sched: Cache - task_struct::flags in sched_submit_work() - ---- - kernel/sched/alt_core.c | 7 +++++-- - 1 file changed, 5 insertions(+), 2 deletions(-) - -diff --git a/kernel/sched/alt_core.c b/kernel/sched/alt_core.c -index a9c82fffef59..e96b385722cc 100644 ---- a/kernel/sched/alt_core.c -+++ b/kernel/sched/alt_core.c -@@ -3808,9 +3808,12 @@ void __noreturn do_task_dead(void) - - static inline void sched_submit_work(struct task_struct *tsk) - { -+ unsigned int task_flags; -+ - if (!tsk->state) - return; - -+ task_flags = tsk->flags; - /* - * If a worker went to sleep, notify and ask workqueue whether - * it wants to wake up a task to maintain concurrency. -@@ -3819,9 +3822,9 @@ static inline void sched_submit_work(struct task_struct *tsk) - * in the possible wakeup of a kworker and because wq_worker_sleeping() - * requires it. - */ -- if (tsk->flags & (PF_WQ_WORKER | PF_IO_WORKER)) { -+ if (task_flags & (PF_WQ_WORKER | PF_IO_WORKER)) { - preempt_disable(); -- if (tsk->flags & PF_WQ_WORKER) -+ if (task_flags & PF_WQ_WORKER) - wq_worker_sleeping(tsk); - else - io_wq_worker_sleeping(tsk); --- -2.37.0 - - -From 819e09cc68cafc81a5c2a5a470470be85130ac41 Mon Sep 17 00:00:00 2001 -From: Alfred Chen -Date: Mon, 7 Dec 2020 14:29:25 +0800 -Subject: [PATCH 076/297] sched/alt: [Sync] ec618b84f6e1 sched: Fix - rq->nr_iowait ordering - ---- - kernel/sched/alt_core.c | 19 ++++++++++++++----- - 1 file changed, 14 insertions(+), 5 deletions(-) - -diff --git a/kernel/sched/alt_core.c b/kernel/sched/alt_core.c -index e96b385722cc..68a7c1d95263 100644 ---- a/kernel/sched/alt_core.c -+++ b/kernel/sched/alt_core.c -@@ -1697,6 +1697,15 @@ ttwu_do_activate(struct rq *rq, struct task_struct *p, int wake_flags) - if (p->sched_contributes_to_load) - rq->nr_uninterruptible--; - -+ if ( -+#ifdef CONFIG_SMP -+ !(wake_flags & WF_MIGRATED) && -+#endif -+ p->in_iowait) { -+ delayacct_blkio_end(p); -+ atomic_dec(&task_rq(p)->nr_iowait); -+ } -+ - activate_task(p, rq); - ttwu_do_wakeup(rq, p, 0); - } -@@ -2088,11 +2097,6 @@ static int try_to_wake_up(struct task_struct *p, unsigned int state, - if (READ_ONCE(p->on_rq) && ttwu_runnable(p, wake_flags)) - goto unlock; - -- if (p->in_iowait) { -- delayacct_blkio_end(p); -- atomic_dec(&task_rq(p)->nr_iowait); -- } -- - #ifdef CONFIG_SMP - /* - * Ensure we load p->on_cpu _after_ p->on_rq, otherwise it would be -@@ -2166,6 +2170,11 @@ static int try_to_wake_up(struct task_struct *p, unsigned int state, - cpu = select_task_rq(p, this_rq()); - - if (cpu != task_cpu(p)) { -+ if (p->in_iowait) { -+ delayacct_blkio_end(p); -+ atomic_dec(&task_rq(p)->nr_iowait); -+ } -+ - wake_flags |= WF_MIGRATED; - psi_ttwu_dequeue(p); - set_task_cpu(p, cpu); --- -2.37.0 - - -From f55492a89b263df7277ed5725070cc58857ee2a7 Mon Sep 17 00:00:00 2001 -From: Alfred Chen -Date: Tue, 8 Dec 2020 15:01:33 +0800 -Subject: [PATCH 077/297] sched/pds: Fix PDS nice accounting. - ---- - kernel/sched/alt_sched.h | 2 ++ - kernel/sched/bmq.h | 6 ------ - kernel/sched/bmq_imp.h | 5 +++++ - kernel/sched/pds.h | 7 +------ - kernel/sched/pds_imp.h | 12 ++++++++++++ - 5 files changed, 20 insertions(+), 12 deletions(-) - -diff --git a/kernel/sched/alt_sched.h b/kernel/sched/alt_sched.h -index 4698d6d16a2d..fd75b7895469 100644 ---- a/kernel/sched/alt_sched.h -+++ b/kernel/sched/alt_sched.h -@@ -373,6 +373,8 @@ static inline bool task_running(struct task_struct *p) - return p->on_cpu; - } - -+extern int task_running_nice(struct task_struct *p); -+ - extern struct static_key_false sched_schedstats; - - #ifdef CONFIG_CPU_IDLE -diff --git a/kernel/sched/bmq.h b/kernel/sched/bmq.h -index 1364824cfa7d..4ce30c30bd3e 100644 ---- a/kernel/sched/bmq.h -+++ b/kernel/sched/bmq.h -@@ -11,10 +11,4 @@ struct bmq { - struct list_head heads[SCHED_BITS]; - }; - -- --static inline int task_running_nice(struct task_struct *p) --{ -- return (p->prio + p->boost_prio > DEFAULT_PRIO + MAX_PRIORITY_ADJ); --} -- - #endif -diff --git a/kernel/sched/bmq_imp.h b/kernel/sched/bmq_imp.h -index e213e82475ab..83c2d019c446 100644 ---- a/kernel/sched/bmq_imp.h -+++ b/kernel/sched/bmq_imp.h -@@ -62,6 +62,11 @@ static inline void time_slice_expired(struct task_struct *p, struct rq *rq) - } - } - -+static inline int task_running_nice(struct task_struct *p) -+{ -+ return (p->prio + p->boost_prio > DEFAULT_PRIO + MAX_PRIORITY_ADJ); -+} -+ - static inline void update_task_priodl(struct task_struct *p) {} - - static inline unsigned long sched_queue_watermark(struct rq *rq) -diff --git a/kernel/sched/pds.h b/kernel/sched/pds.h -index 7fdeace7e8a5..623908cf4380 100644 ---- a/kernel/sched/pds.h -+++ b/kernel/sched/pds.h -@@ -3,12 +3,7 @@ - - /* bits: - * RT(0-99), (Low prio adj range, nice width, high prio adj range) / 2, cpu idle task */ --#define SCHED_BITS (MAX_RT_PRIO + 20 + 1) -+#define SCHED_BITS (MAX_RT_PRIO + NICE_WIDTH / 2 + 1) - #define IDLE_TASK_SCHED_PRIO (SCHED_BITS - 1) - --static inline int task_running_nice(struct task_struct *p) --{ -- return (p->prio > DEFAULT_PRIO); --} -- - #endif -diff --git a/kernel/sched/pds_imp.h b/kernel/sched/pds_imp.h -index 5a5de14598d4..6b2140f0a69e 100644 ---- a/kernel/sched/pds_imp.h -+++ b/kernel/sched/pds_imp.h -@@ -26,6 +26,13 @@ static const unsigned char dl_level_map[] = { - 1, 0 - }; - -+/* DEFAULT_SCHED_PRIO: -+ * dl_level_map[(user_prio2deadline[39] - user_prio2deadline[0]) >> 21] = -+ * dl_level_map[68] = -+ * 10 -+ */ -+#define DEFAULT_SCHED_PRIO (MAX_RT_PRIO + 10) -+ - static inline int normal_prio(struct task_struct *p) - { - if (task_has_rt_policy(p)) -@@ -51,6 +58,11 @@ task_sched_prio(const struct task_struct *p, const struct rq *rq) - return MAX_RT_PRIO + dl_level_map[delta]; - } - -+int task_running_nice(struct task_struct *p) -+{ -+ return task_sched_prio(p, task_rq(p)) > DEFAULT_SCHED_PRIO; -+} -+ - static inline void update_task_priodl(struct task_struct *p) - { - p->priodl = (((u64) (p->prio))<<56) | ((p->deadline)>>8); --- -2.37.0 - - -From f4b910e9af3a7b8895978a7977e9197d2edc7255 Mon Sep 17 00:00:00 2001 -From: Alfred Chen -Date: Mon, 14 Dec 2020 14:03:30 +0800 -Subject: [PATCH 078/297] Project-C v5.10-r0 - ---- - kernel/sched/alt_core.c | 2 +- - 1 file changed, 1 insertion(+), 1 deletion(-) - -diff --git a/kernel/sched/alt_core.c b/kernel/sched/alt_core.c -index 68a7c1d95263..1a857d7e230b 100644 ---- a/kernel/sched/alt_core.c -+++ b/kernel/sched/alt_core.c -@@ -52,7 +52,7 @@ - */ - EXPORT_TRACEPOINT_SYMBOL_GPL(pelt_irq_tp); - --#define ALT_SCHED_VERSION "v5.9-r3" -+#define ALT_SCHED_VERSION "v5.10-r0" - - /* rt_prio(prio) defined in include/linux/sched/rt.h */ - #define rt_task(p) rt_prio((p)->prio) --- -2.37.0 - - -From 044a6f7ec01500fc42d979242bf1f1c9bbefb36f Mon Sep 17 00:00:00 2001 -From: Alfred Chen -Date: Tue, 15 Dec 2020 11:19:05 +0800 -Subject: [PATCH 079/297] sched/bmq: Fix compilation issue. - ---- - kernel/sched/bmq_imp.h | 2 +- - 1 file changed, 1 insertion(+), 1 deletion(-) - -diff --git a/kernel/sched/bmq_imp.h b/kernel/sched/bmq_imp.h -index 83c2d019c446..3faba5f9bb69 100644 ---- a/kernel/sched/bmq_imp.h -+++ b/kernel/sched/bmq_imp.h -@@ -62,7 +62,7 @@ static inline void time_slice_expired(struct task_struct *p, struct rq *rq) - } - } - --static inline int task_running_nice(struct task_struct *p) -+inline int task_running_nice(struct task_struct *p) - { - return (p->prio + p->boost_prio > DEFAULT_PRIO + MAX_PRIORITY_ADJ); - } --- -2.37.0 - - -From cd88ed10b08fe8511bdb2a66458983d82472850a Mon Sep 17 00:00:00 2001 -From: Alfred Chen -Date: Tue, 22 Dec 2020 11:08:31 +0800 -Subject: [PATCH 080/297] sched/alt: rcu_read_xxx() put_task_xxxx() sync up. - ---- - kernel/sched/alt_core.c | 19 +++++++++++-------- - 1 file changed, 11 insertions(+), 8 deletions(-) - -diff --git a/kernel/sched/alt_core.c b/kernel/sched/alt_core.c -index 1a857d7e230b..982562808cc7 100644 ---- a/kernel/sched/alt_core.c -+++ b/kernel/sched/alt_core.c -@@ -4786,10 +4786,15 @@ SYSCALL_DEFINE3(sched_setattr, pid_t, pid, struct sched_attr __user *, uattr, - rcu_read_lock(); - retval = -ESRCH; - p = find_process_by_pid(pid); -- if (p != NULL) -- retval = sched_setattr(p, &attr); -+ if (likely(p)) -+ get_task_struct(p); - rcu_read_unlock(); - -+ if (likely(p)) { -+ retval = sched_setattr(p, &attr); -+ put_task_struct(p); -+ } -+ - return retval; - } - -@@ -4961,13 +4966,11 @@ long sched_setaffinity(pid_t pid, const struct cpumask *in_mask) - struct task_struct *p; - int retval; - -- get_online_cpus(); - rcu_read_lock(); - - p = find_process_by_pid(pid); - if (!p) { - rcu_read_unlock(); -- put_online_cpus(); - return -ESRCH; - } - -@@ -4992,17 +4995,18 @@ long sched_setaffinity(pid_t pid, const struct cpumask *in_mask) - rcu_read_lock(); - if (!ns_capable(__task_cred(p)->user_ns, CAP_SYS_NICE)) { - rcu_read_unlock(); -- goto out_unlock; -+ goto out_free_new_mask; - } - rcu_read_unlock(); - } - - retval = security_task_setscheduler(p); - if (retval) -- goto out_unlock; -+ goto out_free_new_mask; - - cpuset_cpus_allowed(p, cpus_allowed); - cpumask_and(new_mask, in_mask, cpus_allowed); -+ - again: - retval = __set_cpus_allowed_ptr(p, new_mask, true); - -@@ -5018,13 +5022,12 @@ long sched_setaffinity(pid_t pid, const struct cpumask *in_mask) - goto again; - } - } --out_unlock: -+out_free_new_mask: - free_cpumask_var(new_mask); - out_free_cpus_allowed: - free_cpumask_var(cpus_allowed); - out_put_task: - put_task_struct(p); -- put_online_cpus(); - return retval; - } - --- -2.37.0 - - -From a210c246c4b5a9cdfbdd8f049aed32af47897e02 Mon Sep 17 00:00:00 2001 -From: Alfred Chen -Date: Fri, 25 Dec 2020 09:30:03 +0800 -Subject: [PATCH 081/297] sched/alt: Introduce sched_cpu_topo_masks. - -Introduce sched_cpu_topo_masks and rework best_mask_cpu(), which help to -prefered cpu implementation later. ---- - kernel/sched/alt_core.c | 26 ++++++++++++++++------ - kernel/sched/alt_sched.h | 48 +++++++++++++++++++++++++++++++++++----- - 2 files changed, 62 insertions(+), 12 deletions(-) - -diff --git a/kernel/sched/alt_core.c b/kernel/sched/alt_core.c -index 982562808cc7..4c008d3cd0db 100644 ---- a/kernel/sched/alt_core.c -+++ b/kernel/sched/alt_core.c -@@ -92,6 +92,8 @@ static cpumask_t sched_rq_pending_mask ____cacheline_aligned_in_smp; - - DEFINE_PER_CPU(cpumask_t [NR_CPU_AFFINITY_LEVELS], sched_cpu_affinity_masks); - DEFINE_PER_CPU(cpumask_t *, sched_cpu_affinity_end_mask); -+ -+DEFINE_PER_CPU(cpumask_t [NR_CPU_AFFINITY_LEVELS], sched_cpu_topo_masks); - DEFINE_PER_CPU(cpumask_t *, sched_cpu_llc_mask); - - #ifdef CONFIG_SCHED_SMT -@@ -5874,42 +5876,52 @@ static void sched_init_topology_cpumask_early(void) - cpumask_t *tmp; - - for_each_possible_cpu(cpu) { -+ /* init affinity masks */ - tmp = per_cpu(sched_cpu_affinity_masks, cpu); - - cpumask_copy(tmp, cpumask_of(cpu)); - tmp++; - cpumask_copy(tmp, cpu_possible_mask); - cpumask_clear_cpu(cpu, tmp); -- per_cpu(sched_cpu_llc_mask, cpu) = tmp; - per_cpu(sched_cpu_affinity_end_mask, cpu) = ++tmp; -+ /* init topo masks */ -+ tmp = per_cpu(sched_cpu_topo_masks, cpu); -+ -+ cpumask_copy(tmp, cpumask_of(cpu)); -+ tmp++; -+ cpumask_copy(tmp, cpu_possible_mask); -+ per_cpu(sched_cpu_llc_mask, cpu) = tmp; - /*per_cpu(sd_llc_id, cpu) = cpu;*/ - } - } - - #define TOPOLOGY_CPUMASK(name, mask, last) \ -- if (cpumask_and(chk, chk, mask)) \ -- printk(KERN_INFO "sched: cpu#%02d affinity mask: 0x%08lx - "#name,\ -- cpu, (chk++)->bits[0]); \ -+ if (cpumask_and(chk, chk, mask)) { \ -+ cpumask_copy(topo, mask); \ -+ printk(KERN_INFO "sched: cpu#%02d affinity: 0x%08lx topo: 0x%08lx - "#name,\ -+ cpu, (chk++)->bits[0], (topo++)->bits[0]); \ -+ } \ - if (!last) \ - cpumask_complement(chk, mask) - - static void sched_init_topology_cpumask(void) - { - int cpu; -- cpumask_t *chk; -+ cpumask_t *chk, *topo; - - for_each_online_cpu(cpu) { - /* take chance to reset time slice for idle tasks */ - cpu_rq(cpu)->idle->time_slice = sched_timeslice_ns; - - chk = per_cpu(sched_cpu_affinity_masks, cpu) + 1; -+ topo = per_cpu(sched_cpu_topo_masks, cpu) + 1; - - cpumask_complement(chk, cpumask_of(cpu)); - #ifdef CONFIG_SCHED_SMT - TOPOLOGY_CPUMASK(smt, topology_sibling_cpumask(cpu), false); - #endif - per_cpu(sd_llc_id, cpu) = cpumask_first(cpu_coregroup_mask(cpu)); -- per_cpu(sched_cpu_llc_mask, cpu) = chk; -+ per_cpu(sched_cpu_llc_mask, cpu) = topo; - TOPOLOGY_CPUMASK(coregroup, cpu_coregroup_mask(cpu), false); - - TOPOLOGY_CPUMASK(core, topology_core_cpumask(cpu), false); -@@ -5920,7 +5932,7 @@ static void sched_init_topology_cpumask(void) - printk(KERN_INFO "sched: cpu#%02d llc_id = %d, llc_mask idx = %d\n", - cpu, per_cpu(sd_llc_id, cpu), - (int) (per_cpu(sched_cpu_llc_mask, cpu) - -- per_cpu(sched_cpu_affinity_masks, cpu))); -+ per_cpu(sched_cpu_topo_masks, cpu))); - } - } - #endif -diff --git a/kernel/sched/alt_sched.h b/kernel/sched/alt_sched.h -index fd75b7895469..5d6ee22875b9 100644 ---- a/kernel/sched/alt_sched.h -+++ b/kernel/sched/alt_sched.h -@@ -223,7 +223,8 @@ enum { - NR_CPU_AFFINITY_LEVELS - }; - --DECLARE_PER_CPU(cpumask_t [NR_CPU_AFFINITY_LEVELS], sched_cpu_affinity_masks); -+DECLARE_PER_CPU(cpumask_t [NR_CPU_AFFINITY_LEVELS], sched_cpu_topo_masks); -+DECLARE_PER_CPU(cpumask_t *, sched_cpu_llc_mask); - - static inline int __best_mask_cpu(int cpu, const cpumask_t *cpumask, - const cpumask_t *mask) -@@ -242,13 +243,50 @@ static inline int __best_mask_cpu(int cpu, const cpumask_t *cpumask, - #endif - } - --static inline int best_mask_cpu(int cpu, const cpumask_t *cpumask) -+static inline int best_mask_cpu(int cpu, cpumask_t *mask) - { - #if NR_CPUS <= 64 -- return __best_mask_cpu(cpu, cpumask, per_cpu(sched_cpu_affinity_masks, cpu)); -+ unsigned long llc_match; -+ cpumask_t *chk = per_cpu(sched_cpu_llc_mask, cpu); -+ -+ if ((llc_match = mask->bits[0] & chk->bits[0])) { -+ unsigned long match; -+ -+ chk = per_cpu(sched_cpu_topo_masks, cpu); -+ if (mask->bits[0] & chk->bits[0]) -+ return cpu; -+ -+#ifdef CONFIG_SCHED_SMT -+ chk++; -+ if ((match = mask->bits[0] & chk->bits[0])) -+ return __ffs(match); -+#endif -+ -+ return __ffs(llc_match); -+ } -+ -+ return __best_mask_cpu(cpu, mask, chk + 1); - #else -- return cpumask_test_cpu(cpu, cpumask) ? cpu: -- __best_mask_cpu(cpu, cpumask, per_cpu(sched_cpu_affinity_masks, cpu) + 1); -+ cpumask_t llc_match; -+ cpumask_t *chk = per_cpu(sched_cpu_llc_mask, cpu); -+ -+ if (cpumask_and(&llc_match, mask, chk)) { -+ cpumask_t tmp; -+ -+ chk = per_cpu(sched_cpu_topo_masks, cpu); -+ if (cpumask_test_cpu(cpu, mask)) -+ return cpu; -+ -+#ifdef CONFIG_SCHED_SMT -+ chk++; -+ if (cpumask_and(&tmp, mask, chk)) -+ return cpumask_any(&tmp); -+#endif -+ -+ return cpumask_any(&llc_match); -+ } -+ -+ return __best_mask_cpu(cpu, mask, chk + 1); - #endif - } - --- -2.37.0 - - -From e5a5786ad8751c3a56dca50133a52d580e320b2e Mon Sep 17 00:00:00 2001 -From: Alfred Chen -Date: Fri, 25 Dec 2020 09:33:25 +0800 -Subject: [PATCH 082/297] Project-C v5.10-r1 - ---- - kernel/sched/alt_core.c | 2 +- - 1 file changed, 1 insertion(+), 1 deletion(-) - -diff --git a/kernel/sched/alt_core.c b/kernel/sched/alt_core.c -index 4c008d3cd0db..9880d9b50f7e 100644 ---- a/kernel/sched/alt_core.c -+++ b/kernel/sched/alt_core.c -@@ -52,7 +52,7 @@ - */ - EXPORT_TRACEPOINT_SYMBOL_GPL(pelt_irq_tp); - --#define ALT_SCHED_VERSION "v5.10-r0" -+#define ALT_SCHED_VERSION "v5.10-r1" - - /* rt_prio(prio) defined in include/linux/sched/rt.h */ - #define rt_task(p) rt_prio((p)->prio) --- -2.37.0 - - -From 75c6638ed1d2ce05c482ad7ebadd2ef9fb7873e3 Mon Sep 17 00:00:00 2001 -From: Alfred Chen -Date: Fri, 25 Dec 2020 11:33:48 +0800 -Subject: [PATCH 083/297] sched/alt: Fix UP compilation warning. - ---- - kernel/sched/bmq_imp.h | 2 ++ - kernel/sched/pds_imp.h | 2 ++ - 2 files changed, 4 insertions(+) - -diff --git a/kernel/sched/bmq_imp.h b/kernel/sched/bmq_imp.h -index 3faba5f9bb69..13eda4b26b6a 100644 ---- a/kernel/sched/bmq_imp.h -+++ b/kernel/sched/bmq_imp.h -@@ -185,11 +185,13 @@ static void do_sched_yield_type_1(struct task_struct *p, struct rq *rq) - p->boost_prio = MAX_PRIORITY_ADJ; - } - -+#ifdef CONFIG_SMP - static void sched_task_ttwu(struct task_struct *p) - { - if(this_rq()->clock_task - p->last_ran > sched_timeslice_ns) - boost_task(p); - } -+#endif - - static void sched_task_deactivate(struct task_struct *p, struct rq *rq) - { -diff --git a/kernel/sched/pds_imp.h b/kernel/sched/pds_imp.h -index 6b2140f0a69e..b1ad3d0b0430 100644 ---- a/kernel/sched/pds_imp.h -+++ b/kernel/sched/pds_imp.h -@@ -268,5 +268,7 @@ static void do_sched_yield_type_1(struct task_struct *p, struct rq *rq) - time_slice_expired(p, rq); - } - -+#ifdef CONFIG_SMP - static void sched_task_ttwu(struct task_struct *p) {} -+#endif - static void sched_task_deactivate(struct task_struct *p, struct rq *rq) {} --- -2.37.0 - - -From def8da4761d52dd7b1311d7a5835549c28158883 Mon Sep 17 00:00:00 2001 -From: Alfred Chen -Date: Thu, 31 Dec 2020 08:50:38 +0800 -Subject: [PATCH 084/297] sched/alt: Fix compilation error in - sched_numa_find_closest(). - ---- - kernel/sched/alt_sched.h | 2 +- - 1 file changed, 1 insertion(+), 1 deletion(-) - -diff --git a/kernel/sched/alt_sched.h b/kernel/sched/alt_sched.h -index 5d6ee22875b9..d979b4b1aac2 100644 ---- a/kernel/sched/alt_sched.h -+++ b/kernel/sched/alt_sched.h -@@ -243,7 +243,7 @@ static inline int __best_mask_cpu(int cpu, const cpumask_t *cpumask, - #endif - } - --static inline int best_mask_cpu(int cpu, cpumask_t *mask) -+static inline int best_mask_cpu(int cpu, const cpumask_t *mask) - { - #if NR_CPUS <= 64 - unsigned long llc_match; --- -2.37.0 - - -From 830d14b33ababafb52f415b4f5c5c6fe97a3fc2a Mon Sep 17 00:00:00 2001 -From: Alfred Chen -Date: Thu, 31 Dec 2020 08:56:11 +0800 -Subject: [PATCH 085/297] Project-C v5.10-r2 - ---- - kernel/sched/alt_core.c | 2 +- - 1 file changed, 1 insertion(+), 1 deletion(-) - -diff --git a/kernel/sched/alt_core.c b/kernel/sched/alt_core.c -index 9880d9b50f7e..be766515662c 100644 ---- a/kernel/sched/alt_core.c -+++ b/kernel/sched/alt_core.c -@@ -52,7 +52,7 @@ - */ - EXPORT_TRACEPOINT_SYMBOL_GPL(pelt_irq_tp); - --#define ALT_SCHED_VERSION "v5.10-r1" -+#define ALT_SCHED_VERSION "v5.10-r2" - - /* rt_prio(prio) defined in include/linux/sched/rt.h */ - #define rt_task(p) rt_prio((p)->prio) --- -2.37.0 - - -From 9194a598ec866367b2981b4bae8809d150dac913 Mon Sep 17 00:00:00 2001 -From: Alfred Chen -Date: Mon, 4 Jan 2021 14:41:27 +0800 -Subject: [PATCH 086/297] sched/alt: Enable sched_schedstats sysctl interface. - ---- - kernel/sched/alt_core.c | 2 +- - 1 file changed, 1 insertion(+), 1 deletion(-) - -diff --git a/kernel/sched/alt_core.c b/kernel/sched/alt_core.c -index be766515662c..148c2fc477af 100644 ---- a/kernel/sched/alt_core.c -+++ b/kernel/sched/alt_core.c -@@ -1667,7 +1667,7 @@ ttwu_stat(struct task_struct *p, int cpu, int wake_flags) - if (!schedstat_enabled()) - return; - -- rq= this_rq(); -+ rq = this_rq(); - - #ifdef CONFIG_SMP - if (cpu == rq->cpu) --- -2.37.0 - - -From f50db27e9b2c96280175daf255b4e90b1baaaad4 Mon Sep 17 00:00:00 2001 -From: Alfred Chen -Date: Tue, 26 Jan 2021 14:04:46 +0800 -Subject: [PATCH 087/297] sched/alt: [Sync] 9f68b5b74c48 sched: Detect call to - schedule from critical entry code - ---- - kernel/sched/alt_core.c | 1 + - kernel/sched/alt_sched.h | 6 ++++++ - 2 files changed, 7 insertions(+) - -diff --git a/kernel/sched/alt_core.c b/kernel/sched/alt_core.c -index 148c2fc477af..86c1ca67b675 100644 ---- a/kernel/sched/alt_core.c -+++ b/kernel/sched/alt_core.c -@@ -3464,6 +3464,7 @@ static inline void schedule_debug(struct task_struct *prev, bool preempt) - preempt_count_set(PREEMPT_DISABLED); - } - rcu_sleep_check(); -+ SCHED_WARN_ON(ct_state() == CONTEXT_USER); - - profile_hit(SCHED_PROFILING, __builtin_return_address(0)); - -diff --git a/kernel/sched/alt_sched.h b/kernel/sched/alt_sched.h -index d979b4b1aac2..a157800317e9 100644 ---- a/kernel/sched/alt_sched.h -+++ b/kernel/sched/alt_sched.h -@@ -55,6 +55,12 @@ - #include "pds.h" - #endif - -+#ifdef CONFIG_SCHED_DEBUG -+# define SCHED_WARN_ON(x) WARN_ONCE(x, #x) -+#else -+# define SCHED_WARN_ON(x) ({ (void)(x), 0; }) -+#endif -+ - /* task_struct::on_rq states: */ - #define TASK_ON_RQ_QUEUED 1 - #define TASK_ON_RQ_MIGRATING 2 --- -2.37.0 - - -From cf4cee8f6ff06dfd916491cc30c1ff95228cfabf Mon Sep 17 00:00:00 2001 -From: Alfred Chen -Date: Tue, 26 Jan 2021 14:07:25 +0800 -Subject: [PATCH 088/297] sched/alt: [Sync] 6775de4984ea context_tracking: Only - define schedule_user() on !HAVE_CONTEXT_TRACKING_OFFSTACK archs - ---- - kernel/sched/alt_core.c | 2 +- - 1 file changed, 1 insertion(+), 1 deletion(-) - -diff --git a/kernel/sched/alt_core.c b/kernel/sched/alt_core.c -index 86c1ca67b675..2782c97e939b 100644 ---- a/kernel/sched/alt_core.c -+++ b/kernel/sched/alt_core.c -@@ -3903,7 +3903,7 @@ void __sched schedule_idle(void) - } while (need_resched()); - } - --#ifdef CONFIG_CONTEXT_TRACKING -+#if defined(CONFIG_CONTEXT_TRACKING) && !defined(CONFIG_HAVE_CONTEXT_TRACKING_OFFSTACK) - asmlinkage __visible void __sched schedule_user(void) - { - /* --- -2.37.0 - - -From d3b3d1475e598040c854177270df039504c9332d Mon Sep 17 00:00:00 2001 -From: Alfred Chen -Date: Tue, 26 Jan 2021 14:10:53 +0800 -Subject: [PATCH 089/297] sched/alt: [Sync] 345a957fcc95 sched: Reenable - interrupts in do_sched_yield() - ---- - kernel/sched/alt_core.c | 6 +----- - 1 file changed, 1 insertion(+), 5 deletions(-) - -diff --git a/kernel/sched/alt_core.c b/kernel/sched/alt_core.c -index 2782c97e939b..47c9144839bb 100644 ---- a/kernel/sched/alt_core.c -+++ b/kernel/sched/alt_core.c -@@ -5163,12 +5163,8 @@ static void do_sched_yield(void) - rq->skip = current; - } - -- /* -- * Since we are going to call schedule() anyway, there's -- * no need to preempt or enable interrupts: -- */ - preempt_disable(); -- raw_spin_unlock(&rq->lock); -+ raw_spin_unlock_irq(&rq->lock); - sched_preempt_enable_no_resched(); - - schedule(); --- -2.37.0 - - -From fc856815179659d31e0e54e626652373f33e83b0 Mon Sep 17 00:00:00 2001 -From: Alfred Chen -Date: Tue, 26 Jan 2021 14:13:18 +0800 -Subject: [PATCH 090/297] sched/alt: [Sync] a8b62fd08505 stop_machine: Add - function and caller debug info - ---- - kernel/sched/alt_core.c | 1 + - 1 file changed, 1 insertion(+) - -diff --git a/kernel/sched/alt_core.c b/kernel/sched/alt_core.c -index 47c9144839bb..77319f8657f7 100644 ---- a/kernel/sched/alt_core.c -+++ b/kernel/sched/alt_core.c -@@ -5455,6 +5455,7 @@ void sched_show_task(struct task_struct *p) - (unsigned long)task_thread_info(p)->flags); - - print_worker_info(KERN_INFO, p); -+ print_stop_info(KERN_INFO, p); - show_stack(p, NULL, KERN_INFO); - put_task_stack(p); - } --- -2.37.0 - - -From 5bfa362be6f639d6db7a9d7b9503261192fb1a01 Mon Sep 17 00:00:00 2001 -From: Alfred Chen -Date: Tue, 2 Feb 2021 23:17:30 +0800 -Subject: [PATCH 091/297] sched/alt: [Sync] 545b8c8df41f smp: Cleanup - smp_call_function*() - ---- - kernel/sched/alt_core.c | 12 ++---------- - 1 file changed, 2 insertions(+), 10 deletions(-) - -diff --git a/kernel/sched/alt_core.c b/kernel/sched/alt_core.c -index 77319f8657f7..32254626d29d 100644 ---- a/kernel/sched/alt_core.c -+++ b/kernel/sched/alt_core.c -@@ -892,14 +892,6 @@ static inline void check_preempt_curr(struct rq *rq) - resched_curr(rq); - } - --static inline void --rq_csd_init(struct rq *rq, call_single_data_t *csd, smp_call_func_t func) --{ -- csd->flags = 0; -- csd->func = func; -- csd->info = rq; --} -- - #ifdef CONFIG_SCHED_HRTICK - /* - * Use HR-timers to deliver accurate preemption points. -@@ -1016,7 +1008,7 @@ void hrtick_start(struct rq *rq, u64 delay) - static void hrtick_rq_init(struct rq *rq) - { - #ifdef CONFIG_SMP -- rq_csd_init(rq, &rq->hrtick_csd, __hrtick_start); -+ INIT_CSD(&rq->hrtick_csd, __hrtick_start, rq); - #endif - - hrtimer_init(&rq->hrtick_timer, CLOCK_MONOTONIC, HRTIMER_MODE_REL_HARD); -@@ -6024,7 +6016,7 @@ void __init sched_init(void) - #endif - - #ifdef CONFIG_NO_HZ_COMMON -- rq_csd_init(rq, &rq->nohz_csd, nohz_csd_func); -+ INIT_CSD(&rq->nohz_csd, nohz_csd_func, rq); - #endif - #endif /* CONFIG_SMP */ - rq->nr_switches = 0; --- -2.37.0 - - -From 38e2b3f7621d87de4a62f1855004531ba5a4d575 Mon Sep 17 00:00:00 2001 -From: Alfred Chen -Date: Wed, 3 Feb 2021 14:23:17 +0800 -Subject: [PATCH 092/297] sched/alt: Sync up missing new interfaces. - ---- - kernel/sched/alt_core.c | 35 +++++++++++++++++++++++++++++++++++ - 1 file changed, 35 insertions(+) - -diff --git a/kernel/sched/alt_core.c b/kernel/sched/alt_core.c -index 32254626d29d..1feed396d704 100644 ---- a/kernel/sched/alt_core.c -+++ b/kernel/sched/alt_core.c -@@ -1090,7 +1090,18 @@ static inline void __set_task_cpu(struct task_struct *p, unsigned int cpu) - #endif - } - -+void migrate_disable(void) -+{ -+} -+EXPORT_SYMBOL_GPL(migrate_disable); -+ -+void migrate_enable(void) -+{ -+} -+EXPORT_SYMBOL_GPL(migrate_enable); -+ - #ifdef CONFIG_SMP -+ - void set_task_cpu(struct task_struct *p, unsigned int new_cpu) - { - #ifdef CONFIG_SCHED_DEBUG -@@ -5841,6 +5852,23 @@ int sched_cpu_starting(unsigned int cpu) - } - - #ifdef CONFIG_HOTPLUG_CPU -+ -+/* -+ * Invoked immediately before the stopper thread is invoked to bring the -+ * CPU down completely. At this point all per CPU kthreads except the -+ * hotplug thread (current) and the stopper thread (inactive) have been -+ * either parked or have been unbound from the outgoing CPU. Ensure that -+ * any of those which might be on the way out are gone. -+ * -+ * If after this point a bound task is being woken on this CPU then the -+ * responsible hotplug callback has failed to do it's job. -+ * sched_cpu_dying() will catch it with the appropriate fireworks. -+ */ -+int sched_cpu_wait_empty(unsigned int cpu) -+{ -+ return 0; -+} -+ - int sched_cpu_dying(unsigned int cpu) - { - struct rq *rq = cpu_rq(cpu); -@@ -6155,6 +6183,13 @@ void __cant_sleep(const char *file, int line, int preempt_offset) - add_taint(TAINT_WARN, LOCKDEP_STILL_OK); - } - EXPORT_SYMBOL_GPL(__cant_sleep); -+ -+#ifdef CONFIG_SMP -+void __cant_migrate(const char *file, int line) -+{ -+} -+EXPORT_SYMBOL_GPL(__cant_migrate); -+#endif - #endif - - #ifdef CONFIG_MAGIC_SYSRQ --- -2.37.0 - - -From 887fdbe843b181de0e0f0eb3379209ff278a7bdb Mon Sep 17 00:00:00 2001 -From: Alfred Chen -Date: Thu, 4 Feb 2021 14:50:24 +0800 -Subject: [PATCH 093/297] sched/alt: [Sync] 5fbda3ecd14a sched: highmem: Store - local kmaps in task struct - ---- - kernel/sched/alt_core.c | 37 +++++++++++++++++++++++++++++++++++++ - 1 file changed, 37 insertions(+) - -diff --git a/kernel/sched/alt_core.c b/kernel/sched/alt_core.c -index 1feed396d704..60a888c99006 100644 ---- a/kernel/sched/alt_core.c -+++ b/kernel/sched/alt_core.c -@@ -2644,6 +2644,34 @@ static inline void finish_lock_switch(struct rq *rq) - raw_spin_unlock_irq(&rq->lock); - } - -+/* -+ * NOP if the arch has not defined these: -+ */ -+ -+#ifndef prepare_arch_switch -+# define prepare_arch_switch(next) do { } while (0) -+#endif -+ -+#ifndef finish_arch_post_lock_switch -+# define finish_arch_post_lock_switch() do { } while (0) -+#endif -+ -+static inline void kmap_local_sched_out(void) -+{ -+#ifdef CONFIG_KMAP_LOCAL -+ if (unlikely(current->kmap_ctrl.idx)) -+ __kmap_local_sched_out(); -+#endif -+} -+ -+static inline void kmap_local_sched_in(void) -+{ -+#ifdef CONFIG_KMAP_LOCAL -+ if (unlikely(current->kmap_ctrl.idx)) -+ __kmap_local_sched_in(); -+#endif -+} -+ - /** - * prepare_task_switch - prepare to switch tasks - * @rq: the runqueue preparing to switch -@@ -2665,6 +2693,7 @@ prepare_task_switch(struct rq *rq, struct task_struct *prev, - perf_event_task_sched_out(prev, next); - rseq_preempt(prev); - fire_sched_out_preempt_notifiers(prev, next); -+ kmap_local_sched_out(); - prepare_task(next); - prepare_arch_switch(next); - } -@@ -2732,6 +2761,14 @@ static struct rq *finish_task_switch(struct task_struct *prev) - finish_lock_switch(rq); - finish_arch_post_lock_switch(); - kcov_finish_switch(current); -+ /* -+ * kmap_local_sched_out() is invoked with rq::lock held and -+ * interrupts disabled. There is no requirement for that, but the -+ * sched out code does not have an interrupt enabled section. -+ * Restoring the maps on sched in does not require interrupts being -+ * disabled either. -+ */ -+ kmap_local_sched_in(); - - fire_sched_in_preempt_notifiers(current); - /* --- -2.37.0 - - -From a089b7c2d2f1e1217abeea00e6d11d5b6f412b29 Mon Sep 17 00:00:00 2001 -From: Alfred Chen -Date: Fri, 29 Jan 2021 23:53:02 +0800 -Subject: [PATCH 094/297] sched/alt: [Sync] 565790d28b1e sched: Fix - balance_callback() - ---- - kernel/sched/alt_core.c | 85 ++++++++++++++++++++++++++++++++++++++++- - 1 file changed, 84 insertions(+), 1 deletion(-) - -diff --git a/kernel/sched/alt_core.c b/kernel/sched/alt_core.c -index 60a888c99006..920911a23150 100644 ---- a/kernel/sched/alt_core.c -+++ b/kernel/sched/alt_core.c -@@ -2617,6 +2617,76 @@ static inline void finish_task(struct task_struct *prev) - #endif - } - -+#ifdef CONFIG_SMP -+ -+static void do_balance_callbacks(struct rq *rq, struct callback_head *head) -+{ -+ void (*func)(struct rq *rq); -+ struct callback_head *next; -+ -+ lockdep_assert_held(&rq->lock); -+ -+ while (head) { -+ func = (void (*)(struct rq *))head->func; -+ next = head->next; -+ head->next = NULL; -+ head = next; -+ -+ func(rq); -+ } -+} -+ -+static void balance_push(struct rq *rq); -+ -+struct callback_head balance_push_callback = { -+ .next = NULL, -+ .func = (void (*)(struct callback_head *))balance_push, -+}; -+ -+static inline struct callback_head *splice_balance_callbacks(struct rq *rq) -+{ -+ struct callback_head *head = rq->balance_callback; -+ -+ lockdep_assert_held(&rq->lock); -+ if (head) -+ rq->balance_callback = NULL; -+ -+ return head; -+} -+ -+static void __balance_callbacks(struct rq *rq) -+{ -+ do_balance_callbacks(rq, splice_balance_callbacks(rq)); -+} -+ -+static inline void balance_callbacks(struct rq *rq, struct callback_head *head) -+{ -+ unsigned long flags; -+ -+ if (unlikely(head)) { -+ raw_spin_lock_irqsave(&rq->lock, flags); -+ do_balance_callbacks(rq, head); -+ raw_spin_unlock_irqrestore(&rq->lock, flags); -+ } -+} -+ -+#else -+ -+static inline void __balance_callbacks(struct rq *rq) -+{ -+} -+ -+static inline struct callback_head *splice_balance_callbacks(struct rq *rq) -+{ -+ return NULL; -+} -+ -+static inline void balance_callbacks(struct rq *rq, struct callback_head *head) -+{ -+} -+ -+#endif -+ - static inline void - prepare_lock_switch(struct rq *rq, struct task_struct *next) - { -@@ -2641,6 +2711,7 @@ static inline void finish_lock_switch(struct rq *rq) - * prev into current: - */ - spin_acquire(&rq->lock.dep_map, 0, 0, _THIS_IP_); -+ __balance_callbacks(rq); - raw_spin_unlock_irq(&rq->lock); - } - -@@ -3834,8 +3905,10 @@ static void __sched notrace __schedule(bool preempt) - - /* Also unlocks the rq: */ - rq = context_switch(rq, prev, next); -- } else -+ } else { -+ __balance_callbacks(rq); - raw_spin_unlock_irq(&rq->lock); -+ } - - #ifdef CONFIG_SCHED_SMT - sg_balance_check(rq); -@@ -4205,7 +4278,13 @@ void rt_mutex_setprio(struct task_struct *p, struct task_struct *pi_task) - - check_task_changed(rq, p); - out_unlock: -+ /* Avoid rq from going away on us: */ -+ preempt_disable(); -+ -+ __balance_callbacks(rq); - __task_access_unlock(p, lock); -+ -+ preempt_enable(); - } - #else - static inline int rt_effective_prio(struct task_struct *p, int prio) -@@ -4422,6 +4501,7 @@ static int __sched_setscheduler(struct task_struct *p, - int newprio = MAX_RT_PRIO - 1 - attr->sched_priority; - int retval, oldpolicy = -1; - int policy = attr->sched_policy; -+ struct callback_head *head; - unsigned long flags; - struct rq *rq; - int reset_on_fork; -@@ -4575,6 +4655,7 @@ static int __sched_setscheduler(struct task_struct *p, - - /* Avoid rq from going away on us: */ - preempt_disable(); -+ head = splice_balance_callbacks(rq); - __task_access_unlock(p, lock); - raw_spin_unlock_irqrestore(&p->pi_lock, flags); - -@@ -4583,6 +4664,8 @@ static int __sched_setscheduler(struct task_struct *p, - rt_mutex_adjust_pi(p); - } - -+ /* Run balance callbacks after we've adjusted the PI chain: */ -+ balance_callbacks(rq, head); - preempt_enable(); - - return 0; --- -2.37.0 - - -From f93e650c480cf25219a3fb9104105b4479385bcf Mon Sep 17 00:00:00 2001 -From: Alfred Chen -Date: Sat, 30 Jan 2021 09:04:03 +0800 -Subject: [PATCH 095/297] sched/alt: Sync rq->balance_callback - ---- - kernel/sched/alt_core.c | 1 + - kernel/sched/alt_sched.h | 1 + - 2 files changed, 2 insertions(+) - -diff --git a/kernel/sched/alt_core.c b/kernel/sched/alt_core.c -index 920911a23150..eefc424a00e9 100644 ---- a/kernel/sched/alt_core.c -+++ b/kernel/sched/alt_core.c -@@ -6166,6 +6166,7 @@ void __init sched_init(void) - #ifdef CONFIG_NO_HZ_COMMON - INIT_CSD(&rq->nohz_csd, nohz_csd_func, rq); - #endif -+ rq->balance_callback = NULL; - #endif /* CONFIG_SMP */ - rq->nr_switches = 0; - -diff --git a/kernel/sched/alt_sched.h b/kernel/sched/alt_sched.h -index a157800317e9..879aa6313dc7 100644 ---- a/kernel/sched/alt_sched.h -+++ b/kernel/sched/alt_sched.h -@@ -128,6 +128,7 @@ struct rq { - int active_balance; - struct cpu_stop_work active_balance_work; - #endif -+ struct callback_head *balance_callback; - #endif /* CONFIG_SMP */ - #ifdef CONFIG_IRQ_TIME_ACCOUNTING - u64 prev_irq_time; --- -2.37.0 - - -From 9ed0f5739bfd597a8413adbfe4356dc2e070dddb Mon Sep 17 00:00:00 2001 -From: Alfred Chen -Date: Sat, 30 Jan 2021 11:28:33 +0800 -Subject: [PATCH 096/297] sched/alt: [Sync] f2469a1fb43f sched/core: Wait for - tasks being pushed away on hotplug - ---- - kernel/sched/alt_core.c | 162 +++++++++++++++++++++++++++++++++++++++ - kernel/sched/alt_sched.h | 22 +++++- - 2 files changed, 182 insertions(+), 2 deletions(-) - -diff --git a/kernel/sched/alt_core.c b/kernel/sched/alt_core.c -index eefc424a00e9..7c4552821c3f 100644 ---- a/kernel/sched/alt_core.c -+++ b/kernel/sched/alt_core.c -@@ -3419,6 +3419,139 @@ static void sched_tick_stop(int cpu) - twork = per_cpu_ptr(tick_work_cpu, cpu); - cancel_delayed_work_sync(&twork->work); - } -+ -+static int __balance_push_cpu_stop(void *arg) -+{ -+ struct task_struct *p = arg; -+ struct rq *rq = this_rq(); -+ struct rq_flags rf; -+ int cpu; -+ -+ raw_spin_lock_irq(&p->pi_lock); -+ rq_lock(rq, &rf); -+ -+ update_rq_clock(rq); -+ -+ if (task_rq(p) == rq && task_on_rq_queued(p)) { -+ cpu = select_fallback_rq(rq->cpu, p); -+ rq = __migrate_task(rq, p, cpu); -+ } -+ -+ rq_unlock(rq, &rf); -+ raw_spin_unlock_irq(&p->pi_lock); -+ -+ put_task_struct(p); -+ -+ return 0; -+} -+ -+static DEFINE_PER_CPU(struct cpu_stop_work, push_work); -+ -+/* -+ * Ensure we only run per-cpu kthreads once the CPU goes !active. -+ */ -+static void balance_push(struct rq *rq) -+{ -+ struct task_struct *push_task = rq->curr; -+ -+ lockdep_assert_held(&rq->lock); -+ SCHED_WARN_ON(rq->cpu != smp_processor_id()); -+ /* -+ * Ensure the thing is persistent until balance_push_set(.on = false); -+ */ -+ rq->balance_callback = &balance_push_callback; -+ -+ /* -+ * Both the cpu-hotplug and stop task are in this case and are -+ * required to complete the hotplug process. -+ * -+ * XXX: the idle task does not match kthread_is_per_cpu() due to -+ * histerical raisins. -+ */ -+ if (rq->idle == push_task || -+ ((push_task->flags & PF_KTHREAD) && kthread_is_per_cpu(push_task)) || -+ is_migration_disabled(push_task)) { -+ -+ /* -+ * If this is the idle task on the outgoing CPU try to wake -+ * up the hotplug control thread which might wait for the -+ * last task to vanish. The rcuwait_active() check is -+ * accurate here because the waiter is pinned on this CPU -+ * and can't obviously be running in parallel. -+ * -+ * On RT kernels this also has to check whether there are -+ * pinned and scheduled out tasks on the runqueue. They -+ * need to leave the migrate disabled section first. -+ */ -+ if (!rq->nr_running && !rq_has_pinned_tasks(rq) && -+ rcuwait_active(&rq->hotplug_wait)) { -+ raw_spin_unlock(&rq->lock); -+ rcuwait_wake_up(&rq->hotplug_wait); -+ raw_spin_lock(&rq->lock); -+ } -+ return; -+ } -+ -+ get_task_struct(push_task); -+ /* -+ * Temporarily drop rq->lock such that we can wake-up the stop task. -+ * Both preemption and IRQs are still disabled. -+ */ -+ raw_spin_unlock(&rq->lock); -+ stop_one_cpu_nowait(rq->cpu, __balance_push_cpu_stop, push_task, -+ this_cpu_ptr(&push_work)); -+ /* -+ * At this point need_resched() is true and we'll take the loop in -+ * schedule(). The next pick is obviously going to be the stop task -+ * which kthread_is_per_cpu() and will push this task away. -+ */ -+ raw_spin_lock(&rq->lock); -+} -+ -+static void balance_push_set(int cpu, bool on) -+{ -+ struct rq *rq = cpu_rq(cpu); -+ struct rq_flags rf; -+ -+ rq_lock_irqsave(rq, &rf); -+ rq->balance_push = on; -+ if (on) { -+ WARN_ON_ONCE(rq->balance_callback); -+ rq->balance_callback = &balance_push_callback; -+ } else if (rq->balance_callback == &balance_push_callback) { -+ rq->balance_callback = NULL; -+ } -+ rq_unlock_irqrestore(rq, &rf); -+} -+ -+/* -+ * Invoked from a CPUs hotplug control thread after the CPU has been marked -+ * inactive. All tasks which are not per CPU kernel threads are either -+ * pushed off this CPU now via balance_push() or placed on a different CPU -+ * during wakeup. Wait until the CPU is quiescent. -+ */ -+static void balance_hotplug_wait(void) -+{ -+ struct rq *rq = this_rq(); -+ -+ rcuwait_wait_event(&rq->hotplug_wait, -+ rq->nr_running == 1 && !rq_has_pinned_tasks(rq), -+ TASK_UNINTERRUPTIBLE); -+} -+ -+#else -+ -+static void balance_push(struct rq *rq) -+{ -+} -+ -+static void balance_push_set(int cpu, bool on) -+{ -+} -+ -+static inline void balance_hotplug_wait(void) -+{ -+} - #endif /* CONFIG_HOTPLUG_CPU */ - - int __init sched_tick_offload_init(void) -@@ -5893,6 +6026,12 @@ int sched_cpu_activate(unsigned int cpu) - struct rq *rq = cpu_rq(cpu); - unsigned long flags; - -+ /* -+ * Make sure that when the hotplug state machine does a roll-back -+ * we clear balance_push. Ideally that would happen earlier... -+ */ -+ balance_push_set(cpu, false); -+ - #ifdef CONFIG_SCHED_SMT - /* - * When going up, increment the number of cores with SMT present. -@@ -5926,6 +6065,15 @@ int sched_cpu_deactivate(unsigned int cpu) - int ret; - - set_cpu_active(cpu, false); -+ -+ /* -+ * From this point forward, this CPU will refuse to run any task that -+ * is not: migrate_disable() or KTHREAD_IS_PER_CPU, and will actively -+ * push those tasks away until this gets cleared, see -+ * sched_cpu_dying(). -+ */ -+ balance_push_set(cpu, true); -+ - /* - * We've cleared cpu_active_mask, wait for all preempt-disabled and RCU - * users of this state to go away such that all new such users will -@@ -5951,9 +6099,14 @@ int sched_cpu_deactivate(unsigned int cpu) - - ret = cpuset_cpu_inactive(cpu); - if (ret) { -+ balance_push_set(cpu, false); - set_cpu_active(cpu, true); - return ret; - } -+ -+ /* Wait for all non per CPU kernel threads to vanish. */ -+ balance_hotplug_wait(); -+ - return 0; - } - -@@ -6002,6 +6155,12 @@ int sched_cpu_dying(unsigned int cpu) - migrate_tasks(rq); - raw_spin_unlock_irqrestore(&rq->lock, flags); - -+ /* -+ * Now that the CPU is offline, make sure we're welcome -+ * to new tasks once we come back up. -+ */ -+ balance_push_set(cpu, false); -+ - hrtick_clear(rq); - return 0; - } -@@ -6167,6 +6326,9 @@ void __init sched_init(void) - INIT_CSD(&rq->nohz_csd, nohz_csd_func, rq); - #endif - rq->balance_callback = NULL; -+#ifdef CONFIG_HOTPLUG_CPU -+ rcuwait_init(&rq->hotplug_wait); -+#endif - #endif /* CONFIG_SMP */ - rq->nr_switches = 0; - -diff --git a/kernel/sched/alt_sched.h b/kernel/sched/alt_sched.h -index 879aa6313dc7..30e80c4b0825 100644 ---- a/kernel/sched/alt_sched.h -+++ b/kernel/sched/alt_sched.h -@@ -126,9 +126,13 @@ struct rq { - - #ifdef CONFIG_SCHED_SMT - int active_balance; -- struct cpu_stop_work active_balance_work; -+ struct cpu_stop_work active_balance_work; -+#endif -+ struct callback_head *balance_callback; -+ unsigned char balance_push; -+#ifdef CONFIG_HOTPLUG_CPU -+ struct rcuwait hotplug_wait; - #endif -- struct callback_head *balance_callback; - #endif /* CONFIG_SMP */ - #ifdef CONFIG_IRQ_TIME_ACCOUNTING - u64 prev_irq_time; -@@ -388,6 +392,13 @@ task_rq_unlock(struct rq *rq, struct task_struct *p, struct rq_flags *rf) - raw_spin_unlock_irqrestore(&p->pi_lock, rf->flags); - } - -+static inline void -+rq_lock(struct rq *rq, struct rq_flags *rf) -+ __acquires(rq->lock) -+{ -+ raw_spin_lock(&rq->lock); -+} -+ - static inline void - rq_unlock_irq(struct rq *rq, struct rq_flags *rf) - __releases(rq->lock) -@@ -395,6 +406,13 @@ rq_unlock_irq(struct rq *rq, struct rq_flags *rf) - raw_spin_unlock_irq(&rq->lock); - } - -+static inline void -+rq_unlock(struct rq *rq, struct rq_flags *rf) -+ __releases(rq->lock) -+{ -+ raw_spin_unlock(&rq->lock); -+} -+ - static inline struct rq * - this_rq_lock_irq(struct rq_flags *rf) - __acquires(rq->lock) --- -2.37.0 - - -From 3d09a455d788d8106b1de4dc44ecff646198eb57 Mon Sep 17 00:00:00 2001 -From: Alfred Chen -Date: Fri, 5 Feb 2021 10:35:00 +0800 -Subject: [PATCH 097/297] sched/alt: Fix compilation issue. - ---- - kernel/sched/alt_core.c | 13 ++++++++----- - 1 file changed, 8 insertions(+), 5 deletions(-) - -diff --git a/kernel/sched/alt_core.c b/kernel/sched/alt_core.c -index 7c4552821c3f..3882b4c977fd 100644 ---- a/kernel/sched/alt_core.c -+++ b/kernel/sched/alt_core.c -@@ -3469,8 +3469,9 @@ static void balance_push(struct rq *rq) - * histerical raisins. - */ - if (rq->idle == push_task || -- ((push_task->flags & PF_KTHREAD) && kthread_is_per_cpu(push_task)) || -- is_migration_disabled(push_task)) { -+ ((push_task->flags & PF_KTHREAD) && kthread_is_per_cpu(push_task))) { -+ /*((push_task->flags & PF_KTHREAD) && kthread_is_per_cpu(push_task)) || -+ is_migration_disabled(push_task)) {*/ - - /* - * If this is the idle task on the outgoing CPU try to wake -@@ -3483,8 +3484,9 @@ static void balance_push(struct rq *rq) - * pinned and scheduled out tasks on the runqueue. They - * need to leave the migrate disabled section first. - */ -- if (!rq->nr_running && !rq_has_pinned_tasks(rq) && -- rcuwait_active(&rq->hotplug_wait)) { -+ if (!rq->nr_running && rcuwait_active(&rq->hotplug_wait)) { -+ /*if (!rq->nr_running && !rq_has_pinned_tasks(rq) && -+ rcuwait_active(&rq->hotplug_wait)) {*/ - raw_spin_unlock(&rq->lock); - rcuwait_wake_up(&rq->hotplug_wait); - raw_spin_lock(&rq->lock); -@@ -3535,7 +3537,8 @@ static void balance_hotplug_wait(void) - struct rq *rq = this_rq(); - - rcuwait_wait_event(&rq->hotplug_wait, -- rq->nr_running == 1 && !rq_has_pinned_tasks(rq), -+ rq->nr_running == 1, -+/* rq->nr_running == 1 && !rq_has_pinned_tasks(rq),*/ - TASK_UNINTERRUPTIBLE); - } - --- -2.37.0 - - -From 028e152a1ce8a01ff47edc77303be53d46b9268a Mon Sep 17 00:00:00 2001 -From: Alfred Chen -Date: Sat, 30 Jan 2021 21:50:29 +0800 -Subject: [PATCH 098/297] sched/alt: [Sync] 1cf12e08bc4d sched/hotplug: - Consolidate task migration on CPU unplug - ---- - kernel/sched/alt_core.c | 98 ++++++++--------------------------------- - 1 file changed, 19 insertions(+), 79 deletions(-) - -diff --git a/kernel/sched/alt_core.c b/kernel/sched/alt_core.c -index 3882b4c977fd..65d87ca69aff 100644 ---- a/kernel/sched/alt_core.c -+++ b/kernel/sched/alt_core.c -@@ -5889,81 +5889,6 @@ void idle_task_exit(void) - /* finish_cpu(), as ran on the BP, will clean up the active_mm state */ - } - --/* -- * Migrate all tasks from the rq, sleeping tasks will be migrated by -- * try_to_wake_up()->select_task_rq(). -- * -- * Called with rq->lock held even though we'er in stop_machine() and -- * there's no concurrency possible, we hold the required locks anyway -- * because of lock validation efforts. -- */ --static void migrate_tasks(struct rq *dead_rq) --{ -- struct rq *rq = dead_rq; -- struct task_struct *p, *stop = rq->stop; -- int count = 0; -- -- /* -- * Fudge the rq selection such that the below task selection loop -- * doesn't get stuck on the currently eligible stop task. -- * -- * We're currently inside stop_machine() and the rq is either stuck -- * in the stop_machine_cpu_stop() loop, or we're executing this code, -- * either way we should never end up calling schedule() until we're -- * done here. -- */ -- rq->stop = NULL; -- -- p = sched_rq_first_task(rq); -- while (p != rq->idle) { -- int dest_cpu; -- -- /* skip the running task */ -- if (task_running(p) || 1 == p->nr_cpus_allowed) { -- p = sched_rq_next_task(p, rq); -- continue; -- } -- -- /* -- * Rules for changing task_struct::cpus_allowed are holding -- * both pi_lock and rq->lock, such that holding either -- * stabilizes the mask. -- * -- * Drop rq->lock is not quite as disastrous as it usually is -- * because !cpu_active at this point, which means load-balance -- * will not interfere. Also, stop-machine. -- */ -- raw_spin_unlock(&rq->lock); -- raw_spin_lock(&p->pi_lock); -- raw_spin_lock(&rq->lock); -- -- /* -- * Since we're inside stop-machine, _nothing_ should have -- * changed the task, WARN if weird stuff happened, because in -- * that case the above rq->lock drop is a fail too. -- */ -- if (WARN_ON(task_rq(p) != rq || !task_on_rq_queued(p))) { -- raw_spin_unlock(&p->pi_lock); -- p = sched_rq_next_task(p, rq); -- continue; -- } -- -- count++; -- /* Find suitable destination for @next, with force if needed. */ -- dest_cpu = select_fallback_rq(dead_rq->cpu, p); -- rq = __migrate_task(rq, p, dest_cpu); -- raw_spin_unlock(&rq->lock); -- raw_spin_unlock(&p->pi_lock); -- -- rq = dead_rq; -- raw_spin_lock(&rq->lock); -- /* Check queued task all over from the header again */ -- p = sched_rq_first_task(rq); -- } -- -- rq->stop = stop; --} -- - static void set_rq_offline(struct rq *rq) - { - if (rq->online) -@@ -6107,9 +6032,6 @@ int sched_cpu_deactivate(unsigned int cpu) - return ret; - } - -- /* Wait for all non per CPU kernel threads to vanish. */ -- balance_hotplug_wait(); -- - return 0; - } - -@@ -6142,9 +6064,27 @@ int sched_cpu_starting(unsigned int cpu) - */ - int sched_cpu_wait_empty(unsigned int cpu) - { -+ balance_hotplug_wait(); - return 0; - } - -+/* -+ * Since this CPU is going 'away' for a while, fold any nr_active delta we -+ * might have. Called from the CPU stopper task after ensuring that the -+ * stopper is the last running task on the CPU, so nr_active count is -+ * stable. We need to take the teardown thread which is calling this into -+ * account, so we hand in adjust = 1 to the load calculation. -+ * -+ * Also see the comment "Global load-average calculations". -+ */ -+static void calc_load_migrate(struct rq *rq) -+{ -+ long delta = calc_load_fold_active(rq, 1); -+ -+ if (delta) -+ atomic_long_add(delta, &calc_load_tasks); -+} -+ - int sched_cpu_dying(unsigned int cpu) - { - struct rq *rq = cpu_rq(cpu); -@@ -6155,7 +6095,6 @@ int sched_cpu_dying(unsigned int cpu) - - raw_spin_lock_irqsave(&rq->lock, flags); - set_rq_offline(rq); -- migrate_tasks(rq); - raw_spin_unlock_irqrestore(&rq->lock, flags); - - /* -@@ -6164,6 +6103,7 @@ int sched_cpu_dying(unsigned int cpu) - */ - balance_push_set(cpu, false); - -+ calc_load_migrate(rq); - hrtick_clear(rq); - return 0; - } --- -2.37.0 - - -From be69aaf70b0d8fea9395fbfd1a3a0c18b6b03fdd Mon Sep 17 00:00:00 2001 -From: Alfred Chen -Date: Sat, 30 Jan 2021 22:58:28 +0800 -Subject: [PATCH 099/297] sched/alt: [Sync] 120455c514f7 sched: Fix hotplug vs - CPU bandwidth control - ---- - kernel/sched/alt_core.c | 8 +++++++- - 1 file changed, 7 insertions(+), 1 deletion(-) - -diff --git a/kernel/sched/alt_core.c b/kernel/sched/alt_core.c -index 65d87ca69aff..8b9c3c414120 100644 ---- a/kernel/sched/alt_core.c -+++ b/kernel/sched/alt_core.c -@@ -5990,6 +5990,8 @@ int sched_cpu_activate(unsigned int cpu) - - int sched_cpu_deactivate(unsigned int cpu) - { -+ struct rq *rq = cpu_rq(cpu); -+ unsigned long flags; - int ret; - - set_cpu_active(cpu, false); -@@ -6011,6 +6013,11 @@ int sched_cpu_deactivate(unsigned int cpu) - */ - synchronize_rcu(); - -+ raw_spin_lock_irqsave(&rq->lock, flags); -+ update_rq_clock(rq); -+ set_rq_offline(rq); -+ raw_spin_unlock_irqrestore(&rq->lock, flags); -+ - #ifdef CONFIG_SCHED_SMT - /* - * When going down, decrement the number of cores with SMT present. -@@ -6094,7 +6101,6 @@ int sched_cpu_dying(unsigned int cpu) - sched_tick_stop(cpu); - - raw_spin_lock_irqsave(&rq->lock, flags); -- set_rq_offline(rq); - raw_spin_unlock_irqrestore(&rq->lock, flags); - - /* --- -2.37.0 - - -From 0a71700b835f3dd2d6760263e87759e4d94e3861 Mon Sep 17 00:00:00 2001 -From: Alfred Chen -Date: Sun, 31 Jan 2021 11:02:35 +0800 -Subject: [PATCH 100/297] sched/alt: [Sync] 9cfc3e18adb0 sched: Massage - set_cpus_allowed() - ---- - kernel/sched/alt_core.c | 37 ++++++++++++++++++++++++------------- - 1 file changed, 24 insertions(+), 13 deletions(-) - -diff --git a/kernel/sched/alt_core.c b/kernel/sched/alt_core.c -index 8b9c3c414120..f161c317419f 100644 ---- a/kernel/sched/alt_core.c -+++ b/kernel/sched/alt_core.c -@@ -1260,17 +1260,26 @@ static int migration_cpu_stop(void *data) - return 0; - } - -+#define SCA_CHECK 0x01 -+ - static inline void --set_cpus_allowed_common(struct task_struct *p, const struct cpumask *new_mask) -+set_cpus_allowed_common(struct task_struct *p, const struct cpumask *new_mask, u32 flags) - { - cpumask_copy(&p->cpus_mask, new_mask); - p->nr_cpus_allowed = cpumask_weight(new_mask); - } - -+static void -+__do_set_cpus_allowed(struct task_struct *p, const struct cpumask *new_mask, u32 flags) -+{ -+ set_cpus_allowed_common(p, new_mask, flags); -+} -+ - void do_set_cpus_allowed(struct task_struct *p, const struct cpumask *new_mask) - { -- set_cpus_allowed_common(p, new_mask); -+ __do_set_cpus_allowed(p, new_mask, 0); - } -+ - #endif - - /** -@@ -1561,16 +1570,17 @@ void sched_set_stop_task(int cpu, struct task_struct *stop) - * call is not atomic; no spinlocks may be held. - */ - static int __set_cpus_allowed_ptr(struct task_struct *p, -- const struct cpumask *new_mask, bool check) -+ const struct cpumask *new_mask, -+ u32 flags) - { - const struct cpumask *cpu_valid_mask = cpu_active_mask; - int dest_cpu; -- unsigned long flags; -+ unsigned long irq_flags; - struct rq *rq; - raw_spinlock_t *lock; - int ret = 0; - -- raw_spin_lock_irqsave(&p->pi_lock, flags); -+ raw_spin_lock_irqsave(&p->pi_lock, irq_flags); - rq = __task_access_lock(p, &lock); - - if (p->flags & PF_KTHREAD) { -@@ -1584,7 +1594,7 @@ static int __set_cpus_allowed_ptr(struct task_struct *p, - * Must re-check here, to close a race against __kthread_bind(), - * sched_setaffinity() is not guaranteed to observe the flag. - */ -- if (check && (p->flags & PF_NO_SETAFFINITY)) { -+ if ((flags & SCA_CHECK) && (p->flags & PF_NO_SETAFFINITY)) { - ret = -EINVAL; - goto out; - } -@@ -1598,7 +1608,7 @@ static int __set_cpus_allowed_ptr(struct task_struct *p, - goto out; - } - -- do_set_cpus_allowed(p, new_mask); -+ __do_set_cpus_allowed(p, new_mask, flags); - - if (p->flags & PF_KTHREAD) { - /* -@@ -1619,7 +1629,7 @@ static int __set_cpus_allowed_ptr(struct task_struct *p, - - /* Need help from migration thread: drop lock and wait. */ - __task_access_unlock(p, lock); -- raw_spin_unlock_irqrestore(&p->pi_lock, flags); -+ raw_spin_unlock_irqrestore(&p->pi_lock, irq_flags); - stop_one_cpu(cpu_of(rq), migration_cpu_stop, &arg); - return 0; - } -@@ -1635,14 +1645,14 @@ static int __set_cpus_allowed_ptr(struct task_struct *p, - - out: - __task_access_unlock(p, lock); -- raw_spin_unlock_irqrestore(&p->pi_lock, flags); -+ raw_spin_unlock_irqrestore(&p->pi_lock, irq_flags); - - return ret; - } - - int set_cpus_allowed_ptr(struct task_struct *p, const struct cpumask *new_mask) - { -- return __set_cpus_allowed_ptr(p, new_mask, false); -+ return __set_cpus_allowed_ptr(p, new_mask, 0); - } - EXPORT_SYMBOL_GPL(set_cpus_allowed_ptr); - -@@ -1655,7 +1665,8 @@ static inline int select_task_rq(struct task_struct *p, struct rq *rq) - - static inline int - __set_cpus_allowed_ptr(struct task_struct *p, -- const struct cpumask *new_mask, bool check) -+ const struct cpumask *new_mask, -+ u32 flags) - { - return set_cpus_allowed_ptr(p, new_mask); - } -@@ -5270,7 +5281,7 @@ long sched_setaffinity(pid_t pid, const struct cpumask *in_mask) - cpumask_and(new_mask, in_mask, cpus_allowed); - - again: -- retval = __set_cpus_allowed_ptr(p, new_mask, true); -+ retval = __set_cpus_allowed_ptr(p, new_mask, SCA_CHECK); - - if (!retval) { - cpuset_cpus_allowed(p, cpus_allowed); -@@ -5815,7 +5826,7 @@ void init_idle(struct task_struct *idle, int cpu) - * - * And since this is boot we can forgo the serialisation. - */ -- set_cpus_allowed_common(idle, cpumask_of(cpu)); -+ set_cpus_allowed_common(idle, cpumask_of(cpu), 0); - #endif - - /* Silence PROVE_RCU */ --- -2.37.0 - - -From aa50f1a5556f41d41d9efd8543830a95d067d433 Mon Sep 17 00:00:00 2001 -From: Alfred Chen -Date: Tue, 2 Feb 2021 15:10:02 +0800 -Subject: [PATCH 101/297] sched/alt: [Sync] af449901b84c sched: Add - migrate_disable() - ---- - kernel/sched/alt_core.c | 421 ++++++++++++++++++++++++++-------------- - 1 file changed, 271 insertions(+), 150 deletions(-) - -diff --git a/kernel/sched/alt_core.c b/kernel/sched/alt_core.c -index f161c317419f..c61cb93915c9 100644 ---- a/kernel/sched/alt_core.c -+++ b/kernel/sched/alt_core.c -@@ -1090,15 +1090,18 @@ static inline void __set_task_cpu(struct task_struct *p, unsigned int cpu) - #endif - } - --void migrate_disable(void) -+static inline bool is_migration_disabled(struct task_struct *p) - { -+#ifdef CONFIG_SMP -+ return p->migration_disabled; -+#else -+ return false; -+#endif - } --EXPORT_SYMBOL_GPL(migrate_disable); - --void migrate_enable(void) --{ --} --EXPORT_SYMBOL_GPL(migrate_enable); -+#define SCA_CHECK 0x01 -+#define SCA_MIGRATE_DISABLE 0x02 -+#define SCA_MIGRATE_ENABLE 0x04 - - #ifdef CONFIG_SMP - -@@ -1126,6 +1129,8 @@ void set_task_cpu(struct task_struct *p, unsigned int new_cpu) - * Clearly, migrating tasks to offline CPUs is a fairly daft thing. - */ - WARN_ON_ONCE(!cpu_online(new_cpu)); -+ -+ WARN_ON_ONCE(is_migration_disabled(p)); - #endif - if (task_cpu(p) == new_cpu) - return; -@@ -1141,19 +1146,99 @@ static inline bool is_per_cpu_kthread(struct task_struct *p) - return ((p->flags & PF_KTHREAD) && (1 == p->nr_cpus_allowed)); - } - -+static void -+__do_set_cpus_allowed(struct task_struct *p, const struct cpumask *new_mask, u32 flags); -+ -+static int __set_cpus_allowed_ptr(struct task_struct *p, -+ const struct cpumask *new_mask, -+ u32 flags); -+ -+static void migrate_disable_switch(struct rq *rq, struct task_struct *p) -+{ -+ if (likely(!p->migration_disabled)) -+ return; -+ -+ if (p->cpus_ptr != &p->cpus_mask) -+ return; -+ -+ /* -+ * Violates locking rules! see comment in __do_set_cpus_allowed(). -+ */ -+ __do_set_cpus_allowed(p, cpumask_of(rq->cpu), SCA_MIGRATE_DISABLE); -+} -+ -+void migrate_disable(void) -+{ -+ struct task_struct *p = current; -+ -+ if (p->migration_disabled) { -+ p->migration_disabled++; -+ return; -+ } -+ -+ preempt_disable(); -+ /*this_rq()->nr_pinned++;*/ -+ p->migration_disabled = 1; -+ preempt_enable(); -+} -+EXPORT_SYMBOL_GPL(migrate_disable); -+ -+void migrate_enable(void) -+{ -+ struct task_struct *p = current; -+ -+ if (p->migration_disabled > 1) { -+ p->migration_disabled--; -+ return; -+ } -+ -+ /* -+ * Ensure stop_task runs either before or after this, and that -+ * __set_cpus_allowed_ptr(SCA_MIGRATE_ENABLE) doesn't schedule(). -+ */ -+ preempt_disable(); -+ if (p->cpus_ptr != &p->cpus_mask) -+ __set_cpus_allowed_ptr(p, &p->cpus_mask, SCA_MIGRATE_ENABLE); -+ /* -+ * Mustn't clear migration_disabled() until cpus_ptr points back at the -+ * regular cpus_mask, otherwise things that race (eg. -+ * select_fallback_rq) get confused. -+ */ -+ barrier(); -+ p->migration_disabled = 0; -+ /*this_rq()->nr_pinned--;*/ -+ preempt_enable(); -+} -+EXPORT_SYMBOL_GPL(migrate_enable); -+ - /* - * Per-CPU kthreads are allowed to run on !active && online CPUs, see - * __set_cpus_allowed_ptr() and select_fallback_rq(). - */ - static inline bool is_cpu_allowed(struct task_struct *p, int cpu) - { -+ /* When not in the task's cpumask, no point in looking further. */ - if (!cpumask_test_cpu(cpu, p->cpus_ptr)) - return false; - -- if (is_per_cpu_kthread(p)) -+ /* migrate_disabled() must be allowed to finish. */ -+ if (is_migration_disabled(p)) - return cpu_online(cpu); - -- return cpu_active(cpu); -+ /* Non kernel threads are not allowed during either online or offline. */ -+ if (!(p->flags & PF_KTHREAD)) -+ return cpu_active(cpu); -+ -+ /* KTHREAD_IS_PER_CPU is always allowed. */ -+ if (kthread_is_per_cpu(p)) -+ return cpu_online(cpu); -+ -+ /* Regular kernel threads don't get to stay during offline. */ -+ if (cpu_rq(cpu)->balance_push) -+ return false; -+ -+ /* But are allowed during online. */ -+ return cpu_online(cpu); - } - - /* -@@ -1260,11 +1345,14 @@ static int migration_cpu_stop(void *data) - return 0; - } - --#define SCA_CHECK 0x01 -- - static inline void - set_cpus_allowed_common(struct task_struct *p, const struct cpumask *new_mask, u32 flags) - { -+ if (flags & (SCA_MIGRATE_ENABLE | SCA_MIGRATE_DISABLE)) { -+ p->cpus_ptr = new_mask; -+ return; -+ } -+ - cpumask_copy(&p->cpus_mask, new_mask); - p->nr_cpus_allowed = cpumask_weight(new_mask); - } -@@ -1272,6 +1360,23 @@ set_cpus_allowed_common(struct task_struct *p, const struct cpumask *new_mask, u - static void - __do_set_cpus_allowed(struct task_struct *p, const struct cpumask *new_mask, u32 flags) - { -+ /* -+ * This here violates the locking rules for affinity, since we're only -+ * supposed to change these variables while holding both rq->lock and -+ * p->pi_lock. -+ * -+ * HOWEVER, it magically works, because ttwu() is the only code that -+ * accesses these variables under p->pi_lock and only does so after -+ * smp_cond_load_acquire(&p->on_cpu, !VAL), and we're in __schedule() -+ * before finish_task(). -+ * -+ * XXX do further audits, this smells like something putrid. -+ */ -+ if (flags & SCA_MIGRATE_DISABLE) -+ SCHED_WARN_ON(!p->on_cpu); -+ else -+ lockdep_assert_held(&p->pi_lock); -+ - set_cpus_allowed_common(p, new_mask, flags); - } - -@@ -1486,6 +1591,12 @@ static int select_fallback_rq(int cpu, struct task_struct *p) - } - fallthrough; - case possible: -+ /* -+ * XXX When called from select_task_rq() we only -+ * hold p->pi_lock and again violate locking order. -+ * -+ * More yuck to audit. -+ */ - do_set_cpus_allowed(p, cpu_possible_mask); - state = fail; - break; -@@ -1583,9 +1694,16 @@ static int __set_cpus_allowed_ptr(struct task_struct *p, - raw_spin_lock_irqsave(&p->pi_lock, irq_flags); - rq = __task_access_lock(p, &lock); - -- if (p->flags & PF_KTHREAD) { -+ if (p->flags & PF_KTHREAD || is_migration_disabled(p)) { - /* -- * Kernel threads are allowed on online && !active CPUs -+ * Kernel threads are allowed on online && !active CPUs, -+ * however, during cpu-hot-unplug, even these might get pushed -+ * away if not KTHREAD_IS_PER_CPU. -+ * -+ * Specifically, migration_disabled() tasks must not fail the -+ * cpumask_any_and_distribute() pick below, esp. so on -+ * SCA_MIGRATE_ENABLE, otherwise we'll not call -+ * set_cpus_allowed_common() and actually reset p->cpus_ptr. - */ - cpu_valid_mask = cpu_online_mask; - } -@@ -1671,7 +1789,9 @@ __set_cpus_allowed_ptr(struct task_struct *p, - return set_cpus_allowed_ptr(p, new_mask); - } - --#endif /* CONFIG_SMP */ -+static inline void migrate_disable_switch(struct rq *rq, struct task_struct *p) { } -+ -+#endif /* !CONFIG_SMP */ - - static void - ttwu_stat(struct task_struct *p, int cpu, int wake_flags) -@@ -3430,142 +3550,6 @@ static void sched_tick_stop(int cpu) - twork = per_cpu_ptr(tick_work_cpu, cpu); - cancel_delayed_work_sync(&twork->work); - } -- --static int __balance_push_cpu_stop(void *arg) --{ -- struct task_struct *p = arg; -- struct rq *rq = this_rq(); -- struct rq_flags rf; -- int cpu; -- -- raw_spin_lock_irq(&p->pi_lock); -- rq_lock(rq, &rf); -- -- update_rq_clock(rq); -- -- if (task_rq(p) == rq && task_on_rq_queued(p)) { -- cpu = select_fallback_rq(rq->cpu, p); -- rq = __migrate_task(rq, p, cpu); -- } -- -- rq_unlock(rq, &rf); -- raw_spin_unlock_irq(&p->pi_lock); -- -- put_task_struct(p); -- -- return 0; --} -- --static DEFINE_PER_CPU(struct cpu_stop_work, push_work); -- --/* -- * Ensure we only run per-cpu kthreads once the CPU goes !active. -- */ --static void balance_push(struct rq *rq) --{ -- struct task_struct *push_task = rq->curr; -- -- lockdep_assert_held(&rq->lock); -- SCHED_WARN_ON(rq->cpu != smp_processor_id()); -- /* -- * Ensure the thing is persistent until balance_push_set(.on = false); -- */ -- rq->balance_callback = &balance_push_callback; -- -- /* -- * Both the cpu-hotplug and stop task are in this case and are -- * required to complete the hotplug process. -- * -- * XXX: the idle task does not match kthread_is_per_cpu() due to -- * histerical raisins. -- */ -- if (rq->idle == push_task || -- ((push_task->flags & PF_KTHREAD) && kthread_is_per_cpu(push_task))) { -- /*((push_task->flags & PF_KTHREAD) && kthread_is_per_cpu(push_task)) || -- is_migration_disabled(push_task)) {*/ -- -- /* -- * If this is the idle task on the outgoing CPU try to wake -- * up the hotplug control thread which might wait for the -- * last task to vanish. The rcuwait_active() check is -- * accurate here because the waiter is pinned on this CPU -- * and can't obviously be running in parallel. -- * -- * On RT kernels this also has to check whether there are -- * pinned and scheduled out tasks on the runqueue. They -- * need to leave the migrate disabled section first. -- */ -- if (!rq->nr_running && rcuwait_active(&rq->hotplug_wait)) { -- /*if (!rq->nr_running && !rq_has_pinned_tasks(rq) && -- rcuwait_active(&rq->hotplug_wait)) {*/ -- raw_spin_unlock(&rq->lock); -- rcuwait_wake_up(&rq->hotplug_wait); -- raw_spin_lock(&rq->lock); -- } -- return; -- } -- -- get_task_struct(push_task); -- /* -- * Temporarily drop rq->lock such that we can wake-up the stop task. -- * Both preemption and IRQs are still disabled. -- */ -- raw_spin_unlock(&rq->lock); -- stop_one_cpu_nowait(rq->cpu, __balance_push_cpu_stop, push_task, -- this_cpu_ptr(&push_work)); -- /* -- * At this point need_resched() is true and we'll take the loop in -- * schedule(). The next pick is obviously going to be the stop task -- * which kthread_is_per_cpu() and will push this task away. -- */ -- raw_spin_lock(&rq->lock); --} -- --static void balance_push_set(int cpu, bool on) --{ -- struct rq *rq = cpu_rq(cpu); -- struct rq_flags rf; -- -- rq_lock_irqsave(rq, &rf); -- rq->balance_push = on; -- if (on) { -- WARN_ON_ONCE(rq->balance_callback); -- rq->balance_callback = &balance_push_callback; -- } else if (rq->balance_callback == &balance_push_callback) { -- rq->balance_callback = NULL; -- } -- rq_unlock_irqrestore(rq, &rf); --} -- --/* -- * Invoked from a CPUs hotplug control thread after the CPU has been marked -- * inactive. All tasks which are not per CPU kernel threads are either -- * pushed off this CPU now via balance_push() or placed on a different CPU -- * during wakeup. Wait until the CPU is quiescent. -- */ --static void balance_hotplug_wait(void) --{ -- struct rq *rq = this_rq(); -- -- rcuwait_wait_event(&rq->hotplug_wait, -- rq->nr_running == 1, --/* rq->nr_running == 1 && !rq_has_pinned_tasks(rq),*/ -- TASK_UNINTERRUPTIBLE); --} -- --#else -- --static void balance_push(struct rq *rq) --{ --} -- --static void balance_push_set(int cpu, bool on) --{ --} -- --static inline void balance_hotplug_wait(void) --{ --} - #endif /* CONFIG_HOTPLUG_CPU */ - - int __init sched_tick_offload_init(void) -@@ -4046,6 +4030,7 @@ static void __sched notrace __schedule(bool preempt) - */ - ++*switch_count; - -+ migrate_disable_switch(rq, prev); - psi_sched_switch(prev, next, !task_on_rq_queued(prev)); - - trace_sched_switch(preempt, prev, next); -@@ -5900,12 +5885,148 @@ void idle_task_exit(void) - /* finish_cpu(), as ran on the BP, will clean up the active_mm state */ - } - -+static int __balance_push_cpu_stop(void *arg) -+{ -+ struct task_struct *p = arg; -+ struct rq *rq = this_rq(); -+ struct rq_flags rf; -+ int cpu; -+ -+ raw_spin_lock_irq(&p->pi_lock); -+ rq_lock(rq, &rf); -+ -+ update_rq_clock(rq); -+ -+ if (task_rq(p) == rq && task_on_rq_queued(p)) { -+ cpu = select_fallback_rq(rq->cpu, p); -+ rq = __migrate_task(rq, p, cpu); -+ } -+ -+ rq_unlock(rq, &rf); -+ raw_spin_unlock_irq(&p->pi_lock); -+ -+ put_task_struct(p); -+ -+ return 0; -+} -+ -+static DEFINE_PER_CPU(struct cpu_stop_work, push_work); -+ -+/* -+ * Ensure we only run per-cpu kthreads once the CPU goes !active. -+ */ -+static void balance_push(struct rq *rq) -+{ -+ struct task_struct *push_task = rq->curr; -+ -+ lockdep_assert_held(&rq->lock); -+ SCHED_WARN_ON(rq->cpu != smp_processor_id()); -+ /* -+ * Ensure the thing is persistent until balance_push_set(.on = false); -+ */ -+ rq->balance_callback = &balance_push_callback; -+ -+ /* -+ * Both the cpu-hotplug and stop task are in this case and are -+ * required to complete the hotplug process. -+ * -+ * XXX: the idle task does not match kthread_is_per_cpu() due to -+ * histerical raisins. -+ */ -+ if (rq->idle == push_task || -+ ((push_task->flags & PF_KTHREAD) && kthread_is_per_cpu(push_task))) { -+ /*((push_task->flags & PF_KTHREAD) && kthread_is_per_cpu(push_task)) || -+ is_migration_disabled(push_task)) {*/ -+ -+ /* -+ * If this is the idle task on the outgoing CPU try to wake -+ * up the hotplug control thread which might wait for the -+ * last task to vanish. The rcuwait_active() check is -+ * accurate here because the waiter is pinned on this CPU -+ * and can't obviously be running in parallel. -+ * -+ * On RT kernels this also has to check whether there are -+ * pinned and scheduled out tasks on the runqueue. They -+ * need to leave the migrate disabled section first. -+ */ -+ if (!rq->nr_running && rcuwait_active(&rq->hotplug_wait)) { -+ /*if (!rq->nr_running && !rq_has_pinned_tasks(rq) && -+ rcuwait_active(&rq->hotplug_wait)) {*/ -+ raw_spin_unlock(&rq->lock); -+ rcuwait_wake_up(&rq->hotplug_wait); -+ raw_spin_lock(&rq->lock); -+ } -+ return; -+ } -+ -+ get_task_struct(push_task); -+ /* -+ * Temporarily drop rq->lock such that we can wake-up the stop task. -+ * Both preemption and IRQs are still disabled. -+ */ -+ raw_spin_unlock(&rq->lock); -+ stop_one_cpu_nowait(rq->cpu, __balance_push_cpu_stop, push_task, -+ this_cpu_ptr(&push_work)); -+ /* -+ * At this point need_resched() is true and we'll take the loop in -+ * schedule(). The next pick is obviously going to be the stop task -+ * which kthread_is_per_cpu() and will push this task away. -+ */ -+ raw_spin_lock(&rq->lock); -+} -+ -+static void balance_push_set(int cpu, bool on) -+{ -+ struct rq *rq = cpu_rq(cpu); -+ struct rq_flags rf; -+ -+ rq_lock_irqsave(rq, &rf); -+ rq->balance_push = on; -+ if (on) { -+ WARN_ON_ONCE(rq->balance_callback); -+ rq->balance_callback = &balance_push_callback; -+ } else if (rq->balance_callback == &balance_push_callback) { -+ rq->balance_callback = NULL; -+ } -+ rq_unlock_irqrestore(rq, &rf); -+} -+ -+/* -+ * Invoked from a CPUs hotplug control thread after the CPU has been marked -+ * inactive. All tasks which are not per CPU kernel threads are either -+ * pushed off this CPU now via balance_push() or placed on a different CPU -+ * during wakeup. Wait until the CPU is quiescent. -+ */ -+static void balance_hotplug_wait(void) -+{ -+ struct rq *rq = this_rq(); -+ -+ rcuwait_wait_event(&rq->hotplug_wait, -+ rq->nr_running == 1, -+/* rq->nr_running == 1 && !rq_has_pinned_tasks(rq),*/ -+ TASK_UNINTERRUPTIBLE); -+} -+ -+#else -+ -+static void balance_push(struct rq *rq) -+{ -+} -+ -+static void balance_push_set(int cpu, bool on) -+{ -+} -+ -+static inline void balance_hotplug_wait(void) -+{ -+} -+#endif /* CONFIG_HOTPLUG_CPU */ -+ - static void set_rq_offline(struct rq *rq) - { - if (rq->online) - rq->online = false; - } --#endif /* CONFIG_HOTPLUG_CPU */ - - static void set_rq_online(struct rq *rq) - { --- -2.37.0 - - -From 7cd866a92841ab94bf78068dedba172ac1db71ad Mon Sep 17 00:00:00 2001 -From: Alfred Chen -Date: Tue, 9 Feb 2021 17:27:46 +0800 -Subject: [PATCH 102/297] sched/alt: [Sync] 3015ef4b98f5 sched/core: Make - migrate disable and CPU hotplug cooperative - ---- - kernel/sched/alt_core.c | 28 ++++++++++++++++++---------- - kernel/sched/alt_sched.h | 1 + - 2 files changed, 19 insertions(+), 10 deletions(-) - -diff --git a/kernel/sched/alt_core.c b/kernel/sched/alt_core.c -index c61cb93915c9..556c27911635 100644 ---- a/kernel/sched/alt_core.c -+++ b/kernel/sched/alt_core.c -@@ -1177,7 +1177,7 @@ void migrate_disable(void) - } - - preempt_disable(); -- /*this_rq()->nr_pinned++;*/ -+ this_rq()->nr_pinned++; - p->migration_disabled = 1; - preempt_enable(); - } -@@ -1206,11 +1206,16 @@ void migrate_enable(void) - */ - barrier(); - p->migration_disabled = 0; -- /*this_rq()->nr_pinned--;*/ -+ this_rq()->nr_pinned--; - preempt_enable(); - } - EXPORT_SYMBOL_GPL(migrate_enable); - -+static inline bool rq_has_pinned_tasks(struct rq *rq) -+{ -+ return rq->nr_pinned; -+} -+ - /* - * Per-CPU kthreads are allowed to run on !active && online CPUs, see - * __set_cpus_allowed_ptr() and select_fallback_rq(). -@@ -1791,6 +1796,11 @@ __set_cpus_allowed_ptr(struct task_struct *p, - - static inline void migrate_disable_switch(struct rq *rq, struct task_struct *p) { } - -+static inline bool rq_has_pinned_tasks(struct rq *rq) -+{ -+ return false; -+} -+ - #endif /* !CONFIG_SMP */ - - static void -@@ -5934,9 +5944,8 @@ static void balance_push(struct rq *rq) - * histerical raisins. - */ - if (rq->idle == push_task || -- ((push_task->flags & PF_KTHREAD) && kthread_is_per_cpu(push_task))) { -- /*((push_task->flags & PF_KTHREAD) && kthread_is_per_cpu(push_task)) || -- is_migration_disabled(push_task)) {*/ -+ ((push_task->flags & PF_KTHREAD) && kthread_is_per_cpu(push_task)) || -+ is_migration_disabled(push_task)) { - - /* - * If this is the idle task on the outgoing CPU try to wake -@@ -5949,9 +5958,8 @@ static void balance_push(struct rq *rq) - * pinned and scheduled out tasks on the runqueue. They - * need to leave the migrate disabled section first. - */ -- if (!rq->nr_running && rcuwait_active(&rq->hotplug_wait)) { -- /*if (!rq->nr_running && !rq_has_pinned_tasks(rq) && -- rcuwait_active(&rq->hotplug_wait)) {*/ -+ if (!rq->nr_running && !rq_has_pinned_tasks(rq) && -+ rcuwait_active(&rq->hotplug_wait)) { - raw_spin_unlock(&rq->lock); - rcuwait_wake_up(&rq->hotplug_wait); - raw_spin_lock(&rq->lock); -@@ -6002,8 +6010,7 @@ static void balance_hotplug_wait(void) - struct rq *rq = this_rq(); - - rcuwait_wait_event(&rq->hotplug_wait, -- rq->nr_running == 1, --/* rq->nr_running == 1 && !rq_has_pinned_tasks(rq),*/ -+ rq->nr_running == 1 && !rq_has_pinned_tasks(rq), - TASK_UNINTERRUPTIBLE); - } - -@@ -6233,6 +6240,7 @@ int sched_cpu_dying(unsigned int cpu) - sched_tick_stop(cpu); - - raw_spin_lock_irqsave(&rq->lock, flags); -+ BUG_ON(rq->nr_running != 1 || rq_has_pinned_tasks(rq)); - raw_spin_unlock_irqrestore(&rq->lock, flags); - - /* -diff --git a/kernel/sched/alt_sched.h b/kernel/sched/alt_sched.h -index 30e80c4b0825..cc2739f843af 100644 ---- a/kernel/sched/alt_sched.h -+++ b/kernel/sched/alt_sched.h -@@ -133,6 +133,7 @@ struct rq { - #ifdef CONFIG_HOTPLUG_CPU - struct rcuwait hotplug_wait; - #endif -+ unsigned int nr_pinned; - #endif /* CONFIG_SMP */ - #ifdef CONFIG_IRQ_TIME_ACCOUNTING - u64 prev_irq_time; --- -2.37.0 - - -From c0099d5e58395efcaa4823e4bb0b40dfb0c65964 Mon Sep 17 00:00:00 2001 -From: Alfred Chen -Date: Wed, 10 Feb 2021 18:41:04 +0800 -Subject: [PATCH 103/297] sched/alt: [Sync] ded467dc83ac sched, lockdep: - Annotate ->pi_lock recursion - ---- - kernel/sched/alt_core.c | 15 +++++++++++++++ - 1 file changed, 15 insertions(+) - -diff --git a/kernel/sched/alt_core.c b/kernel/sched/alt_core.c -index 556c27911635..9fa24c75ee0d 100644 ---- a/kernel/sched/alt_core.c -+++ b/kernel/sched/alt_core.c -@@ -1649,6 +1649,7 @@ static inline int select_task_rq(struct task_struct *p, struct rq *rq) - - void sched_set_stop_task(int cpu, struct task_struct *stop) - { -+ static struct lock_class_key stop_pi_lock; - struct sched_param stop_param = { .sched_priority = STOP_PRIO }; - struct sched_param start_param = { .sched_priority = 0 }; - struct task_struct *old_stop = cpu_rq(cpu)->stop; -@@ -1663,6 +1664,20 @@ void sched_set_stop_task(int cpu, struct task_struct *stop) - * rely on PI working anyway. - */ - sched_setscheduler_nocheck(stop, SCHED_FIFO, &stop_param); -+ -+ /* -+ * The PI code calls rt_mutex_setprio() with ->pi_lock held to -+ * adjust the effective priority of a task. As a result, -+ * rt_mutex_setprio() can trigger (RT) balancing operations, -+ * which can then trigger wakeups of the stop thread to push -+ * around the current task. -+ * -+ * The stop task itself will never be part of the PI-chain, it -+ * never blocks, therefore that ->pi_lock recursion is safe. -+ * Tell lockdep about this by placing the stop->pi_lock in its -+ * own class. -+ */ -+ lockdep_set_class(&stop->pi_lock, &stop_pi_lock); - } - - cpu_rq(cpu)->stop = stop; --- -2.37.0 - - -From faa649b6a21efaababa472a45d8b12f61ae6296f Mon Sep 17 00:00:00 2001 -From: Alfred Chen -Date: Wed, 10 Feb 2021 19:22:58 +0800 -Subject: [PATCH 104/297] sched/alt: Fix lockdep_assert_held in - splice_balance_callbacks() - ---- - kernel/sched/alt_core.c | 5 +++-- - 1 file changed, 3 insertions(+), 2 deletions(-) - -diff --git a/kernel/sched/alt_core.c b/kernel/sched/alt_core.c -index 9fa24c75ee0d..5c7f8b734b12 100644 ---- a/kernel/sched/alt_core.c -+++ b/kernel/sched/alt_core.c -@@ -2803,9 +2803,10 @@ static inline struct callback_head *splice_balance_callbacks(struct rq *rq) - { - struct callback_head *head = rq->balance_callback; - -- lockdep_assert_held(&rq->lock); -- if (head) -+ if (head) { -+ lockdep_assert_held(&rq->lock); - rq->balance_callback = NULL; -+ } - - return head; - } --- -2.37.0 - - -From d54543bb65e824b5c53f054fb317ef70fd53e6b3 Mon Sep 17 00:00:00 2001 -From: Alfred Chen -Date: Sat, 13 Feb 2021 16:41:23 +0800 -Subject: [PATCH 105/297] sched/alt: Remove migrate_disable_switch(). - ---- - kernel/sched/alt_core.c | 24 +++++++----------------- - 1 file changed, 7 insertions(+), 17 deletions(-) - -diff --git a/kernel/sched/alt_core.c b/kernel/sched/alt_core.c -index 5c7f8b734b12..a69c9d449d3d 100644 ---- a/kernel/sched/alt_core.c -+++ b/kernel/sched/alt_core.c -@@ -1153,20 +1153,6 @@ static int __set_cpus_allowed_ptr(struct task_struct *p, - const struct cpumask *new_mask, - u32 flags); - --static void migrate_disable_switch(struct rq *rq, struct task_struct *p) --{ -- if (likely(!p->migration_disabled)) -- return; -- -- if (p->cpus_ptr != &p->cpus_mask) -- return; -- -- /* -- * Violates locking rules! see comment in __do_set_cpus_allowed(). -- */ -- __do_set_cpus_allowed(p, cpumask_of(rq->cpu), SCA_MIGRATE_DISABLE); --} -- - void migrate_disable(void) - { - struct task_struct *p = current; -@@ -1179,6 +1165,13 @@ void migrate_disable(void) - preempt_disable(); - this_rq()->nr_pinned++; - p->migration_disabled = 1; -+ -+ /* -+ * Violates locking rules! see comment in __do_set_cpus_allowed(). -+ */ -+ if (p->cpus_ptr == &p->cpus_mask) -+ __do_set_cpus_allowed(p, cpumask_of(smp_processor_id()), SCA_MIGRATE_DISABLE); -+ - preempt_enable(); - } - EXPORT_SYMBOL_GPL(migrate_disable); -@@ -1809,8 +1802,6 @@ __set_cpus_allowed_ptr(struct task_struct *p, - return set_cpus_allowed_ptr(p, new_mask); - } - --static inline void migrate_disable_switch(struct rq *rq, struct task_struct *p) { } -- - static inline bool rq_has_pinned_tasks(struct rq *rq) - { - return false; -@@ -4056,7 +4047,6 @@ static void __sched notrace __schedule(bool preempt) - */ - ++*switch_count; - -- migrate_disable_switch(rq, prev); - psi_sched_switch(prev, next, !task_on_rq_queued(prev)); - - trace_sched_switch(preempt, prev, next); --- -2.37.0 - - -From 37059fa68af2b9666598eb0aecef3d9eb16d9b3e Mon Sep 17 00:00:00 2001 -From: Alfred Chen -Date: Mon, 15 Feb 2021 10:48:22 +0800 -Subject: [PATCH 106/297] sched/alt: Rework migration disable vs - set_cpus_allowed_ptr() - ---- - kernel/sched/alt_core.c | 16 ++++++++++++++-- - 1 file changed, 14 insertions(+), 2 deletions(-) - -diff --git a/kernel/sched/alt_core.c b/kernel/sched/alt_core.c -index a69c9d449d3d..1f781a4d4103 100644 ---- a/kernel/sched/alt_core.c -+++ b/kernel/sched/alt_core.c -@@ -1190,8 +1190,12 @@ void migrate_enable(void) - * __set_cpus_allowed_ptr(SCA_MIGRATE_ENABLE) doesn't schedule(). - */ - preempt_disable(); -+ /* -+ * Assumption: current should be running on allowed cpu -+ */ -+ WARN_ON_ONCE(!cpumask_test_cpu(smp_processor_id(), &p->cpus_mask)); - if (p->cpus_ptr != &p->cpus_mask) -- __set_cpus_allowed_ptr(p, &p->cpus_mask, SCA_MIGRATE_ENABLE); -+ __do_set_cpus_allowed(p, &p->cpus_mask, SCA_MIGRATE_ENABLE); - /* - * Mustn't clear migration_disabled() until cpus_ptr points back at the - * regular cpus_mask, otherwise things that race (eg. -@@ -1370,7 +1374,7 @@ __do_set_cpus_allowed(struct task_struct *p, const struct cpumask *new_mask, u32 - * - * XXX do further audits, this smells like something putrid. - */ -- if (flags & SCA_MIGRATE_DISABLE) -+ if (flags & (SCA_MIGRATE_DISABLE | SCA_MIGRATE_ENABLE)) - SCHED_WARN_ON(!p->on_cpu); - else - lockdep_assert_held(&p->pi_lock); -@@ -1755,6 +1759,14 @@ static int __set_cpus_allowed_ptr(struct task_struct *p, - if (cpumask_test_cpu(task_cpu(p), new_mask)) - goto out; - -+ if (p->migration_disabled) { -+ if (p->cpus_ptr != &p->cpus_mask) -+ __do_set_cpus_allowed(p, &p->cpus_mask, SCA_MIGRATE_ENABLE); -+ p->migration_disabled = 0; -+ /* When p is migrate_disabled, rq->lock should be held */ -+ rq->nr_pinned--; -+ } -+ - if (task_running(p) || p->state == TASK_WAKING) { - struct migration_arg arg = { p, dest_cpu }; - --- -2.37.0 - - -From 0ffb554fdb86e863746b1a3907168c80f3c5f9a2 Mon Sep 17 00:00:00 2001 -From: Alfred Chen -Date: Mon, 15 Feb 2021 15:53:12 +0800 -Subject: [PATCH 107/297] sched/alt: [Sync] b19a888c1e9b sched/core: Fix typos - in comments - ---- - kernel/sched/alt_core.c | 14 +++++++------- - 1 file changed, 7 insertions(+), 7 deletions(-) - -diff --git a/kernel/sched/alt_core.c b/kernel/sched/alt_core.c -index 1f781a4d4103..b2d4dc2cc6e1 100644 ---- a/kernel/sched/alt_core.c -+++ b/kernel/sched/alt_core.c -@@ -203,7 +203,7 @@ static inline struct task_struct *rq_runnable_task(struct rq *rq) - * - * Normal scheduling state is serialized by rq->lock. __schedule() takes the - * local CPU's rq->lock, it optionally removes the task from the runqueue and -- * always looks at the local rq data structures to find the most elegible task -+ * always looks at the local rq data structures to find the most eligible task - * to run next. - * - * Task enqueue is also under rq->lock, possibly taken from another CPU. -@@ -653,7 +653,7 @@ static bool __wake_q_add(struct wake_q_head *head, struct task_struct *task) - - /* - * Atomically grab the task, if ->wake_q is !nil already it means -- * its already queued (either by us or someone else) and will get the -+ * it's already queued (either by us or someone else) and will get the - * wakeup due to that. - * - * In order to ensure that a pending wakeup will observe our pending -@@ -2320,7 +2320,7 @@ static int try_to_wake_up(struct task_struct *p, unsigned int state, - - /* - * If the owning (remote) CPU is still in the middle of schedule() with -- * this task as prev, wait until its done referencing the task. -+ * this task as prev, wait until it's done referencing the task. - * - * Pairs with the smp_store_release() in finish_task(). - * -@@ -3180,7 +3180,7 @@ unsigned long nr_iowait_cpu(int cpu) - } - - /* -- * IO-wait accounting, and how its mostly bollocks (on SMP). -+ * IO-wait accounting, and how it's mostly bollocks (on SMP). - * - * The idea behind IO-wait account is to account the idle time that we could - * have spend running if it were not for IO. That is, if we were to improve the -@@ -4404,7 +4404,7 @@ void rt_mutex_setprio(struct task_struct *p, struct task_struct *pi_task) - * right. rt_mutex_slowunlock()+rt_mutex_postunlock() work together to - * ensure a task is de-boosted (pi_task is set to NULL) before the - * task is allowed to run again (and can exit). This ensures the pointer -- * points to a blocked task -- which guaratees the task is present. -+ * points to a blocked task -- which guarantees the task is present. - */ - p->pi_top_task = pi_task; - -@@ -4472,7 +4472,7 @@ void set_user_nice(struct task_struct *p, long nice) - /* - * The RT priorities are set via sched_setscheduler(), but we still - * allow the 'normal' nice value to be set - but as expected -- * it wont have any effect on scheduling until the task is -+ * it won't have any effect on scheduling until the task is - * not SCHED_NORMAL/SCHED_BATCH: - */ - if (task_has_rt_policy(p)) -@@ -5507,7 +5507,7 @@ EXPORT_SYMBOL(__cond_resched_lock); - * - * The scheduler is at all times free to pick the calling task as the most - * eligible task to run, if removing the yield() call from your code breaks -- * it, its already broken. -+ * it, it's already broken. - * - * Typical broken usage is: - * --- -2.37.0 - - -From bf49bd4919bd1d6d05c4876ae039ef9a0b6bb9cc Mon Sep 17 00:00:00 2001 -From: Alfred Chen -Date: Mon, 15 Feb 2021 23:42:56 +0800 -Subject: [PATCH 108/297] Project-C v5.11-r0 - ---- - kernel/sched/alt_core.c | 2 +- - 1 file changed, 1 insertion(+), 1 deletion(-) - -diff --git a/kernel/sched/alt_core.c b/kernel/sched/alt_core.c -index b2d4dc2cc6e1..cd1b0b85af6d 100644 ---- a/kernel/sched/alt_core.c -+++ b/kernel/sched/alt_core.c -@@ -52,7 +52,7 @@ - */ - EXPORT_TRACEPOINT_SYMBOL_GPL(pelt_irq_tp); - --#define ALT_SCHED_VERSION "v5.10-r2" -+#define ALT_SCHED_VERSION "v5.11-r0" - - /* rt_prio(prio) defined in include/linux/sched/rt.h */ - #define rt_task(p) rt_prio((p)->prio) --- -2.37.0 - - -From d179e9a644963bed5dd386cddf930b6f565f73c6 Mon Sep 17 00:00:00 2001 -From: Alfred Chen -Date: Thu, 18 Feb 2021 15:36:42 +0800 -Subject: [PATCH 109/297] sched/alt: [Sync] 59a74b1544e1 sched: Fix kernel-doc - markup - ---- - kernel/sched/alt_core.c | 17 ++++++++--------- - 1 file changed, 8 insertions(+), 9 deletions(-) - -diff --git a/kernel/sched/alt_core.c b/kernel/sched/alt_core.c -index cd1b0b85af6d..2daa6605ccdc 100644 ---- a/kernel/sched/alt_core.c -+++ b/kernel/sched/alt_core.c -@@ -5417,15 +5417,6 @@ SYSCALL_DEFINE3(sched_getaffinity, pid_t, pid, unsigned int, len, - return ret; - } - --/** -- * sys_sched_yield - yield the current processor to other threads. -- * -- * This function yields the current CPU to other tasks. It does this by -- * scheduling away the current task. If it still has the earliest deadline -- * it will be scheduled again as the next task. -- * -- * Return: 0. -- */ - static void do_sched_yield(void) - { - struct rq *rq; -@@ -5453,6 +5444,14 @@ static void do_sched_yield(void) - schedule(); - } - -+/** -+ * sys_sched_yield - yield the current processor to other threads. -+ * -+ * This function yields the current CPU to other tasks. If there are no -+ * other threads running on this CPU then this function will return. -+ * -+ * Return: 0. -+ */ - SYSCALL_DEFINE0(sched_yield) - { - do_sched_yield(); --- -2.37.0 - - -From dd43a5c9f5117663f447f7e34ff495f46805adba Mon Sep 17 00:00:00 2001 -From: Alfred Chen -Date: Thu, 18 Feb 2021 15:58:32 +0800 -Subject: [PATCH 110/297] sched/alt: [Sync] 36c6e17bf169 sched/core: Print out - straggler tasks in sched_cpu_dying() - ---- - kernel/sched/alt_core.c | 26 ++++++++++++++++++++++++-- - kernel/sched/alt_sched.h | 2 +- - 2 files changed, 25 insertions(+), 3 deletions(-) - -diff --git a/kernel/sched/alt_core.c b/kernel/sched/alt_core.c -index 2daa6605ccdc..b469a7a9156e 100644 ---- a/kernel/sched/alt_core.c -+++ b/kernel/sched/alt_core.c -@@ -3761,7 +3761,7 @@ inline void alt_sched_debug(void) {} - - #ifdef CONFIG_SMP - --#define SCHED_RQ_NR_MIGRATION (32UL) -+#define SCHED_RQ_NR_MIGRATION (32U) - /* - * Migrate pending tasks in @rq to @dest_cpu - * Will try to migrate mininal of half of @rq nr_running tasks and -@@ -6248,6 +6248,25 @@ static void calc_load_migrate(struct rq *rq) - atomic_long_add(delta, &calc_load_tasks); - } - -+static void dump_rq_tasks(struct rq *rq, const char *loglvl) -+{ -+ struct task_struct *g, *p; -+ int cpu = cpu_of(rq); -+ -+ lockdep_assert_held(&rq->lock); -+ -+ printk("%sCPU%d enqueued tasks (%u total):\n", loglvl, cpu, rq->nr_running); -+ for_each_process_thread(g, p) { -+ if (task_cpu(p) != cpu) -+ continue; -+ -+ if (!task_on_rq_queued(p)) -+ continue; -+ -+ printk("%s\tpid: %d, name: %s\n", loglvl, p->pid, p->comm); -+ } -+} -+ - int sched_cpu_dying(unsigned int cpu) - { - struct rq *rq = cpu_rq(cpu); -@@ -6257,7 +6276,10 @@ int sched_cpu_dying(unsigned int cpu) - sched_tick_stop(cpu); - - raw_spin_lock_irqsave(&rq->lock, flags); -- BUG_ON(rq->nr_running != 1 || rq_has_pinned_tasks(rq)); -+ if (rq->nr_running != 1 || rq_has_pinned_tasks(rq)) { -+ WARN(true, "Dying CPU not properly vacated!"); -+ dump_rq_tasks(rq, KERN_WARNING); -+ } - raw_spin_unlock_irqrestore(&rq->lock, flags); - - /* -diff --git a/kernel/sched/alt_sched.h b/kernel/sched/alt_sched.h -index cc2739f843af..192586fee177 100644 ---- a/kernel/sched/alt_sched.h -+++ b/kernel/sched/alt_sched.h -@@ -153,7 +153,7 @@ struct rq { - u64 last_ts_switch; - u64 clock_task; - -- unsigned long nr_running; -+ unsigned int nr_running; - unsigned long nr_uninterruptible; - - #ifdef CONFIG_SCHED_HRTICK --- -2.37.0 - - -From e328096eaac153b4ecc56833b04e050dbed9eb0c Mon Sep 17 00:00:00 2001 -From: Alfred Chen -Date: Thu, 18 Feb 2021 22:19:45 +0800 -Subject: [PATCH 111/297] sched/alt: [Sync] 5ba2ffba13a1 sched: Fix CPU hotplug - / tighten is_per_cpu_kthread() - ---- - kernel/sched/alt_core.c | 10 ++++++++++ - 1 file changed, 10 insertions(+) - -diff --git a/kernel/sched/alt_core.c b/kernel/sched/alt_core.c -index b469a7a9156e..97ae80161672 100644 ---- a/kernel/sched/alt_core.c -+++ b/kernel/sched/alt_core.c -@@ -1979,6 +1979,13 @@ static void __ttwu_queue_wakelist(struct task_struct *p, int cpu, int wake_flags - - static inline bool ttwu_queue_cond(int cpu, int wake_flags) - { -+ /* -+ * Do not complicate things with the async wake_list while the CPU is -+ * in hotplug state. -+ */ -+ if (!cpu_active(cpu)) -+ return false; -+ - /* - * If the CPU does not share cache, then queue the task on the - * remote rqs wakelist to avoid accessing remote data. -@@ -6165,6 +6172,9 @@ int sched_cpu_deactivate(unsigned int cpu) - * users of this state to go away such that all new such users will - * observe it. - * -+ * Specifically, we rely on ttwu to no longer target this CPU, see -+ * ttwu_queue_cond() and is_cpu_allowed(). -+ * - * Do sync before park smpboot threads to take care the rcu boost case. - */ - synchronize_rcu(); --- -2.37.0 - - -From 47e48c16fb86689337883e095638d8051f173655 Mon Sep 17 00:00:00 2001 -From: Alfred Chen -Date: Sun, 28 Feb 2021 22:16:39 +0800 -Subject: [PATCH 112/297] sched/alt: Implement __cant_migrate() for BMQ/PDS. - ---- - kernel/sched/alt_core.c | 33 +++++++++++++++++++++++++++++++++ - 1 file changed, 33 insertions(+) - -diff --git a/kernel/sched/alt_core.c b/kernel/sched/alt_core.c -index 97ae80161672..02d405a8a789 100644 ---- a/kernel/sched/alt_core.c -+++ b/kernel/sched/alt_core.c -@@ -1146,6 +1146,8 @@ static inline bool is_per_cpu_kthread(struct task_struct *p) - return ((p->flags & PF_KTHREAD) && (1 == p->nr_cpus_allowed)); - } - -+#define MDF_FORCE_ENABLED 0x80 -+ - static void - __do_set_cpus_allowed(struct task_struct *p, const struct cpumask *new_mask, u32 flags); - -@@ -1165,6 +1167,7 @@ void migrate_disable(void) - preempt_disable(); - this_rq()->nr_pinned++; - p->migration_disabled = 1; -+ p->migration_flags &= ~MDF_FORCE_ENABLED; - - /* - * Violates locking rules! see comment in __do_set_cpus_allowed(). -@@ -1763,6 +1766,7 @@ static int __set_cpus_allowed_ptr(struct task_struct *p, - if (p->cpus_ptr != &p->cpus_mask) - __do_set_cpus_allowed(p, &p->cpus_mask, SCA_MIGRATE_ENABLE); - p->migration_disabled = 0; -+ p->migration_flags |= MDF_FORCE_ENABLED; - /* When p is migrate_disabled, rq->lock should be held */ - rq->nr_pinned--; - } -@@ -6608,6 +6612,35 @@ EXPORT_SYMBOL_GPL(__cant_sleep); - #ifdef CONFIG_SMP - void __cant_migrate(const char *file, int line) - { -+ static unsigned long prev_jiffy; -+ -+ if (irqs_disabled()) -+ return; -+ -+ if (is_migration_disabled(current)) -+ return; -+ -+ if (!IS_ENABLED(CONFIG_PREEMPT_COUNT)) -+ return; -+ -+ if (preempt_count() > 0) -+ return; -+ -+ if (current->migration_flags & MDF_FORCE_ENABLED) -+ return; -+ -+ if (time_before(jiffies, prev_jiffy + HZ) && prev_jiffy) -+ return; -+ prev_jiffy = jiffies; -+ -+ pr_err("BUG: assuming non migratable context at %s:%d\n", file, line); -+ pr_err("in_atomic(): %d, irqs_disabled(): %d, migration_disabled() %u pid: %d, name: %s\n", -+ in_atomic(), irqs_disabled(), is_migration_disabled(current), -+ current->pid, current->comm); -+ -+ debug_show_held_locks(current); -+ dump_stack(); -+ add_taint(TAINT_WARN, LOCKDEP_STILL_OK); - } - EXPORT_SYMBOL_GPL(__cant_migrate); - #endif --- -2.37.0 - - -From 2209d91d2a1d835bdf5df0a6fd0055cf3d61c268 Mon Sep 17 00:00:00 2001 -From: Alfred Chen -Date: Mon, 1 Mar 2021 13:52:16 +0800 -Subject: [PATCH 113/297] Project-C v5.11-r1 - ---- - kernel/sched/alt_core.c | 2 +- - 1 file changed, 1 insertion(+), 1 deletion(-) - -diff --git a/kernel/sched/alt_core.c b/kernel/sched/alt_core.c -index 02d405a8a789..2ca0e6470022 100644 ---- a/kernel/sched/alt_core.c -+++ b/kernel/sched/alt_core.c -@@ -52,7 +52,7 @@ - */ - EXPORT_TRACEPOINT_SYMBOL_GPL(pelt_irq_tp); - --#define ALT_SCHED_VERSION "v5.11-r0" -+#define ALT_SCHED_VERSION "v5.11-r1" - - /* rt_prio(prio) defined in include/linux/sched/rt.h */ - #define rt_task(p) rt_prio((p)->prio) --- -2.37.0 - - -From 82a0930bbf23372c447336576957f2c3f7609ae8 Mon Sep 17 00:00:00 2001 -From: Alfred Chen -Date: Sun, 7 Mar 2021 10:03:57 +0800 -Subject: [PATCH 114/297] sched/alt: Implement fake cpu.shares for CGROUP - ---- - init/Kconfig | 4 +++- - kernel/sched/alt_core.c | 50 ++++++++++++++++++++++++++++++++++++++++ - kernel/sched/alt_sched.h | 45 ++++++++++++++++++++++++++++++++++++ - 3 files changed, 98 insertions(+), 1 deletion(-) - -diff --git a/init/Kconfig b/init/Kconfig -index c4acd49edf94..fcdfda2b98f1 100644 ---- a/init/Kconfig -+++ b/init/Kconfig -@@ -1028,12 +1028,13 @@ menuconfig CGROUP_SCHED - bandwidth allocation to such task groups. It uses cgroups to group - tasks. - --if CGROUP_SCHED && !SCHED_ALT -+if CGROUP_SCHED - config FAIR_GROUP_SCHED - bool "Group scheduling for SCHED_OTHER" - depends on CGROUP_SCHED - default CGROUP_SCHED - -+if !SCHED_ALT - config CFS_BANDWIDTH - bool "CPU bandwidth provisioning for FAIR_GROUP_SCHED" - depends on FAIR_GROUP_SCHED -@@ -1056,6 +1057,7 @@ config RT_GROUP_SCHED - realtime bandwidth for them. - See Documentation/scheduler/sched-rt-group.rst for more information. - -+endif #!SCHED_ALT - endif #CGROUP_SCHED - - config UCLAMP_TASK_GROUP -diff --git a/kernel/sched/alt_core.c b/kernel/sched/alt_core.c -index 2ca0e6470022..b470dc451585 100644 ---- a/kernel/sched/alt_core.c -+++ b/kernel/sched/alt_core.c -@@ -6411,6 +6411,9 @@ struct task_group { - struct task_group *parent; - struct list_head siblings; - struct list_head children; -+#ifdef CONFIG_FAIR_GROUP_SCHED -+ unsigned long shares; -+#endif - }; - - /* -@@ -6829,7 +6832,54 @@ static void cpu_cgroup_attach(struct cgroup_taskset *tset) - { - } - -+#ifdef CONFIG_FAIR_GROUP_SCHED -+static DEFINE_MUTEX(shares_mutex); -+ -+int sched_group_set_shares(struct task_group *tg, unsigned long shares) -+{ -+ /* -+ * We can't change the weight of the root cgroup. -+ */ -+ if (&root_task_group == tg) -+ return -EINVAL; -+ -+ shares = clamp(shares, scale_load(MIN_SHARES), scale_load(MAX_SHARES)); -+ -+ mutex_lock(&shares_mutex); -+ if (tg->shares == shares) -+ goto done; -+ -+ tg->shares = shares; -+done: -+ mutex_unlock(&shares_mutex); -+ return 0; -+} -+ -+static int cpu_shares_write_u64(struct cgroup_subsys_state *css, -+ struct cftype *cftype, u64 shareval) -+{ -+ if (shareval > scale_load_down(ULONG_MAX)) -+ shareval = MAX_SHARES; -+ return sched_group_set_shares(css_tg(css), scale_load(shareval)); -+} -+ -+static u64 cpu_shares_read_u64(struct cgroup_subsys_state *css, -+ struct cftype *cft) -+{ -+ struct task_group *tg = css_tg(css); -+ -+ return (u64) scale_load_down(tg->shares); -+} -+#endif -+ - static struct cftype cpu_legacy_files[] = { -+#ifdef CONFIG_FAIR_GROUP_SCHED -+ { -+ .name = "shares", -+ .read_u64 = cpu_shares_read_u64, -+ .write_u64 = cpu_shares_write_u64, -+ }, -+#endif - { } /* Terminate */ - }; - -diff --git a/kernel/sched/alt_sched.h b/kernel/sched/alt_sched.h -index 192586fee177..51f11bf416f4 100644 ---- a/kernel/sched/alt_sched.h -+++ b/kernel/sched/alt_sched.h -@@ -61,6 +61,51 @@ - # define SCHED_WARN_ON(x) ({ (void)(x), 0; }) - #endif - -+/* -+ * Increase resolution of nice-level calculations for 64-bit architectures. -+ * The extra resolution improves shares distribution and load balancing of -+ * low-weight task groups (eg. nice +19 on an autogroup), deeper taskgroup -+ * hierarchies, especially on larger systems. This is not a user-visible change -+ * and does not change the user-interface for setting shares/weights. -+ * -+ * We increase resolution only if we have enough bits to allow this increased -+ * resolution (i.e. 64-bit). The costs for increasing resolution when 32-bit -+ * are pretty high and the returns do not justify the increased costs. -+ * -+ * Really only required when CONFIG_FAIR_GROUP_SCHED=y is also set, but to -+ * increase coverage and consistency always enable it on 64-bit platforms. -+ */ -+#ifdef CONFIG_64BIT -+# define NICE_0_LOAD_SHIFT (SCHED_FIXEDPOINT_SHIFT + SCHED_FIXEDPOINT_SHIFT) -+# define scale_load(w) ((w) << SCHED_FIXEDPOINT_SHIFT) -+# define scale_load_down(w) \ -+({ \ -+ unsigned long __w = (w); \ -+ if (__w) \ -+ __w = max(2UL, __w >> SCHED_FIXEDPOINT_SHIFT); \ -+ __w; \ -+}) -+#else -+# define NICE_0_LOAD_SHIFT (SCHED_FIXEDPOINT_SHIFT) -+# define scale_load(w) (w) -+# define scale_load_down(w) (w) -+#endif -+ -+#ifdef CONFIG_FAIR_GROUP_SCHED -+#define ROOT_TASK_GROUP_LOAD NICE_0_LOAD -+ -+/* -+ * A weight of 0 or 1 can cause arithmetics problems. -+ * A weight of a cfs_rq is the sum of weights of which entities -+ * are queued on this cfs_rq, so a weight of a entity should not be -+ * too large, so as the shares value of a task group. -+ * (The default weight is 1024 - so there's no practical -+ * limitation from this.) -+ */ -+#define MIN_SHARES (1UL << 1) -+#define MAX_SHARES (1UL << 18) -+#endif -+ - /* task_struct::on_rq states: */ - #define TASK_ON_RQ_QUEUED 1 - #define TASK_ON_RQ_MIGRATING 2 --- -2.37.0 - - -From 5e4ca862d9b0b8023e76ebb23538388e553fc492 Mon Sep 17 00:00:00 2001 -From: Piotr Gorski -Date: Sat, 6 Mar 2021 20:38:54 +0100 -Subject: [PATCH 115/297] sched/alt: Backport "sched/core: Allow - try_invoke_on_locked_down_task() with irqs disabled" - -Signed-off-by: Piotr Gorski ---- - kernel/sched/alt_core.c | 9 ++++----- - 1 file changed, 4 insertions(+), 5 deletions(-) - -diff --git a/kernel/sched/alt_core.c b/kernel/sched/alt_core.c -index b470dc451585..dbbd8a46e75d 100644 ---- a/kernel/sched/alt_core.c -+++ b/kernel/sched/alt_core.c -@@ -2371,7 +2371,7 @@ static int try_to_wake_up(struct task_struct *p, unsigned int state, - - /** - * try_invoke_on_locked_down_task - Invoke a function on task in fixed state -- * @p: Process for which the function is to be invoked. -+ * @p: Process for which the function is to be invoked, can be @current. - * @func: Function to invoke. - * @arg: Argument to function. - * -@@ -2389,12 +2389,11 @@ static int try_to_wake_up(struct task_struct *p, unsigned int state, - */ - bool try_invoke_on_locked_down_task(struct task_struct *p, bool (*func)(struct task_struct *t, void *arg), void *arg) - { -- bool ret = false; - struct rq_flags rf; -+ bool ret = false; - struct rq *rq; - -- lockdep_assert_irqs_enabled(); -- raw_spin_lock_irq(&p->pi_lock); -+ raw_spin_lock_irqsave(&p->pi_lock, rf.flags); - if (p->on_rq) { - rq = __task_rq_lock(p, &rf); - if (task_rq(p) == rq) -@@ -2411,7 +2410,7 @@ bool try_invoke_on_locked_down_task(struct task_struct *p, bool (*func)(struct t - ret = func(p, arg); - } - } -- raw_spin_unlock_irq(&p->pi_lock); -+ raw_spin_unlock_irqrestore(&p->pi_lock, rf.flags); - return ret; - } - --- -2.37.0 - - -From 2e805c59748b8190428907cc188a49a863c1ee56 Mon Sep 17 00:00:00 2001 -From: Alfred Chen -Date: Sun, 7 Mar 2021 10:27:35 +0800 -Subject: [PATCH 116/297] Project-C v5.11-r2 - ---- - kernel/sched/alt_core.c | 2 +- - 1 file changed, 1 insertion(+), 1 deletion(-) - -diff --git a/kernel/sched/alt_core.c b/kernel/sched/alt_core.c -index dbbd8a46e75d..cb8a4aabbb21 100644 ---- a/kernel/sched/alt_core.c -+++ b/kernel/sched/alt_core.c -@@ -52,7 +52,7 @@ - */ - EXPORT_TRACEPOINT_SYMBOL_GPL(pelt_irq_tp); - --#define ALT_SCHED_VERSION "v5.11-r1" -+#define ALT_SCHED_VERSION "v5.11-r2" - - /* rt_prio(prio) defined in include/linux/sched/rt.h */ - #define rt_task(p) rt_prio((p)->prio) --- -2.37.0 - - -From 977b3402474428a89feb66df6c9c85aa46bdb055 Mon Sep 17 00:00:00 2001 -From: Alfred Chen -Date: Sun, 14 Mar 2021 17:28:24 +0800 -Subject: [PATCH 117/297] sched/alt: Fix limited cpufreq for schedutil. - -Here is the first fix for #12, which fix the limited cpufreq for -schedutil. But it still tend to stay at max cpufreq. ---- - kernel/sched/alt_sched.h | 3 ++- - kernel/sched/cpufreq_schedutil.c | 13 +++++++++---- - 2 files changed, 11 insertions(+), 5 deletions(-) - -diff --git a/kernel/sched/alt_sched.h b/kernel/sched/alt_sched.h -index 51f11bf416f4..7bcd96cc6bed 100644 ---- a/kernel/sched/alt_sched.h -+++ b/kernel/sched/alt_sched.h -@@ -598,7 +598,8 @@ static inline void cpufreq_update_util(struct rq *rq, unsigned int flags) - { - struct update_util_data *data; - -- data = rcu_dereference_sched(*this_cpu_ptr(&cpufreq_update_util_data)); -+ data = rcu_dereference_sched(*per_cpu_ptr(&cpufreq_update_util_data, -+ cpu_of(rq))); - if (data) - data->func(data, rq_clock(rq), flags); - } -diff --git a/kernel/sched/cpufreq_schedutil.c b/kernel/sched/cpufreq_schedutil.c -index 424b1e360af5..59536ee7c3d5 100644 ---- a/kernel/sched/cpufreq_schedutil.c -+++ b/kernel/sched/cpufreq_schedutil.c -@@ -165,11 +165,16 @@ static void sugov_get_util(struct sugov_cpu *sg_cpu) - sg_cpu->util = effective_cpu_util(sg_cpu->cpu, cpu_util_cfs(sg_cpu->cpu), max, - FREQUENCY_UTIL, NULL); - } -+ - #else /* CONFIG_SCHED_ALT */ --static unsigned long sugov_get_util(struct sugov_cpu *sg_cpu) -+ -+static void sugov_get_util(struct sugov_cpu *sg_cpu) - { -- sg_cpu->max = arch_scale_cpu_capacity(sg_cpu->cpu); -- return sg_cpu->max; -+ unsigned long max = arch_scale_cpu_capacity(sg_cpu->cpu); -+ -+ sg_cpu->max = max; -+ sg_cpu->bw_dl = 0; -+ sg_cpu->util = cpu_rq(sg_cpu->cpu)->nr_running ? max:0UL; - } - #endif - -@@ -316,8 +321,8 @@ static inline void ignore_dl_rate_limit(struct sugov_cpu *sg_cpu) - { - #ifndef CONFIG_SCHED_ALT - if (cpu_bw_dl(cpu_rq(sg_cpu->cpu)) > sg_cpu->bw_dl) --#endif - sg_cpu->sg_policy->limits_changed = true; -+#endif - } - - static inline bool sugov_update_single_common(struct sugov_cpu *sg_cpu, --- -2.37.0 - - -From 1cfd6610f9c105c226e484148e4f8bce236f3638 Mon Sep 17 00:00:00 2001 -From: Alfred Chen -Date: Sun, 14 Mar 2021 22:23:58 +0800 -Subject: [PATCH 118/297] sched/alt: Add is_migration_disabled() checking in - sg_balance_trigger(). - ---- - kernel/sched/alt_core.c | 8 ++++++-- - 1 file changed, 6 insertions(+), 2 deletions(-) - -diff --git a/kernel/sched/alt_core.c b/kernel/sched/alt_core.c -index cb8a4aabbb21..6d56458e71a3 100644 ---- a/kernel/sched/alt_core.c -+++ b/kernel/sched/alt_core.c -@@ -1183,6 +1183,9 @@ void migrate_enable(void) - { - struct task_struct *p = current; - -+ if (0 == p->migration_disabled) -+ return; -+ - if (p->migration_disabled > 1) { - p->migration_disabled--; - return; -@@ -3389,7 +3392,8 @@ static inline int active_load_balance_cpu_stop(void *data) - rq->active_balance = 0; - /* _something_ may have changed the task, double check again */ - if (task_on_rq_queued(p) && task_rq(p) == rq && -- cpumask_and(&tmp, p->cpus_ptr, &sched_sg_idle_mask)) { -+ cpumask_and(&tmp, p->cpus_ptr, &sched_sg_idle_mask) && -+ !is_migration_disabled(p)) { - int cpu = cpu_of(rq); - int dcpu = __best_mask_cpu(cpu, &tmp, - per_cpu(sched_cpu_llc_mask, cpu)); -@@ -3417,7 +3421,7 @@ static inline int sg_balance_trigger(const int cpu) - curr = rq->curr; - res = (!is_idle_task(curr)) && (1 == rq->nr_running) &&\ - cpumask_intersects(curr->cpus_ptr, &sched_sg_idle_mask) &&\ -- (!rq->active_balance); -+ !is_migration_disabled(curr) && (!rq->active_balance); - - if (res) - rq->active_balance = 1; --- -2.37.0 - - -From 746b3ce0081d3447b924954709fddcfa3f0f0b57 Mon Sep 17 00:00:00 2001 -From: Alfred Chen -Date: Thu, 25 Mar 2021 09:54:19 +0800 -Subject: [PATCH 119/297] Project-C v5.11-r3 - ---- - kernel/sched/alt_core.c | 2 +- - 1 file changed, 1 insertion(+), 1 deletion(-) - -diff --git a/kernel/sched/alt_core.c b/kernel/sched/alt_core.c -index 6d56458e71a3..be7c185a741d 100644 ---- a/kernel/sched/alt_core.c -+++ b/kernel/sched/alt_core.c -@@ -52,7 +52,7 @@ - */ - EXPORT_TRACEPOINT_SYMBOL_GPL(pelt_irq_tp); - --#define ALT_SCHED_VERSION "v5.11-r2" -+#define ALT_SCHED_VERSION "v5.11-r3" - - /* rt_prio(prio) defined in include/linux/sched/rt.h */ - #define rt_task(p) rt_prio((p)->prio) --- -2.37.0 - - -From 805822b2b2210e3e94ff6e24fa699dc2b4597f50 Mon Sep 17 00:00:00 2001 -From: Alfred Chen -Date: Fri, 26 Mar 2021 14:20:06 +0800 -Subject: [PATCH 120/297] sched/alt: Implement cpu load history and util for - schedutil. - ---- - kernel/sched/alt_core.c | 16 ++++----- - kernel/sched/cpufreq_schedutil.c | 56 ++++++++++++++++++++++++++++++-- - 2 files changed, 61 insertions(+), 11 deletions(-) - -diff --git a/kernel/sched/alt_core.c b/kernel/sched/alt_core.c -index be7c185a741d..29b7d30fd45f 100644 ---- a/kernel/sched/alt_core.c -+++ b/kernel/sched/alt_core.c -@@ -558,14 +558,6 @@ static inline void enqueue_task(struct task_struct *p, struct rq *rq, int flags) - #endif - - sched_update_tick_dependency(rq); -- -- /* -- * If in_iowait is set, the code below may not trigger any cpufreq -- * utilization updates, so do it here explicitly with the IOWAIT flag -- * passed. -- */ -- if (p->in_iowait) -- cpufreq_update_util(rq, SCHED_CPUFREQ_IOWAIT); - } - - static inline void requeue_task(struct task_struct *p, struct rq *rq) -@@ -1057,7 +1049,13 @@ static void activate_task(struct task_struct *p, struct rq *rq) - { - enqueue_task(p, rq, ENQUEUE_WAKEUP); - p->on_rq = TASK_ON_RQ_QUEUED; -- cpufreq_update_util(rq, 0); -+ -+ /* -+ * If in_iowait is set, the code below may not trigger any cpufreq -+ * utilization updates, so do it here explicitly with the IOWAIT flag -+ * passed. -+ */ -+ cpufreq_update_util(rq, SCHED_CPUFREQ_IOWAIT * p->in_iowait); - } - - /* -diff --git a/kernel/sched/cpufreq_schedutil.c b/kernel/sched/cpufreq_schedutil.c -index 59536ee7c3d5..c44d2e33e114 100644 ---- a/kernel/sched/cpufreq_schedutil.c -+++ b/kernel/sched/cpufreq_schedutil.c -@@ -50,6 +50,13 @@ struct sugov_cpu { - unsigned long bw_dl; - unsigned long max; - -+#ifdef CONFIG_SCHED_ALT -+ /* For genenal cpu load util */ -+ s32 load_history; -+ u64 load_block; -+ u64 load_stamp; -+#endif -+ - /* The field below is for single-CPU policies only: */ - #ifdef CONFIG_NO_HZ_COMMON - unsigned long saved_idle_calls; -@@ -168,15 +175,52 @@ static void sugov_get_util(struct sugov_cpu *sg_cpu) - - #else /* CONFIG_SCHED_ALT */ - -+#define SG_CPU_LOAD_HISTORY_BITS (sizeof(s32) * 8ULL) -+#define SG_CPU_UTIL_SHIFT (8) -+#define SG_CPU_LOAD_HISTORY_SHIFT (SG_CPU_LOAD_HISTORY_BITS - 1 - SG_CPU_UTIL_SHIFT) -+#define SG_CPU_LOAD_HISTORY_TO_UTIL(l) (((l) >> SG_CPU_LOAD_HISTORY_SHIFT) & 0xff) -+ -+#define LOAD_BLOCK(t) ((t) >> 17) -+#define LOAD_HALF_BLOCK(t) ((t) >> 16) -+#define BLOCK_MASK(t) ((t) & ((0x01 << 18) - 1)) -+#define LOAD_BLOCK_BIT(b) (1UL << (SG_CPU_LOAD_HISTORY_BITS - 1 - (b))) -+#define CURRENT_LOAD_BIT LOAD_BLOCK_BIT(0) -+ - static void sugov_get_util(struct sugov_cpu *sg_cpu) - { - unsigned long max = arch_scale_cpu_capacity(sg_cpu->cpu); - - sg_cpu->max = max; - sg_cpu->bw_dl = 0; -- sg_cpu->util = cpu_rq(sg_cpu->cpu)->nr_running ? max:0UL; -+ sg_cpu->util = SG_CPU_LOAD_HISTORY_TO_UTIL(sg_cpu->load_history) * -+ (max >> SG_CPU_UTIL_SHIFT); - } --#endif -+ -+static inline void sugov_cpu_load_update(struct sugov_cpu *sg_cpu, u64 time) -+{ -+ u64 delta = min(LOAD_BLOCK(time) - LOAD_BLOCK(sg_cpu->load_stamp), -+ SG_CPU_LOAD_HISTORY_BITS - 1); -+ u64 prev = !!(sg_cpu->load_history & CURRENT_LOAD_BIT); -+ u64 curr = !!cpu_rq(sg_cpu->cpu)->nr_running; -+ -+ if (delta) { -+ sg_cpu->load_history = sg_cpu->load_history >> delta; -+ -+ if (delta <= SG_CPU_UTIL_SHIFT) { -+ sg_cpu->load_block += (~BLOCK_MASK(sg_cpu->load_stamp)) * prev; -+ if (!!LOAD_HALF_BLOCK(sg_cpu->load_block) ^ curr) -+ sg_cpu->load_history ^= LOAD_BLOCK_BIT(delta); -+ } -+ -+ sg_cpu->load_block = BLOCK_MASK(time) * prev; -+ } else { -+ sg_cpu->load_block += (time - sg_cpu->load_stamp) * prev; -+ } -+ if (prev ^ curr) -+ sg_cpu->load_history ^= CURRENT_LOAD_BIT; -+ sg_cpu->load_stamp = time; -+} -+#endif /* CONFIG_SCHED_ALT */ - - /** - * sugov_iowait_reset() - Reset the IO boost status of a CPU. -@@ -328,6 +372,10 @@ static inline void ignore_dl_rate_limit(struct sugov_cpu *sg_cpu) - static inline bool sugov_update_single_common(struct sugov_cpu *sg_cpu, - u64 time, unsigned int flags) - { -+#ifdef CONFIG_SCHED_ALT -+ sugov_cpu_load_update(sg_cpu, time); -+#endif /* CONFIG_SCHED_ALT */ -+ - sugov_iowait_boost(sg_cpu, time, flags); - sg_cpu->last_update = time; - -@@ -454,6 +502,10 @@ sugov_update_shared(struct update_util_data *hook, u64 time, unsigned int flags) - - raw_spin_lock(&sg_policy->update_lock); - -+#ifdef CONFIG_SCHED_ALT -+ sugov_cpu_load_update(sg_cpu, time); -+#endif /* CONFIG_SCHED_ALT */ -+ - sugov_iowait_boost(sg_cpu, time, flags); - sg_cpu->last_update = time; - --- -2.37.0 - - -From ca2227b48f1b32b8a21c607efbb96e93511ae698 Mon Sep 17 00:00:00 2001 -From: Alfred Chen -Date: Mon, 29 Mar 2021 15:35:51 +0800 -Subject: [PATCH 121/297] sched/alt: [Sync] 741ba80f6f9a sched: Relax the - set_cpus_allowed_ptr() semantics - ---- - kernel/sched/alt_core.c | 10 ---------- - 1 file changed, 10 deletions(-) - -diff --git a/kernel/sched/alt_core.c b/kernel/sched/alt_core.c -index 29b7d30fd45f..847a1baff835 100644 ---- a/kernel/sched/alt_core.c -+++ b/kernel/sched/alt_core.c -@@ -1749,16 +1749,6 @@ static int __set_cpus_allowed_ptr(struct task_struct *p, - - __do_set_cpus_allowed(p, new_mask, flags); - -- if (p->flags & PF_KTHREAD) { -- /* -- * For kernel threads that do indeed end up on online && -- * !active we want to ensure they are strict per-CPU threads. -- */ -- WARN_ON(cpumask_intersects(new_mask, cpu_online_mask) && -- !cpumask_intersects(new_mask, cpu_active_mask) && -- p->nr_cpus_allowed != 1); -- } -- - /* Can the task run on the task's current CPU? If so, we're done */ - if (cpumask_test_cpu(task_cpu(p), new_mask)) - goto out; --- -2.37.0 - - -From ce639edbb7ef2a9015c33b0affed597d9b118f11 Mon Sep 17 00:00:00 2001 -From: Alfred Chen -Date: Tue, 30 Mar 2021 10:21:02 +0800 -Subject: [PATCH 122/297] sched/alt: Remove USER_PRIO, TASK_USER_PRIO and - MAX_USER_PRIO and MAX_USER_RT_PRIO - -9d061ba6bc17 sched: Remove USER_PRIO, TASK_USER_PRIO and MAX_USER_PRIO -ae18ad281e82 sched: Remove MAX_USER_RT_PRIO ---- - init/init_task.c | 4 ++-- - kernel/sched/alt_core.c | 6 +++--- - kernel/sched/pds_imp.h | 8 +++++--- - 3 files changed, 10 insertions(+), 8 deletions(-) - -diff --git a/init/init_task.c b/init/init_task.c -index a98a65334c15..b59f5a2fefc8 100644 ---- a/init/init_task.c -+++ b/init/init_task.c -@@ -81,9 +81,9 @@ struct task_struct init_task - .normal_prio = DEFAULT_PRIO + MAX_PRIORITY_ADJ, - #endif - #ifdef CONFIG_SCHED_PDS -- .prio = MAX_USER_RT_PRIO, -+ .prio = MAX_RT_PRIO, - .static_prio = DEFAULT_PRIO, -- .normal_prio = MAX_USER_RT_PRIO, -+ .normal_prio = MAX_RT_PRIO, - #else - .prio = MAX_PRIO - 20, - .static_prio = MAX_PRIO - 20, -diff --git a/kernel/sched/alt_core.c b/kernel/sched/alt_core.c -index 847a1baff835..426af5230831 100644 ---- a/kernel/sched/alt_core.c -+++ b/kernel/sched/alt_core.c -@@ -4697,11 +4697,11 @@ static int __sched_setscheduler(struct task_struct *p, - - /* - * Valid priorities for SCHED_FIFO and SCHED_RR are -- * 1..MAX_USER_RT_PRIO-1, valid priority for SCHED_NORMAL and -+ * 1..MAX_RT_PRIO-1, valid priority for SCHED_NORMAL and - * SCHED_BATCH and SCHED_IDLE is 0. - */ - if (attr->sched_priority < 0 || -- (p->mm && attr->sched_priority > MAX_USER_RT_PRIO - 1) || -+ (p->mm && attr->sched_priority > MAX_RT_PRIO - 1) || - (!p->mm && attr->sched_priority > MAX_RT_PRIO - 1)) - return -EINVAL; - if ((SCHED_RR == policy || SCHED_FIFO == policy) != -@@ -5614,7 +5614,7 @@ SYSCALL_DEFINE1(sched_get_priority_max, int, policy) - switch (policy) { - case SCHED_FIFO: - case SCHED_RR: -- ret = MAX_USER_RT_PRIO-1; -+ ret = MAX_RT_PRIO - 1; - break; - case SCHED_NORMAL: - case SCHED_BATCH: -diff --git a/kernel/sched/pds_imp.h b/kernel/sched/pds_imp.h -index b1ad3d0b0430..0f068a98db87 100644 ---- a/kernel/sched/pds_imp.h -+++ b/kernel/sched/pds_imp.h -@@ -38,7 +38,7 @@ static inline int normal_prio(struct task_struct *p) - if (task_has_rt_policy(p)) - return MAX_RT_PRIO - 1 - p->rt_priority; - -- return MAX_USER_RT_PRIO; -+ return MAX_RT_PRIO; - } - - static inline int -@@ -76,7 +76,8 @@ static inline void time_slice_expired(struct task_struct *p, struct rq *rq) - p->time_slice = sched_timeslice_ns; - - if (p->prio >= MAX_RT_PRIO) -- p->deadline = rq->clock + user_prio2deadline[TASK_USER_PRIO(p)]; -+ p->deadline = rq->clock + -+ user_prio2deadline[p->static_prio - MAX_RT_PRIO]; - update_task_priodl(p); - - if (SCHED_FIFO != p->policy && task_on_rq_queued(p)) -@@ -237,7 +238,8 @@ static void sched_task_fork(struct task_struct *p, struct rq *rq) - { - p->sl_level = pds_skiplist_random_level(p); - if (p->prio >= MAX_RT_PRIO) -- p->deadline = rq->clock + user_prio2deadline[TASK_USER_PRIO(p)]; -+ p->deadline = rq->clock + -+ user_prio2deadline[p->static_prio - MAX_RT_PRIO]; - update_task_priodl(p); - } - --- -2.37.0 - - -From 560fe89ae1f42f337401be9dd4649ebeb365a690 Mon Sep 17 00:00:00 2001 -From: Alfred Chen -Date: Tue, 30 Mar 2021 10:49:16 +0800 -Subject: [PATCH 123/297] sched/alt: [Sync] b965f1ddb47d preempt/dynamic: - Provide cond_resched() and might_resched() static calls - ---- - kernel/sched/alt_core.c | 16 +++++++++++++--- - 1 file changed, 13 insertions(+), 3 deletions(-) - -diff --git a/kernel/sched/alt_core.c b/kernel/sched/alt_core.c -index 426af5230831..d8420f2625c8 100644 ---- a/kernel/sched/alt_core.c -+++ b/kernel/sched/alt_core.c -@@ -5460,17 +5460,27 @@ SYSCALL_DEFINE0(sched_yield) - return 0; - } - --#ifndef CONFIG_PREEMPTION --int __sched _cond_resched(void) -+#if !defined(CONFIG_PREEMPTION) || defined(CONFIG_PREEMPT_DYNAMIC) -+int __sched __cond_resched(void) - { - if (should_resched(0)) { - preempt_schedule_common(); - return 1; - } -+#ifndef CONFIG_PREEMPT_RCU - rcu_all_qs(); -+#endif - return 0; - } --EXPORT_SYMBOL(_cond_resched); -+EXPORT_SYMBOL(__cond_resched); -+#endif -+ -+#ifdef CONFIG_PREEMPT_DYNAMIC -+DEFINE_STATIC_CALL_RET0(cond_resched, __cond_resched); -+EXPORT_STATIC_CALL_TRAMP(cond_resched); -+ -+DEFINE_STATIC_CALL_RET0(might_resched, __cond_resched); -+EXPORT_STATIC_CALL_TRAMP(might_resched); - #endif - - /* --- -2.37.0 - - -From 346794d4112dd92eda4985ca64c94d67219f9bb1 Mon Sep 17 00:00:00 2001 -From: Alfred Chen -Date: Tue, 30 Mar 2021 10:54:18 +0800 -Subject: [PATCH 124/297] sched/alt: [Sync] 2c9a98d3bc80 preempt/dynamic: - Provide preempt_schedule[_notrace]() static calls - ---- - kernel/sched/alt_core.c | 12 ++++++++++++ - 1 file changed, 12 insertions(+) - -diff --git a/kernel/sched/alt_core.c b/kernel/sched/alt_core.c -index d8420f2625c8..739d2860747b 100644 ---- a/kernel/sched/alt_core.c -+++ b/kernel/sched/alt_core.c -@@ -4257,6 +4257,12 @@ asmlinkage __visible void __sched notrace preempt_schedule(void) - NOKPROBE_SYMBOL(preempt_schedule); - EXPORT_SYMBOL(preempt_schedule); - -+#ifdef CONFIG_PREEMPT_DYNAMIC -+DEFINE_STATIC_CALL(preempt_schedule, __preempt_schedule_func); -+EXPORT_STATIC_CALL(preempt_schedule); -+#endif -+ -+ - /** - * preempt_schedule_notrace - preempt_schedule called by tracing - * -@@ -4309,6 +4315,12 @@ asmlinkage __visible void __sched notrace preempt_schedule_notrace(void) - } - EXPORT_SYMBOL_GPL(preempt_schedule_notrace); - -+#ifdef CONFIG_PREEMPT_DYNAMIC -+DEFINE_STATIC_CALL(preempt_schedule_notrace, __preempt_schedule_notrace_func); -+EXPORT_STATIC_CALL(preempt_schedule_notrace); -+#endif -+ -+ - #endif /* CONFIG_PREEMPTION */ - - /* --- -2.37.0 - - -From e1344821acfcc9164852671078c5e11b968fe132 Mon Sep 17 00:00:00 2001 -From: Alfred Chen -Date: Tue, 30 Mar 2021 13:38:47 +0800 -Subject: [PATCH 125/297] sched/alt: [Sync] 826bfeb37bb4 preempt/dynamic: - Support dynamic preempt with preempt= boot option - ---- - kernel/sched/alt_core.c | 185 ++++++++++++++++++++++++++++++++++++++- - kernel/sched/alt_sched.h | 1 + - 2 files changed, 185 insertions(+), 1 deletion(-) - -diff --git a/kernel/sched/alt_core.c b/kernel/sched/alt_core.c -index 739d2860747b..86ef6e3c1bbd 100644 ---- a/kernel/sched/alt_core.c -+++ b/kernel/sched/alt_core.c -@@ -4320,9 +4320,192 @@ DEFINE_STATIC_CALL(preempt_schedule_notrace, __preempt_schedule_notrace_func); - EXPORT_STATIC_CALL(preempt_schedule_notrace); - #endif - -- - #endif /* CONFIG_PREEMPTION */ - -+#ifdef CONFIG_PREEMPT_DYNAMIC -+ -+#include -+ -+/* -+ * SC:cond_resched -+ * SC:might_resched -+ * SC:preempt_schedule -+ * SC:preempt_schedule_notrace -+ * SC:irqentry_exit_cond_resched -+ * -+ * -+ * NONE: -+ * cond_resched <- __cond_resched -+ * might_resched <- RET0 -+ * preempt_schedule <- NOP -+ * preempt_schedule_notrace <- NOP -+ * irqentry_exit_cond_resched <- NOP -+ * -+ * VOLUNTARY: -+ * cond_resched <- __cond_resched -+ * might_resched <- __cond_resched -+ * preempt_schedule <- NOP -+ * preempt_schedule_notrace <- NOP -+ * irqentry_exit_cond_resched <- NOP -+ * -+ * FULL: -+ * cond_resched <- RET0 -+ * might_resched <- RET0 -+ * preempt_schedule <- preempt_schedule -+ * preempt_schedule_notrace <- preempt_schedule_notrace -+ * irqentry_exit_cond_resched <- irqentry_exit_cond_resched -+ */ -+ -+enum { -+ preempt_dynamic_none = 0, -+ preempt_dynamic_voluntary, -+ preempt_dynamic_full, -+}; -+ -+static int preempt_dynamic_mode = preempt_dynamic_full; -+ -+static int sched_dynamic_mode(const char *str) -+{ -+ if (!strcmp(str, "none")) -+ return 0; -+ -+ if (!strcmp(str, "voluntary")) -+ return 1; -+ -+ if (!strcmp(str, "full")) -+ return 2; -+ -+ return -1; -+} -+ -+static void sched_dynamic_update(int mode) -+{ -+ /* -+ * Avoid {NONE,VOLUNTARY} -> FULL transitions from ever ending up in -+ * the ZERO state, which is invalid. -+ */ -+ static_call_update(cond_resched, __cond_resched); -+ static_call_update(might_resched, __cond_resched); -+ static_call_update(preempt_schedule, __preempt_schedule_func); -+ static_call_update(preempt_schedule_notrace, __preempt_schedule_notrace_func); -+ static_call_update(irqentry_exit_cond_resched, irqentry_exit_cond_resched); -+ -+ switch (mode) { -+ case preempt_dynamic_none: -+ static_call_update(cond_resched, __cond_resched); -+ static_call_update(might_resched, (typeof(&__cond_resched)) __static_call_return0); -+ static_call_update(preempt_schedule, (typeof(&preempt_schedule)) NULL); -+ static_call_update(preempt_schedule_notrace, (typeof(&preempt_schedule_notrace)) NULL); -+ static_call_update(irqentry_exit_cond_resched, (typeof(&irqentry_exit_cond_resched)) NULL); -+ pr_info("Dynamic Preempt: none\n"); -+ break; -+ -+ case preempt_dynamic_voluntary: -+ static_call_update(cond_resched, __cond_resched); -+ static_call_update(might_resched, __cond_resched); -+ static_call_update(preempt_schedule, (typeof(&preempt_schedule)) NULL); -+ static_call_update(preempt_schedule_notrace, (typeof(&preempt_schedule_notrace)) NULL); -+ static_call_update(irqentry_exit_cond_resched, (typeof(&irqentry_exit_cond_resched)) NULL); -+ pr_info("Dynamic Preempt: voluntary\n"); -+ break; -+ -+ case preempt_dynamic_full: -+ static_call_update(cond_resched, (typeof(&__cond_resched)) __static_call_return0); -+ static_call_update(might_resched, (typeof(&__cond_resched)) __static_call_return0); -+ static_call_update(preempt_schedule, __preempt_schedule_func); -+ static_call_update(preempt_schedule_notrace, __preempt_schedule_notrace_func); -+ static_call_update(irqentry_exit_cond_resched, irqentry_exit_cond_resched); -+ pr_info("Dynamic Preempt: full\n"); -+ break; -+ } -+ -+ preempt_dynamic_mode = mode; -+} -+ -+static int __init setup_preempt_mode(char *str) -+{ -+ int mode = sched_dynamic_mode(str); -+ if (mode < 0) { -+ pr_warn("Dynamic Preempt: unsupported mode: %s\n", str); -+ return 1; -+ } -+ -+ sched_dynamic_update(mode); -+ return 0; -+} -+__setup("preempt=", setup_preempt_mode); -+ -+#ifdef CONFIG_SCHED_DEBUG -+ -+static ssize_t sched_dynamic_write(struct file *filp, const char __user *ubuf, -+ size_t cnt, loff_t *ppos) -+{ -+ char buf[16]; -+ int mode; -+ -+ if (cnt > 15) -+ cnt = 15; -+ -+ if (copy_from_user(&buf, ubuf, cnt)) -+ return -EFAULT; -+ -+ buf[cnt] = 0; -+ mode = sched_dynamic_mode(strstrip(buf)); -+ if (mode < 0) -+ return mode; -+ -+ sched_dynamic_update(mode); -+ -+ *ppos += cnt; -+ -+ return cnt; -+} -+ -+static int sched_dynamic_show(struct seq_file *m, void *v) -+{ -+ static const char * preempt_modes[] = { -+ "none", "voluntary", "full" -+ }; -+ int i; -+ -+ for (i = 0; i < ARRAY_SIZE(preempt_modes); i++) { -+ if (preempt_dynamic_mode == i) -+ seq_puts(m, "("); -+ seq_puts(m, preempt_modes[i]); -+ if (preempt_dynamic_mode == i) -+ seq_puts(m, ")"); -+ -+ seq_puts(m, " "); -+ } -+ -+ seq_puts(m, "\n"); -+ return 0; -+} -+ -+static int sched_dynamic_open(struct inode *inode, struct file *filp) -+{ -+ return single_open(filp, sched_dynamic_show, NULL); -+} -+ -+static const struct file_operations sched_dynamic_fops = { -+ .open = sched_dynamic_open, -+ .write = sched_dynamic_write, -+ .read = seq_read, -+ .llseek = seq_lseek, -+ .release = single_release, -+}; -+ -+static __init int sched_init_debug_dynamic(void) -+{ -+ debugfs_create_file("sched_preempt", 0644, NULL, NULL, &sched_dynamic_fops); -+ return 0; -+} -+late_initcall(sched_init_debug_dynamic); -+ -+#endif /* CONFIG_SCHED_DEBUG */ -+#endif /* CONFIG_PREEMPT_DYNAMIC */ -+ -+ - /* - * This is the entry point to schedule() from kernel preemption - * off of irq context. -diff --git a/kernel/sched/alt_sched.h b/kernel/sched/alt_sched.h -index 7bcd96cc6bed..32b523e2af06 100644 ---- a/kernel/sched/alt_sched.h -+++ b/kernel/sched/alt_sched.h -@@ -26,6 +26,7 @@ - #include - #include - #include -+#include - #include - #include - #include --- -2.37.0 - - -From 4883a757728996d234c848c6aedd3d24c891fa45 Mon Sep 17 00:00:00 2001 -From: Alfred Chen -Date: Tue, 30 Mar 2021 13:42:07 +0800 -Subject: [PATCH 126/297] sched/alt: [Sync] f3d4b4b1dc1c sched: Add - cond_resched_rwlock - ---- - kernel/sched/alt_core.c | 40 ++++++++++++++++++++++++++++++++++++++++ - 1 file changed, 40 insertions(+) - -diff --git a/kernel/sched/alt_core.c b/kernel/sched/alt_core.c -index 86ef6e3c1bbd..e30d174d2ad8 100644 ---- a/kernel/sched/alt_core.c -+++ b/kernel/sched/alt_core.c -@@ -5706,6 +5706,46 @@ int __cond_resched_lock(spinlock_t *lock) - } - EXPORT_SYMBOL(__cond_resched_lock); - -+int __cond_resched_rwlock_read(rwlock_t *lock) -+{ -+ int resched = should_resched(PREEMPT_LOCK_OFFSET); -+ int ret = 0; -+ -+ lockdep_assert_held_read(lock); -+ -+ if (rwlock_needbreak(lock) || resched) { -+ read_unlock(lock); -+ if (resched) -+ preempt_schedule_common(); -+ else -+ cpu_relax(); -+ ret = 1; -+ read_lock(lock); -+ } -+ return ret; -+} -+EXPORT_SYMBOL(__cond_resched_rwlock_read); -+ -+int __cond_resched_rwlock_write(rwlock_t *lock) -+{ -+ int resched = should_resched(PREEMPT_LOCK_OFFSET); -+ int ret = 0; -+ -+ lockdep_assert_held_write(lock); -+ -+ if (rwlock_needbreak(lock) || resched) { -+ write_unlock(lock); -+ if (resched) -+ preempt_schedule_common(); -+ else -+ cpu_relax(); -+ ret = 1; -+ write_lock(lock); -+ } -+ return ret; -+} -+EXPORT_SYMBOL(__cond_resched_rwlock_write); -+ - /** - * yield - yield the current processor to other threads. - * --- -2.37.0 - - -From 767e8f222653a8922f0831746c66e109614d0951 Mon Sep 17 00:00:00 2001 -From: Alfred Chen -Date: Wed, 31 Mar 2021 10:43:38 +0800 -Subject: [PATCH 127/297] sched/alt: [Sync] ef72661e28c6 sched: Harden - PREEMPT_DYNAMIC - ---- - kernel/sched/alt_core.c | 4 ++-- - 1 file changed, 2 insertions(+), 2 deletions(-) - -diff --git a/kernel/sched/alt_core.c b/kernel/sched/alt_core.c -index e30d174d2ad8..ece579819949 100644 ---- a/kernel/sched/alt_core.c -+++ b/kernel/sched/alt_core.c -@@ -4259,7 +4259,7 @@ EXPORT_SYMBOL(preempt_schedule); - - #ifdef CONFIG_PREEMPT_DYNAMIC - DEFINE_STATIC_CALL(preempt_schedule, __preempt_schedule_func); --EXPORT_STATIC_CALL(preempt_schedule); -+EXPORT_STATIC_CALL_TRAMP(preempt_schedule); - #endif - - -@@ -4317,7 +4317,7 @@ EXPORT_SYMBOL_GPL(preempt_schedule_notrace); - - #ifdef CONFIG_PREEMPT_DYNAMIC - DEFINE_STATIC_CALL(preempt_schedule_notrace, __preempt_schedule_notrace_func); --EXPORT_STATIC_CALL(preempt_schedule_notrace); -+EXPORT_STATIC_CALL_TRAMP(preempt_schedule_notrace); - #endif - - #endif /* CONFIG_PREEMPTION */ --- -2.37.0 - - -From 08ee68e325c221b5b5706754aab77c6215dc51dc Mon Sep 17 00:00:00 2001 -From: Alfred Chen -Date: Thu, 1 Apr 2021 15:40:39 +0800 -Subject: [PATCH 128/297] sched/alt: Update task_prio() function header - ---- - kernel/sched/bmq_imp.h | 7 +++++-- - kernel/sched/pds_imp.h | 7 +++++-- - 2 files changed, 10 insertions(+), 4 deletions(-) - -diff --git a/kernel/sched/bmq_imp.h b/kernel/sched/bmq_imp.h -index 13eda4b26b6a..7c71f1141d00 100644 ---- a/kernel/sched/bmq_imp.h -+++ b/kernel/sched/bmq_imp.h -@@ -170,8 +170,11 @@ static void sched_task_fork(struct task_struct *p, struct rq *rq) - * @p: the task in question. - * - * Return: The priority value as seen by users in /proc. -- * RT tasks are offset by -100. Normal tasks are centered around 1, value goes -- * from 0(SCHED_ISO) up to 82 (nice +19 SCHED_IDLE). -+ * -+ * sched policy return value kernel prio user prio/nice/boost -+ * -+ * normal, batch, idle [0 ... 53] [100 ... 139] 0/[-20 ... 19]/[-7 ... 7] -+ * fifo, rr [-1 ... -100] [99 ... 0] [0 ... 99] - */ - int task_prio(const struct task_struct *p) - { -diff --git a/kernel/sched/pds_imp.h b/kernel/sched/pds_imp.h -index 0f068a98db87..335ce3a8e3ec 100644 ---- a/kernel/sched/pds_imp.h -+++ b/kernel/sched/pds_imp.h -@@ -248,8 +248,11 @@ static void sched_task_fork(struct task_struct *p, struct rq *rq) - * @p: the task in question. - * - * Return: The priority value as seen by users in /proc. -- * RT tasks are offset by -100. Normal tasks are centered around 1, value goes -- * from 0(SCHED_ISO) up to 82 (nice +19 SCHED_IDLE). -+ * -+ * sched policy return value kernel prio user prio/nice -+ * -+ * normal, batch, idle [0 ... 39] 100 0/[-20 ... 19] -+ * fifo, rr [-1 ... -100] [99 ... 0] [0 ... 99] - */ - int task_prio(const struct task_struct *p) - { --- -2.37.0 - - -From f446239e56f01cc4f6ef4a1d7bd7b53479cb3448 Mon Sep 17 00:00:00 2001 -From: Alfred Chen -Date: Thu, 1 Apr 2021 22:42:09 +0800 -Subject: [PATCH 129/297] sched/alt: [Sync] 156ec6f42b8d sched/features: Fix - hrtick reprogramming - ---- - kernel/sched/alt_core.c | 7 +++---- - kernel/sched/alt_sched.h | 3 ++- - 2 files changed, 5 insertions(+), 5 deletions(-) - -diff --git a/kernel/sched/alt_core.c b/kernel/sched/alt_core.c -index ece579819949..f69ed4d89395 100644 ---- a/kernel/sched/alt_core.c -+++ b/kernel/sched/alt_core.c -@@ -937,8 +937,9 @@ static inline int hrtick_enabled(struct rq *rq) - static void __hrtick_restart(struct rq *rq) - { - struct hrtimer *timer = &rq->hrtick_timer; -+ ktime_t time = rq->hrtick_time; - -- hrtimer_start_expires(timer, HRTIMER_MODE_ABS_PINNED_HARD); -+ hrtimer_start(timer, time, HRTIMER_MODE_ABS_PINNED_HARD); - } - - /* -@@ -961,7 +962,6 @@ static void __hrtick_start(void *arg) - void hrtick_start(struct rq *rq, u64 delay) - { - struct hrtimer *timer = &rq->hrtick_timer; -- ktime_t time; - s64 delta; - - /* -@@ -969,9 +969,8 @@ void hrtick_start(struct rq *rq, u64 delay) - * doesn't make sense and can cause timer DoS. - */ - delta = max_t(s64, delay, 10000LL); -- time = ktime_add_ns(timer->base->get_time(), delta); - -- hrtimer_set_expires(timer, time); -+ rq->hrtick_time = ktime_add_ns(timer->base->get_time(), delta); - - if (rq == this_rq()) - __hrtick_restart(rq); -diff --git a/kernel/sched/alt_sched.h b/kernel/sched/alt_sched.h -index 32b523e2af06..2a6a0530fbb7 100644 ---- a/kernel/sched/alt_sched.h -+++ b/kernel/sched/alt_sched.h -@@ -206,7 +206,8 @@ struct rq { - #ifdef CONFIG_SMP - call_single_data_t hrtick_csd; - #endif -- struct hrtimer hrtick_timer; -+ struct hrtimer hrtick_timer; -+ ktime_t hrtick_time; - #endif - - #ifdef CONFIG_SCHEDSTATS --- -2.37.0 - - -From 6c7833013dc88efb0192f1efc4acad3cee745c8e Mon Sep 17 00:00:00 2001 -From: Piotr Gorski -Date: Mon, 26 Apr 2021 13:33:02 +0200 -Subject: [PATCH 130/297] Project-C v5.12-r0 - -Signed-off-by: Piotr Gorski ---- - kernel/sched/alt_core.c | 2 +- - 1 file changed, 1 insertion(+), 1 deletion(-) - -diff --git a/kernel/sched/alt_core.c b/kernel/sched/alt_core.c -index f69ed4d89395..02610d086d00 100644 ---- a/kernel/sched/alt_core.c -+++ b/kernel/sched/alt_core.c -@@ -52,7 +52,7 @@ - */ - EXPORT_TRACEPOINT_SYMBOL_GPL(pelt_irq_tp); - --#define ALT_SCHED_VERSION "v5.11-r3" -+#define ALT_SCHED_VERSION "v5.12-r0" - - /* rt_prio(prio) defined in include/linux/sched/rt.h */ - #define rt_task(p) rt_prio((p)->prio) --- -2.37.0 - - -From 9d465ac0e07e2f577469debce71ac5a4348279e5 Mon Sep 17 00:00:00 2001 -From: Alfred Chen -Date: Thu, 1 Apr 2021 09:57:05 +0800 -Subject: [PATCH 131/297] sched/alt: Cleanup in cpufreq_schedutil.c - ---- - kernel/sched/cpufreq_schedutil.c | 9 ++------- - 1 file changed, 2 insertions(+), 7 deletions(-) - -diff --git a/kernel/sched/cpufreq_schedutil.c b/kernel/sched/cpufreq_schedutil.c -index c44d2e33e114..ab803029b7fd 100644 ---- a/kernel/sched/cpufreq_schedutil.c -+++ b/kernel/sched/cpufreq_schedutil.c -@@ -905,10 +905,11 @@ struct cpufreq_governor *cpufreq_default_governor(void) - cpufreq_governor_init(schedutil_gov); - - #ifdef CONFIG_ENERGY_MODEL --#ifndef CONFIG_SCHED_ALT - static void rebuild_sd_workfn(struct work_struct *work) - { -+#ifndef CONFIG_SCHED_ALT - rebuild_sched_domains_energy(); -+#endif /* CONFIG_SCHED_ALT */ - } - static DECLARE_WORK(rebuild_sd_work, rebuild_sd_workfn); - -@@ -929,10 +930,4 @@ void sched_cpufreq_governor_change(struct cpufreq_policy *policy, - } - - } --#else /* CONFIG_SCHED_ALT */ --void sched_cpufreq_governor_change(struct cpufreq_policy *policy, -- struct cpufreq_governor *old_gov) --{ --} --#endif - #endif --- -2.37.0 - - -From 0ba2755f9c2aacb23c4b5b832f8f33878d86b2bf Mon Sep 17 00:00:00 2001 -From: Alfred Chen -Date: Fri, 26 Mar 2021 13:46:22 +0800 -Subject: [PATCH 132/297] sched/alt: Remove unnecessary CONFIG_SMP macros - usage. - ---- - kernel/sched/alt_core.c | 6 ++---- - 1 file changed, 2 insertions(+), 4 deletions(-) - -diff --git a/kernel/sched/alt_core.c b/kernel/sched/alt_core.c -index 02610d086d00..ae37764ced1e 100644 ---- a/kernel/sched/alt_core.c -+++ b/kernel/sched/alt_core.c -@@ -3816,15 +3816,13 @@ static inline int take_other_rq_tasks(struct rq *rq, int cpu) - - if ((nr_migrated = migrate_pending_tasks(src_rq, rq, cpu))) { - src_rq->nr_running -= nr_migrated; --#ifdef CONFIG_SMP - if (src_rq->nr_running < 2) - cpumask_clear_cpu(i, &sched_rq_pending_mask); --#endif -+ - rq->nr_running += nr_migrated; --#ifdef CONFIG_SMP - if (rq->nr_running > 1) - cpumask_set_cpu(cpu, &sched_rq_pending_mask); --#endif -+ - update_sched_rq_watermark(rq); - cpufreq_update_util(rq, 0); - --- -2.37.0 - - -From 603af480eceb0442af627cb0ea8364f4f61f19dd Mon Sep 17 00:00:00 2001 -From: Alfred Chen -Date: Sun, 4 Apr 2021 08:54:29 +0800 -Subject: [PATCH 133/297] sched/alt: Code clean up - ---- - kernel/sched/alt_core.c | 11 +---------- - 1 file changed, 1 insertion(+), 10 deletions(-) - -diff --git a/kernel/sched/alt_core.c b/kernel/sched/alt_core.c -index ae37764ced1e..3e79fdc14152 100644 ---- a/kernel/sched/alt_core.c -+++ b/kernel/sched/alt_core.c -@@ -1138,20 +1138,11 @@ void set_task_cpu(struct task_struct *p, unsigned int new_cpu) - __set_task_cpu(p, new_cpu); - } - --static inline bool is_per_cpu_kthread(struct task_struct *p) --{ -- return ((p->flags & PF_KTHREAD) && (1 == p->nr_cpus_allowed)); --} -- - #define MDF_FORCE_ENABLED 0x80 - - static void - __do_set_cpus_allowed(struct task_struct *p, const struct cpumask *new_mask, u32 flags); - --static int __set_cpus_allowed_ptr(struct task_struct *p, -- const struct cpumask *new_mask, -- u32 flags); -- - void migrate_disable(void) - { - struct task_struct *p = current; -@@ -1753,7 +1744,7 @@ static int __set_cpus_allowed_ptr(struct task_struct *p, - goto out; - - if (p->migration_disabled) { -- if (p->cpus_ptr != &p->cpus_mask) -+ if (likely(p->cpus_ptr != &p->cpus_mask)) - __do_set_cpus_allowed(p, &p->cpus_mask, SCA_MIGRATE_ENABLE); - p->migration_disabled = 0; - p->migration_flags |= MDF_FORCE_ENABLED; --- -2.37.0 - - -From 66512fba36f59b1144a9256ec3954bd9cc16d353 Mon Sep 17 00:00:00 2001 -From: Alfred Chen -Date: Wed, 7 Apr 2021 11:43:30 +0800 -Subject: [PATCH 134/297] sched/alt: Don't migrate_disable() during migration. - ---- - kernel/sched/alt_core.c | 21 ++++++++++++--------- - 1 file changed, 12 insertions(+), 9 deletions(-) - -diff --git a/kernel/sched/alt_core.c b/kernel/sched/alt_core.c -index 3e79fdc14152..11ffc1cb4528 100644 ---- a/kernel/sched/alt_core.c -+++ b/kernel/sched/alt_core.c -@@ -1146,6 +1146,7 @@ __do_set_cpus_allowed(struct task_struct *p, const struct cpumask *new_mask, u32 - void migrate_disable(void) - { - struct task_struct *p = current; -+ int cpu; - - if (p->migration_disabled) { - p->migration_disabled++; -@@ -1153,16 +1154,18 @@ void migrate_disable(void) - } - - preempt_disable(); -- this_rq()->nr_pinned++; -- p->migration_disabled = 1; -- p->migration_flags &= ~MDF_FORCE_ENABLED; -- -- /* -- * Violates locking rules! see comment in __do_set_cpus_allowed(). -- */ -- if (p->cpus_ptr == &p->cpus_mask) -- __do_set_cpus_allowed(p, cpumask_of(smp_processor_id()), SCA_MIGRATE_DISABLE); -+ cpu = smp_processor_id(); -+ if (cpumask_test_cpu(cpu, &p->cpus_mask)) { -+ cpu_rq(cpu)->nr_pinned++; -+ p->migration_disabled = 1; -+ p->migration_flags &= ~MDF_FORCE_ENABLED; - -+ /* -+ * Violates locking rules! see comment in __do_set_cpus_allowed(). -+ */ -+ if (p->cpus_ptr == &p->cpus_mask) -+ __do_set_cpus_allowed(p, cpumask_of(cpu), SCA_MIGRATE_DISABLE); -+ } - preempt_enable(); - } - EXPORT_SYMBOL_GPL(migrate_disable); --- -2.37.0 - - -From 98b132c71bd16c65729ef343b36a23f39b3d5c75 Mon Sep 17 00:00:00 2001 -From: Alfred Chen -Date: Wed, 7 Apr 2021 14:08:18 +0800 -Subject: [PATCH 135/297] sched/alt: migrate disable code clean up - ---- - kernel/sched/alt_core.c | 63 ++++++++++++++++++----------------------- - 1 file changed, 28 insertions(+), 35 deletions(-) - -diff --git a/kernel/sched/alt_core.c b/kernel/sched/alt_core.c -index 11ffc1cb4528..4ed1ff9f1aab 100644 ---- a/kernel/sched/alt_core.c -+++ b/kernel/sched/alt_core.c -@@ -1097,8 +1097,6 @@ static inline bool is_migration_disabled(struct task_struct *p) - } - - #define SCA_CHECK 0x01 --#define SCA_MIGRATE_DISABLE 0x02 --#define SCA_MIGRATE_ENABLE 0x04 - - #ifdef CONFIG_SMP - -@@ -1141,7 +1139,23 @@ void set_task_cpu(struct task_struct *p, unsigned int new_cpu) - #define MDF_FORCE_ENABLED 0x80 - - static void --__do_set_cpus_allowed(struct task_struct *p, const struct cpumask *new_mask, u32 flags); -+__do_set_cpus_ptr(struct task_struct *p, const struct cpumask *new_mask) -+{ -+ /* -+ * This here violates the locking rules for affinity, since we're only -+ * supposed to change these variables while holding both rq->lock and -+ * p->pi_lock. -+ * -+ * HOWEVER, it magically works, because ttwu() is the only code that -+ * accesses these variables under p->pi_lock and only does so after -+ * smp_cond_load_acquire(&p->on_cpu, !VAL), and we're in __schedule() -+ * before finish_task(). -+ * -+ * XXX do further audits, this smells like something putrid. -+ */ -+ SCHED_WARN_ON(!p->on_cpu); -+ p->cpus_ptr = new_mask; -+} - - void migrate_disable(void) - { -@@ -1161,10 +1175,10 @@ void migrate_disable(void) - p->migration_flags &= ~MDF_FORCE_ENABLED; - - /* -- * Violates locking rules! see comment in __do_set_cpus_allowed(). -+ * Violates locking rules! see comment in __do_set_cpus_ptr(). - */ - if (p->cpus_ptr == &p->cpus_mask) -- __do_set_cpus_allowed(p, cpumask_of(cpu), SCA_MIGRATE_DISABLE); -+ __do_set_cpus_ptr(p, cpumask_of(cpu)); - } - preempt_enable(); - } -@@ -1192,7 +1206,7 @@ void migrate_enable(void) - */ - WARN_ON_ONCE(!cpumask_test_cpu(smp_processor_id(), &p->cpus_mask)); - if (p->cpus_ptr != &p->cpus_mask) -- __do_set_cpus_allowed(p, &p->cpus_mask, SCA_MIGRATE_ENABLE); -+ __do_set_cpus_ptr(p, &p->cpus_mask); - /* - * Mustn't clear migration_disabled() until cpus_ptr points back at the - * regular cpus_mask, otherwise things that race (eg. -@@ -1345,43 +1359,22 @@ static int migration_cpu_stop(void *data) - } - - static inline void --set_cpus_allowed_common(struct task_struct *p, const struct cpumask *new_mask, u32 flags) -+set_cpus_allowed_common(struct task_struct *p, const struct cpumask *new_mask) - { -- if (flags & (SCA_MIGRATE_ENABLE | SCA_MIGRATE_DISABLE)) { -- p->cpus_ptr = new_mask; -- return; -- } -- - cpumask_copy(&p->cpus_mask, new_mask); - p->nr_cpus_allowed = cpumask_weight(new_mask); - } - - static void --__do_set_cpus_allowed(struct task_struct *p, const struct cpumask *new_mask, u32 flags) -+__do_set_cpus_allowed(struct task_struct *p, const struct cpumask *new_mask) - { -- /* -- * This here violates the locking rules for affinity, since we're only -- * supposed to change these variables while holding both rq->lock and -- * p->pi_lock. -- * -- * HOWEVER, it magically works, because ttwu() is the only code that -- * accesses these variables under p->pi_lock and only does so after -- * smp_cond_load_acquire(&p->on_cpu, !VAL), and we're in __schedule() -- * before finish_task(). -- * -- * XXX do further audits, this smells like something putrid. -- */ -- if (flags & (SCA_MIGRATE_DISABLE | SCA_MIGRATE_ENABLE)) -- SCHED_WARN_ON(!p->on_cpu); -- else -- lockdep_assert_held(&p->pi_lock); -- -- set_cpus_allowed_common(p, new_mask, flags); -+ lockdep_assert_held(&p->pi_lock); -+ set_cpus_allowed_common(p, new_mask); - } - - void do_set_cpus_allowed(struct task_struct *p, const struct cpumask *new_mask) - { -- __do_set_cpus_allowed(p, new_mask, 0); -+ __do_set_cpus_allowed(p, new_mask); - } - - #endif -@@ -1740,7 +1733,7 @@ static int __set_cpus_allowed_ptr(struct task_struct *p, - goto out; - } - -- __do_set_cpus_allowed(p, new_mask, flags); -+ __do_set_cpus_allowed(p, new_mask); - - /* Can the task run on the task's current CPU? If so, we're done */ - if (cpumask_test_cpu(task_cpu(p), new_mask)) -@@ -1748,7 +1741,7 @@ static int __set_cpus_allowed_ptr(struct task_struct *p, - - if (p->migration_disabled) { - if (likely(p->cpus_ptr != &p->cpus_mask)) -- __do_set_cpus_allowed(p, &p->cpus_mask, SCA_MIGRATE_ENABLE); -+ __do_set_cpus_ptr(p, &p->cpus_mask); - p->migration_disabled = 0; - p->migration_flags |= MDF_FORCE_ENABLED; - /* When p is migrate_disabled, rq->lock should be held */ -@@ -6076,7 +6069,7 @@ void init_idle(struct task_struct *idle, int cpu) - * - * And since this is boot we can forgo the serialisation. - */ -- set_cpus_allowed_common(idle, cpumask_of(cpu), 0); -+ set_cpus_allowed_common(idle, cpumask_of(cpu)); - #endif - - /* Silence PROVE_RCU */ --- -2.37.0 - - -From 6837036676afbba59f7876ae8761ba146c369822 Mon Sep 17 00:00:00 2001 -From: Alfred Chen -Date: Mon, 19 Apr 2021 10:11:56 +0800 -Subject: [PATCH 136/297] sched/alt: Fix task migratie to dying cpu. - -Fix #23 - -WARNING: CPU: 2 PID: 26 at kernel/sched/alt_core.c:6294 -sched_cpu_dying.cold+0xc/0xd2 ---- - kernel/sched/alt_core.c | 6 +++++- - 1 file changed, 5 insertions(+), 1 deletion(-) - -diff --git a/kernel/sched/alt_core.c b/kernel/sched/alt_core.c -index 4ed1ff9f1aab..6350afe33985 100644 ---- a/kernel/sched/alt_core.c -+++ b/kernel/sched/alt_core.c -@@ -1619,7 +1619,7 @@ static inline int select_task_rq(struct task_struct *p, struct rq *rq) - { - cpumask_t chk_mask, tmp; - -- if (unlikely(!cpumask_and(&chk_mask, p->cpus_ptr, cpu_online_mask))) -+ if (unlikely(!cpumask_and(&chk_mask, p->cpus_ptr, cpu_active_mask))) - return select_fallback_rq(task_cpu(p), p); - - if ( -@@ -3420,6 +3420,10 @@ static inline void sg_balance_check(struct rq *rq) - if (cpumask_empty(&sched_sg_idle_mask)) - return; - -+ /* exit when cpu is offline */ -+ if (unlikely(!rq->online)) -+ return; -+ - cpu = cpu_of(rq); - /* - * Only cpu in slibing idle group will do the checking and then --- -2.37.0 - - -From 1716236d1fc190aa7b283ae2323dc38c19f350e1 Mon Sep 17 00:00:00 2001 -From: Alfred Chen -Date: Tue, 27 Apr 2021 10:37:22 +0800 -Subject: [PATCH 137/297] Project-C v5.12-r1 - ---- - kernel/sched/alt_core.c | 2 +- - 1 file changed, 1 insertion(+), 1 deletion(-) - -diff --git a/kernel/sched/alt_core.c b/kernel/sched/alt_core.c -index 6350afe33985..c85e3ccf9302 100644 ---- a/kernel/sched/alt_core.c -+++ b/kernel/sched/alt_core.c -@@ -52,7 +52,7 @@ - */ - EXPORT_TRACEPOINT_SYMBOL_GPL(pelt_irq_tp); - --#define ALT_SCHED_VERSION "v5.12-r0" -+#define ALT_SCHED_VERSION "v5.12-r1" - - /* rt_prio(prio) defined in include/linux/sched/rt.h */ - #define rt_task(p) rt_prio((p)->prio) --- -2.37.0 - - -From 913f33c1caa0c1d396ccc470f275ce987cae3f09 Mon Sep 17 00:00:00 2001 -From: Alfred Chen -Date: Mon, 7 Jun 2021 14:32:09 +0800 -Subject: [PATCH 138/297] sched/alt: [Sync] 7fae6c8171d2 psi: Use ONCPU state - tracking machinery to detect reclaim - ---- - kernel/sched/alt_core.c | 1 - - 1 file changed, 1 deletion(-) - -diff --git a/kernel/sched/alt_core.c b/kernel/sched/alt_core.c -index c85e3ccf9302..8eea87597c2e 100644 ---- a/kernel/sched/alt_core.c -+++ b/kernel/sched/alt_core.c -@@ -3342,7 +3342,6 @@ void scheduler_tick(void) - - scheduler_task_tick(rq); - calc_global_load_tick(rq); -- psi_task_tick(rq); - - rq->last_tick = rq->clock; - raw_spin_unlock(&rq->lock); --- -2.37.0 - - -From d71e5f775c30c6c0c015beb8b2e82af89afa31f3 Mon Sep 17 00:00:00 2001 -From: Alfred Chen -Date: Tue, 8 Jun 2021 06:33:50 +0800 -Subject: [PATCH 139/297] sched/alt: [Sync] c6f886546cb8 sched/fair: Trigger - the update of blocked load on newly idle cpu - ---- - kernel/sched/alt_sched.h | 1 + - 1 file changed, 1 insertion(+) - -diff --git a/kernel/sched/alt_sched.h b/kernel/sched/alt_sched.h -index 2a6a0530fbb7..b3436b11ba7c 100644 ---- a/kernel/sched/alt_sched.h -+++ b/kernel/sched/alt_sched.h -@@ -683,4 +683,5 @@ static inline int sched_numa_find_closest(const struct cpumask *cpus, int cpu) - void swake_up_all_locked(struct swait_queue_head *q); - void __prepare_to_swait(struct swait_queue_head *q, struct swait_queue *wait); - -+static inline void nohz_run_idle_balance(int cpu) { } - #endif /* ALT_SCHED_H */ --- -2.37.0 - - -From 12e6ae4f0cf744e3ccb7c7f96126eb3bfb563189 Mon Sep 17 00:00:00 2001 -From: Alfred Chen -Date: Tue, 8 Jun 2021 08:52:49 +0800 -Subject: [PATCH 140/297] sched/alt: [Sync] 9432bbd969c6 static_call: Relax - static_call_update() function argument type - ---- - kernel/sched/alt_core.c | 18 +++++++++--------- - 1 file changed, 9 insertions(+), 9 deletions(-) - -diff --git a/kernel/sched/alt_core.c b/kernel/sched/alt_core.c -index 8eea87597c2e..fc35459d76b0 100644 ---- a/kernel/sched/alt_core.c -+++ b/kernel/sched/alt_core.c -@@ -4380,25 +4380,25 @@ static void sched_dynamic_update(int mode) - switch (mode) { - case preempt_dynamic_none: - static_call_update(cond_resched, __cond_resched); -- static_call_update(might_resched, (typeof(&__cond_resched)) __static_call_return0); -- static_call_update(preempt_schedule, (typeof(&preempt_schedule)) NULL); -- static_call_update(preempt_schedule_notrace, (typeof(&preempt_schedule_notrace)) NULL); -- static_call_update(irqentry_exit_cond_resched, (typeof(&irqentry_exit_cond_resched)) NULL); -+ static_call_update(might_resched, (void *)&__static_call_return0); -+ static_call_update(preempt_schedule, NULL); -+ static_call_update(preempt_schedule_notrace, NULL); -+ static_call_update(irqentry_exit_cond_resched, NULL); - pr_info("Dynamic Preempt: none\n"); - break; - - case preempt_dynamic_voluntary: - static_call_update(cond_resched, __cond_resched); - static_call_update(might_resched, __cond_resched); -- static_call_update(preempt_schedule, (typeof(&preempt_schedule)) NULL); -- static_call_update(preempt_schedule_notrace, (typeof(&preempt_schedule_notrace)) NULL); -- static_call_update(irqentry_exit_cond_resched, (typeof(&irqentry_exit_cond_resched)) NULL); -+ static_call_update(preempt_schedule, NULL); -+ static_call_update(preempt_schedule_notrace, NULL); -+ static_call_update(irqentry_exit_cond_resched, NULL); - pr_info("Dynamic Preempt: voluntary\n"); - break; - - case preempt_dynamic_full: -- static_call_update(cond_resched, (typeof(&__cond_resched)) __static_call_return0); -- static_call_update(might_resched, (typeof(&__cond_resched)) __static_call_return0); -+ static_call_update(cond_resched, (void *)&__static_call_return0); -+ static_call_update(might_resched, (void *)&__static_call_return0); - static_call_update(preempt_schedule, __preempt_schedule_func); - static_call_update(preempt_schedule_notrace, __preempt_schedule_notrace_func); - static_call_update(irqentry_exit_cond_resched, irqentry_exit_cond_resched); --- -2.37.0 - - -From e968346110a1ee38f221db41c22703e9893bedac Mon Sep 17 00:00:00 2001 -From: Alfred Chen -Date: Tue, 8 Jun 2021 08:55:38 +0800 -Subject: [PATCH 141/297] sched/alt: [Sync] 13c2235b2b28 sched: Remove - unnecessary variable from schedule_tail() - ---- - kernel/sched/alt_core.c | 4 +--- - 1 file changed, 1 insertion(+), 3 deletions(-) - -diff --git a/kernel/sched/alt_core.c b/kernel/sched/alt_core.c -index fc35459d76b0..f052be4a3ff5 100644 ---- a/kernel/sched/alt_core.c -+++ b/kernel/sched/alt_core.c -@@ -3029,8 +3029,6 @@ static struct rq *finish_task_switch(struct task_struct *prev) - asmlinkage __visible void schedule_tail(struct task_struct *prev) - __releases(rq->lock) - { -- struct rq *rq; -- - /* - * New tasks start with FORK_PREEMPT_COUNT, see there and - * finish_task_switch() for details. -@@ -3040,7 +3038,7 @@ asmlinkage __visible void schedule_tail(struct task_struct *prev) - * PREEMPT_COUNT kernels). - */ - -- rq = finish_task_switch(prev); -+ finish_task_switch(prev); - preempt_enable(); - - if (current->set_child_tid) --- -2.37.0 - - -From f99530410f74ad9c9040f52306ef4f4e470ddde4 Mon Sep 17 00:00:00 2001 -From: Alfred Chen -Date: Tue, 8 Jun 2021 08:58:13 +0800 -Subject: [PATCH 142/297] sched/alt: [Sync] 7e1b2eb74928 sched/core: Stop using - magic values in sched_dynamic_mode() - ---- - kernel/sched/alt_core.c | 6 +++--- - 1 file changed, 3 insertions(+), 3 deletions(-) - -diff --git a/kernel/sched/alt_core.c b/kernel/sched/alt_core.c -index f052be4a3ff5..b4a42e218b12 100644 ---- a/kernel/sched/alt_core.c -+++ b/kernel/sched/alt_core.c -@@ -4352,13 +4352,13 @@ static int preempt_dynamic_mode = preempt_dynamic_full; - static int sched_dynamic_mode(const char *str) - { - if (!strcmp(str, "none")) -- return 0; -+ return preempt_dynamic_none; - - if (!strcmp(str, "voluntary")) -- return 1; -+ return preempt_dynamic_voluntary; - - if (!strcmp(str, "full")) -- return 2; -+ return preempt_dynamic_full; - - return -1; - } --- -2.37.0 - - -From 3a32bee45fe806e27213a530bf399c7de997da5b Mon Sep 17 00:00:00 2001 -From: Alfred Chen -Date: Tue, 8 Jun 2021 08:59:00 +0800 -Subject: [PATCH 143/297] sched/alt: [Sync] c4681f3f1cfc sched/core: Use - -EINVAL in sched_dynamic_mode() - ---- - kernel/sched/alt_core.c | 2 +- - 1 file changed, 1 insertion(+), 1 deletion(-) - -diff --git a/kernel/sched/alt_core.c b/kernel/sched/alt_core.c -index b4a42e218b12..6b7136682e39 100644 ---- a/kernel/sched/alt_core.c -+++ b/kernel/sched/alt_core.c -@@ -4360,7 +4360,7 @@ static int sched_dynamic_mode(const char *str) - if (!strcmp(str, "full")) - return preempt_dynamic_full; - -- return -1; -+ return -EINVAL; - } - - static void sched_dynamic_update(int mode) --- -2.37.0 - - -From 017d061930108f08e64ac9f1fd9af2ae0fca1812 Mon Sep 17 00:00:00 2001 -From: Alfred Chen -Date: Tue, 8 Jun 2021 09:53:27 +0800 -Subject: [PATCH 144/297] sched/alt: [Sync] b5c4477366fb sched: Use cpu_dying() - to fix balance_push vs hotplug-rollback - ---- - kernel/sched/alt_core.c | 26 ++++++++++++++------------ - kernel/sched/alt_sched.h | 1 - - 2 files changed, 14 insertions(+), 13 deletions(-) - -diff --git a/kernel/sched/alt_core.c b/kernel/sched/alt_core.c -index 6b7136682e39..4947e3446124 100644 ---- a/kernel/sched/alt_core.c -+++ b/kernel/sched/alt_core.c -@@ -1247,7 +1247,7 @@ static inline bool is_cpu_allowed(struct task_struct *p, int cpu) - return cpu_online(cpu); - - /* Regular kernel threads don't get to stay during offline. */ -- if (cpu_rq(cpu)->balance_push) -+ if (cpu_dying(cpu)) - return false; - - /* But are allowed during online. */ -@@ -6172,7 +6172,8 @@ static int __balance_push_cpu_stop(void *arg) - static DEFINE_PER_CPU(struct cpu_stop_work, push_work); - - /* -- * Ensure we only run per-cpu kthreads once the CPU goes !active. -+ * This is enabled below SCHED_AP_ACTIVE; when !cpu_active(), but only -+ * effective when the hotplug motion is down. - */ - static void balance_push(struct rq *rq) - { -@@ -6180,11 +6181,18 @@ static void balance_push(struct rq *rq) - - lockdep_assert_held(&rq->lock); - SCHED_WARN_ON(rq->cpu != smp_processor_id()); -+ - /* - * Ensure the thing is persistent until balance_push_set(.on = false); - */ - rq->balance_callback = &balance_push_callback; - -+ /* -+ * Only active while going offline. -+ */ -+ if (!cpu_dying(rq->cpu)) -+ return; -+ - /* - * Both the cpu-hotplug and stop task are in this case and are - * required to complete the hotplug process. -@@ -6238,7 +6246,6 @@ static void balance_push_set(int cpu, bool on) - struct rq_flags rf; - - rq_lock_irqsave(rq, &rf); -- rq->balance_push = on; - if (on) { - WARN_ON_ONCE(rq->balance_callback); - rq->balance_callback = &balance_push_callback; -@@ -6343,8 +6350,8 @@ int sched_cpu_activate(unsigned int cpu) - unsigned long flags; - - /* -- * Make sure that when the hotplug state machine does a roll-back -- * we clear balance_push. Ideally that would happen earlier... -+ * Clear the balance_push callback and prepare to schedule -+ * regular tasks. - */ - balance_push_set(cpu, false); - -@@ -6517,12 +6524,6 @@ int sched_cpu_dying(unsigned int cpu) - } - raw_spin_unlock_irqrestore(&rq->lock, flags); - -- /* -- * Now that the CPU is offline, make sure we're welcome -- * to new tasks once we come back up. -- */ -- balance_push_set(cpu, false); -- - calc_load_migrate(rq); - hrtick_clear(rq); - return 0; -@@ -6691,7 +6692,7 @@ void __init sched_init(void) - #ifdef CONFIG_NO_HZ_COMMON - INIT_CSD(&rq->nohz_csd, nohz_csd_func, rq); - #endif -- rq->balance_callback = NULL; -+ rq->balance_callback = &balance_push_callback; - #ifdef CONFIG_HOTPLUG_CPU - rcuwait_init(&rq->hotplug_wait); - #endif -@@ -6723,6 +6724,7 @@ void __init sched_init(void) - - #ifdef CONFIG_SMP - idle_thread_set_boot_cpu(); -+ balance_push_set(smp_processor_id(), false); - - sched_init_topology_cpumask_early(); - #endif /* SMP */ -diff --git a/kernel/sched/alt_sched.h b/kernel/sched/alt_sched.h -index b3436b11ba7c..6902a2579d73 100644 ---- a/kernel/sched/alt_sched.h -+++ b/kernel/sched/alt_sched.h -@@ -175,7 +175,6 @@ struct rq { - struct cpu_stop_work active_balance_work; - #endif - struct callback_head *balance_callback; -- unsigned char balance_push; - #ifdef CONFIG_HOTPLUG_CPU - struct rcuwait hotplug_wait; - #endif --- -2.37.0 - - -From 07addd5369fb59a247afb899ca93fa47f9ab18c1 Mon Sep 17 00:00:00 2001 -From: Alfred Chen -Date: Tue, 8 Jun 2021 16:40:51 +0000 -Subject: [PATCH 145/297] sched/alt: Use common code in debug.c - -This commit sync-up following commits -8a99b6833c88 sched: Move SCHED_DEBUG sysctl to debugfs -1011dcce99f8 sched,preempt: Move preempt_dynamic to debug.c ---- - kernel/sched/Makefile | 3 +- - kernel/sched/alt_core.c | 75 ++-------------------------------------- - kernel/sched/alt_sched.h | 11 ++++-- - kernel/sched/debug.c | 10 ++++++ - 4 files changed, 24 insertions(+), 75 deletions(-) - -diff --git a/kernel/sched/Makefile b/kernel/sched/Makefile -index 01b041388419..31d587c16ec1 100644 ---- a/kernel/sched/Makefile -+++ b/kernel/sched/Makefile -@@ -29,7 +29,8 @@ endif - # build parallelizes well and finishes roughly at once: - # - ifdef CONFIG_SCHED_ALT --obj-y += alt_core.o alt_debug.o -+obj-y += alt_core.o -+obj-$(CONFIG_SCHED_DEBUG) += alt_debug.o - else - obj-y += core.o - obj-y += fair.o -diff --git a/kernel/sched/alt_core.c b/kernel/sched/alt_core.c -index 4947e3446124..cf4ad8c4203d 100644 ---- a/kernel/sched/alt_core.c -+++ b/kernel/sched/alt_core.c -@@ -4347,9 +4347,9 @@ enum { - preempt_dynamic_full, - }; - --static int preempt_dynamic_mode = preempt_dynamic_full; -+int preempt_dynamic_mode = preempt_dynamic_full; - --static int sched_dynamic_mode(const char *str) -+int sched_dynamic_mode(const char *str) - { - if (!strcmp(str, "none")) - return preempt_dynamic_none; -@@ -4363,7 +4363,7 @@ static int sched_dynamic_mode(const char *str) - return -EINVAL; - } - --static void sched_dynamic_update(int mode) -+void sched_dynamic_update(int mode) - { - /* - * Avoid {NONE,VOLUNTARY} -> FULL transitions from ever ending up in -@@ -4420,77 +4420,8 @@ static int __init setup_preempt_mode(char *str) - } - __setup("preempt=", setup_preempt_mode); - --#ifdef CONFIG_SCHED_DEBUG -- --static ssize_t sched_dynamic_write(struct file *filp, const char __user *ubuf, -- size_t cnt, loff_t *ppos) --{ -- char buf[16]; -- int mode; -- -- if (cnt > 15) -- cnt = 15; -- -- if (copy_from_user(&buf, ubuf, cnt)) -- return -EFAULT; -- -- buf[cnt] = 0; -- mode = sched_dynamic_mode(strstrip(buf)); -- if (mode < 0) -- return mode; -- -- sched_dynamic_update(mode); -- -- *ppos += cnt; -- -- return cnt; --} -- --static int sched_dynamic_show(struct seq_file *m, void *v) --{ -- static const char * preempt_modes[] = { -- "none", "voluntary", "full" -- }; -- int i; -- -- for (i = 0; i < ARRAY_SIZE(preempt_modes); i++) { -- if (preempt_dynamic_mode == i) -- seq_puts(m, "("); -- seq_puts(m, preempt_modes[i]); -- if (preempt_dynamic_mode == i) -- seq_puts(m, ")"); -- -- seq_puts(m, " "); -- } -- -- seq_puts(m, "\n"); -- return 0; --} -- --static int sched_dynamic_open(struct inode *inode, struct file *filp) --{ -- return single_open(filp, sched_dynamic_show, NULL); --} -- --static const struct file_operations sched_dynamic_fops = { -- .open = sched_dynamic_open, -- .write = sched_dynamic_write, -- .read = seq_read, -- .llseek = seq_lseek, -- .release = single_release, --}; -- --static __init int sched_init_debug_dynamic(void) --{ -- debugfs_create_file("sched_preempt", 0644, NULL, NULL, &sched_dynamic_fops); -- return 0; --} --late_initcall(sched_init_debug_dynamic); -- --#endif /* CONFIG_SCHED_DEBUG */ - #endif /* CONFIG_PREEMPT_DYNAMIC */ - -- - /* - * This is the entry point to schedule() from kernel preemption - * off of irq context. -diff --git a/kernel/sched/alt_sched.h b/kernel/sched/alt_sched.h -index 6902a2579d73..cf9ed4480d86 100644 ---- a/kernel/sched/alt_sched.h -+++ b/kernel/sched/alt_sched.h -@@ -60,6 +60,7 @@ - # define SCHED_WARN_ON(x) WARN_ONCE(x, #x) - #else - # define SCHED_WARN_ON(x) ({ (void)(x), 0; }) -+static inline void resched_latency_warn(int cpu, u64 latency) {} - #endif - - /* -@@ -679,8 +680,14 @@ static inline int sched_numa_find_closest(const struct cpumask *cpus, int cpu) - } - #endif - --void swake_up_all_locked(struct swait_queue_head *q); --void __prepare_to_swait(struct swait_queue_head *q, struct swait_queue *wait); -+extern void swake_up_all_locked(struct swait_queue_head *q); -+extern void __prepare_to_swait(struct swait_queue_head *q, struct swait_queue *wait); -+ -+#ifdef CONFIG_PREEMPT_DYNAMIC -+extern int preempt_dynamic_mode; -+extern int sched_dynamic_mode(const char *str); -+extern void sched_dynamic_update(int mode); -+#endif - - static inline void nohz_run_idle_balance(int cpu) { } - #endif /* ALT_SCHED_H */ -diff --git a/kernel/sched/debug.c b/kernel/sched/debug.c -index bb3d63bdf4ae..c87c87da2ba5 100644 ---- a/kernel/sched/debug.c -+++ b/kernel/sched/debug.c -@@ -7,6 +7,7 @@ - * Copyright(C) 2007, Red Hat, Inc., Ingo Molnar - */ - -+#ifndef CONFIG_SCHED_ALT - /* - * This allows printing both to /proc/sched_debug and - * to the console -@@ -215,6 +216,7 @@ static const struct file_operations sched_scaling_fops = { - }; - - #endif /* SMP */ -+#endif /* !CONFIG_SCHED_ALT */ - - #ifdef CONFIG_PREEMPT_DYNAMIC - -@@ -278,6 +280,7 @@ static const struct file_operations sched_dynamic_fops = { - - #endif /* CONFIG_PREEMPT_DYNAMIC */ - -+#ifndef CONFIG_SCHED_ALT - __read_mostly bool sched_debug_verbose; - - static const struct seq_operations sched_debug_sops; -@@ -293,6 +296,7 @@ static const struct file_operations sched_debug_fops = { - .llseek = seq_lseek, - .release = seq_release, - }; -+#endif /* !CONFIG_SCHED_ALT */ - - static struct dentry *debugfs_sched; - -@@ -302,12 +306,15 @@ static __init int sched_init_debug(void) - - debugfs_sched = debugfs_create_dir("sched", NULL); - -+#ifndef CONFIG_SCHED_ALT - debugfs_create_file("features", 0644, debugfs_sched, NULL, &sched_feat_fops); - debugfs_create_bool("verbose", 0644, debugfs_sched, &sched_debug_verbose); -+#endif /* !CONFIG_SCHED_ALT */ - #ifdef CONFIG_PREEMPT_DYNAMIC - debugfs_create_file("preempt", 0644, debugfs_sched, NULL, &sched_dynamic_fops); - #endif - -+#ifndef CONFIG_SCHED_ALT - debugfs_create_u32("latency_ns", 0644, debugfs_sched, &sysctl_sched_latency); - debugfs_create_u32("min_granularity_ns", 0644, debugfs_sched, &sysctl_sched_min_granularity); - debugfs_create_u32("idle_min_granularity_ns", 0644, debugfs_sched, &sysctl_sched_idle_min_granularity); -@@ -336,11 +343,13 @@ static __init int sched_init_debug(void) - #endif - - debugfs_create_file("debug", 0444, debugfs_sched, NULL, &sched_debug_fops); -+#endif /* !CONFIG_SCHED_ALT */ - - return 0; - } - late_initcall(sched_init_debug); - -+#ifndef CONFIG_SCHED_ALT - #ifdef CONFIG_SMP - - static cpumask_var_t sd_sysctl_cpus; -@@ -1077,3 +1086,4 @@ void resched_latency_warn(int cpu, u64 latency) - "without schedule\n", - cpu, latency, cpu_rq(cpu)->ticks_without_resched); - } -+#endif /* !CONFIG_SCHED_ALT */ --- -2.37.0 - - -From 400ec7dc8745e9cff692c2f01d8e1a00fc98eb30 Mon Sep 17 00:00:00 2001 -From: Alfred Chen -Date: Wed, 9 Jun 2021 09:48:48 +0000 -Subject: [PATCH 146/297] sched/alt: [Sync] c006fac556e4 sched: Warn on long - periods of pending need_resched - ---- - kernel/sched/alt_core.c | 74 +++++++++++++++++++++++++++++++++++++++- - kernel/sched/alt_sched.h | 6 ++++ - kernel/sched/debug.c | 2 +- - 3 files changed, 80 insertions(+), 2 deletions(-) - -diff --git a/kernel/sched/alt_core.c b/kernel/sched/alt_core.c -index cf4ad8c4203d..345a2b606b65 100644 ---- a/kernel/sched/alt_core.c -+++ b/kernel/sched/alt_core.c -@@ -52,6 +52,21 @@ - */ - EXPORT_TRACEPOINT_SYMBOL_GPL(pelt_irq_tp); - -+#ifdef CONFIG_SCHED_DEBUG -+#define sched_feat(x) (1) -+/* -+ * Print a warning if need_resched is set for the given duration (if -+ * LATENCY_WARN is enabled). -+ * -+ * If sysctl_resched_latency_warn_once is set, only one warning will be shown -+ * per boot. -+ */ -+__read_mostly int sysctl_resched_latency_warn_ms = 100; -+__read_mostly int sysctl_resched_latency_warn_once = 1; -+#else -+#define sched_feat(x) (0) -+#endif /* CONFIG_SCHED_DEBUG */ -+ - #define ALT_SCHED_VERSION "v5.12-r1" - - /* rt_prio(prio) defined in include/linux/sched/rt.h */ -@@ -3323,6 +3338,55 @@ static inline void scheduler_task_tick(struct rq *rq) - set_preempt_need_resched(); - } - -+#ifdef CONFIG_SCHED_DEBUG -+static u64 cpu_resched_latency(struct rq *rq) -+{ -+ int latency_warn_ms = READ_ONCE(sysctl_resched_latency_warn_ms); -+ u64 resched_latency, now = rq_clock(rq); -+ static bool warned_once; -+ -+ if (sysctl_resched_latency_warn_once && warned_once) -+ return 0; -+ -+ if (!need_resched() || !latency_warn_ms) -+ return 0; -+ -+ if (system_state == SYSTEM_BOOTING) -+ return 0; -+ -+ if (!rq->last_seen_need_resched_ns) { -+ rq->last_seen_need_resched_ns = now; -+ rq->ticks_without_resched = 0; -+ return 0; -+ } -+ -+ rq->ticks_without_resched++; -+ resched_latency = now - rq->last_seen_need_resched_ns; -+ if (resched_latency <= latency_warn_ms * NSEC_PER_MSEC) -+ return 0; -+ -+ warned_once = true; -+ -+ return resched_latency; -+} -+ -+static int __init setup_resched_latency_warn_ms(char *str) -+{ -+ long val; -+ -+ if ((kstrtol(str, 0, &val))) { -+ pr_warn("Unable to set resched_latency_warn_ms\n"); -+ return 1; -+ } -+ -+ sysctl_resched_latency_warn_ms = val; -+ return 1; -+} -+__setup("resched_latency_warn_ms=", setup_resched_latency_warn_ms); -+#else -+static inline u64 cpu_resched_latency(struct rq *rq) { return 0; } -+#endif /* CONFIG_SCHED_DEBUG */ -+ - /* - * This function gets called by the timer code, with HZ frequency. - * We call it with interrupts disabled. -@@ -3331,6 +3395,7 @@ void scheduler_tick(void) - { - int cpu __maybe_unused = smp_processor_id(); - struct rq *rq = cpu_rq(cpu); -+ u64 resched_latency; - - arch_scale_freq_tick(); - sched_clock_tick(); -@@ -3339,11 +3404,16 @@ void scheduler_tick(void) - update_rq_clock(rq); - - scheduler_task_tick(rq); -+ if (sched_feat(LATENCY_WARN)) -+ resched_latency = cpu_resched_latency(rq); - calc_global_load_tick(rq); - - rq->last_tick = rq->clock; - raw_spin_unlock(&rq->lock); - -+ if (sched_feat(LATENCY_WARN) && resched_latency) -+ resched_latency_warn(cpu, resched_latency); -+ - perf_event_task_tick(); - } - -@@ -4018,7 +4088,9 @@ static void __sched notrace __schedule(bool preempt) - next = choose_next_task(rq, cpu, prev); - clear_tsk_need_resched(prev); - clear_preempt_need_resched(); -- -+#ifdef CONFIG_SCHED_DEBUG -+ rq->last_seen_need_resched_ns = 0; -+#endif - - if (likely(prev != next)) { - next->last_ran = rq->clock_task; -diff --git a/kernel/sched/alt_sched.h b/kernel/sched/alt_sched.h -index cf9ed4480d86..ac11555ba4f1 100644 ---- a/kernel/sched/alt_sched.h -+++ b/kernel/sched/alt_sched.h -@@ -58,6 +58,7 @@ - - #ifdef CONFIG_SCHED_DEBUG - # define SCHED_WARN_ON(x) WARN_ONCE(x, #x) -+extern void resched_latency_warn(int cpu, u64 latency); - #else - # define SCHED_WARN_ON(x) ({ (void)(x), 0; }) - static inline void resched_latency_warn(int cpu, u64 latency) {} -@@ -155,6 +156,11 @@ struct rq { - - atomic_t nr_iowait; - -+#ifdef CONFIG_SCHED_DEBUG -+ u64 last_seen_need_resched_ns; -+ int ticks_without_resched; -+#endif -+ - #ifdef CONFIG_MEMBARRIER - int membarrier_state; - #endif -diff --git a/kernel/sched/debug.c b/kernel/sched/debug.c -index c87c87da2ba5..4e1680785704 100644 ---- a/kernel/sched/debug.c -+++ b/kernel/sched/debug.c -@@ -1076,6 +1076,7 @@ void proc_sched_set_task(struct task_struct *p) - memset(&p->stats, 0, sizeof(p->stats)); - #endif - } -+#endif /* !CONFIG_SCHED_ALT */ - - void resched_latency_warn(int cpu, u64 latency) - { -@@ -1086,4 +1087,3 @@ void resched_latency_warn(int cpu, u64 latency) - "without schedule\n", - cpu, latency, cpu_rq(cpu)->ticks_without_resched); - } --#endif /* !CONFIG_SCHED_ALT */ --- -2.37.0 - - -From 6e43f2c913988e36e242cd28ef52d4a35e05eab8 Mon Sep 17 00:00:00 2001 -From: Alfred Chen -Date: Fri, 11 Jun 2021 18:22:00 +0000 -Subject: [PATCH 147/297] Project-C v5.13-r0 - ---- - kernel/sched/alt_core.c | 2 +- - 1 file changed, 1 insertion(+), 1 deletion(-) - -diff --git a/kernel/sched/alt_core.c b/kernel/sched/alt_core.c -index 345a2b606b65..2a485c184832 100644 ---- a/kernel/sched/alt_core.c -+++ b/kernel/sched/alt_core.c -@@ -67,7 +67,7 @@ __read_mostly int sysctl_resched_latency_warn_once = 1; - #define sched_feat(x) (0) - #endif /* CONFIG_SCHED_DEBUG */ - --#define ALT_SCHED_VERSION "v5.12-r1" -+#define ALT_SCHED_VERSION "v5.13-r0" - - /* rt_prio(prio) defined in include/linux/sched/rt.h */ - #define rt_task(p) rt_prio((p)->prio) --- -2.37.0 - - -From 47d4d9fce817d9c7ccf27639854e8ecee8936c23 Mon Sep 17 00:00:00 2001 -From: Alfred Chen -Date: Tue, 15 Jun 2021 09:48:47 +0000 -Subject: [PATCH 148/297] sched/alt: irq save/restore in migration_cpu_stop() - ---- - kernel/sched/alt_core.c | 6 +++--- - 1 file changed, 3 insertions(+), 3 deletions(-) - -diff --git a/kernel/sched/alt_core.c b/kernel/sched/alt_core.c -index 2a485c184832..3680162d8d19 100644 ---- a/kernel/sched/alt_core.c -+++ b/kernel/sched/alt_core.c -@@ -1344,12 +1344,13 @@ static int migration_cpu_stop(void *data) - struct migration_arg *arg = data; - struct task_struct *p = arg->task; - struct rq *rq = this_rq(); -+ unsigned long flags; - - /* - * The original target CPU might have gone down and we might - * be on another CPU but it doesn't matter. - */ -- local_irq_disable(); -+ local_irq_save(flags); - /* - * We need to explicitly wake pending tasks before running - * __migrate_task() such that we will not miss enforcing cpus_ptr -@@ -1367,9 +1368,8 @@ static int migration_cpu_stop(void *data) - if (task_rq(p) == rq && task_on_rq_queued(p)) - rq = __migrate_task(rq, p, arg->dest_cpu); - raw_spin_unlock(&rq->lock); -- raw_spin_unlock(&p->pi_lock); -+ raw_spin_unlock_irqrestore(&p->pi_lock, flags); - -- local_irq_enable(); - return 0; - } - --- -2.37.0 - - -From 923b01e019bbce8d8b400c7b2dfc51f5ecd7945f Mon Sep 17 00:00:00 2001 -From: Alfred Chen -Date: Mon, 17 May 2021 10:25:28 +0000 -Subject: [PATCH 149/297] sched/pds: PDS improvement. - -PDS uses bitmap queue as queue data structure. Rename maybe needed after -all improvement are done. ---- - include/linux/sched.h | 7 +- - include/linux/skip_list.h | 175 ------------------------ - init/init_task.c | 3 +- - kernel/sched/alt_core.c | 11 +- - kernel/sched/alt_sched.h | 3 +- - kernel/sched/pds.h | 5 + - kernel/sched/pds_imp.h | 281 ++++++++++++++++++++++---------------- - 7 files changed, 183 insertions(+), 302 deletions(-) - delete mode 100644 include/linux/skip_list.h - -diff --git a/include/linux/sched.h b/include/linux/sched.h -index 2bbe9f377c6e..948c6d84757a 100644 ---- a/include/linux/sched.h -+++ b/include/linux/sched.h -@@ -34,7 +34,6 @@ - #include - #include - #include --#include - #include - - /* task_struct member predeclarations (sorted alphabetically): */ -@@ -791,10 +790,8 @@ struct task_struct { - #ifdef CONFIG_SCHED_PDS - u64 deadline; - u64 priodl; -- /* skip list level */ -- int sl_level; -- /* skip list node */ -- struct skiplist_node sl_node; -+ int sq_idx; -+ struct list_head sq_node; - #endif /* CONFIG_SCHED_PDS */ - /* sched_clock time spent running */ - u64 sched_time; -diff --git a/include/linux/skip_list.h b/include/linux/skip_list.h -deleted file mode 100644 -index 637c83ecbd6b..000000000000 ---- a/include/linux/skip_list.h -+++ /dev/null -@@ -1,175 +0,0 @@ --/* -- * Copyright (C) 2016 Alfred Chen. -- * -- * Code based on Con Kolivas's skip list implementation for BFS, and -- * which is based on example originally by William Pugh. -- * -- * Skip Lists are a probabilistic alternative to balanced trees, as -- * described in the June 1990 issue of CACM and were invented by -- * William Pugh in 1987. -- * -- * A couple of comments about this implementation: -- * -- * This file only provides a infrastructure of skip list. -- * -- * skiplist_node is embedded into container data structure, to get rid -- * the dependency of kmalloc/kfree operation in scheduler code. -- * -- * A customized search function should be defined using DEFINE_SKIPLIST_INSERT -- * macro and be used for skip list insert operation. -- * -- * Random Level is also not defined in this file, instead, it should be -- * customized implemented and set to node->level then pass to the customized -- * skiplist_insert function. -- * -- * Levels start at zero and go up to (NUM_SKIPLIST_LEVEL -1) -- * -- * NUM_SKIPLIST_LEVEL in this implementation is 8 instead of origin 16, -- * considering that there will be 256 entries to enable the top level when using -- * random level p=0.5, and that number is more than enough for a run queue usage -- * in a scheduler usage. And it also help to reduce the memory usage of the -- * embedded skip list node in task_struct to about 50%. -- * -- * The insertion routine has been implemented so as to use the -- * dirty hack described in the CACM paper: if a random level is -- * generated that is more than the current maximum level, the -- * current maximum level plus one is used instead. -- * -- * BFS Notes: In this implementation of skiplists, there are bidirectional -- * next/prev pointers and the insert function returns a pointer to the actual -- * node the value is stored. The key here is chosen by the scheduler so as to -- * sort tasks according to the priority list requirements and is no longer used -- * by the scheduler after insertion. The scheduler lookup, however, occurs in -- * O(1) time because it is always the first item in the level 0 linked list. -- * Since the task struct stores a copy of the node pointer upon skiplist_insert, -- * it can also remove it much faster than the original implementation with the -- * aid of prev<->next pointer manipulation and no searching. -- */ --#ifndef _LINUX_SKIP_LIST_H --#define _LINUX_SKIP_LIST_H -- --#include -- --#define NUM_SKIPLIST_LEVEL (4) -- --struct skiplist_node { -- int level; /* Levels in this node */ -- struct skiplist_node *next[NUM_SKIPLIST_LEVEL]; -- struct skiplist_node *prev[NUM_SKIPLIST_LEVEL]; --}; -- --#define SKIPLIST_NODE_INIT(name) { 0,\ -- {&name, &name, &name, &name},\ -- {&name, &name, &name, &name},\ -- } -- --/** -- * INIT_SKIPLIST_NODE -- init a skiplist_node, expecially for header -- * @node: the skip list node to be inited. -- */ --static inline void INIT_SKIPLIST_NODE(struct skiplist_node *node) --{ -- int i; -- -- node->level = 0; -- for (i = 0; i < NUM_SKIPLIST_LEVEL; i++) { -- WRITE_ONCE(node->next[i], node); -- node->prev[i] = node; -- } --} -- --/** -- * skiplist_entry - get the struct for this entry -- * @ptr: the &struct skiplist_node pointer. -- * @type: the type of the struct this is embedded in. -- * @member: the name of the skiplist_node within the struct. -- */ --#define skiplist_entry(ptr, type, member) \ -- container_of(ptr, type, member) -- --/** -- * DEFINE_SKIPLIST_INSERT_FUNC -- macro to define a customized skip list insert -- * function, which takes two parameters, first one is the header node of the -- * skip list, second one is the skip list node to be inserted -- * @func_name: the customized skip list insert function name -- * @search_func: the search function to be used, which takes two parameters, -- * 1st one is the itrator of skiplist_node in the list, the 2nd is the skip list -- * node to be inserted, the function should return true if search should be -- * continued, otherwise return false. -- * Returns 1 if @node is inserted as the first item of skip list at level zero, -- * otherwise 0 -- */ --#define DEFINE_SKIPLIST_INSERT_FUNC(func_name, search_func)\ --static inline int func_name(struct skiplist_node *head, struct skiplist_node *node)\ --{\ -- struct skiplist_node *p, *q;\ -- unsigned int k = head->level;\ -- unsigned int l = node->level;\ --\ -- p = head;\ -- if (l > k) {\ -- l = node->level = ++head->level;\ --\ -- node->next[l] = head;\ -- node->prev[l] = head;\ -- head->next[l] = node;\ -- head->prev[l] = node;\ --\ -- do {\ -- while (q = p->next[k], q != head && search_func(q, node))\ -- p = q;\ --\ -- node->prev[k] = p;\ -- node->next[k] = q;\ -- q->prev[k] = node;\ -- p->next[k] = node;\ -- } while (k--);\ --\ -- return (p == head);\ -- }\ --\ -- while (k > l) {\ -- while (q = p->next[k], q != head && search_func(q, node))\ -- p = q;\ -- k--;\ -- }\ --\ -- do {\ -- while (q = p->next[k], q != head && search_func(q, node))\ -- p = q;\ --\ -- node->prev[k] = p;\ -- node->next[k] = q;\ -- q->prev[k] = node;\ -- p->next[k] = node;\ -- } while (k--);\ --\ -- return (p == head);\ --} -- --/** -- * skiplist_del_init -- delete skip list node from a skip list and reset it's -- * init state -- * @head: the header node of the skip list to be deleted from. -- * @node: the skip list node to be deleted, the caller need to ensure @node is -- * in skip list which @head represent. -- * Returns 1 if @node is the first item of skip level at level zero, otherwise 0 -- */ --static inline int --skiplist_del_init(struct skiplist_node *head, struct skiplist_node *node) --{ -- unsigned int i, level = node->level; -- -- for (i = 0; i <= level; i++) { -- node->prev[i]->next[i] = node->next[i]; -- node->next[i]->prev[i] = node->prev[i]; -- } -- if (level == head->level && level) { -- while (head->next[level] == head && level) -- level--; -- head->level = level; -- } -- -- return (node->prev[0] == head); --} --#endif /* _LINUX_SKIP_LIST_H */ -diff --git a/init/init_task.c b/init/init_task.c -index b59f5a2fefc8..0374297c45b9 100644 ---- a/init/init_task.c -+++ b/init/init_task.c -@@ -107,8 +107,7 @@ struct task_struct init_task - #endif - #ifdef CONFIG_SCHED_PDS - .deadline = 0, -- .sl_level = 0, -- .sl_node = SKIPLIST_NODE_INIT(init_task.sl_node), -+ .sq_node = LIST_HEAD_INIT(init_task.sq_node), - #endif - .time_slice = HZ, - #else -diff --git a/kernel/sched/alt_core.c b/kernel/sched/alt_core.c -index 3680162d8d19..01abbf28670f 100644 ---- a/kernel/sched/alt_core.c -+++ b/kernel/sched/alt_core.c -@@ -509,6 +509,7 @@ static inline void update_rq_clock(struct rq *rq) - if (unlikely(delta <= 0)) - return; - rq->clock += delta; -+ update_rq_time_edge(rq); - update_rq_clock_task(rq, delta); - } - -@@ -3815,7 +3816,15 @@ void alt_sched_debug(void) - sched_sg_idle_mask.bits[0]); - } - #else --inline void alt_sched_debug(void) {} -+int alt_debug[20]; -+ -+inline void alt_sched_debug(void) -+{ -+ int i; -+ -+ for (i = 0; i < 3; i++) -+ printk(KERN_INFO "sched: %d\n", alt_debug[i]); -+} - #endif - - #ifdef CONFIG_SMP -diff --git a/kernel/sched/alt_sched.h b/kernel/sched/alt_sched.h -index ac11555ba4f1..cfb4669dfbbf 100644 ---- a/kernel/sched/alt_sched.h -+++ b/kernel/sched/alt_sched.h -@@ -147,7 +147,8 @@ struct rq { - struct bmq queue; - #endif - #ifdef CONFIG_SCHED_PDS -- struct skiplist_node sl_header; -+ struct sched_queue queue; -+ u64 time_edge; - #endif - unsigned long watermark; - -diff --git a/kernel/sched/pds.h b/kernel/sched/pds.h -index 623908cf4380..3afc6fd7a27f 100644 ---- a/kernel/sched/pds.h -+++ b/kernel/sched/pds.h -@@ -6,4 +6,9 @@ - #define SCHED_BITS (MAX_RT_PRIO + NICE_WIDTH / 2 + 1) - #define IDLE_TASK_SCHED_PRIO (SCHED_BITS - 1) - -+struct sched_queue { -+ DECLARE_BITMAP(bitmap, SCHED_BITS); -+ struct list_head heads[SCHED_BITS]; -+}; -+ - #endif -diff --git a/kernel/sched/pds_imp.h b/kernel/sched/pds_imp.h -index 335ce3a8e3ec..35886852c71a 100644 ---- a/kernel/sched/pds_imp.h -+++ b/kernel/sched/pds_imp.h -@@ -11,26 +11,7 @@ static const u64 user_prio2deadline[NICE_WIDTH] = { - /* 15 */ 117870029, 129657031, 142622734, 156885007, 172573507 - }; - --static const unsigned char dl_level_map[] = { --/* 0 4 8 12 */ -- 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 18, --/* 16 20 24 28 */ -- 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 17, 17, 17, 17, 17, --/* 32 36 40 44 */ -- 17, 17, 17, 17, 16, 16, 16, 16, 16, 16, 16, 16, 15, 15, 15, 15, --/* 48 52 56 60 */ -- 15, 15, 15, 14, 14, 14, 14, 14, 14, 13, 13, 13, 13, 12, 12, 12, --/* 64 68 72 76 */ -- 12, 11, 11, 11, 10, 10, 10, 9, 9, 8, 7, 6, 5, 4, 3, 2, --/* 80 84 88 92 */ -- 1, 0 --}; -- --/* DEFAULT_SCHED_PRIO: -- * dl_level_map[(user_prio2deadline[39] - user_prio2deadline[0]) >> 21] = -- * dl_level_map[68] = -- * 10 -- */ -+#define SCHED_PRIO_SLOT (4ULL << 20) - #define DEFAULT_SCHED_PRIO (MAX_RT_PRIO + 10) - - static inline int normal_prio(struct task_struct *p) -@@ -41,21 +22,46 @@ static inline int normal_prio(struct task_struct *p) - return MAX_RT_PRIO; - } - -+extern int alt_debug[20]; -+ - static inline int --task_sched_prio(const struct task_struct *p, const struct rq *rq) -+task_sched_prio_normal(const struct task_struct *p, const struct rq *rq) - { -- size_t delta; -+ int delta; -+ -+ delta = rq->time_edge + 20 - (p->deadline >> 23); -+ if (delta < 0) { -+ delta = 0; -+ alt_debug[0]++; -+ } -+ delta = 19 - min(delta, 19); -+ -+ return delta; -+} - -+static inline int -+task_sched_prio(const struct task_struct *p, const struct rq *rq) -+{ - if (p == rq->idle) - return IDLE_TASK_SCHED_PRIO; - - if (p->prio < MAX_RT_PRIO) - return p->prio; - -- delta = (rq->clock + user_prio2deadline[39] - p->deadline) >> 21; -- delta = min((size_t)delta, ARRAY_SIZE(dl_level_map) - 1); -+ return MAX_RT_PRIO + task_sched_prio_normal(p, rq); -+} -+ -+static inline int -+task_sched_prio_idx(const struct task_struct *p, const struct rq *rq) -+{ -+ if (p == rq->idle) -+ return IDLE_TASK_SCHED_PRIO; - -- return MAX_RT_PRIO + dl_level_map[delta]; -+ if (p->prio < MAX_RT_PRIO) -+ return p->prio; -+ -+ return MAX_RT_PRIO + -+ (task_sched_prio_normal(p, rq) + rq->time_edge) % 20; - } - - int task_running_nice(struct task_struct *p) -@@ -68,6 +74,53 @@ static inline void update_task_priodl(struct task_struct *p) - p->priodl = (((u64) (p->prio))<<56) | ((p->deadline)>>8); - } - -+ -+DECLARE_BITMAP(normal_mask, SCHED_BITS); -+ -+static inline void sched_shift_normal_bitmap(unsigned long *mask, unsigned int shift) -+{ -+ DECLARE_BITMAP(normal, SCHED_BITS); -+ -+ bitmap_and(normal, mask, normal_mask, SCHED_BITS); -+ bitmap_shift_right(normal, normal, shift, SCHED_BITS); -+ bitmap_and(normal, normal, normal_mask, SCHED_BITS); -+ -+ bitmap_andnot(mask, mask, normal_mask, SCHED_BITS); -+ bitmap_or(mask, mask, normal, SCHED_BITS); -+} -+ -+static inline void update_rq_time_edge(struct rq *rq) -+{ -+ struct list_head head; -+ u64 old = rq->time_edge; -+ u64 now = rq->clock >> 23; -+ u64 prio, delta = min(20ULL, now - old); -+ -+ if (now == old) -+ return; -+ -+ INIT_LIST_HEAD(&head); -+ -+ prio = MAX_RT_PRIO; -+ for_each_set_bit_from(prio, rq->queue.bitmap, MAX_RT_PRIO + delta) { -+ u64 idx; -+ -+ idx = MAX_RT_PRIO + ((prio - MAX_RT_PRIO) + rq->time_edge) % 20; -+ list_splice_tail_init(rq->queue.heads + idx, &head); -+ } -+ sched_shift_normal_bitmap(rq->queue.bitmap, delta); -+ rq->time_edge = now; -+ if (!list_empty(&head)) { -+ struct task_struct *p; -+ -+ list_for_each_entry(p, &head, sq_node) -+ p->sq_idx = MAX_RT_PRIO + now % 20; -+ -+ list_splice(&head, rq->queue.heads + MAX_RT_PRIO + now % 20); -+ set_bit(MAX_RT_PRIO, rq->queue.bitmap); -+ } -+} -+ - static inline void requeue_task(struct task_struct *p, struct rq *rq); - - static inline void time_slice_expired(struct task_struct *p, struct rq *rq) -@@ -77,40 +130,25 @@ static inline void time_slice_expired(struct task_struct *p, struct rq *rq) - - if (p->prio >= MAX_RT_PRIO) - p->deadline = rq->clock + -- user_prio2deadline[p->static_prio - MAX_RT_PRIO]; -+ SCHED_PRIO_SLOT * (p->static_prio - MAX_RT_PRIO + 1); - update_task_priodl(p); - - if (SCHED_FIFO != p->policy && task_on_rq_queued(p)) - requeue_task(p, rq); - } - --/* -- * pds_skiplist_task_search -- search function used in PDS run queue skip list -- * node insert operation. -- * @it: iterator pointer to the node in the skip list -- * @node: pointer to the skiplist_node to be inserted -- * -- * Returns true if key of @it is less or equal to key value of @node, otherwise -- * false. -- */ --static inline bool --pds_skiplist_task_search(struct skiplist_node *it, struct skiplist_node *node) --{ -- return (skiplist_entry(it, struct task_struct, sl_node)->priodl <= -- skiplist_entry(node, struct task_struct, sl_node)->priodl); --} -- --/* -- * Define the skip list insert function for PDS -- */ --DEFINE_SKIPLIST_INSERT_FUNC(pds_skiplist_insert, pds_skiplist_task_search); -- - /* - * Init the queue structure in rq - */ - static inline void sched_queue_init(struct rq *rq) - { -- INIT_SKIPLIST_NODE(&rq->sl_header); -+ struct sched_queue *q = &rq->queue; -+ int i; -+ -+ bitmap_set(normal_mask, MAX_RT_PRIO, 20); -+ bitmap_zero(q->bitmap, SCHED_BITS); -+ for(i = 0; i < SCHED_BITS; i++) -+ INIT_LIST_HEAD(&q->heads[i]); - } - - /* -@@ -119,19 +157,33 @@ static inline void sched_queue_init(struct rq *rq) - */ - static inline void sched_queue_init_idle(struct rq *rq, struct task_struct *idle) - { -+ struct sched_queue *q = &rq->queue; - /*printk(KERN_INFO "sched: init(%d) - %px\n", cpu_of(rq), idle);*/ -- int default_prio = idle->prio; - -- idle->prio = MAX_PRIO; -- idle->deadline = 0ULL; -- update_task_priodl(idle); -+ idle->sq_idx = IDLE_TASK_SCHED_PRIO; -+ INIT_LIST_HEAD(&q->heads[idle->sq_idx]); -+ list_add(&idle->sq_node, &q->heads[idle->sq_idx]); -+ set_bit(idle->sq_idx, q->bitmap); -+} - -- INIT_SKIPLIST_NODE(&rq->sl_header); -+static inline unsigned long sched_prio2idx(unsigned long idx, struct rq *rq) -+{ -+ if (IDLE_TASK_SCHED_PRIO == idx || -+ idx < MAX_RT_PRIO) -+ return idx; - -- idle->sl_node.level = idle->sl_level; -- pds_skiplist_insert(&rq->sl_header, &idle->sl_node); -+ return MAX_RT_PRIO + -+ ((idx - MAX_RT_PRIO) + rq->time_edge) % 20; -+} - -- idle->prio = default_prio; -+static inline unsigned long sched_idx2prio(unsigned long idx, struct rq *rq) -+{ -+ if (IDLE_TASK_SCHED_PRIO == idx || -+ idx < MAX_RT_PRIO) -+ return idx; -+ -+ return MAX_RT_PRIO + -+ ((idx - MAX_RT_PRIO) + 20 - rq->time_edge % 20) % 20; - } - - /* -@@ -139,107 +191,99 @@ static inline void sched_queue_init_idle(struct rq *rq, struct task_struct *idle - */ - static inline struct task_struct *sched_rq_first_task(struct rq *rq) - { -- struct skiplist_node *node = rq->sl_header.next[0]; -+ unsigned long idx = find_first_bit(rq->queue.bitmap, SCHED_BITS); -+ const struct list_head *head = &rq->queue.heads[sched_prio2idx(idx, rq)]; - -- BUG_ON(node == &rq->sl_header); -- return skiplist_entry(node, struct task_struct, sl_node); -+ /* -+ if (list_empty(head)) { -+ pr_err("BUG: cpu%d(time_edge%llu) prio%lu idx%lu mismatched\n", -+ rq->cpu, rq->time_edge, idx, sched_prio2idx(idx, rq)); -+ BUG(); -+ }*/ -+ return list_first_entry(head, struct task_struct, sq_node); - } - - static inline struct task_struct * - sched_rq_next_task(struct task_struct *p, struct rq *rq) - { -- struct skiplist_node *next = p->sl_node.next[0]; -+ unsigned long idx = p->sq_idx; -+ struct list_head *head = &rq->queue.heads[idx]; -+ -+ if (list_is_last(&p->sq_node, head)) { -+ idx = find_next_bit(rq->queue.bitmap, SCHED_BITS, -+ sched_idx2prio(idx, rq) + 1); -+ head = &rq->queue.heads[sched_prio2idx(idx, rq)]; -+ -+ return list_first_entry(head, struct task_struct, sq_node); -+ } - -- BUG_ON(next == &rq->sl_header); -- return skiplist_entry(next, struct task_struct, sl_node); -+ return list_next_entry(p, sq_node); - } - - static inline unsigned long sched_queue_watermark(struct rq *rq) - { -- return task_sched_prio(sched_rq_first_task(rq), rq); -+ return find_first_bit(rq->queue.bitmap, SCHED_BITS); - } - - #define __SCHED_DEQUEUE_TASK(p, rq, flags, func) \ - psi_dequeue(p, flags & DEQUEUE_SLEEP); \ - sched_info_dequeued(rq, p); \ - \ -- if (skiplist_del_init(&rq->sl_header, &p->sl_node)) { \ -+ list_del(&p->sq_node); \ -+ if (list_empty(&rq->queue.heads[p->sq_idx])) { \ -+ clear_bit(sched_idx2prio(p->sq_idx, rq), \ -+ rq->queue.bitmap); \ - func; \ -- } -+ } \ -+ /*\ -+ pr_info("-->: cpu%d(time_edge%llu) prio%lu idx%u\n", \ -+ rq->cpu, rq->time_edge, sched_idx2prio(p->sq_idx, rq), p->sq_idx); \ -+ */ - - #define __SCHED_ENQUEUE_TASK(p, rq, flags) \ - sched_info_queued(rq, p); \ - psi_enqueue(p, flags); \ - \ -- p->sl_node.level = p->sl_level; \ -- pds_skiplist_insert(&rq->sl_header, &p->sl_node) -+ p->sq_idx = task_sched_prio_idx(p, rq); \ -+ list_add_tail(&p->sq_node, &rq->queue.heads[p->sq_idx]); \ -+ set_bit(sched_idx2prio(p->sq_idx, rq), rq->queue.bitmap); \ -+ /*\ -+ pr_info("<--: cpu%d(time_edge%llu) prio%lu idx%u\n", \ -+ rq->cpu, rq->time_edge, sched_idx2prio(p->sq_idx, rq), p->sq_idx); \ -+ */ - - /* - * Requeue a task @p to @rq - */ - #define __SCHED_REQUEUE_TASK(p, rq, func) \ - {\ -- bool b_first = skiplist_del_init(&rq->sl_header, &p->sl_node); \ -+ int idx = task_sched_prio_idx(p, rq); \ - \ -- p->sl_node.level = p->sl_level; \ -- if (pds_skiplist_insert(&rq->sl_header, &p->sl_node) || b_first) { \ -+ list_del(&p->sq_node); \ -+ list_add_tail(&p->sq_node, &rq->queue.heads[idx]); \ -+ if (idx != p->sq_idx) { \ -+ if (list_empty(&rq->queue.heads[p->sq_idx])) \ -+ clear_bit(sched_idx2prio(p->sq_idx, rq), rq->queue.bitmap); \ -+ p->sq_idx = idx; \ -+ set_bit(sched_idx2prio(p->sq_idx, rq), rq->queue.bitmap); \ - func; \ -+ /*\ -+ pr_info("<->: cpu%d(time_edge%llu) prio%lu idx%u\n", \ -+ rq->cpu, rq->time_edge, sched_idx2prio(p->sq_idx, rq), p->sq_idx); \ -+ */\ - } \ - } - - static inline bool sched_task_need_requeue(struct task_struct *p, struct rq *rq) - { -- struct skiplist_node *node; -- -- node = p->sl_node.prev[0]; -- if (node != &rq->sl_header && -- skiplist_entry(node, struct task_struct, sl_node)->priodl > p->priodl) -- return true; -- -- node = p->sl_node.next[0]; -- if (node != &rq->sl_header && -- skiplist_entry(node, struct task_struct, sl_node)->priodl < p->priodl) -- return true; -- -- return false; --} -- --/* -- * pds_skiplist_random_level -- Returns a pseudo-random level number for skip -- * list node which is used in PDS run queue. -- * -- * __ffs() is used to satisfy p = 0.5 between each levels, and there should be -- * platform instruction(known as ctz/clz) for acceleration. -- * -- * The skiplist level for a task is populated when task is created and doesn't -- * change in task's life time. When task is being inserted into run queue, this -- * skiplist level is set to task's sl_node->level, the skiplist insert function -- * may change it based on current level of the skip lsit. -- */ --static inline int pds_skiplist_random_level(const struct task_struct *p) --{ -- /* -- * 1. Some architectures don't have better than microsecond resolution -- * so mask out ~microseconds as a factor of the random seed for skiplist -- * insertion. -- * 2. Use address of task structure pointer as another factor of the -- * random seed for task burst forking scenario. -- */ -- unsigned long randseed = (task_rq(p)->clock ^ (unsigned long)p) >> 10; -- -- randseed &= __GENMASK(NUM_SKIPLIST_LEVEL - 1, 0); -- if (randseed) -- return __ffs(randseed); -- -- return (NUM_SKIPLIST_LEVEL - 1); -+ return (task_sched_prio_idx(p, rq) != p->sq_idx); - } - - static void sched_task_fork(struct task_struct *p, struct rq *rq) - { -- p->sl_level = pds_skiplist_random_level(p); - if (p->prio >= MAX_RT_PRIO) - p->deadline = rq->clock + -- user_prio2deadline[p->static_prio - MAX_RT_PRIO]; -+ SCHED_PRIO_SLOT * (p->static_prio - MAX_RT_PRIO + 1); - update_task_priodl(p); - } - -@@ -261,9 +305,10 @@ int task_prio(const struct task_struct *p) - if (p->prio < MAX_RT_PRIO) - return (p->prio - MAX_RT_PRIO); - -- preempt_disable(); -- ret = task_sched_prio(p, this_rq()) - MAX_RT_PRIO; -- preempt_enable(); -+ /*preempt_disable(); -+ ret = task_sched_prio(p, task_rq(p)) - MAX_RT_PRIO;*/ -+ ret = p->static_prio - MAX_RT_PRIO; -+ /*preempt_enable();*/ - - return ret; - } --- -2.37.0 - - -From 41ea91a3c9688bca5332d74d25bcf199e9733a0b Mon Sep 17 00:00:00 2001 -From: Alfred Chen -Date: Mon, 17 May 2021 10:38:00 +0000 -Subject: [PATCH 150/297] sched/pds: Remove unused priodl in task structure - ---- - include/linux/sched.h | 1 - - include/linux/sched/deadline.h | 2 +- - kernel/sched/alt_core.c | 3 --- - kernel/sched/pds_imp.h | 8 -------- - 4 files changed, 1 insertion(+), 13 deletions(-) - -diff --git a/include/linux/sched.h b/include/linux/sched.h -index 948c6d84757a..41bf98528037 100644 ---- a/include/linux/sched.h -+++ b/include/linux/sched.h -@@ -789,7 +789,6 @@ struct task_struct { - #endif /* CONFIG_SCHED_BMQ */ - #ifdef CONFIG_SCHED_PDS - u64 deadline; -- u64 priodl; - int sq_idx; - struct list_head sq_node; - #endif /* CONFIG_SCHED_PDS */ -diff --git a/include/linux/sched/deadline.h b/include/linux/sched/deadline.h -index 9516a98cf160..20c59b190b1a 100644 ---- a/include/linux/sched/deadline.h -+++ b/include/linux/sched/deadline.h -@@ -12,7 +12,7 @@ static inline int dl_task(struct task_struct *p) - #endif - - #ifdef CONFIG_SCHED_PDS --#define __tsk_deadline(p) ((p)->priodl) -+#define __tsk_deadline(p) ((((u64) ((p)->prio))<<56) | (((p)->deadline)>>8)) - #endif - - #else -diff --git a/kernel/sched/alt_core.c b/kernel/sched/alt_core.c -index 01abbf28670f..cbca3a54f912 100644 ---- a/kernel/sched/alt_core.c -+++ b/kernel/sched/alt_core.c -@@ -4628,7 +4628,6 @@ void rt_mutex_setprio(struct task_struct *p, struct task_struct *pi_task) - - trace_sched_pi_setprio(p, pi_task); - p->prio = prio; -- update_task_priodl(p); - - check_task_changed(rq, p); - out_unlock: -@@ -4673,7 +4672,6 @@ void set_user_nice(struct task_struct *p, long nice) - goto out_unlock; - - p->prio = effective_prio(p); -- update_task_priodl(p); - - check_task_changed(rq, p); - out_unlock: -@@ -4823,7 +4821,6 @@ static void __setscheduler(struct rq *rq, struct task_struct *p, - p->prio = normal_prio(p); - if (keep_boost) - p->prio = rt_effective_prio(p, p->prio); -- update_task_priodl(p); - } - - /* -diff --git a/kernel/sched/pds_imp.h b/kernel/sched/pds_imp.h -index 35886852c71a..c9ab90f8d5c6 100644 ---- a/kernel/sched/pds_imp.h -+++ b/kernel/sched/pds_imp.h -@@ -69,12 +69,6 @@ int task_running_nice(struct task_struct *p) - return task_sched_prio(p, task_rq(p)) > DEFAULT_SCHED_PRIO; - } - --static inline void update_task_priodl(struct task_struct *p) --{ -- p->priodl = (((u64) (p->prio))<<56) | ((p->deadline)>>8); --} -- -- - DECLARE_BITMAP(normal_mask, SCHED_BITS); - - static inline void sched_shift_normal_bitmap(unsigned long *mask, unsigned int shift) -@@ -131,7 +125,6 @@ static inline void time_slice_expired(struct task_struct *p, struct rq *rq) - if (p->prio >= MAX_RT_PRIO) - p->deadline = rq->clock + - SCHED_PRIO_SLOT * (p->static_prio - MAX_RT_PRIO + 1); -- update_task_priodl(p); - - if (SCHED_FIFO != p->policy && task_on_rq_queued(p)) - requeue_task(p, rq); -@@ -284,7 +277,6 @@ static void sched_task_fork(struct task_struct *p, struct rq *rq) - if (p->prio >= MAX_RT_PRIO) - p->deadline = rq->clock + - SCHED_PRIO_SLOT * (p->static_prio - MAX_RT_PRIO + 1); -- update_task_priodl(p); - } - - /** --- -2.37.0 - - -From 94accf4e15e13fbcd0792595f818b046acf69da5 Mon Sep 17 00:00:00 2001 -From: Alfred Chen -Date: Mon, 17 May 2021 12:49:04 +0000 -Subject: [PATCH 151/297] sched/pds: Code clean up. - ---- - kernel/sched/pds_imp.h | 42 +++++++++++++----------------------------- - 1 file changed, 13 insertions(+), 29 deletions(-) - -diff --git a/kernel/sched/pds_imp.h b/kernel/sched/pds_imp.h -index c9ab90f8d5c6..8cc656a7cc48 100644 ---- a/kernel/sched/pds_imp.h -+++ b/kernel/sched/pds_imp.h -@@ -88,11 +88,12 @@ static inline void update_rq_time_edge(struct rq *rq) - struct list_head head; - u64 old = rq->time_edge; - u64 now = rq->clock >> 23; -- u64 prio, delta = min(20ULL, now - old); -+ u64 prio, delta; - - if (now == old) - return; - -+ delta = min(20ULL, now - old); - INIT_LIST_HEAD(&head); - - prio = MAX_RT_PRIO; -@@ -115,17 +116,20 @@ static inline void update_rq_time_edge(struct rq *rq) - } - } - -+static inline void sched_renew_deadline(struct task_struct *p, const struct rq *rq) -+{ -+ if (p->prio >= MAX_RT_PRIO) -+ p->deadline = rq->clock + -+ SCHED_PRIO_SLOT * (p->static_prio - MAX_RT_PRIO + 1); -+} -+ - static inline void requeue_task(struct task_struct *p, struct rq *rq); - - static inline void time_slice_expired(struct task_struct *p, struct rq *rq) - { - /*printk(KERN_INFO "sched: time_slice_expired(%d) - %px\n", cpu_of(rq), p);*/ - p->time_slice = sched_timeslice_ns; -- -- if (p->prio >= MAX_RT_PRIO) -- p->deadline = rq->clock + -- SCHED_PRIO_SLOT * (p->static_prio - MAX_RT_PRIO + 1); -- -+ sched_renew_deadline(p, rq); - if (SCHED_FIFO != p->policy && task_on_rq_queued(p)) - requeue_task(p, rq); - } -@@ -187,12 +191,6 @@ static inline struct task_struct *sched_rq_first_task(struct rq *rq) - unsigned long idx = find_first_bit(rq->queue.bitmap, SCHED_BITS); - const struct list_head *head = &rq->queue.heads[sched_prio2idx(idx, rq)]; - -- /* -- if (list_empty(head)) { -- pr_err("BUG: cpu%d(time_edge%llu) prio%lu idx%lu mismatched\n", -- rq->cpu, rq->time_edge, idx, sched_prio2idx(idx, rq)); -- BUG(); -- }*/ - return list_first_entry(head, struct task_struct, sq_node); - } - -@@ -227,11 +225,7 @@ static inline unsigned long sched_queue_watermark(struct rq *rq) - clear_bit(sched_idx2prio(p->sq_idx, rq), \ - rq->queue.bitmap); \ - func; \ -- } \ -- /*\ -- pr_info("-->: cpu%d(time_edge%llu) prio%lu idx%u\n", \ -- rq->cpu, rq->time_edge, sched_idx2prio(p->sq_idx, rq), p->sq_idx); \ -- */ -+ } - - #define __SCHED_ENQUEUE_TASK(p, rq, flags) \ - sched_info_queued(rq, p); \ -@@ -239,11 +233,7 @@ static inline unsigned long sched_queue_watermark(struct rq *rq) - \ - p->sq_idx = task_sched_prio_idx(p, rq); \ - list_add_tail(&p->sq_node, &rq->queue.heads[p->sq_idx]); \ -- set_bit(sched_idx2prio(p->sq_idx, rq), rq->queue.bitmap); \ -- /*\ -- pr_info("<--: cpu%d(time_edge%llu) prio%lu idx%u\n", \ -- rq->cpu, rq->time_edge, sched_idx2prio(p->sq_idx, rq), p->sq_idx); \ -- */ -+ set_bit(sched_idx2prio(p->sq_idx, rq), rq->queue.bitmap); - - /* - * Requeue a task @p to @rq -@@ -260,10 +250,6 @@ static inline unsigned long sched_queue_watermark(struct rq *rq) - p->sq_idx = idx; \ - set_bit(sched_idx2prio(p->sq_idx, rq), rq->queue.bitmap); \ - func; \ -- /*\ -- pr_info("<->: cpu%d(time_edge%llu) prio%lu idx%u\n", \ -- rq->cpu, rq->time_edge, sched_idx2prio(p->sq_idx, rq), p->sq_idx); \ -- */\ - } \ - } - -@@ -274,9 +260,7 @@ static inline bool sched_task_need_requeue(struct task_struct *p, struct rq *rq) - - static void sched_task_fork(struct task_struct *p, struct rq *rq) - { -- if (p->prio >= MAX_RT_PRIO) -- p->deadline = rq->clock + -- SCHED_PRIO_SLOT * (p->static_prio - MAX_RT_PRIO + 1); -+ sched_renew_deadline(p, rq); - } - - /** --- -2.37.0 - - -From 8635be47b8820c0fb240378109c80b5c92e29837 Mon Sep 17 00:00:00 2001 -From: Alfred Chen -Date: Mon, 17 May 2021 13:48:11 +0000 -Subject: [PATCH 152/297] sched/alt: BMQ&PDS share same name in data structure - -sq_idx and sq_node are shared in task_struct. -queue is shared in rq. ---- - include/linux/sched.h | 6 ++--- - init/init_task.c | 5 ++-- - kernel/sched/alt_sched.h | 10 +++++--- - kernel/sched/bmq.h | 5 ---- - kernel/sched/bmq_imp.h | 54 ++++++++++++++++++++-------------------- - kernel/sched/pds.h | 5 ---- - 6 files changed, 37 insertions(+), 48 deletions(-) - -diff --git a/include/linux/sched.h b/include/linux/sched.h -index 41bf98528037..2423bc6713eb 100644 ---- a/include/linux/sched.h -+++ b/include/linux/sched.h -@@ -782,15 +782,13 @@ struct task_struct { - #ifdef CONFIG_SCHED_ALT - u64 last_ran; - s64 time_slice; -+ int sq_idx; -+ struct list_head sq_node; - #ifdef CONFIG_SCHED_BMQ - int boost_prio; -- int bmq_idx; -- struct list_head bmq_node; - #endif /* CONFIG_SCHED_BMQ */ - #ifdef CONFIG_SCHED_PDS - u64 deadline; -- int sq_idx; -- struct list_head sq_node; - #endif /* CONFIG_SCHED_PDS */ - /* sched_clock time spent running */ - u64 sched_time; -diff --git a/init/init_task.c b/init/init_task.c -index 0374297c45b9..579d99864d49 100644 ---- a/init/init_task.c -+++ b/init/init_task.c -@@ -100,14 +100,13 @@ struct task_struct init_task - .fn = do_no_restart_syscall, - }, - #ifdef CONFIG_SCHED_ALT -+ .sq_node = LIST_HEAD_INIT(init_task.sq_node), - #ifdef CONFIG_SCHED_BMQ - .boost_prio = 0, -- .bmq_idx = 15, -- .bmq_node = LIST_HEAD_INIT(init_task.bmq_node), -+ .sq_idx = 15, - #endif - #ifdef CONFIG_SCHED_PDS - .deadline = 0, -- .sq_node = LIST_HEAD_INIT(init_task.sq_node), - #endif - .time_slice = HZ, - #else -diff --git a/kernel/sched/alt_sched.h b/kernel/sched/alt_sched.h -index cfb4669dfbbf..21f359102fbc 100644 ---- a/kernel/sched/alt_sched.h -+++ b/kernel/sched/alt_sched.h -@@ -131,6 +131,11 @@ static inline int task_on_rq_migrating(struct task_struct *p) - #define WF_MIGRATED 0x04 /* internal use, task got migrated */ - #define WF_ON_CPU 0x08 /* Wakee is on_rq */ - -+struct sched_queue { -+ DECLARE_BITMAP(bitmap, SCHED_BITS); -+ struct list_head heads[SCHED_BITS]; -+}; -+ - /* - * This is the main, per-CPU runqueue data structure. - * This data should only be modified by the local cpu. -@@ -143,11 +148,8 @@ struct rq { - struct task_struct *idle, *stop, *skip; - struct mm_struct *prev_mm; - --#ifdef CONFIG_SCHED_BMQ -- struct bmq queue; --#endif --#ifdef CONFIG_SCHED_PDS - struct sched_queue queue; -+#ifdef CONFIG_SCHED_PDS - u64 time_edge; - #endif - unsigned long watermark; -diff --git a/kernel/sched/bmq.h b/kernel/sched/bmq.h -index 4ce30c30bd3e..2ae022def9f7 100644 ---- a/kernel/sched/bmq.h -+++ b/kernel/sched/bmq.h -@@ -6,9 +6,4 @@ - #define SCHED_BITS (MAX_RT_PRIO + NICE_WIDTH + 2 * MAX_PRIORITY_ADJ + 1) - #define IDLE_TASK_SCHED_PRIO (SCHED_BITS - 1) - --struct bmq { -- DECLARE_BITMAP(bitmap, SCHED_BITS); -- struct list_head heads[SCHED_BITS]; --}; -- - #endif -diff --git a/kernel/sched/bmq_imp.h b/kernel/sched/bmq_imp.h -index 7c71f1141d00..f6bd3421b95c 100644 ---- a/kernel/sched/bmq_imp.h -+++ b/kernel/sched/bmq_imp.h -@@ -67,8 +67,6 @@ inline int task_running_nice(struct task_struct *p) - return (p->prio + p->boost_prio > DEFAULT_PRIO + MAX_PRIORITY_ADJ); - } - --static inline void update_task_priodl(struct task_struct *p) {} -- - static inline unsigned long sched_queue_watermark(struct rq *rq) - { - return find_first_bit(rq->queue.bitmap, SCHED_BITS); -@@ -76,7 +74,7 @@ static inline unsigned long sched_queue_watermark(struct rq *rq) - - static inline void sched_queue_init(struct rq *rq) - { -- struct bmq *q = &rq->queue; -+ struct sched_queue *q = &rq->queue; - int i; - - bitmap_zero(q->bitmap, SCHED_BITS); -@@ -86,12 +84,12 @@ static inline void sched_queue_init(struct rq *rq) - - static inline void sched_queue_init_idle(struct rq *rq, struct task_struct *idle) - { -- struct bmq *q = &rq->queue; -+ struct sched_queue *q = &rq->queue; - -- idle->bmq_idx = IDLE_TASK_SCHED_PRIO; -- INIT_LIST_HEAD(&q->heads[idle->bmq_idx]); -- list_add(&idle->bmq_node, &q->heads[idle->bmq_idx]); -- set_bit(idle->bmq_idx, q->bitmap); -+ idle->sq_idx = IDLE_TASK_SCHED_PRIO; -+ INIT_LIST_HEAD(&q->heads[idle->sq_idx]); -+ list_add(&idle->sq_node, &q->heads[idle->sq_idx]); -+ set_bit(idle->sq_idx, q->bitmap); - } - - /* -@@ -102,32 +100,32 @@ static inline struct task_struct *sched_rq_first_task(struct rq *rq) - unsigned long idx = find_first_bit(rq->queue.bitmap, SCHED_BITS); - const struct list_head *head = &rq->queue.heads[idx]; - -- return list_first_entry(head, struct task_struct, bmq_node); -+ return list_first_entry(head, struct task_struct, sq_node); - } - - static inline struct task_struct * - sched_rq_next_task(struct task_struct *p, struct rq *rq) - { -- unsigned long idx = p->bmq_idx; -+ unsigned long idx = p->sq_idx; - struct list_head *head = &rq->queue.heads[idx]; - -- if (list_is_last(&p->bmq_node, head)) { -+ if (list_is_last(&p->sq_node, head)) { - idx = find_next_bit(rq->queue.bitmap, SCHED_BITS, idx + 1); - head = &rq->queue.heads[idx]; - -- return list_first_entry(head, struct task_struct, bmq_node); -+ return list_first_entry(head, struct task_struct, sq_node); - } - -- return list_next_entry(p, bmq_node); -+ return list_next_entry(p, sq_node); - } - - #define __SCHED_DEQUEUE_TASK(p, rq, flags, func) \ - psi_dequeue(p, flags & DEQUEUE_SLEEP); \ - sched_info_dequeued(rq, p); \ - \ -- list_del(&p->bmq_node); \ -- if (list_empty(&rq->queue.heads[p->bmq_idx])) { \ -- clear_bit(p->bmq_idx, rq->queue.bitmap);\ -+ list_del(&p->sq_node); \ -+ if (list_empty(&rq->queue.heads[p->sq_idx])) { \ -+ clear_bit(p->sq_idx, rq->queue.bitmap);\ - func; \ - } - -@@ -135,28 +133,28 @@ sched_rq_next_task(struct task_struct *p, struct rq *rq) - sched_info_queued(rq, p); \ - psi_enqueue(p, flags); \ - \ -- p->bmq_idx = task_sched_prio(p, rq); \ -- list_add_tail(&p->bmq_node, &rq->queue.heads[p->bmq_idx]); \ -- set_bit(p->bmq_idx, rq->queue.bitmap) -+ p->sq_idx = task_sched_prio(p, rq); \ -+ list_add_tail(&p->sq_node, &rq->queue.heads[p->sq_idx]); \ -+ set_bit(p->sq_idx, rq->queue.bitmap) - - #define __SCHED_REQUEUE_TASK(p, rq, func) \ - { \ - int idx = task_sched_prio(p, rq); \ - \ -- list_del(&p->bmq_node); \ -- list_add_tail(&p->bmq_node, &rq->queue.heads[idx]); \ -- if (idx != p->bmq_idx) { \ -- if (list_empty(&rq->queue.heads[p->bmq_idx])) \ -- clear_bit(p->bmq_idx, rq->queue.bitmap); \ -- p->bmq_idx = idx; \ -- set_bit(p->bmq_idx, rq->queue.bitmap); \ -+ list_del(&p->sq_node); \ -+ list_add_tail(&p->sq_node, &rq->queue.heads[idx]); \ -+ if (idx != p->sq_idx) { \ -+ if (list_empty(&rq->queue.heads[p->sq_idx])) \ -+ clear_bit(p->sq_idx, rq->queue.bitmap); \ -+ p->sq_idx = idx; \ -+ set_bit(p->sq_idx, rq->queue.bitmap); \ - func; \ - } \ - } - - static inline bool sched_task_need_requeue(struct task_struct *p, struct rq *rq) - { -- return (task_sched_prio(p, rq) != p->bmq_idx); -+ return (task_sched_prio(p, rq) != p->sq_idx); - } - - static void sched_task_fork(struct task_struct *p, struct rq *rq) -@@ -201,3 +199,5 @@ static void sched_task_deactivate(struct task_struct *p, struct rq *rq) - if (rq_switch_time(rq) < boost_threshold(p)) - boost_task(p); - } -+ -+static inline void update_rq_time_edge(struct rq *rq) {} -diff --git a/kernel/sched/pds.h b/kernel/sched/pds.h -index 3afc6fd7a27f..623908cf4380 100644 ---- a/kernel/sched/pds.h -+++ b/kernel/sched/pds.h -@@ -6,9 +6,4 @@ - #define SCHED_BITS (MAX_RT_PRIO + NICE_WIDTH / 2 + 1) - #define IDLE_TASK_SCHED_PRIO (SCHED_BITS - 1) - --struct sched_queue { -- DECLARE_BITMAP(bitmap, SCHED_BITS); -- struct list_head heads[SCHED_BITS]; --}; -- - #endif --- -2.37.0 - - -From 15abc98ac279821cd496765da1cd6f1536f6a40a Mon Sep 17 00:00:00 2001 -From: Alfred Chen -Date: Mon, 17 May 2021 14:17:37 +0000 -Subject: [PATCH 153/297] sched/alt: [Sync] 163dd7fa459f kthread: Fix - PF_KTHREAD vs to_kthread() race - ---- - kernel/sched/alt_core.c | 2 +- - 1 file changed, 1 insertion(+), 1 deletion(-) - -diff --git a/kernel/sched/alt_core.c b/kernel/sched/alt_core.c -index cbca3a54f912..b1c17ff1642c 100644 ---- a/kernel/sched/alt_core.c -+++ b/kernel/sched/alt_core.c -@@ -6210,7 +6210,7 @@ static void balance_push(struct rq *rq) - * histerical raisins. - */ - if (rq->idle == push_task || -- ((push_task->flags & PF_KTHREAD) && kthread_is_per_cpu(push_task)) || -+ kthread_is_per_cpu(push_task) || - is_migration_disabled(push_task)) { - - /* --- -2.37.0 - - -From d2156be52fe37185c10f22d238a9d0dc70b36ee9 Mon Sep 17 00:00:00 2001 -From: Alfred Chen -Date: Mon, 17 May 2021 14:49:46 +0000 -Subject: [PATCH 154/297] sched/alt: Rename BMQ&PDS implement files. - ---- - kernel/sched/alt_core.c | 4 +- - kernel/sched/alt_sched.h | 9 +- - kernel/sched/bmq.h | 206 +++++++++++++++++++++++++- - kernel/sched/bmq_imp.h | 203 -------------------------- - kernel/sched/pds.h | 303 ++++++++++++++++++++++++++++++++++++++- - kernel/sched/pds_imp.h | 300 -------------------------------------- - 6 files changed, 506 insertions(+), 519 deletions(-) - delete mode 100644 kernel/sched/bmq_imp.h - delete mode 100644 kernel/sched/pds_imp.h - -diff --git a/kernel/sched/alt_core.c b/kernel/sched/alt_core.c -index b1c17ff1642c..9ade1b64aa9c 100644 ---- a/kernel/sched/alt_core.c -+++ b/kernel/sched/alt_core.c -@@ -143,10 +143,10 @@ static cpumask_t sched_sg_idle_mask ____cacheline_aligned_in_smp; - static cpumask_t sched_rq_watermark[SCHED_BITS] ____cacheline_aligned_in_smp; - - #ifdef CONFIG_SCHED_BMQ --#include "bmq_imp.h" -+#include "bmq.h" - #endif - #ifdef CONFIG_SCHED_PDS --#include "pds_imp.h" -+#include "pds.h" - #endif - - static inline void update_sched_rq_watermark(struct rq *rq) -diff --git a/kernel/sched/alt_sched.h b/kernel/sched/alt_sched.h -index 21f359102fbc..58ff6212b446 100644 ---- a/kernel/sched/alt_sched.h -+++ b/kernel/sched/alt_sched.h -@@ -50,12 +50,17 @@ - #include - - #ifdef CONFIG_SCHED_BMQ --#include "bmq.h" -+/* bits: -+ * RT(0-99), (Low prio adj range, nice width, high prio adj range) / 2, cpu idle task */ -+#define SCHED_BITS (MAX_RT_PRIO + NICE_WIDTH / 2 + MAX_PRIORITY_ADJ + 1) - #endif - #ifdef CONFIG_SCHED_PDS --#include "pds.h" -+/* bits: RT(0-99), nice width / 2, cpu idle task */ -+#define SCHED_BITS (MAX_RT_PRIO + NICE_WIDTH / 2 + 1) - #endif - -+#define IDLE_TASK_SCHED_PRIO (SCHED_BITS - 1) -+ - #ifdef CONFIG_SCHED_DEBUG - # define SCHED_WARN_ON(x) WARN_ONCE(x, #x) - extern void resched_latency_warn(int cpu, u64 latency); -diff --git a/kernel/sched/bmq.h b/kernel/sched/bmq.h -index 2ae022def9f7..f6bd3421b95c 100644 ---- a/kernel/sched/bmq.h -+++ b/kernel/sched/bmq.h -@@ -1,9 +1,203 @@ --#ifndef BMQ_H --#define BMQ_H -+#define ALT_SCHED_VERSION_MSG "sched/bmq: BMQ CPU Scheduler "ALT_SCHED_VERSION" by Alfred Chen.\n" - --/* bits: -- * RT(0-99), Low prio adj range, nice width, high prio adj range, cpu idle task */ --#define SCHED_BITS (MAX_RT_PRIO + NICE_WIDTH + 2 * MAX_PRIORITY_ADJ + 1) --#define IDLE_TASK_SCHED_PRIO (SCHED_BITS - 1) -+/* -+ * BMQ only routines -+ */ -+#define rq_switch_time(rq) ((rq)->clock - (rq)->last_ts_switch) -+#define boost_threshold(p) (sched_timeslice_ns >>\ -+ (15 - MAX_PRIORITY_ADJ - (p)->boost_prio)) - -+static inline void boost_task(struct task_struct *p) -+{ -+ int limit; -+ -+ switch (p->policy) { -+ case SCHED_NORMAL: -+ limit = -MAX_PRIORITY_ADJ; -+ break; -+ case SCHED_BATCH: -+ case SCHED_IDLE: -+ limit = 0; -+ break; -+ default: -+ return; -+ } -+ -+ if (p->boost_prio > limit) -+ p->boost_prio--; -+} -+ -+static inline void deboost_task(struct task_struct *p) -+{ -+ if (p->boost_prio < MAX_PRIORITY_ADJ) -+ p->boost_prio++; -+} -+ -+/* -+ * Common interfaces -+ */ -+static inline int normal_prio(struct task_struct *p) -+{ -+ if (task_has_rt_policy(p)) -+ return MAX_RT_PRIO - 1 - p->rt_priority; -+ -+ return p->static_prio + MAX_PRIORITY_ADJ; -+} -+ -+static inline int task_sched_prio(struct task_struct *p, struct rq *rq) -+{ -+ return (p->prio < MAX_RT_PRIO)? p->prio : MAX_RT_PRIO / 2 + (p->prio + p->boost_prio) / 2; -+} -+ -+static inline void requeue_task(struct task_struct *p, struct rq *rq); -+ -+static inline void time_slice_expired(struct task_struct *p, struct rq *rq) -+{ -+ p->time_slice = sched_timeslice_ns; -+ -+ if (SCHED_FIFO != p->policy && task_on_rq_queued(p)) { -+ if (SCHED_RR != p->policy) -+ deboost_task(p); -+ requeue_task(p, rq); -+ } -+} -+ -+inline int task_running_nice(struct task_struct *p) -+{ -+ return (p->prio + p->boost_prio > DEFAULT_PRIO + MAX_PRIORITY_ADJ); -+} -+ -+static inline unsigned long sched_queue_watermark(struct rq *rq) -+{ -+ return find_first_bit(rq->queue.bitmap, SCHED_BITS); -+} -+ -+static inline void sched_queue_init(struct rq *rq) -+{ -+ struct sched_queue *q = &rq->queue; -+ int i; -+ -+ bitmap_zero(q->bitmap, SCHED_BITS); -+ for(i = 0; i < SCHED_BITS; i++) -+ INIT_LIST_HEAD(&q->heads[i]); -+} -+ -+static inline void sched_queue_init_idle(struct rq *rq, struct task_struct *idle) -+{ -+ struct sched_queue *q = &rq->queue; -+ -+ idle->sq_idx = IDLE_TASK_SCHED_PRIO; -+ INIT_LIST_HEAD(&q->heads[idle->sq_idx]); -+ list_add(&idle->sq_node, &q->heads[idle->sq_idx]); -+ set_bit(idle->sq_idx, q->bitmap); -+} -+ -+/* -+ * This routine used in bmq scheduler only which assume the idle task in the bmq -+ */ -+static inline struct task_struct *sched_rq_first_task(struct rq *rq) -+{ -+ unsigned long idx = find_first_bit(rq->queue.bitmap, SCHED_BITS); -+ const struct list_head *head = &rq->queue.heads[idx]; -+ -+ return list_first_entry(head, struct task_struct, sq_node); -+} -+ -+static inline struct task_struct * -+sched_rq_next_task(struct task_struct *p, struct rq *rq) -+{ -+ unsigned long idx = p->sq_idx; -+ struct list_head *head = &rq->queue.heads[idx]; -+ -+ if (list_is_last(&p->sq_node, head)) { -+ idx = find_next_bit(rq->queue.bitmap, SCHED_BITS, idx + 1); -+ head = &rq->queue.heads[idx]; -+ -+ return list_first_entry(head, struct task_struct, sq_node); -+ } -+ -+ return list_next_entry(p, sq_node); -+} -+ -+#define __SCHED_DEQUEUE_TASK(p, rq, flags, func) \ -+ psi_dequeue(p, flags & DEQUEUE_SLEEP); \ -+ sched_info_dequeued(rq, p); \ -+ \ -+ list_del(&p->sq_node); \ -+ if (list_empty(&rq->queue.heads[p->sq_idx])) { \ -+ clear_bit(p->sq_idx, rq->queue.bitmap);\ -+ func; \ -+ } -+ -+#define __SCHED_ENQUEUE_TASK(p, rq, flags) \ -+ sched_info_queued(rq, p); \ -+ psi_enqueue(p, flags); \ -+ \ -+ p->sq_idx = task_sched_prio(p, rq); \ -+ list_add_tail(&p->sq_node, &rq->queue.heads[p->sq_idx]); \ -+ set_bit(p->sq_idx, rq->queue.bitmap) -+ -+#define __SCHED_REQUEUE_TASK(p, rq, func) \ -+{ \ -+ int idx = task_sched_prio(p, rq); \ -+\ -+ list_del(&p->sq_node); \ -+ list_add_tail(&p->sq_node, &rq->queue.heads[idx]); \ -+ if (idx != p->sq_idx) { \ -+ if (list_empty(&rq->queue.heads[p->sq_idx])) \ -+ clear_bit(p->sq_idx, rq->queue.bitmap); \ -+ p->sq_idx = idx; \ -+ set_bit(p->sq_idx, rq->queue.bitmap); \ -+ func; \ -+ } \ -+} -+ -+static inline bool sched_task_need_requeue(struct task_struct *p, struct rq *rq) -+{ -+ return (task_sched_prio(p, rq) != p->sq_idx); -+} -+ -+static void sched_task_fork(struct task_struct *p, struct rq *rq) -+{ -+ p->boost_prio = (p->boost_prio < 0) ? -+ p->boost_prio + MAX_PRIORITY_ADJ : MAX_PRIORITY_ADJ; -+} -+ -+/** -+ * task_prio - return the priority value of a given task. -+ * @p: the task in question. -+ * -+ * Return: The priority value as seen by users in /proc. -+ * -+ * sched policy return value kernel prio user prio/nice/boost -+ * -+ * normal, batch, idle [0 ... 53] [100 ... 139] 0/[-20 ... 19]/[-7 ... 7] -+ * fifo, rr [-1 ... -100] [99 ... 0] [0 ... 99] -+ */ -+int task_prio(const struct task_struct *p) -+{ -+ if (p->prio < MAX_RT_PRIO) -+ return (p->prio - MAX_RT_PRIO); -+ return (p->prio - MAX_RT_PRIO + p->boost_prio); -+} -+ -+static void do_sched_yield_type_1(struct task_struct *p, struct rq *rq) -+{ -+ p->boost_prio = MAX_PRIORITY_ADJ; -+} -+ -+#ifdef CONFIG_SMP -+static void sched_task_ttwu(struct task_struct *p) -+{ -+ if(this_rq()->clock_task - p->last_ran > sched_timeslice_ns) -+ boost_task(p); -+} - #endif -+ -+static void sched_task_deactivate(struct task_struct *p, struct rq *rq) -+{ -+ if (rq_switch_time(rq) < boost_threshold(p)) -+ boost_task(p); -+} -+ -+static inline void update_rq_time_edge(struct rq *rq) {} -diff --git a/kernel/sched/bmq_imp.h b/kernel/sched/bmq_imp.h -deleted file mode 100644 -index f6bd3421b95c..000000000000 ---- a/kernel/sched/bmq_imp.h -+++ /dev/null -@@ -1,203 +0,0 @@ --#define ALT_SCHED_VERSION_MSG "sched/bmq: BMQ CPU Scheduler "ALT_SCHED_VERSION" by Alfred Chen.\n" -- --/* -- * BMQ only routines -- */ --#define rq_switch_time(rq) ((rq)->clock - (rq)->last_ts_switch) --#define boost_threshold(p) (sched_timeslice_ns >>\ -- (15 - MAX_PRIORITY_ADJ - (p)->boost_prio)) -- --static inline void boost_task(struct task_struct *p) --{ -- int limit; -- -- switch (p->policy) { -- case SCHED_NORMAL: -- limit = -MAX_PRIORITY_ADJ; -- break; -- case SCHED_BATCH: -- case SCHED_IDLE: -- limit = 0; -- break; -- default: -- return; -- } -- -- if (p->boost_prio > limit) -- p->boost_prio--; --} -- --static inline void deboost_task(struct task_struct *p) --{ -- if (p->boost_prio < MAX_PRIORITY_ADJ) -- p->boost_prio++; --} -- --/* -- * Common interfaces -- */ --static inline int normal_prio(struct task_struct *p) --{ -- if (task_has_rt_policy(p)) -- return MAX_RT_PRIO - 1 - p->rt_priority; -- -- return p->static_prio + MAX_PRIORITY_ADJ; --} -- --static inline int task_sched_prio(struct task_struct *p, struct rq *rq) --{ -- return (p->prio < MAX_RT_PRIO)? p->prio : MAX_RT_PRIO / 2 + (p->prio + p->boost_prio) / 2; --} -- --static inline void requeue_task(struct task_struct *p, struct rq *rq); -- --static inline void time_slice_expired(struct task_struct *p, struct rq *rq) --{ -- p->time_slice = sched_timeslice_ns; -- -- if (SCHED_FIFO != p->policy && task_on_rq_queued(p)) { -- if (SCHED_RR != p->policy) -- deboost_task(p); -- requeue_task(p, rq); -- } --} -- --inline int task_running_nice(struct task_struct *p) --{ -- return (p->prio + p->boost_prio > DEFAULT_PRIO + MAX_PRIORITY_ADJ); --} -- --static inline unsigned long sched_queue_watermark(struct rq *rq) --{ -- return find_first_bit(rq->queue.bitmap, SCHED_BITS); --} -- --static inline void sched_queue_init(struct rq *rq) --{ -- struct sched_queue *q = &rq->queue; -- int i; -- -- bitmap_zero(q->bitmap, SCHED_BITS); -- for(i = 0; i < SCHED_BITS; i++) -- INIT_LIST_HEAD(&q->heads[i]); --} -- --static inline void sched_queue_init_idle(struct rq *rq, struct task_struct *idle) --{ -- struct sched_queue *q = &rq->queue; -- -- idle->sq_idx = IDLE_TASK_SCHED_PRIO; -- INIT_LIST_HEAD(&q->heads[idle->sq_idx]); -- list_add(&idle->sq_node, &q->heads[idle->sq_idx]); -- set_bit(idle->sq_idx, q->bitmap); --} -- --/* -- * This routine used in bmq scheduler only which assume the idle task in the bmq -- */ --static inline struct task_struct *sched_rq_first_task(struct rq *rq) --{ -- unsigned long idx = find_first_bit(rq->queue.bitmap, SCHED_BITS); -- const struct list_head *head = &rq->queue.heads[idx]; -- -- return list_first_entry(head, struct task_struct, sq_node); --} -- --static inline struct task_struct * --sched_rq_next_task(struct task_struct *p, struct rq *rq) --{ -- unsigned long idx = p->sq_idx; -- struct list_head *head = &rq->queue.heads[idx]; -- -- if (list_is_last(&p->sq_node, head)) { -- idx = find_next_bit(rq->queue.bitmap, SCHED_BITS, idx + 1); -- head = &rq->queue.heads[idx]; -- -- return list_first_entry(head, struct task_struct, sq_node); -- } -- -- return list_next_entry(p, sq_node); --} -- --#define __SCHED_DEQUEUE_TASK(p, rq, flags, func) \ -- psi_dequeue(p, flags & DEQUEUE_SLEEP); \ -- sched_info_dequeued(rq, p); \ -- \ -- list_del(&p->sq_node); \ -- if (list_empty(&rq->queue.heads[p->sq_idx])) { \ -- clear_bit(p->sq_idx, rq->queue.bitmap);\ -- func; \ -- } -- --#define __SCHED_ENQUEUE_TASK(p, rq, flags) \ -- sched_info_queued(rq, p); \ -- psi_enqueue(p, flags); \ -- \ -- p->sq_idx = task_sched_prio(p, rq); \ -- list_add_tail(&p->sq_node, &rq->queue.heads[p->sq_idx]); \ -- set_bit(p->sq_idx, rq->queue.bitmap) -- --#define __SCHED_REQUEUE_TASK(p, rq, func) \ --{ \ -- int idx = task_sched_prio(p, rq); \ --\ -- list_del(&p->sq_node); \ -- list_add_tail(&p->sq_node, &rq->queue.heads[idx]); \ -- if (idx != p->sq_idx) { \ -- if (list_empty(&rq->queue.heads[p->sq_idx])) \ -- clear_bit(p->sq_idx, rq->queue.bitmap); \ -- p->sq_idx = idx; \ -- set_bit(p->sq_idx, rq->queue.bitmap); \ -- func; \ -- } \ --} -- --static inline bool sched_task_need_requeue(struct task_struct *p, struct rq *rq) --{ -- return (task_sched_prio(p, rq) != p->sq_idx); --} -- --static void sched_task_fork(struct task_struct *p, struct rq *rq) --{ -- p->boost_prio = (p->boost_prio < 0) ? -- p->boost_prio + MAX_PRIORITY_ADJ : MAX_PRIORITY_ADJ; --} -- --/** -- * task_prio - return the priority value of a given task. -- * @p: the task in question. -- * -- * Return: The priority value as seen by users in /proc. -- * -- * sched policy return value kernel prio user prio/nice/boost -- * -- * normal, batch, idle [0 ... 53] [100 ... 139] 0/[-20 ... 19]/[-7 ... 7] -- * fifo, rr [-1 ... -100] [99 ... 0] [0 ... 99] -- */ --int task_prio(const struct task_struct *p) --{ -- if (p->prio < MAX_RT_PRIO) -- return (p->prio - MAX_RT_PRIO); -- return (p->prio - MAX_RT_PRIO + p->boost_prio); --} -- --static void do_sched_yield_type_1(struct task_struct *p, struct rq *rq) --{ -- p->boost_prio = MAX_PRIORITY_ADJ; --} -- --#ifdef CONFIG_SMP --static void sched_task_ttwu(struct task_struct *p) --{ -- if(this_rq()->clock_task - p->last_ran > sched_timeslice_ns) -- boost_task(p); --} --#endif -- --static void sched_task_deactivate(struct task_struct *p, struct rq *rq) --{ -- if (rq_switch_time(rq) < boost_threshold(p)) -- boost_task(p); --} -- --static inline void update_rq_time_edge(struct rq *rq) {} -diff --git a/kernel/sched/pds.h b/kernel/sched/pds.h -index 623908cf4380..8cc656a7cc48 100644 ---- a/kernel/sched/pds.h -+++ b/kernel/sched/pds.h -@@ -1,9 +1,300 @@ --#ifndef PDS_H --#define PDS_H -+#define ALT_SCHED_VERSION_MSG "sched/pds: PDS CPU Scheduler "ALT_SCHED_VERSION" by Alfred Chen.\n" - --/* bits: -- * RT(0-99), (Low prio adj range, nice width, high prio adj range) / 2, cpu idle task */ --#define SCHED_BITS (MAX_RT_PRIO + NICE_WIDTH / 2 + 1) --#define IDLE_TASK_SCHED_PRIO (SCHED_BITS - 1) -+static const u64 user_prio2deadline[NICE_WIDTH] = { -+/* -20 */ 4194304, 4613734, 5075107, 5582617, 6140878, -+/* -15 */ 6754965, 7430461, 8173507, 8990857, 9889942, -+/* -10 */ 10878936, 11966829, 13163511, 14479862, 15927848, -+/* -5 */ 17520632, 19272695, 21199964, 23319960, 25651956, -+/* 0 */ 28217151, 31038866, 34142752, 37557027, 41312729, -+/* 5 */ 45444001, 49988401, 54987241, 60485965, 66534561, -+/* 10 */ 73188017, 80506818, 88557499, 97413248, 107154572, -+/* 15 */ 117870029, 129657031, 142622734, 156885007, 172573507 -+}; - -+#define SCHED_PRIO_SLOT (4ULL << 20) -+#define DEFAULT_SCHED_PRIO (MAX_RT_PRIO + 10) -+ -+static inline int normal_prio(struct task_struct *p) -+{ -+ if (task_has_rt_policy(p)) -+ return MAX_RT_PRIO - 1 - p->rt_priority; -+ -+ return MAX_RT_PRIO; -+} -+ -+extern int alt_debug[20]; -+ -+static inline int -+task_sched_prio_normal(const struct task_struct *p, const struct rq *rq) -+{ -+ int delta; -+ -+ delta = rq->time_edge + 20 - (p->deadline >> 23); -+ if (delta < 0) { -+ delta = 0; -+ alt_debug[0]++; -+ } -+ delta = 19 - min(delta, 19); -+ -+ return delta; -+} -+ -+static inline int -+task_sched_prio(const struct task_struct *p, const struct rq *rq) -+{ -+ if (p == rq->idle) -+ return IDLE_TASK_SCHED_PRIO; -+ -+ if (p->prio < MAX_RT_PRIO) -+ return p->prio; -+ -+ return MAX_RT_PRIO + task_sched_prio_normal(p, rq); -+} -+ -+static inline int -+task_sched_prio_idx(const struct task_struct *p, const struct rq *rq) -+{ -+ if (p == rq->idle) -+ return IDLE_TASK_SCHED_PRIO; -+ -+ if (p->prio < MAX_RT_PRIO) -+ return p->prio; -+ -+ return MAX_RT_PRIO + -+ (task_sched_prio_normal(p, rq) + rq->time_edge) % 20; -+} -+ -+int task_running_nice(struct task_struct *p) -+{ -+ return task_sched_prio(p, task_rq(p)) > DEFAULT_SCHED_PRIO; -+} -+ -+DECLARE_BITMAP(normal_mask, SCHED_BITS); -+ -+static inline void sched_shift_normal_bitmap(unsigned long *mask, unsigned int shift) -+{ -+ DECLARE_BITMAP(normal, SCHED_BITS); -+ -+ bitmap_and(normal, mask, normal_mask, SCHED_BITS); -+ bitmap_shift_right(normal, normal, shift, SCHED_BITS); -+ bitmap_and(normal, normal, normal_mask, SCHED_BITS); -+ -+ bitmap_andnot(mask, mask, normal_mask, SCHED_BITS); -+ bitmap_or(mask, mask, normal, SCHED_BITS); -+} -+ -+static inline void update_rq_time_edge(struct rq *rq) -+{ -+ struct list_head head; -+ u64 old = rq->time_edge; -+ u64 now = rq->clock >> 23; -+ u64 prio, delta; -+ -+ if (now == old) -+ return; -+ -+ delta = min(20ULL, now - old); -+ INIT_LIST_HEAD(&head); -+ -+ prio = MAX_RT_PRIO; -+ for_each_set_bit_from(prio, rq->queue.bitmap, MAX_RT_PRIO + delta) { -+ u64 idx; -+ -+ idx = MAX_RT_PRIO + ((prio - MAX_RT_PRIO) + rq->time_edge) % 20; -+ list_splice_tail_init(rq->queue.heads + idx, &head); -+ } -+ sched_shift_normal_bitmap(rq->queue.bitmap, delta); -+ rq->time_edge = now; -+ if (!list_empty(&head)) { -+ struct task_struct *p; -+ -+ list_for_each_entry(p, &head, sq_node) -+ p->sq_idx = MAX_RT_PRIO + now % 20; -+ -+ list_splice(&head, rq->queue.heads + MAX_RT_PRIO + now % 20); -+ set_bit(MAX_RT_PRIO, rq->queue.bitmap); -+ } -+} -+ -+static inline void sched_renew_deadline(struct task_struct *p, const struct rq *rq) -+{ -+ if (p->prio >= MAX_RT_PRIO) -+ p->deadline = rq->clock + -+ SCHED_PRIO_SLOT * (p->static_prio - MAX_RT_PRIO + 1); -+} -+ -+static inline void requeue_task(struct task_struct *p, struct rq *rq); -+ -+static inline void time_slice_expired(struct task_struct *p, struct rq *rq) -+{ -+ /*printk(KERN_INFO "sched: time_slice_expired(%d) - %px\n", cpu_of(rq), p);*/ -+ p->time_slice = sched_timeslice_ns; -+ sched_renew_deadline(p, rq); -+ if (SCHED_FIFO != p->policy && task_on_rq_queued(p)) -+ requeue_task(p, rq); -+} -+ -+/* -+ * Init the queue structure in rq -+ */ -+static inline void sched_queue_init(struct rq *rq) -+{ -+ struct sched_queue *q = &rq->queue; -+ int i; -+ -+ bitmap_set(normal_mask, MAX_RT_PRIO, 20); -+ bitmap_zero(q->bitmap, SCHED_BITS); -+ for(i = 0; i < SCHED_BITS; i++) -+ INIT_LIST_HEAD(&q->heads[i]); -+} -+ -+/* -+ * Init idle task and put into queue structure of rq -+ * IMPORTANT: may be called multiple times for a single cpu -+ */ -+static inline void sched_queue_init_idle(struct rq *rq, struct task_struct *idle) -+{ -+ struct sched_queue *q = &rq->queue; -+ /*printk(KERN_INFO "sched: init(%d) - %px\n", cpu_of(rq), idle);*/ -+ -+ idle->sq_idx = IDLE_TASK_SCHED_PRIO; -+ INIT_LIST_HEAD(&q->heads[idle->sq_idx]); -+ list_add(&idle->sq_node, &q->heads[idle->sq_idx]); -+ set_bit(idle->sq_idx, q->bitmap); -+} -+ -+static inline unsigned long sched_prio2idx(unsigned long idx, struct rq *rq) -+{ -+ if (IDLE_TASK_SCHED_PRIO == idx || -+ idx < MAX_RT_PRIO) -+ return idx; -+ -+ return MAX_RT_PRIO + -+ ((idx - MAX_RT_PRIO) + rq->time_edge) % 20; -+} -+ -+static inline unsigned long sched_idx2prio(unsigned long idx, struct rq *rq) -+{ -+ if (IDLE_TASK_SCHED_PRIO == idx || -+ idx < MAX_RT_PRIO) -+ return idx; -+ -+ return MAX_RT_PRIO + -+ ((idx - MAX_RT_PRIO) + 20 - rq->time_edge % 20) % 20; -+} -+ -+/* -+ * This routine assume that the idle task always in queue -+ */ -+static inline struct task_struct *sched_rq_first_task(struct rq *rq) -+{ -+ unsigned long idx = find_first_bit(rq->queue.bitmap, SCHED_BITS); -+ const struct list_head *head = &rq->queue.heads[sched_prio2idx(idx, rq)]; -+ -+ return list_first_entry(head, struct task_struct, sq_node); -+} -+ -+static inline struct task_struct * -+sched_rq_next_task(struct task_struct *p, struct rq *rq) -+{ -+ unsigned long idx = p->sq_idx; -+ struct list_head *head = &rq->queue.heads[idx]; -+ -+ if (list_is_last(&p->sq_node, head)) { -+ idx = find_next_bit(rq->queue.bitmap, SCHED_BITS, -+ sched_idx2prio(idx, rq) + 1); -+ head = &rq->queue.heads[sched_prio2idx(idx, rq)]; -+ -+ return list_first_entry(head, struct task_struct, sq_node); -+ } -+ -+ return list_next_entry(p, sq_node); -+} -+ -+static inline unsigned long sched_queue_watermark(struct rq *rq) -+{ -+ return find_first_bit(rq->queue.bitmap, SCHED_BITS); -+} -+ -+#define __SCHED_DEQUEUE_TASK(p, rq, flags, func) \ -+ psi_dequeue(p, flags & DEQUEUE_SLEEP); \ -+ sched_info_dequeued(rq, p); \ -+ \ -+ list_del(&p->sq_node); \ -+ if (list_empty(&rq->queue.heads[p->sq_idx])) { \ -+ clear_bit(sched_idx2prio(p->sq_idx, rq), \ -+ rq->queue.bitmap); \ -+ func; \ -+ } -+ -+#define __SCHED_ENQUEUE_TASK(p, rq, flags) \ -+ sched_info_queued(rq, p); \ -+ psi_enqueue(p, flags); \ -+ \ -+ p->sq_idx = task_sched_prio_idx(p, rq); \ -+ list_add_tail(&p->sq_node, &rq->queue.heads[p->sq_idx]); \ -+ set_bit(sched_idx2prio(p->sq_idx, rq), rq->queue.bitmap); -+ -+/* -+ * Requeue a task @p to @rq -+ */ -+#define __SCHED_REQUEUE_TASK(p, rq, func) \ -+{\ -+ int idx = task_sched_prio_idx(p, rq); \ -+\ -+ list_del(&p->sq_node); \ -+ list_add_tail(&p->sq_node, &rq->queue.heads[idx]); \ -+ if (idx != p->sq_idx) { \ -+ if (list_empty(&rq->queue.heads[p->sq_idx])) \ -+ clear_bit(sched_idx2prio(p->sq_idx, rq), rq->queue.bitmap); \ -+ p->sq_idx = idx; \ -+ set_bit(sched_idx2prio(p->sq_idx, rq), rq->queue.bitmap); \ -+ func; \ -+ } \ -+} -+ -+static inline bool sched_task_need_requeue(struct task_struct *p, struct rq *rq) -+{ -+ return (task_sched_prio_idx(p, rq) != p->sq_idx); -+} -+ -+static void sched_task_fork(struct task_struct *p, struct rq *rq) -+{ -+ sched_renew_deadline(p, rq); -+} -+ -+/** -+ * task_prio - return the priority value of a given task. -+ * @p: the task in question. -+ * -+ * Return: The priority value as seen by users in /proc. -+ * -+ * sched policy return value kernel prio user prio/nice -+ * -+ * normal, batch, idle [0 ... 39] 100 0/[-20 ... 19] -+ * fifo, rr [-1 ... -100] [99 ... 0] [0 ... 99] -+ */ -+int task_prio(const struct task_struct *p) -+{ -+ int ret; -+ -+ if (p->prio < MAX_RT_PRIO) -+ return (p->prio - MAX_RT_PRIO); -+ -+ /*preempt_disable(); -+ ret = task_sched_prio(p, task_rq(p)) - MAX_RT_PRIO;*/ -+ ret = p->static_prio - MAX_RT_PRIO; -+ /*preempt_enable();*/ -+ -+ return ret; -+} -+ -+static void do_sched_yield_type_1(struct task_struct *p, struct rq *rq) -+{ -+ time_slice_expired(p, rq); -+} -+ -+#ifdef CONFIG_SMP -+static void sched_task_ttwu(struct task_struct *p) {} - #endif -+static void sched_task_deactivate(struct task_struct *p, struct rq *rq) {} -diff --git a/kernel/sched/pds_imp.h b/kernel/sched/pds_imp.h -deleted file mode 100644 -index 8cc656a7cc48..000000000000 ---- a/kernel/sched/pds_imp.h -+++ /dev/null -@@ -1,300 +0,0 @@ --#define ALT_SCHED_VERSION_MSG "sched/pds: PDS CPU Scheduler "ALT_SCHED_VERSION" by Alfred Chen.\n" -- --static const u64 user_prio2deadline[NICE_WIDTH] = { --/* -20 */ 4194304, 4613734, 5075107, 5582617, 6140878, --/* -15 */ 6754965, 7430461, 8173507, 8990857, 9889942, --/* -10 */ 10878936, 11966829, 13163511, 14479862, 15927848, --/* -5 */ 17520632, 19272695, 21199964, 23319960, 25651956, --/* 0 */ 28217151, 31038866, 34142752, 37557027, 41312729, --/* 5 */ 45444001, 49988401, 54987241, 60485965, 66534561, --/* 10 */ 73188017, 80506818, 88557499, 97413248, 107154572, --/* 15 */ 117870029, 129657031, 142622734, 156885007, 172573507 --}; -- --#define SCHED_PRIO_SLOT (4ULL << 20) --#define DEFAULT_SCHED_PRIO (MAX_RT_PRIO + 10) -- --static inline int normal_prio(struct task_struct *p) --{ -- if (task_has_rt_policy(p)) -- return MAX_RT_PRIO - 1 - p->rt_priority; -- -- return MAX_RT_PRIO; --} -- --extern int alt_debug[20]; -- --static inline int --task_sched_prio_normal(const struct task_struct *p, const struct rq *rq) --{ -- int delta; -- -- delta = rq->time_edge + 20 - (p->deadline >> 23); -- if (delta < 0) { -- delta = 0; -- alt_debug[0]++; -- } -- delta = 19 - min(delta, 19); -- -- return delta; --} -- --static inline int --task_sched_prio(const struct task_struct *p, const struct rq *rq) --{ -- if (p == rq->idle) -- return IDLE_TASK_SCHED_PRIO; -- -- if (p->prio < MAX_RT_PRIO) -- return p->prio; -- -- return MAX_RT_PRIO + task_sched_prio_normal(p, rq); --} -- --static inline int --task_sched_prio_idx(const struct task_struct *p, const struct rq *rq) --{ -- if (p == rq->idle) -- return IDLE_TASK_SCHED_PRIO; -- -- if (p->prio < MAX_RT_PRIO) -- return p->prio; -- -- return MAX_RT_PRIO + -- (task_sched_prio_normal(p, rq) + rq->time_edge) % 20; --} -- --int task_running_nice(struct task_struct *p) --{ -- return task_sched_prio(p, task_rq(p)) > DEFAULT_SCHED_PRIO; --} -- --DECLARE_BITMAP(normal_mask, SCHED_BITS); -- --static inline void sched_shift_normal_bitmap(unsigned long *mask, unsigned int shift) --{ -- DECLARE_BITMAP(normal, SCHED_BITS); -- -- bitmap_and(normal, mask, normal_mask, SCHED_BITS); -- bitmap_shift_right(normal, normal, shift, SCHED_BITS); -- bitmap_and(normal, normal, normal_mask, SCHED_BITS); -- -- bitmap_andnot(mask, mask, normal_mask, SCHED_BITS); -- bitmap_or(mask, mask, normal, SCHED_BITS); --} -- --static inline void update_rq_time_edge(struct rq *rq) --{ -- struct list_head head; -- u64 old = rq->time_edge; -- u64 now = rq->clock >> 23; -- u64 prio, delta; -- -- if (now == old) -- return; -- -- delta = min(20ULL, now - old); -- INIT_LIST_HEAD(&head); -- -- prio = MAX_RT_PRIO; -- for_each_set_bit_from(prio, rq->queue.bitmap, MAX_RT_PRIO + delta) { -- u64 idx; -- -- idx = MAX_RT_PRIO + ((prio - MAX_RT_PRIO) + rq->time_edge) % 20; -- list_splice_tail_init(rq->queue.heads + idx, &head); -- } -- sched_shift_normal_bitmap(rq->queue.bitmap, delta); -- rq->time_edge = now; -- if (!list_empty(&head)) { -- struct task_struct *p; -- -- list_for_each_entry(p, &head, sq_node) -- p->sq_idx = MAX_RT_PRIO + now % 20; -- -- list_splice(&head, rq->queue.heads + MAX_RT_PRIO + now % 20); -- set_bit(MAX_RT_PRIO, rq->queue.bitmap); -- } --} -- --static inline void sched_renew_deadline(struct task_struct *p, const struct rq *rq) --{ -- if (p->prio >= MAX_RT_PRIO) -- p->deadline = rq->clock + -- SCHED_PRIO_SLOT * (p->static_prio - MAX_RT_PRIO + 1); --} -- --static inline void requeue_task(struct task_struct *p, struct rq *rq); -- --static inline void time_slice_expired(struct task_struct *p, struct rq *rq) --{ -- /*printk(KERN_INFO "sched: time_slice_expired(%d) - %px\n", cpu_of(rq), p);*/ -- p->time_slice = sched_timeslice_ns; -- sched_renew_deadline(p, rq); -- if (SCHED_FIFO != p->policy && task_on_rq_queued(p)) -- requeue_task(p, rq); --} -- --/* -- * Init the queue structure in rq -- */ --static inline void sched_queue_init(struct rq *rq) --{ -- struct sched_queue *q = &rq->queue; -- int i; -- -- bitmap_set(normal_mask, MAX_RT_PRIO, 20); -- bitmap_zero(q->bitmap, SCHED_BITS); -- for(i = 0; i < SCHED_BITS; i++) -- INIT_LIST_HEAD(&q->heads[i]); --} -- --/* -- * Init idle task and put into queue structure of rq -- * IMPORTANT: may be called multiple times for a single cpu -- */ --static inline void sched_queue_init_idle(struct rq *rq, struct task_struct *idle) --{ -- struct sched_queue *q = &rq->queue; -- /*printk(KERN_INFO "sched: init(%d) - %px\n", cpu_of(rq), idle);*/ -- -- idle->sq_idx = IDLE_TASK_SCHED_PRIO; -- INIT_LIST_HEAD(&q->heads[idle->sq_idx]); -- list_add(&idle->sq_node, &q->heads[idle->sq_idx]); -- set_bit(idle->sq_idx, q->bitmap); --} -- --static inline unsigned long sched_prio2idx(unsigned long idx, struct rq *rq) --{ -- if (IDLE_TASK_SCHED_PRIO == idx || -- idx < MAX_RT_PRIO) -- return idx; -- -- return MAX_RT_PRIO + -- ((idx - MAX_RT_PRIO) + rq->time_edge) % 20; --} -- --static inline unsigned long sched_idx2prio(unsigned long idx, struct rq *rq) --{ -- if (IDLE_TASK_SCHED_PRIO == idx || -- idx < MAX_RT_PRIO) -- return idx; -- -- return MAX_RT_PRIO + -- ((idx - MAX_RT_PRIO) + 20 - rq->time_edge % 20) % 20; --} -- --/* -- * This routine assume that the idle task always in queue -- */ --static inline struct task_struct *sched_rq_first_task(struct rq *rq) --{ -- unsigned long idx = find_first_bit(rq->queue.bitmap, SCHED_BITS); -- const struct list_head *head = &rq->queue.heads[sched_prio2idx(idx, rq)]; -- -- return list_first_entry(head, struct task_struct, sq_node); --} -- --static inline struct task_struct * --sched_rq_next_task(struct task_struct *p, struct rq *rq) --{ -- unsigned long idx = p->sq_idx; -- struct list_head *head = &rq->queue.heads[idx]; -- -- if (list_is_last(&p->sq_node, head)) { -- idx = find_next_bit(rq->queue.bitmap, SCHED_BITS, -- sched_idx2prio(idx, rq) + 1); -- head = &rq->queue.heads[sched_prio2idx(idx, rq)]; -- -- return list_first_entry(head, struct task_struct, sq_node); -- } -- -- return list_next_entry(p, sq_node); --} -- --static inline unsigned long sched_queue_watermark(struct rq *rq) --{ -- return find_first_bit(rq->queue.bitmap, SCHED_BITS); --} -- --#define __SCHED_DEQUEUE_TASK(p, rq, flags, func) \ -- psi_dequeue(p, flags & DEQUEUE_SLEEP); \ -- sched_info_dequeued(rq, p); \ -- \ -- list_del(&p->sq_node); \ -- if (list_empty(&rq->queue.heads[p->sq_idx])) { \ -- clear_bit(sched_idx2prio(p->sq_idx, rq), \ -- rq->queue.bitmap); \ -- func; \ -- } -- --#define __SCHED_ENQUEUE_TASK(p, rq, flags) \ -- sched_info_queued(rq, p); \ -- psi_enqueue(p, flags); \ -- \ -- p->sq_idx = task_sched_prio_idx(p, rq); \ -- list_add_tail(&p->sq_node, &rq->queue.heads[p->sq_idx]); \ -- set_bit(sched_idx2prio(p->sq_idx, rq), rq->queue.bitmap); -- --/* -- * Requeue a task @p to @rq -- */ --#define __SCHED_REQUEUE_TASK(p, rq, func) \ --{\ -- int idx = task_sched_prio_idx(p, rq); \ --\ -- list_del(&p->sq_node); \ -- list_add_tail(&p->sq_node, &rq->queue.heads[idx]); \ -- if (idx != p->sq_idx) { \ -- if (list_empty(&rq->queue.heads[p->sq_idx])) \ -- clear_bit(sched_idx2prio(p->sq_idx, rq), rq->queue.bitmap); \ -- p->sq_idx = idx; \ -- set_bit(sched_idx2prio(p->sq_idx, rq), rq->queue.bitmap); \ -- func; \ -- } \ --} -- --static inline bool sched_task_need_requeue(struct task_struct *p, struct rq *rq) --{ -- return (task_sched_prio_idx(p, rq) != p->sq_idx); --} -- --static void sched_task_fork(struct task_struct *p, struct rq *rq) --{ -- sched_renew_deadline(p, rq); --} -- --/** -- * task_prio - return the priority value of a given task. -- * @p: the task in question. -- * -- * Return: The priority value as seen by users in /proc. -- * -- * sched policy return value kernel prio user prio/nice -- * -- * normal, batch, idle [0 ... 39] 100 0/[-20 ... 19] -- * fifo, rr [-1 ... -100] [99 ... 0] [0 ... 99] -- */ --int task_prio(const struct task_struct *p) --{ -- int ret; -- -- if (p->prio < MAX_RT_PRIO) -- return (p->prio - MAX_RT_PRIO); -- -- /*preempt_disable(); -- ret = task_sched_prio(p, task_rq(p)) - MAX_RT_PRIO;*/ -- ret = p->static_prio - MAX_RT_PRIO; -- /*preempt_enable();*/ -- -- return ret; --} -- --static void do_sched_yield_type_1(struct task_struct *p, struct rq *rq) --{ -- time_slice_expired(p, rq); --} -- --#ifdef CONFIG_SMP --static void sched_task_ttwu(struct task_struct *p) {} --#endif --static void sched_task_deactivate(struct task_struct *p, struct rq *rq) {} --- -2.37.0 - - -From b9d74857f996912e63dd264edf8807d79d118ecd Mon Sep 17 00:00:00 2001 -From: Alfred Chen -Date: Mon, 17 May 2021 16:55:30 +0000 -Subject: [PATCH 155/297] sched/alt: sched_queue_init_idle() share common code - ---- - kernel/sched/alt_core.c | 15 ++++++++++++++- - kernel/sched/bmq.h | 10 ---------- - kernel/sched/pds.h | 15 --------------- - 3 files changed, 14 insertions(+), 26 deletions(-) - -diff --git a/kernel/sched/alt_core.c b/kernel/sched/alt_core.c -index 9ade1b64aa9c..407d5d441298 100644 ---- a/kernel/sched/alt_core.c -+++ b/kernel/sched/alt_core.c -@@ -6045,6 +6045,19 @@ void dump_cpu_task(int cpu) - sched_show_task(cpu_curr(cpu)); - } - -+/* -+ * Init idle task and put into queue structure of rq -+ * IMPORTANT: may be called multiple times for a single cpu -+ */ -+static inline void sched_queue_init_idle(struct sched_queue *q, -+ struct task_struct *idle) -+{ -+ idle->sq_idx = IDLE_TASK_SCHED_PRIO; -+ INIT_LIST_HEAD(&q->heads[idle->sq_idx]); -+ list_add(&idle->sq_node, &q->heads[idle->sq_idx]); -+ set_bit(idle->sq_idx, q->bitmap); -+} -+ - /** - * init_idle - set up an idle thread for a given CPU - * @idle: task in question -@@ -6067,7 +6080,7 @@ void init_idle(struct task_struct *idle, int cpu) - idle->last_ran = rq->clock_task; - idle->state = TASK_RUNNING; - idle->flags |= PF_IDLE; -- sched_queue_init_idle(rq, idle); -+ sched_queue_init_idle(&rq->queue, idle); - - scs_task_reset(idle); - kasan_unpoison_task_stack(idle); -diff --git a/kernel/sched/bmq.h b/kernel/sched/bmq.h -index f6bd3421b95c..f5bd651a7666 100644 ---- a/kernel/sched/bmq.h -+++ b/kernel/sched/bmq.h -@@ -82,16 +82,6 @@ static inline void sched_queue_init(struct rq *rq) - INIT_LIST_HEAD(&q->heads[i]); - } - --static inline void sched_queue_init_idle(struct rq *rq, struct task_struct *idle) --{ -- struct sched_queue *q = &rq->queue; -- -- idle->sq_idx = IDLE_TASK_SCHED_PRIO; -- INIT_LIST_HEAD(&q->heads[idle->sq_idx]); -- list_add(&idle->sq_node, &q->heads[idle->sq_idx]); -- set_bit(idle->sq_idx, q->bitmap); --} -- - /* - * This routine used in bmq scheduler only which assume the idle task in the bmq - */ -diff --git a/kernel/sched/pds.h b/kernel/sched/pds.h -index 8cc656a7cc48..c29122334bda 100644 ---- a/kernel/sched/pds.h -+++ b/kernel/sched/pds.h -@@ -148,21 +148,6 @@ static inline void sched_queue_init(struct rq *rq) - INIT_LIST_HEAD(&q->heads[i]); - } - --/* -- * Init idle task and put into queue structure of rq -- * IMPORTANT: may be called multiple times for a single cpu -- */ --static inline void sched_queue_init_idle(struct rq *rq, struct task_struct *idle) --{ -- struct sched_queue *q = &rq->queue; -- /*printk(KERN_INFO "sched: init(%d) - %px\n", cpu_of(rq), idle);*/ -- -- idle->sq_idx = IDLE_TASK_SCHED_PRIO; -- INIT_LIST_HEAD(&q->heads[idle->sq_idx]); -- list_add(&idle->sq_node, &q->heads[idle->sq_idx]); -- set_bit(idle->sq_idx, q->bitmap); --} -- - static inline unsigned long sched_prio2idx(unsigned long idx, struct rq *rq) - { - if (IDLE_TASK_SCHED_PRIO == idx || --- -2.37.0 - - -From 3355290529f7bc9fa4f788818de2898074e43b90 Mon Sep 17 00:00:00 2001 -From: Alfred Chen -Date: Tue, 18 May 2021 10:40:43 +0000 -Subject: [PATCH 156/297] sched/alt: Merge BMQ&PDS common code. - ---- - kernel/sched/alt_core.c | 43 ++++++++++++------- - kernel/sched/bmq.h | 17 +------- - kernel/sched/pds.h | 91 ++++++++++++++++++----------------------- - 3 files changed, 70 insertions(+), 81 deletions(-) - -diff --git a/kernel/sched/alt_core.c b/kernel/sched/alt_core.c -index 407d5d441298..c81a9fc6a140 100644 ---- a/kernel/sched/alt_core.c -+++ b/kernel/sched/alt_core.c -@@ -149,9 +149,34 @@ static cpumask_t sched_rq_watermark[SCHED_BITS] ____cacheline_aligned_in_smp; - #include "pds.h" - #endif - -+/* sched_queue related functions */ -+static inline void sched_queue_init(struct sched_queue *q) -+{ -+ int i; -+ -+ bitmap_zero(q->bitmap, SCHED_BITS); -+ for(i = 0; i < SCHED_BITS; i++) -+ INIT_LIST_HEAD(&q->heads[i]); -+} -+ -+/* -+ * Init idle task and put into queue structure of rq -+ * IMPORTANT: may be called multiple times for a single cpu -+ */ -+static inline void sched_queue_init_idle(struct sched_queue *q, -+ struct task_struct *idle) -+{ -+ idle->sq_idx = IDLE_TASK_SCHED_PRIO; -+ INIT_LIST_HEAD(&q->heads[idle->sq_idx]); -+ list_add(&idle->sq_node, &q->heads[idle->sq_idx]); -+ set_bit(idle->sq_idx, q->bitmap); -+} -+ -+ -+/* water mark related functions*/ - static inline void update_sched_rq_watermark(struct rq *rq) - { -- unsigned long watermark = sched_queue_watermark(rq); -+ unsigned long watermark = find_first_bit(rq->queue.bitmap, SCHED_BITS); - unsigned long last_wm = rq->watermark; - unsigned long i; - int cpu; -@@ -6045,19 +6070,6 @@ void dump_cpu_task(int cpu) - sched_show_task(cpu_curr(cpu)); - } - --/* -- * Init idle task and put into queue structure of rq -- * IMPORTANT: may be called multiple times for a single cpu -- */ --static inline void sched_queue_init_idle(struct sched_queue *q, -- struct task_struct *idle) --{ -- idle->sq_idx = IDLE_TASK_SCHED_PRIO; -- INIT_LIST_HEAD(&q->heads[idle->sq_idx]); -- list_add(&idle->sq_node, &q->heads[idle->sq_idx]); -- set_bit(idle->sq_idx, q->bitmap); --} -- - /** - * init_idle - set up an idle thread for a given CPU - * @idle: task in question -@@ -6677,6 +6689,7 @@ void __init sched_init(void) - struct rq *rq; - - printk(KERN_INFO ALT_SCHED_VERSION_MSG); -+ sched_imp_init(); - - wait_bit_init(); - -@@ -6695,7 +6708,7 @@ void __init sched_init(void) - for_each_possible_cpu(i) { - rq = cpu_rq(i); - -- sched_queue_init(rq); -+ sched_queue_init(&rq->queue); - rq->watermark = IDLE_WM; - rq->skip = NULL; - -diff --git a/kernel/sched/bmq.h b/kernel/sched/bmq.h -index f5bd651a7666..7858ac1185ce 100644 ---- a/kernel/sched/bmq.h -+++ b/kernel/sched/bmq.h -@@ -62,26 +62,13 @@ static inline void time_slice_expired(struct task_struct *p, struct rq *rq) - } - } - -+static inline void sched_imp_init(void) {} -+ - inline int task_running_nice(struct task_struct *p) - { - return (p->prio + p->boost_prio > DEFAULT_PRIO + MAX_PRIORITY_ADJ); - } - --static inline unsigned long sched_queue_watermark(struct rq *rq) --{ -- return find_first_bit(rq->queue.bitmap, SCHED_BITS); --} -- --static inline void sched_queue_init(struct rq *rq) --{ -- struct sched_queue *q = &rq->queue; -- int i; -- -- bitmap_zero(q->bitmap, SCHED_BITS); -- for(i = 0; i < SCHED_BITS; i++) -- INIT_LIST_HEAD(&q->heads[i]); --} -- - /* - * This routine used in bmq scheduler only which assume the idle task in the bmq - */ -diff --git a/kernel/sched/pds.h b/kernel/sched/pds.h -index c29122334bda..64631b2770fe 100644 ---- a/kernel/sched/pds.h -+++ b/kernel/sched/pds.h -@@ -14,13 +14,7 @@ static const u64 user_prio2deadline[NICE_WIDTH] = { - #define SCHED_PRIO_SLOT (4ULL << 20) - #define DEFAULT_SCHED_PRIO (MAX_RT_PRIO + 10) - --static inline int normal_prio(struct task_struct *p) --{ -- if (task_has_rt_policy(p)) -- return MAX_RT_PRIO - 1 - p->rt_priority; -- -- return MAX_RT_PRIO; --} -+DECLARE_BITMAP(normal_mask, SCHED_BITS); - - extern int alt_debug[20]; - -@@ -64,13 +58,49 @@ task_sched_prio_idx(const struct task_struct *p, const struct rq *rq) - (task_sched_prio_normal(p, rq) + rq->time_edge) % 20; - } - -+static inline unsigned long sched_prio2idx(unsigned long idx, struct rq *rq) -+{ -+ if (IDLE_TASK_SCHED_PRIO == idx || -+ idx < MAX_RT_PRIO) -+ return idx; -+ -+ return MAX_RT_PRIO + -+ ((idx - MAX_RT_PRIO) + rq->time_edge) % 20; -+} -+ -+static inline unsigned long sched_idx2prio(unsigned long idx, struct rq *rq) -+{ -+ if (IDLE_TASK_SCHED_PRIO == idx || -+ idx < MAX_RT_PRIO) -+ return idx; -+ -+ return MAX_RT_PRIO + -+ ((idx - MAX_RT_PRIO) + 20 - rq->time_edge % 20) % 20; -+} -+ -+static inline void sched_renew_deadline(struct task_struct *p, const struct rq *rq) -+{ -+ if (p->prio >= MAX_RT_PRIO) -+ p->deadline = rq->clock + -+ SCHED_PRIO_SLOT * (p->static_prio - MAX_RT_PRIO + 1); -+} -+ -+/* -+ * Common interfaces -+ */ -+static inline int normal_prio(struct task_struct *p) -+{ -+ if (task_has_rt_policy(p)) -+ return MAX_RT_PRIO - 1 - p->rt_priority; -+ -+ return MAX_RT_PRIO; -+} -+ - int task_running_nice(struct task_struct *p) - { - return task_sched_prio(p, task_rq(p)) > DEFAULT_SCHED_PRIO; - } - --DECLARE_BITMAP(normal_mask, SCHED_BITS); -- - static inline void sched_shift_normal_bitmap(unsigned long *mask, unsigned int shift) - { - DECLARE_BITMAP(normal, SCHED_BITS); -@@ -116,13 +146,6 @@ static inline void update_rq_time_edge(struct rq *rq) - } - } - --static inline void sched_renew_deadline(struct task_struct *p, const struct rq *rq) --{ -- if (p->prio >= MAX_RT_PRIO) -- p->deadline = rq->clock + -- SCHED_PRIO_SLOT * (p->static_prio - MAX_RT_PRIO + 1); --} -- - static inline void requeue_task(struct task_struct *p, struct rq *rq); - - static inline void time_slice_expired(struct task_struct *p, struct rq *rq) -@@ -134,38 +157,9 @@ static inline void time_slice_expired(struct task_struct *p, struct rq *rq) - requeue_task(p, rq); - } - --/* -- * Init the queue structure in rq -- */ --static inline void sched_queue_init(struct rq *rq) -+static inline void sched_imp_init(void) - { -- struct sched_queue *q = &rq->queue; -- int i; -- - bitmap_set(normal_mask, MAX_RT_PRIO, 20); -- bitmap_zero(q->bitmap, SCHED_BITS); -- for(i = 0; i < SCHED_BITS; i++) -- INIT_LIST_HEAD(&q->heads[i]); --} -- --static inline unsigned long sched_prio2idx(unsigned long idx, struct rq *rq) --{ -- if (IDLE_TASK_SCHED_PRIO == idx || -- idx < MAX_RT_PRIO) -- return idx; -- -- return MAX_RT_PRIO + -- ((idx - MAX_RT_PRIO) + rq->time_edge) % 20; --} -- --static inline unsigned long sched_idx2prio(unsigned long idx, struct rq *rq) --{ -- if (IDLE_TASK_SCHED_PRIO == idx || -- idx < MAX_RT_PRIO) -- return idx; -- -- return MAX_RT_PRIO + -- ((idx - MAX_RT_PRIO) + 20 - rq->time_edge % 20) % 20; - } - - /* -@@ -196,11 +190,6 @@ sched_rq_next_task(struct task_struct *p, struct rq *rq) - return list_next_entry(p, sq_node); - } - --static inline unsigned long sched_queue_watermark(struct rq *rq) --{ -- return find_first_bit(rq->queue.bitmap, SCHED_BITS); --} -- - #define __SCHED_DEQUEUE_TASK(p, rq, flags, func) \ - psi_dequeue(p, flags & DEQUEUE_SLEEP); \ - sched_info_dequeued(rq, p); \ --- -2.37.0 - - -From d7e78ce37e84fa1b89a567d655d2ab923b055caa Mon Sep 17 00:00:00 2001 -From: Alfred Chen -Date: Wed, 19 May 2021 10:56:37 +0000 -Subject: [PATCH 157/297] sched/pds: Refine task_sched_prio() and - task_sched_prio_idx() - -idle task should never be queued/dequued/requeued or be woken. ---- - kernel/sched/pds.h | 20 +++++++------------- - 1 file changed, 7 insertions(+), 13 deletions(-) - -diff --git a/kernel/sched/pds.h b/kernel/sched/pds.h -index 64631b2770fe..62b5ab738876 100644 ---- a/kernel/sched/pds.h -+++ b/kernel/sched/pds.h -@@ -36,26 +36,20 @@ task_sched_prio_normal(const struct task_struct *p, const struct rq *rq) - static inline int - task_sched_prio(const struct task_struct *p, const struct rq *rq) - { -- if (p == rq->idle) -- return IDLE_TASK_SCHED_PRIO; -- -- if (p->prio < MAX_RT_PRIO) -- return p->prio; -+ if (p->prio >= MAX_RT_PRIO) -+ return MAX_RT_PRIO + task_sched_prio_normal(p, rq); - -- return MAX_RT_PRIO + task_sched_prio_normal(p, rq); -+ return p->prio; - } - - static inline int - task_sched_prio_idx(const struct task_struct *p, const struct rq *rq) - { -- if (p == rq->idle) -- return IDLE_TASK_SCHED_PRIO; -- -- if (p->prio < MAX_RT_PRIO) -- return p->prio; -+ if (p->prio >= MAX_RT_PRIO) -+ return MAX_RT_PRIO + -+ (task_sched_prio_normal(p, rq) + rq->time_edge) % 20; - -- return MAX_RT_PRIO + -- (task_sched_prio_normal(p, rq) + rq->time_edge) % 20; -+ return p->prio; - } - - static inline unsigned long sched_prio2idx(unsigned long idx, struct rq *rq) --- -2.37.0 - - -From acd958d1847d946f3f4c2a47ca4aa951babcd42c Mon Sep 17 00:00:00 2001 -From: Alfred Chen -Date: Mon, 24 May 2021 21:46:42 +0000 -Subject: [PATCH 158/297] sched/pds: Fix unexpected larger delta in - task_sched_prio_normal() - ---- - kernel/sched/alt_core.c | 23 ++++++++++++----------- - kernel/sched/bmq.h | 10 ++++++---- - kernel/sched/pds.h | 39 +++++++++++++++++++++------------------ - 3 files changed, 39 insertions(+), 33 deletions(-) - -diff --git a/kernel/sched/alt_core.c b/kernel/sched/alt_core.c -index c81a9fc6a140..21dc24e855eb 100644 ---- a/kernel/sched/alt_core.c -+++ b/kernel/sched/alt_core.c -@@ -1328,6 +1328,7 @@ static struct rq *move_queued_task(struct rq *rq, struct task_struct *p, int - - raw_spin_lock(&rq->lock); - BUG_ON(task_cpu(p) != new_cpu); -+ sched_task_sanity_check(p, rq); - enqueue_task(p, rq, 0); - p->on_rq = TASK_ON_RQ_QUEUED; - check_preempt_curr(rq); -@@ -1656,7 +1657,7 @@ static int select_fallback_rq(int cpu, struct task_struct *p) - return dest_cpu; - } - --static inline int select_task_rq(struct task_struct *p, struct rq *rq) -+static inline int select_task_rq(struct task_struct *p) - { - cpumask_t chk_mask, tmp; - -@@ -1669,7 +1670,7 @@ static inline int select_task_rq(struct task_struct *p, struct rq *rq) - #endif - cpumask_and(&tmp, &chk_mask, &sched_rq_watermark[IDLE_WM]) || - cpumask_and(&tmp, &chk_mask, -- &sched_rq_watermark[task_sched_prio(p, rq) + 1])) -+ &sched_rq_watermark[task_sched_prio(p) + 1])) - return best_mask_cpu(task_cpu(p), &tmp); - - return best_mask_cpu(task_cpu(p), &chk_mask); -@@ -1823,7 +1824,7 @@ EXPORT_SYMBOL_GPL(set_cpus_allowed_ptr); - - #else /* CONFIG_SMP */ - --static inline int select_task_rq(struct task_struct *p, struct rq *rq) -+static inline int select_task_rq(struct task_struct *p) - { - return 0; - } -@@ -2360,7 +2361,7 @@ static int try_to_wake_up(struct task_struct *p, unsigned int state, - - sched_task_ttwu(p); - -- cpu = select_task_rq(p, this_rq()); -+ cpu = select_task_rq(p); - - if (cpu != task_cpu(p)) { - if (p->in_iowait) { -@@ -2662,7 +2663,7 @@ void wake_up_new_task(struct task_struct *p) - - p->state = TASK_RUNNING; - -- rq = cpu_rq(select_task_rq(p, this_rq())); -+ rq = cpu_rq(select_task_rq(p)); - #ifdef CONFIG_SMP - rseq_migrate(p); - /* -@@ -3265,7 +3266,7 @@ void sched_exec(void) - if (rq != task_rq(p) || rq->nr_running < 2) - goto unlock; - -- dest_cpu = select_task_rq(p, task_rq(p)); -+ dest_cpu = select_task_rq(p); - if (dest_cpu == smp_processor_id()) - goto unlock; - -@@ -3847,7 +3848,7 @@ inline void alt_sched_debug(void) - { - int i; - -- for (i = 0; i < 3; i++) -+ for (i = 0; i < 6; i++) - printk(KERN_INFO "sched: %d\n", alt_debug[i]); - } - #endif -@@ -4562,7 +4563,7 @@ int default_wake_function(wait_queue_entry_t *curr, unsigned mode, int wake_flag - } - EXPORT_SYMBOL(default_wake_function); - --static inline void check_task_changed(struct rq *rq, struct task_struct *p) -+static inline void check_task_changed(struct task_struct *p, struct rq *rq) - { - /* Trigger resched if task sched_prio has been modified. */ - if (task_on_rq_queued(p) && sched_task_need_requeue(p, rq)) { -@@ -4654,7 +4655,7 @@ void rt_mutex_setprio(struct task_struct *p, struct task_struct *pi_task) - trace_sched_pi_setprio(p, pi_task); - p->prio = prio; - -- check_task_changed(rq, p); -+ check_task_changed(p, rq); - out_unlock: - /* Avoid rq from going away on us: */ - preempt_disable(); -@@ -4698,7 +4699,7 @@ void set_user_nice(struct task_struct *p, long nice) - - p->prio = effective_prio(p); - -- check_task_changed(rq, p); -+ check_task_changed(p, rq); - out_unlock: - __task_access_unlock(p, lock); - raw_spin_unlock_irqrestore(&p->pi_lock, flags); -@@ -5027,7 +5028,7 @@ static int __sched_setscheduler(struct task_struct *p, - - __setscheduler(rq, p, attr, pi); - -- check_task_changed(rq, p); -+ check_task_changed(p, rq); - - /* Avoid rq from going away on us: */ - preempt_disable(); -diff --git a/kernel/sched/bmq.h b/kernel/sched/bmq.h -index 7858ac1185ce..eea8cb31ca1a 100644 ---- a/kernel/sched/bmq.h -+++ b/kernel/sched/bmq.h -@@ -44,7 +44,7 @@ static inline int normal_prio(struct task_struct *p) - return p->static_prio + MAX_PRIORITY_ADJ; - } - --static inline int task_sched_prio(struct task_struct *p, struct rq *rq) -+static inline int task_sched_prio(struct task_struct *p) - { - return (p->prio < MAX_RT_PRIO)? p->prio : MAX_RT_PRIO / 2 + (p->prio + p->boost_prio) / 2; - } -@@ -62,6 +62,8 @@ static inline void time_slice_expired(struct task_struct *p, struct rq *rq) - } - } - -+static inline void sched_task_sanity_check(struct task_struct *p, struct rq *rq) {} -+ - static inline void sched_imp_init(void) {} - - inline int task_running_nice(struct task_struct *p) -@@ -110,13 +112,13 @@ sched_rq_next_task(struct task_struct *p, struct rq *rq) - sched_info_queued(rq, p); \ - psi_enqueue(p, flags); \ - \ -- p->sq_idx = task_sched_prio(p, rq); \ -+ p->sq_idx = task_sched_prio(p); \ - list_add_tail(&p->sq_node, &rq->queue.heads[p->sq_idx]); \ - set_bit(p->sq_idx, rq->queue.bitmap) - - #define __SCHED_REQUEUE_TASK(p, rq, func) \ - { \ -- int idx = task_sched_prio(p, rq); \ -+ int idx = task_sched_prio(p); \ - \ - list_del(&p->sq_node); \ - list_add_tail(&p->sq_node, &rq->queue.heads[idx]); \ -@@ -131,7 +133,7 @@ sched_rq_next_task(struct task_struct *p, struct rq *rq) - - static inline bool sched_task_need_requeue(struct task_struct *p, struct rq *rq) - { -- return (task_sched_prio(p, rq) != p->sq_idx); -+ return (task_sched_prio(p) != p->sq_idx); - } - - static void sched_task_fork(struct task_struct *p, struct rq *rq) -diff --git a/kernel/sched/pds.h b/kernel/sched/pds.h -index 62b5ab738876..7eac80b83fb3 100644 ---- a/kernel/sched/pds.h -+++ b/kernel/sched/pds.h -@@ -21,23 +21,22 @@ extern int alt_debug[20]; - static inline int - task_sched_prio_normal(const struct task_struct *p, const struct rq *rq) - { -- int delta; -+ int delta = (p->deadline >> 23) - rq->time_edge - 1; - -- delta = rq->time_edge + 20 - (p->deadline >> 23); -- if (delta < 0) { -- delta = 0; -- alt_debug[0]++; -+ if (unlikely(delta > 19)) { -+ pr_info("pds: task_sched_prio_normal delta %d, deadline %llu(%llu), time_edge %llu\n", -+ delta, p->deadline, p->deadline >> 23, rq->time_edge); -+ delta = 19; - } -- delta = 19 - min(delta, 19); - -- return delta; -+ return (delta < 0)? 0:delta; - } - - static inline int --task_sched_prio(const struct task_struct *p, const struct rq *rq) -+task_sched_prio(const struct task_struct *p) - { - if (p->prio >= MAX_RT_PRIO) -- return MAX_RT_PRIO + task_sched_prio_normal(p, rq); -+ return MAX_RT_PRIO + task_sched_prio_normal(p, task_rq(p)); - - return p->prio; - } -@@ -92,7 +91,7 @@ static inline int normal_prio(struct task_struct *p) - - int task_running_nice(struct task_struct *p) - { -- return task_sched_prio(p, task_rq(p)) > DEFAULT_SCHED_PRIO; -+ return task_sched_prio(p) > DEFAULT_SCHED_PRIO; - } - - static inline void sched_shift_normal_bitmap(unsigned long *mask, unsigned int shift) -@@ -117,7 +116,7 @@ static inline void update_rq_time_edge(struct rq *rq) - if (now == old) - return; - -- delta = min(20ULL, now - old); -+ delta = min_t(u64, 20, now - old); - INIT_LIST_HEAD(&head); - - prio = MAX_RT_PRIO; -@@ -151,6 +150,12 @@ static inline void time_slice_expired(struct task_struct *p, struct rq *rq) - requeue_task(p, rq); - } - -+static inline void sched_task_sanity_check(struct task_struct *p, struct rq *rq) -+{ -+ if (unlikely(p->deadline > rq->clock + 40 * SCHED_PRIO_SLOT)) -+ p->deadline = rq->clock + 40 * SCHED_PRIO_SLOT; -+} -+ - static inline void sched_imp_init(void) - { - bitmap_set(normal_mask, MAX_RT_PRIO, 20); -@@ -212,11 +217,12 @@ sched_rq_next_task(struct task_struct *p, struct rq *rq) - \ - list_del(&p->sq_node); \ - list_add_tail(&p->sq_node, &rq->queue.heads[idx]); \ -- if (idx != p->sq_idx) { \ -+ if (idx != p->sq_idx) { \ - if (list_empty(&rq->queue.heads[p->sq_idx])) \ -- clear_bit(sched_idx2prio(p->sq_idx, rq), rq->queue.bitmap); \ -+ clear_bit(sched_idx2prio(p->sq_idx, rq), \ -+ rq->queue.bitmap); \ - p->sq_idx = idx; \ -- set_bit(sched_idx2prio(p->sq_idx, rq), rq->queue.bitmap); \ -+ set_bit(sched_idx2prio(p->sq_idx, rq), rq->queue.bitmap); \ - func; \ - } \ - } -@@ -249,10 +255,7 @@ int task_prio(const struct task_struct *p) - if (p->prio < MAX_RT_PRIO) - return (p->prio - MAX_RT_PRIO); - -- /*preempt_disable(); -- ret = task_sched_prio(p, task_rq(p)) - MAX_RT_PRIO;*/ -- ret = p->static_prio - MAX_RT_PRIO; -- /*preempt_enable();*/ -+ ret = task_sched_prio(p) - MAX_RT_PRIO; - - return ret; - } --- -2.37.0 - - -From 1466e72c04940a3afcab4240de77724c2c444bd4 Mon Sep 17 00:00:00 2001 -From: Alfred Chen -Date: Tue, 25 May 2021 09:56:33 +0000 -Subject: [PATCH 159/297] sched/pds: Rewrite - task_sched_prio/task_sched_prio_idx/sched_prio2idx/sched_idx2prio - ---- - kernel/sched/pds.h | 27 +++++++-------------------- - 1 file changed, 7 insertions(+), 20 deletions(-) - -diff --git a/kernel/sched/pds.h b/kernel/sched/pds.h -index 7eac80b83fb3..8a1841e52e91 100644 ---- a/kernel/sched/pds.h -+++ b/kernel/sched/pds.h -@@ -35,39 +35,26 @@ task_sched_prio_normal(const struct task_struct *p, const struct rq *rq) - static inline int - task_sched_prio(const struct task_struct *p) - { -- if (p->prio >= MAX_RT_PRIO) -- return MAX_RT_PRIO + task_sched_prio_normal(p, task_rq(p)); -- -- return p->prio; -+ return (p->prio < MAX_RT_PRIO) ? p->prio : -+ MAX_RT_PRIO + task_sched_prio_normal(p, task_rq(p)); - } - - static inline int - task_sched_prio_idx(const struct task_struct *p, const struct rq *rq) - { -- if (p->prio >= MAX_RT_PRIO) -- return MAX_RT_PRIO + -- (task_sched_prio_normal(p, rq) + rq->time_edge) % 20; -- -- return p->prio; -+ return (p->prio < MAX_RT_PRIO) ? p->prio : MAX_RT_PRIO + -+ (task_sched_prio_normal(p, rq) + rq->time_edge) % 20; - } - - static inline unsigned long sched_prio2idx(unsigned long idx, struct rq *rq) - { -- if (IDLE_TASK_SCHED_PRIO == idx || -- idx < MAX_RT_PRIO) -- return idx; -- -- return MAX_RT_PRIO + -- ((idx - MAX_RT_PRIO) + rq->time_edge) % 20; -+ return (IDLE_TASK_SCHED_PRIO == idx || idx < MAX_RT_PRIO) ? idx : -+ MAX_RT_PRIO + ((idx - MAX_RT_PRIO) + rq->time_edge) % 20; - } - - static inline unsigned long sched_idx2prio(unsigned long idx, struct rq *rq) - { -- if (IDLE_TASK_SCHED_PRIO == idx || -- idx < MAX_RT_PRIO) -- return idx; -- -- return MAX_RT_PRIO + -+ return (idx < MAX_RT_PRIO) ? idx : MAX_RT_PRIO + - ((idx - MAX_RT_PRIO) + 20 - rq->time_edge % 20) % 20; - } - --- -2.37.0 - - -From 77bcd8ac6bacddd905ed63b0c6756389c65a510e Mon Sep 17 00:00:00 2001 -From: Alfred Chen -Date: Tue, 25 May 2021 10:51:11 +0000 -Subject: [PATCH 160/297] sched/alt: One less bit for sched_queue.bitmap - ---- - kernel/sched/alt_core.c | 3 +-- - kernel/sched/alt_sched.h | 4 +++- - kernel/sched/bmq.h | 10 +++++----- - kernel/sched/pds.h | 4 ++-- - 4 files changed, 11 insertions(+), 10 deletions(-) - -diff --git a/kernel/sched/alt_core.c b/kernel/sched/alt_core.c -index 21dc24e855eb..8fd6fd9ec2ea 100644 ---- a/kernel/sched/alt_core.c -+++ b/kernel/sched/alt_core.c -@@ -169,14 +169,13 @@ static inline void sched_queue_init_idle(struct sched_queue *q, - idle->sq_idx = IDLE_TASK_SCHED_PRIO; - INIT_LIST_HEAD(&q->heads[idle->sq_idx]); - list_add(&idle->sq_node, &q->heads[idle->sq_idx]); -- set_bit(idle->sq_idx, q->bitmap); - } - - - /* water mark related functions*/ - static inline void update_sched_rq_watermark(struct rq *rq) - { -- unsigned long watermark = find_first_bit(rq->queue.bitmap, SCHED_BITS); -+ unsigned long watermark = find_first_bit(rq->queue.bitmap, SCHED_QUEUE_BITS); - unsigned long last_wm = rq->watermark; - unsigned long i; - int cpu; -diff --git a/kernel/sched/alt_sched.h b/kernel/sched/alt_sched.h -index 58ff6212b446..76ec6f9c737b 100644 ---- a/kernel/sched/alt_sched.h -+++ b/kernel/sched/alt_sched.h -@@ -136,8 +136,10 @@ static inline int task_on_rq_migrating(struct task_struct *p) - #define WF_MIGRATED 0x04 /* internal use, task got migrated */ - #define WF_ON_CPU 0x08 /* Wakee is on_rq */ - -+#define SCHED_QUEUE_BITS (SCHED_BITS - 1) -+ - struct sched_queue { -- DECLARE_BITMAP(bitmap, SCHED_BITS); -+ DECLARE_BITMAP(bitmap, SCHED_QUEUE_BITS); - struct list_head heads[SCHED_BITS]; - }; - -diff --git a/kernel/sched/bmq.h b/kernel/sched/bmq.h -index eea8cb31ca1a..85e4c477eda8 100644 ---- a/kernel/sched/bmq.h -+++ b/kernel/sched/bmq.h -@@ -76,7 +76,7 @@ inline int task_running_nice(struct task_struct *p) - */ - static inline struct task_struct *sched_rq_first_task(struct rq *rq) - { -- unsigned long idx = find_first_bit(rq->queue.bitmap, SCHED_BITS); -+ unsigned long idx = find_first_bit(rq->queue.bitmap, SCHED_QUEUE_BITS); - const struct list_head *head = &rq->queue.heads[idx]; - - return list_first_entry(head, struct task_struct, sq_node); -@@ -89,7 +89,7 @@ sched_rq_next_task(struct task_struct *p, struct rq *rq) - struct list_head *head = &rq->queue.heads[idx]; - - if (list_is_last(&p->sq_node, head)) { -- idx = find_next_bit(rq->queue.bitmap, SCHED_BITS, idx + 1); -+ idx = find_next_bit(rq->queue.bitmap, SCHED_QUEUE_BITS, idx + 1); - head = &rq->queue.heads[idx]; - - return list_first_entry(head, struct task_struct, sq_node); -@@ -104,7 +104,7 @@ sched_rq_next_task(struct task_struct *p, struct rq *rq) - \ - list_del(&p->sq_node); \ - if (list_empty(&rq->queue.heads[p->sq_idx])) { \ -- clear_bit(p->sq_idx, rq->queue.bitmap);\ -+ clear_bit(p->sq_idx, rq->queue.bitmap); \ - func; \ - } - -@@ -122,9 +122,9 @@ sched_rq_next_task(struct task_struct *p, struct rq *rq) - \ - list_del(&p->sq_node); \ - list_add_tail(&p->sq_node, &rq->queue.heads[idx]); \ -- if (idx != p->sq_idx) { \ -+ if (idx != p->sq_idx) { \ - if (list_empty(&rq->queue.heads[p->sq_idx])) \ -- clear_bit(p->sq_idx, rq->queue.bitmap); \ -+ clear_bit(p->sq_idx, rq->queue.bitmap); \ - p->sq_idx = idx; \ - set_bit(p->sq_idx, rq->queue.bitmap); \ - func; \ -diff --git a/kernel/sched/pds.h b/kernel/sched/pds.h -index 8a1841e52e91..ee3d5cfac781 100644 ---- a/kernel/sched/pds.h -+++ b/kernel/sched/pds.h -@@ -153,7 +153,7 @@ static inline void sched_imp_init(void) - */ - static inline struct task_struct *sched_rq_first_task(struct rq *rq) - { -- unsigned long idx = find_first_bit(rq->queue.bitmap, SCHED_BITS); -+ unsigned long idx = find_first_bit(rq->queue.bitmap, SCHED_QUEUE_BITS); - const struct list_head *head = &rq->queue.heads[sched_prio2idx(idx, rq)]; - - return list_first_entry(head, struct task_struct, sq_node); -@@ -166,7 +166,7 @@ sched_rq_next_task(struct task_struct *p, struct rq *rq) - struct list_head *head = &rq->queue.heads[idx]; - - if (list_is_last(&p->sq_node, head)) { -- idx = find_next_bit(rq->queue.bitmap, SCHED_BITS, -+ idx = find_next_bit(rq->queue.bitmap, SCHED_QUEUE_BITS, - sched_idx2prio(idx, rq) + 1); - head = &rq->queue.heads[sched_prio2idx(idx, rq)]; - --- -2.37.0 - - -From 723d5c879541fa1022d4a769fef554aca300c9f4 Mon Sep 17 00:00:00 2001 -From: Alfred Chen -Date: Tue, 25 May 2021 14:08:11 +0000 -Subject: [PATCH 161/297] sched/pds: Introduce SCHED_NORMAL_PRIO_NUM - ---- - kernel/sched/alt_sched.h | 3 ++- - kernel/sched/pds.h | 30 ++++++++++++++++++------------ - 2 files changed, 20 insertions(+), 13 deletions(-) - -diff --git a/kernel/sched/alt_sched.h b/kernel/sched/alt_sched.h -index 76ec6f9c737b..1a579536fd30 100644 ---- a/kernel/sched/alt_sched.h -+++ b/kernel/sched/alt_sched.h -@@ -55,8 +55,9 @@ - #define SCHED_BITS (MAX_RT_PRIO + NICE_WIDTH / 2 + MAX_PRIORITY_ADJ + 1) - #endif - #ifdef CONFIG_SCHED_PDS -+#define SCHED_NORMAL_PRIO_NUM (NICE_WIDTH / 2) - /* bits: RT(0-99), nice width / 2, cpu idle task */ --#define SCHED_BITS (MAX_RT_PRIO + NICE_WIDTH / 2 + 1) -+#define SCHED_BITS (MAX_RT_PRIO + SCHED_NORMAL_PRIO_NUM + 1) - #endif - - #define IDLE_TASK_SCHED_PRIO (SCHED_BITS - 1) -diff --git a/kernel/sched/pds.h b/kernel/sched/pds.h -index ee3d5cfac781..effd38a024d1 100644 ---- a/kernel/sched/pds.h -+++ b/kernel/sched/pds.h -@@ -21,12 +21,12 @@ extern int alt_debug[20]; - static inline int - task_sched_prio_normal(const struct task_struct *p, const struct rq *rq) - { -- int delta = (p->deadline >> 23) - rq->time_edge - 1; -+ s64 delta = (p->deadline >> 23) - rq->time_edge - 1; - -- if (unlikely(delta > 19)) { -- pr_info("pds: task_sched_prio_normal delta %d, deadline %llu(%llu), time_edge %llu\n", -+ if (unlikely(delta > SCHED_NORMAL_PRIO_NUM - 1)) { -+ pr_info("pds: task_sched_prio_normal delta %lld, deadline %llu(%llu), time_edge %llu\n", - delta, p->deadline, p->deadline >> 23, rq->time_edge); -- delta = 19; -+ delta = SCHED_NORMAL_PRIO_NUM - 1ULL; - } - - return (delta < 0)? 0:delta; -@@ -43,19 +43,23 @@ static inline int - task_sched_prio_idx(const struct task_struct *p, const struct rq *rq) - { - return (p->prio < MAX_RT_PRIO) ? p->prio : MAX_RT_PRIO + -- (task_sched_prio_normal(p, rq) + rq->time_edge) % 20; -+ (task_sched_prio_normal(p, rq) + rq->time_edge) % -+ SCHED_NORMAL_PRIO_NUM; - } - - static inline unsigned long sched_prio2idx(unsigned long idx, struct rq *rq) - { - return (IDLE_TASK_SCHED_PRIO == idx || idx < MAX_RT_PRIO) ? idx : -- MAX_RT_PRIO + ((idx - MAX_RT_PRIO) + rq->time_edge) % 20; -+ MAX_RT_PRIO + ((idx - MAX_RT_PRIO) + rq->time_edge) % -+ SCHED_NORMAL_PRIO_NUM; - } - - static inline unsigned long sched_idx2prio(unsigned long idx, struct rq *rq) - { - return (idx < MAX_RT_PRIO) ? idx : MAX_RT_PRIO + -- ((idx - MAX_RT_PRIO) + 20 - rq->time_edge % 20) % 20; -+ ((idx - MAX_RT_PRIO) + SCHED_NORMAL_PRIO_NUM - -+ rq->time_edge % SCHED_NORMAL_PRIO_NUM) % -+ SCHED_NORMAL_PRIO_NUM; - } - - static inline void sched_renew_deadline(struct task_struct *p, const struct rq *rq) -@@ -103,25 +107,27 @@ static inline void update_rq_time_edge(struct rq *rq) - if (now == old) - return; - -- delta = min_t(u64, 20, now - old); -+ delta = min_t(u64, SCHED_NORMAL_PRIO_NUM, now - old); - INIT_LIST_HEAD(&head); - - prio = MAX_RT_PRIO; - for_each_set_bit_from(prio, rq->queue.bitmap, MAX_RT_PRIO + delta) { - u64 idx; - -- idx = MAX_RT_PRIO + ((prio - MAX_RT_PRIO) + rq->time_edge) % 20; -+ idx = MAX_RT_PRIO + ((prio - MAX_RT_PRIO) + rq->time_edge) % -+ SCHED_NORMAL_PRIO_NUM; - list_splice_tail_init(rq->queue.heads + idx, &head); - } - sched_shift_normal_bitmap(rq->queue.bitmap, delta); - rq->time_edge = now; - if (!list_empty(&head)) { - struct task_struct *p; -+ u64 new_idx = MAX_RT_PRIO + now % SCHED_NORMAL_PRIO_NUM; - - list_for_each_entry(p, &head, sq_node) -- p->sq_idx = MAX_RT_PRIO + now % 20; -+ p->sq_idx = new_idx; - -- list_splice(&head, rq->queue.heads + MAX_RT_PRIO + now % 20); -+ list_splice(&head, rq->queue.heads + new_idx); - set_bit(MAX_RT_PRIO, rq->queue.bitmap); - } - } -@@ -145,7 +151,7 @@ static inline void sched_task_sanity_check(struct task_struct *p, struct rq *rq) - - static inline void sched_imp_init(void) - { -- bitmap_set(normal_mask, MAX_RT_PRIO, 20); -+ bitmap_set(normal_mask, MAX_RT_PRIO, SCHED_NORMAL_PRIO_NUM); - } - - /* --- -2.37.0 - - -From 8951168051627eeb110c2a3ee161c7f5993d8de6 Mon Sep 17 00:00:00 2001 -From: Alfred Chen -Date: Tue, 25 May 2021 15:00:58 +0000 -Subject: [PATCH 162/297] sched/pds: SCHED_NORMAL_PRIO_NUM to 40 - ---- - kernel/sched/alt_sched.h | 2 +- - kernel/sched/pds.h | 8 ++++---- - 2 files changed, 5 insertions(+), 5 deletions(-) - -diff --git a/kernel/sched/alt_sched.h b/kernel/sched/alt_sched.h -index 1a579536fd30..eb5e8d31686c 100644 ---- a/kernel/sched/alt_sched.h -+++ b/kernel/sched/alt_sched.h -@@ -55,7 +55,7 @@ - #define SCHED_BITS (MAX_RT_PRIO + NICE_WIDTH / 2 + MAX_PRIORITY_ADJ + 1) - #endif - #ifdef CONFIG_SCHED_PDS --#define SCHED_NORMAL_PRIO_NUM (NICE_WIDTH / 2) -+#define SCHED_NORMAL_PRIO_NUM (NICE_WIDTH) - /* bits: RT(0-99), nice width / 2, cpu idle task */ - #define SCHED_BITS (MAX_RT_PRIO + SCHED_NORMAL_PRIO_NUM + 1) - #endif -diff --git a/kernel/sched/pds.h b/kernel/sched/pds.h -index effd38a024d1..b1ea68e43ba7 100644 ---- a/kernel/sched/pds.h -+++ b/kernel/sched/pds.h -@@ -12,7 +12,7 @@ static const u64 user_prio2deadline[NICE_WIDTH] = { - }; - - #define SCHED_PRIO_SLOT (4ULL << 20) --#define DEFAULT_SCHED_PRIO (MAX_RT_PRIO + 10) -+#define DEFAULT_SCHED_PRIO (MAX_RT_PRIO + SCHED_NORMAL_PRIO_NUM / 2) - - DECLARE_BITMAP(normal_mask, SCHED_BITS); - -@@ -21,11 +21,11 @@ extern int alt_debug[20]; - static inline int - task_sched_prio_normal(const struct task_struct *p, const struct rq *rq) - { -- s64 delta = (p->deadline >> 23) - rq->time_edge - 1; -+ s64 delta = (p->deadline >> 22) - rq->time_edge - 1; - - if (unlikely(delta > SCHED_NORMAL_PRIO_NUM - 1)) { - pr_info("pds: task_sched_prio_normal delta %lld, deadline %llu(%llu), time_edge %llu\n", -- delta, p->deadline, p->deadline >> 23, rq->time_edge); -+ delta, p->deadline, p->deadline >> 22, rq->time_edge); - delta = SCHED_NORMAL_PRIO_NUM - 1ULL; - } - -@@ -101,7 +101,7 @@ static inline void update_rq_time_edge(struct rq *rq) - { - struct list_head head; - u64 old = rq->time_edge; -- u64 now = rq->clock >> 23; -+ u64 now = rq->clock >> 22; - u64 prio, delta; - - if (now == old) --- -2.37.0 - - -From a24585db80f4a0816bee71006bac92dcecc35c03 Mon Sep 17 00:00:00 2001 -From: Alfred Chen -Date: Thu, 27 May 2021 14:34:44 +0000 -Subject: [PATCH 163/297] sched/pds: Change MIN_NORMAL_PRIO to 128 - ---- - include/linux/sched/prio.h | 17 +++++++++++++++++ - kernel/sched/alt_sched.h | 7 ++++--- - kernel/sched/pds.h | 31 +++++++++++++++++-------------- - 3 files changed, 38 insertions(+), 17 deletions(-) - -diff --git a/include/linux/sched/prio.h b/include/linux/sched/prio.h -index 4d4f92bffeea..a191f253771b 100644 ---- a/include/linux/sched/prio.h -+++ b/include/linux/sched/prio.h -@@ -18,14 +18,31 @@ - #define MAX_PRIO (MAX_RT_PRIO + NICE_WIDTH) - #define DEFAULT_PRIO (MAX_RT_PRIO + NICE_WIDTH / 2) - -+#ifdef CONFIG_SCHED_ALT -+ -+/* Undefine MAX_PRIO and DEFAULT_PRIO */ -+#undef MAX_PRIO -+#undef DEFAULT_PRIO -+ - /* +/- priority levels from the base priority */ - #ifdef CONFIG_SCHED_BMQ - #define MAX_PRIORITY_ADJ 7 -+ -+#define MIN_NORMAL_PRIO (MAX_RT_PRIO) -+#define MAX_PRIO (MIN_NORMAL_PRIO + NICE_WIDTH) -+#define DEFAULT_PRIO (MIN_NORMAL_PRIO + NICE_WIDTH / 2) - #endif -+ - #ifdef CONFIG_SCHED_PDS - #define MAX_PRIORITY_ADJ 0 -+ -+#define MIN_NORMAL_PRIO (128) -+#define MAX_PRIO (MIN_NORMAL_PRIO + NICE_WIDTH) -+#define DEFAULT_PRIO (MIN_NORMAL_PRIO + NICE_WIDTH / 2) - #endif - -+#endif /* CONFIG_SCHED_ALT */ -+ - /* - * Convert user-nice values [ -20 ... 0 ... 19 ] - * to static priority [ MAX_RT_PRIO..MAX_PRIO-1 ], -diff --git a/kernel/sched/alt_sched.h b/kernel/sched/alt_sched.h -index eb5e8d31686c..52e1baa4f5da 100644 ---- a/kernel/sched/alt_sched.h -+++ b/kernel/sched/alt_sched.h -@@ -54,11 +54,12 @@ - * RT(0-99), (Low prio adj range, nice width, high prio adj range) / 2, cpu idle task */ - #define SCHED_BITS (MAX_RT_PRIO + NICE_WIDTH / 2 + MAX_PRIORITY_ADJ + 1) - #endif -+ - #ifdef CONFIG_SCHED_PDS - #define SCHED_NORMAL_PRIO_NUM (NICE_WIDTH) --/* bits: RT(0-99), nice width / 2, cpu idle task */ --#define SCHED_BITS (MAX_RT_PRIO + SCHED_NORMAL_PRIO_NUM + 1) --#endif -+/* bits: RT(0-99), reserved(100-127), SCHED_NORMAL_PRIO_NUM, cpu idle task */ -+#define SCHED_BITS (MIN_NORMAL_PRIO + SCHED_NORMAL_PRIO_NUM + 1) -+#endif /* CONFIG_SCHED_PDS */ - - #define IDLE_TASK_SCHED_PRIO (SCHED_BITS - 1) - -diff --git a/kernel/sched/pds.h b/kernel/sched/pds.h -index b1ea68e43ba7..4a181e6ed52f 100644 ---- a/kernel/sched/pds.h -+++ b/kernel/sched/pds.h -@@ -12,7 +12,7 @@ static const u64 user_prio2deadline[NICE_WIDTH] = { - }; - - #define SCHED_PRIO_SLOT (4ULL << 20) --#define DEFAULT_SCHED_PRIO (MAX_RT_PRIO + SCHED_NORMAL_PRIO_NUM / 2) -+#define DEFAULT_SCHED_PRIO (MIN_NORMAL_PRIO + SCHED_NORMAL_PRIO_NUM / 2) - - DECLARE_BITMAP(normal_mask, SCHED_BITS); - -@@ -36,13 +36,13 @@ static inline int - task_sched_prio(const struct task_struct *p) - { - return (p->prio < MAX_RT_PRIO) ? p->prio : -- MAX_RT_PRIO + task_sched_prio_normal(p, task_rq(p)); -+ MIN_NORMAL_PRIO + task_sched_prio_normal(p, task_rq(p)); - } - - static inline int - task_sched_prio_idx(const struct task_struct *p, const struct rq *rq) - { -- return (p->prio < MAX_RT_PRIO) ? p->prio : MAX_RT_PRIO + -+ return (p->prio < MAX_RT_PRIO) ? p->prio : MIN_NORMAL_PRIO + - (task_sched_prio_normal(p, rq) + rq->time_edge) % - SCHED_NORMAL_PRIO_NUM; - } -@@ -50,14 +50,15 @@ task_sched_prio_idx(const struct task_struct *p, const struct rq *rq) - static inline unsigned long sched_prio2idx(unsigned long idx, struct rq *rq) - { - return (IDLE_TASK_SCHED_PRIO == idx || idx < MAX_RT_PRIO) ? idx : -- MAX_RT_PRIO + ((idx - MAX_RT_PRIO) + rq->time_edge) % -+ MIN_NORMAL_PRIO + -+ ((idx - MIN_NORMAL_PRIO) + rq->time_edge) % - SCHED_NORMAL_PRIO_NUM; - } - - static inline unsigned long sched_idx2prio(unsigned long idx, struct rq *rq) - { -- return (idx < MAX_RT_PRIO) ? idx : MAX_RT_PRIO + -- ((idx - MAX_RT_PRIO) + SCHED_NORMAL_PRIO_NUM - -+ return (idx < MAX_RT_PRIO) ? idx : MIN_NORMAL_PRIO + -+ ((idx - MIN_NORMAL_PRIO) + SCHED_NORMAL_PRIO_NUM - - rq->time_edge % SCHED_NORMAL_PRIO_NUM) % - SCHED_NORMAL_PRIO_NUM; - } -@@ -66,7 +67,8 @@ static inline void sched_renew_deadline(struct task_struct *p, const struct rq * - { - if (p->prio >= MAX_RT_PRIO) - p->deadline = rq->clock + -- SCHED_PRIO_SLOT * (p->static_prio - MAX_RT_PRIO + 1); -+ SCHED_PRIO_SLOT * -+ (p->static_prio - MIN_NORMAL_PRIO + 1); - } - - /* -@@ -110,11 +112,12 @@ static inline void update_rq_time_edge(struct rq *rq) - delta = min_t(u64, SCHED_NORMAL_PRIO_NUM, now - old); - INIT_LIST_HEAD(&head); - -- prio = MAX_RT_PRIO; -- for_each_set_bit_from(prio, rq->queue.bitmap, MAX_RT_PRIO + delta) { -+ prio = MIN_NORMAL_PRIO; -+ for_each_set_bit_from(prio, rq->queue.bitmap, MIN_NORMAL_PRIO + delta) { - u64 idx; - -- idx = MAX_RT_PRIO + ((prio - MAX_RT_PRIO) + rq->time_edge) % -+ idx = MIN_NORMAL_PRIO + -+ ((prio - MIN_NORMAL_PRIO) + rq->time_edge) % - SCHED_NORMAL_PRIO_NUM; - list_splice_tail_init(rq->queue.heads + idx, &head); - } -@@ -122,13 +125,13 @@ static inline void update_rq_time_edge(struct rq *rq) - rq->time_edge = now; - if (!list_empty(&head)) { - struct task_struct *p; -- u64 new_idx = MAX_RT_PRIO + now % SCHED_NORMAL_PRIO_NUM; -+ u64 new_idx = MIN_NORMAL_PRIO + now % SCHED_NORMAL_PRIO_NUM; - - list_for_each_entry(p, &head, sq_node) - p->sq_idx = new_idx; - - list_splice(&head, rq->queue.heads + new_idx); -- set_bit(MAX_RT_PRIO, rq->queue.bitmap); -+ set_bit(MIN_NORMAL_PRIO, rq->queue.bitmap); - } - } - -@@ -151,7 +154,7 @@ static inline void sched_task_sanity_check(struct task_struct *p, struct rq *rq) - - static inline void sched_imp_init(void) - { -- bitmap_set(normal_mask, MAX_RT_PRIO, SCHED_NORMAL_PRIO_NUM); -+ bitmap_set(normal_mask, MIN_NORMAL_PRIO, SCHED_NORMAL_PRIO_NUM); - } - - /* -@@ -248,7 +251,7 @@ int task_prio(const struct task_struct *p) - if (p->prio < MAX_RT_PRIO) - return (p->prio - MAX_RT_PRIO); - -- ret = task_sched_prio(p) - MAX_RT_PRIO; -+ ret = task_sched_prio(p) - MIN_NORMAL_PRIO; - - return ret; - } --- -2.37.0 - - -From 2205d2e629a8d04a821e6710936b708b8889bb94 Mon Sep 17 00:00:00 2001 -From: Alfred Chen -Date: Thu, 27 May 2021 15:19:51 +0000 -Subject: [PATCH 164/297] sched/pds: Optimization for MIN_NORMAL_PRIO=128 - ---- - kernel/sched/alt_core.c | 1 - - kernel/sched/bmq.h | 2 -- - kernel/sched/pds.h | 34 ++++++---------------------------- - 3 files changed, 6 insertions(+), 31 deletions(-) - -diff --git a/kernel/sched/alt_core.c b/kernel/sched/alt_core.c -index 8fd6fd9ec2ea..56c527cbcff5 100644 ---- a/kernel/sched/alt_core.c -+++ b/kernel/sched/alt_core.c -@@ -6689,7 +6689,6 @@ void __init sched_init(void) - struct rq *rq; - - printk(KERN_INFO ALT_SCHED_VERSION_MSG); -- sched_imp_init(); - - wait_bit_init(); - -diff --git a/kernel/sched/bmq.h b/kernel/sched/bmq.h -index 85e4c477eda8..ed6995865d81 100644 ---- a/kernel/sched/bmq.h -+++ b/kernel/sched/bmq.h -@@ -64,8 +64,6 @@ static inline void time_slice_expired(struct task_struct *p, struct rq *rq) - - static inline void sched_task_sanity_check(struct task_struct *p, struct rq *rq) {} - --static inline void sched_imp_init(void) {} -- - inline int task_running_nice(struct task_struct *p) - { - return (p->prio + p->boost_prio > DEFAULT_PRIO + MAX_PRIORITY_ADJ); -diff --git a/kernel/sched/pds.h b/kernel/sched/pds.h -index 4a181e6ed52f..79121046e892 100644 ---- a/kernel/sched/pds.h -+++ b/kernel/sched/pds.h -@@ -14,8 +14,6 @@ static const u64 user_prio2deadline[NICE_WIDTH] = { - #define SCHED_PRIO_SLOT (4ULL << 20) - #define DEFAULT_SCHED_PRIO (MIN_NORMAL_PRIO + SCHED_NORMAL_PRIO_NUM / 2) - --DECLARE_BITMAP(normal_mask, SCHED_BITS); -- - extern int alt_debug[20]; - - static inline int -@@ -66,8 +64,7 @@ static inline unsigned long sched_idx2prio(unsigned long idx, struct rq *rq) - static inline void sched_renew_deadline(struct task_struct *p, const struct rq *rq) - { - if (p->prio >= MAX_RT_PRIO) -- p->deadline = rq->clock + -- SCHED_PRIO_SLOT * -+ p->deadline = rq->clock + SCHED_PRIO_SLOT * - (p->static_prio - MIN_NORMAL_PRIO + 1); - } - -@@ -87,18 +84,6 @@ int task_running_nice(struct task_struct *p) - return task_sched_prio(p) > DEFAULT_SCHED_PRIO; - } - --static inline void sched_shift_normal_bitmap(unsigned long *mask, unsigned int shift) --{ -- DECLARE_BITMAP(normal, SCHED_BITS); -- -- bitmap_and(normal, mask, normal_mask, SCHED_BITS); -- bitmap_shift_right(normal, normal, shift, SCHED_BITS); -- bitmap_and(normal, normal, normal_mask, SCHED_BITS); -- -- bitmap_andnot(mask, mask, normal_mask, SCHED_BITS); -- bitmap_or(mask, mask, normal, SCHED_BITS); --} -- - static inline void update_rq_time_edge(struct rq *rq) - { - struct list_head head; -@@ -112,26 +97,24 @@ static inline void update_rq_time_edge(struct rq *rq) - delta = min_t(u64, SCHED_NORMAL_PRIO_NUM, now - old); - INIT_LIST_HEAD(&head); - -- prio = MIN_NORMAL_PRIO; -- for_each_set_bit_from(prio, rq->queue.bitmap, MIN_NORMAL_PRIO + delta) { -+ for_each_set_bit(prio, &rq->queue.bitmap[2], delta) { - u64 idx; - - idx = MIN_NORMAL_PRIO + -- ((prio - MIN_NORMAL_PRIO) + rq->time_edge) % -- SCHED_NORMAL_PRIO_NUM; -+ (prio + rq->time_edge) % SCHED_NORMAL_PRIO_NUM; - list_splice_tail_init(rq->queue.heads + idx, &head); - } -- sched_shift_normal_bitmap(rq->queue.bitmap, delta); -+ rq->queue.bitmap[2] >>= delta; - rq->time_edge = now; - if (!list_empty(&head)) { -- struct task_struct *p; - u64 new_idx = MIN_NORMAL_PRIO + now % SCHED_NORMAL_PRIO_NUM; -+ struct task_struct *p; - - list_for_each_entry(p, &head, sq_node) - p->sq_idx = new_idx; - - list_splice(&head, rq->queue.heads + new_idx); -- set_bit(MIN_NORMAL_PRIO, rq->queue.bitmap); -+ rq->queue.bitmap[2] |= 1UL; - } - } - -@@ -152,11 +135,6 @@ static inline void sched_task_sanity_check(struct task_struct *p, struct rq *rq) - p->deadline = rq->clock + 40 * SCHED_PRIO_SLOT; - } - --static inline void sched_imp_init(void) --{ -- bitmap_set(normal_mask, MIN_NORMAL_PRIO, SCHED_NORMAL_PRIO_NUM); --} -- - /* - * This routine assume that the idle task always in queue - */ --- -2.37.0 - - -From 8a244645ca795a11c1a7f06b64ed6790bfb48e5a Mon Sep 17 00:00:00 2001 -From: Alfred Chen -Date: Fri, 28 May 2021 10:13:57 +0000 -Subject: [PATCH 165/297] sched/alt: Machine friendly time slice value - ---- - kernel/sched/alt_core.c | 6 +++--- - 1 file changed, 3 insertions(+), 3 deletions(-) - -diff --git a/kernel/sched/alt_core.c b/kernel/sched/alt_core.c -index 56c527cbcff5..b553f5fa60dd 100644 ---- a/kernel/sched/alt_core.c -+++ b/kernel/sched/alt_core.c -@@ -77,7 +77,7 @@ __read_mostly int sysctl_resched_latency_warn_once = 1; - #define STOP_PRIO (MAX_RT_PRIO - 1) - - /* Default time slice is 4 in ms, can be set via kernel parameter "sched_timeslice" */ --u64 sched_timeslice_ns __read_mostly = (4 * 1000 * 1000); -+u64 sched_timeslice_ns __read_mostly = (4 << 20); - - static int __init sched_timeslice(char *str) - { -@@ -85,14 +85,14 @@ static int __init sched_timeslice(char *str) - - get_option(&str, ×lice_us); - if (timeslice_us >= 1000) -- sched_timeslice_ns = timeslice_us * 1000; -+ sched_timeslice_ns = (timeslice_us / 1000) << 20; - - return 0; - } - early_param("sched_timeslice", sched_timeslice); - - /* Reschedule if less than this many μs left */ --#define RESCHED_NS (100 * 1000) -+#define RESCHED_NS (100 << 10) - - /** - * sched_yield_type - Choose what sort of yield sched_yield will perform. --- -2.37.0 - - -From 0b3e9d9f56ab201fc165896356f8e0d38ea42c71 Mon Sep 17 00:00:00 2001 -From: Alfred Chen -Date: Fri, 28 May 2021 14:47:49 +0000 -Subject: [PATCH 166/297] sched/pds: Default 2ms time slice - ---- - kernel/sched/alt_core.c | 3 ++- - kernel/sched/bmq.h | 2 ++ - kernel/sched/pds.h | 51 ++++++++++++++++++----------------------- - 3 files changed, 26 insertions(+), 30 deletions(-) - -diff --git a/kernel/sched/alt_core.c b/kernel/sched/alt_core.c -index b553f5fa60dd..db8f5b24089d 100644 ---- a/kernel/sched/alt_core.c -+++ b/kernel/sched/alt_core.c -@@ -77,7 +77,7 @@ __read_mostly int sysctl_resched_latency_warn_once = 1; - #define STOP_PRIO (MAX_RT_PRIO - 1) - - /* Default time slice is 4 in ms, can be set via kernel parameter "sched_timeslice" */ --u64 sched_timeslice_ns __read_mostly = (4 << 20); -+u64 sched_timeslice_ns __read_mostly = (2 << 20); - - static int __init sched_timeslice(char *str) - { -@@ -6689,6 +6689,7 @@ void __init sched_init(void) - struct rq *rq; - - printk(KERN_INFO ALT_SCHED_VERSION_MSG); -+ sched_imp_init(); - - wait_bit_init(); - -diff --git a/kernel/sched/bmq.h b/kernel/sched/bmq.h -index ed6995865d81..7299b5cc9a87 100644 ---- a/kernel/sched/bmq.h -+++ b/kernel/sched/bmq.h -@@ -36,6 +36,8 @@ static inline void deboost_task(struct task_struct *p) - /* - * Common interfaces - */ -+static inline void sched_imp_init(void) {} -+ - static inline int normal_prio(struct task_struct *p) - { - if (task_has_rt_policy(p)) -diff --git a/kernel/sched/pds.h b/kernel/sched/pds.h -index 79121046e892..6bba054465d3 100644 ---- a/kernel/sched/pds.h -+++ b/kernel/sched/pds.h -@@ -1,17 +1,7 @@ - #define ALT_SCHED_VERSION_MSG "sched/pds: PDS CPU Scheduler "ALT_SCHED_VERSION" by Alfred Chen.\n" - --static const u64 user_prio2deadline[NICE_WIDTH] = { --/* -20 */ 4194304, 4613734, 5075107, 5582617, 6140878, --/* -15 */ 6754965, 7430461, 8173507, 8990857, 9889942, --/* -10 */ 10878936, 11966829, 13163511, 14479862, 15927848, --/* -5 */ 17520632, 19272695, 21199964, 23319960, 25651956, --/* 0 */ 28217151, 31038866, 34142752, 37557027, 41312729, --/* 5 */ 45444001, 49988401, 54987241, 60485965, 66534561, --/* 10 */ 73188017, 80506818, 88557499, 97413248, 107154572, --/* 15 */ 117870029, 129657031, 142622734, 156885007, 172573507 --}; -- --#define SCHED_PRIO_SLOT (4ULL << 20) -+static u64 user_prio2deadline[NICE_WIDTH]; -+ - #define DEFAULT_SCHED_PRIO (MIN_NORMAL_PRIO + SCHED_NORMAL_PRIO_NUM / 2) - - extern int alt_debug[20]; -@@ -19,11 +9,11 @@ extern int alt_debug[20]; - static inline int - task_sched_prio_normal(const struct task_struct *p, const struct rq *rq) - { -- s64 delta = (p->deadline >> 22) - rq->time_edge - 1; -+ s64 delta = (p->deadline >> 21) - rq->time_edge - 1; - - if (unlikely(delta > SCHED_NORMAL_PRIO_NUM - 1)) { - pr_info("pds: task_sched_prio_normal delta %lld, deadline %llu(%llu), time_edge %llu\n", -- delta, p->deadline, p->deadline >> 22, rq->time_edge); -+ delta, p->deadline, p->deadline >> 21, rq->time_edge); - delta = SCHED_NORMAL_PRIO_NUM - 1ULL; - } - -@@ -48,8 +38,7 @@ task_sched_prio_idx(const struct task_struct *p, const struct rq *rq) - static inline unsigned long sched_prio2idx(unsigned long idx, struct rq *rq) - { - return (IDLE_TASK_SCHED_PRIO == idx || idx < MAX_RT_PRIO) ? idx : -- MIN_NORMAL_PRIO + -- ((idx - MIN_NORMAL_PRIO) + rq->time_edge) % -+ MIN_NORMAL_PRIO + ((idx - MIN_NORMAL_PRIO) + rq->time_edge) % - SCHED_NORMAL_PRIO_NUM; - } - -@@ -64,13 +53,23 @@ static inline unsigned long sched_idx2prio(unsigned long idx, struct rq *rq) - static inline void sched_renew_deadline(struct task_struct *p, const struct rq *rq) - { - if (p->prio >= MAX_RT_PRIO) -- p->deadline = rq->clock + SCHED_PRIO_SLOT * -- (p->static_prio - MIN_NORMAL_PRIO + 1); -+ p->deadline = rq->clock + -+ user_prio2deadline[p->static_prio - MIN_NORMAL_PRIO]; - } - - /* - * Common interfaces - */ -+static inline void sched_imp_init(void) -+{ -+ int i; -+ -+ user_prio2deadline[0] = sched_timeslice_ns; -+ for (i = 1; i < NICE_WIDTH; i++) -+ user_prio2deadline[i] = -+ user_prio2deadline[i - 1] + sched_timeslice_ns; -+} -+ - static inline int normal_prio(struct task_struct *p) - { - if (task_has_rt_policy(p)) -@@ -88,7 +87,7 @@ static inline void update_rq_time_edge(struct rq *rq) - { - struct list_head head; - u64 old = rq->time_edge; -- u64 now = rq->clock >> 22; -+ u64 now = rq->clock >> 21; - u64 prio, delta; - - if (now == old) -@@ -131,8 +130,8 @@ static inline void time_slice_expired(struct task_struct *p, struct rq *rq) - - static inline void sched_task_sanity_check(struct task_struct *p, struct rq *rq) - { -- if (unlikely(p->deadline > rq->clock + 40 * SCHED_PRIO_SLOT)) -- p->deadline = rq->clock + 40 * SCHED_PRIO_SLOT; -+ if (unlikely(p->deadline > rq->clock + user_prio2deadline[NICE_WIDTH - 1])) -+ p->deadline = rq->clock + user_prio2deadline[NICE_WIDTH - 1]; - } - - /* -@@ -224,14 +223,8 @@ static void sched_task_fork(struct task_struct *p, struct rq *rq) - */ - int task_prio(const struct task_struct *p) - { -- int ret; -- -- if (p->prio < MAX_RT_PRIO) -- return (p->prio - MAX_RT_PRIO); -- -- ret = task_sched_prio(p) - MIN_NORMAL_PRIO; -- -- return ret; -+ return (p->prio < MAX_RT_PRIO) ? p->prio - MAX_RT_PRIO : -+ task_sched_prio(p) - MIN_NORMAL_PRIO; - } - - static void do_sched_yield_type_1(struct task_struct *p, struct rq *rq) --- -2.37.0 - - -From 646019ef9049053285adc0bb1b5b0329db8fcd7e Mon Sep 17 00:00:00 2001 -From: Alfred Chen -Date: Tue, 1 Jun 2021 11:29:16 +0000 -Subject: [PATCH 167/297] sched/pds: Code clean up - ---- - kernel/sched/pds.h | 14 +++++--------- - 1 file changed, 5 insertions(+), 9 deletions(-) - -diff --git a/kernel/sched/pds.h b/kernel/sched/pds.h -index 6bba054465d3..d7f772401b3e 100644 ---- a/kernel/sched/pds.h -+++ b/kernel/sched/pds.h -@@ -2,8 +2,6 @@ - - static u64 user_prio2deadline[NICE_WIDTH]; - --#define DEFAULT_SCHED_PRIO (MIN_NORMAL_PRIO + SCHED_NORMAL_PRIO_NUM / 2) -- - extern int alt_debug[20]; - - static inline int -@@ -14,10 +12,10 @@ task_sched_prio_normal(const struct task_struct *p, const struct rq *rq) - if (unlikely(delta > SCHED_NORMAL_PRIO_NUM - 1)) { - pr_info("pds: task_sched_prio_normal delta %lld, deadline %llu(%llu), time_edge %llu\n", - delta, p->deadline, p->deadline >> 21, rq->time_edge); -- delta = SCHED_NORMAL_PRIO_NUM - 1ULL; -+ return SCHED_NORMAL_PRIO_NUM - 1ULL; - } - -- return (delta < 0)? 0:delta; -+ return (delta < 0) ? 0 : delta; - } - - static inline int -@@ -72,15 +70,13 @@ static inline void sched_imp_init(void) - - static inline int normal_prio(struct task_struct *p) - { -- if (task_has_rt_policy(p)) -- return MAX_RT_PRIO - 1 - p->rt_priority; -- -- return MAX_RT_PRIO; -+ return task_has_rt_policy(p) ? (MAX_RT_PRIO - 1 - p->rt_priority) : -+ MAX_RT_PRIO; - } - - int task_running_nice(struct task_struct *p) - { -- return task_sched_prio(p) > DEFAULT_SCHED_PRIO; -+ return task_sched_prio(p) > DEFAULT_PRIO; - } - - static inline void update_rq_time_edge(struct rq *rq) --- -2.37.0 - - -From b7ff88eacb7024a2aa67ca5254aa086443a195be Mon Sep 17 00:00:00 2001 -From: Alfred Chen -Date: Wed, 2 Jun 2021 10:25:26 +0000 -Subject: [PATCH 168/297] sched/pds: SCHED_NORMAL_PRIO_NUM to 64 - ---- - include/linux/sched/prio.h | 9 +++++---- - kernel/sched/alt_sched.h | 2 +- - kernel/sched/pds.h | 12 ++++++------ - 3 files changed, 12 insertions(+), 11 deletions(-) - -diff --git a/include/linux/sched/prio.h b/include/linux/sched/prio.h -index a191f253771b..6af9ae681116 100644 ---- a/include/linux/sched/prio.h -+++ b/include/linux/sched/prio.h -@@ -26,7 +26,7 @@ - - /* +/- priority levels from the base priority */ - #ifdef CONFIG_SCHED_BMQ --#define MAX_PRIORITY_ADJ 7 -+#define MAX_PRIORITY_ADJ (7) - - #define MIN_NORMAL_PRIO (MAX_RT_PRIO) - #define MAX_PRIO (MIN_NORMAL_PRIO + NICE_WIDTH) -@@ -34,11 +34,12 @@ - #endif - - #ifdef CONFIG_SCHED_PDS --#define MAX_PRIORITY_ADJ 0 -+#define MAX_PRIORITY_ADJ (0) - - #define MIN_NORMAL_PRIO (128) --#define MAX_PRIO (MIN_NORMAL_PRIO + NICE_WIDTH) --#define DEFAULT_PRIO (MIN_NORMAL_PRIO + NICE_WIDTH / 2) -+#define NORMAL_PRIO_NUM (64) -+#define MAX_PRIO (MIN_NORMAL_PRIO + NORMAL_PRIO_NUM) -+#define DEFAULT_PRIO (MAX_PRIO - NICE_WIDTH / 2) - #endif - - #endif /* CONFIG_SCHED_ALT */ -diff --git a/kernel/sched/alt_sched.h b/kernel/sched/alt_sched.h -index 52e1baa4f5da..db89d3d3be63 100644 ---- a/kernel/sched/alt_sched.h -+++ b/kernel/sched/alt_sched.h -@@ -56,7 +56,7 @@ - #endif - - #ifdef CONFIG_SCHED_PDS --#define SCHED_NORMAL_PRIO_NUM (NICE_WIDTH) -+#define SCHED_NORMAL_PRIO_NUM (NORMAL_PRIO_NUM) - /* bits: RT(0-99), reserved(100-127), SCHED_NORMAL_PRIO_NUM, cpu idle task */ - #define SCHED_BITS (MIN_NORMAL_PRIO + SCHED_NORMAL_PRIO_NUM + 1) - #endif /* CONFIG_SCHED_PDS */ -diff --git a/kernel/sched/pds.h b/kernel/sched/pds.h -index d7f772401b3e..5abc6a9d0b9b 100644 ---- a/kernel/sched/pds.h -+++ b/kernel/sched/pds.h -@@ -7,7 +7,8 @@ extern int alt_debug[20]; - static inline int - task_sched_prio_normal(const struct task_struct *p, const struct rq *rq) - { -- s64 delta = (p->deadline >> 21) - rq->time_edge - 1; -+ s64 delta = (p->deadline >> 21) - rq->time_edge + -+ SCHED_NORMAL_PRIO_NUM - NICE_WIDTH - 1; - - if (unlikely(delta > SCHED_NORMAL_PRIO_NUM - 1)) { - pr_info("pds: task_sched_prio_normal delta %lld, deadline %llu(%llu), time_edge %llu\n", -@@ -51,8 +52,8 @@ static inline unsigned long sched_idx2prio(unsigned long idx, struct rq *rq) - static inline void sched_renew_deadline(struct task_struct *p, const struct rq *rq) - { - if (p->prio >= MAX_RT_PRIO) -- p->deadline = rq->clock + -- user_prio2deadline[p->static_prio - MIN_NORMAL_PRIO]; -+ p->deadline = rq->clock + user_prio2deadline[p->static_prio - -+ (MAX_PRIO - NICE_WIDTH)]; - } - - /* -@@ -95,8 +96,7 @@ static inline void update_rq_time_edge(struct rq *rq) - for_each_set_bit(prio, &rq->queue.bitmap[2], delta) { - u64 idx; - -- idx = MIN_NORMAL_PRIO + -- (prio + rq->time_edge) % SCHED_NORMAL_PRIO_NUM; -+ idx = MIN_NORMAL_PRIO + (prio + old) % SCHED_NORMAL_PRIO_NUM; - list_splice_tail_init(rq->queue.heads + idx, &head); - } - rq->queue.bitmap[2] >>= delta; -@@ -220,7 +220,7 @@ static void sched_task_fork(struct task_struct *p, struct rq *rq) - int task_prio(const struct task_struct *p) - { - return (p->prio < MAX_RT_PRIO) ? p->prio - MAX_RT_PRIO : -- task_sched_prio(p) - MIN_NORMAL_PRIO; -+ task_sched_prio_normal(p, task_rq(p)); - } - - static void do_sched_yield_type_1(struct task_struct *p, struct rq *rq) --- -2.37.0 - - -From 933843a21fb8b21518c42735ef8dd02f99551444 Mon Sep 17 00:00:00 2001 -From: Alfred Chen -Date: Thu, 3 Jun 2021 09:31:01 +0000 -Subject: [PATCH 169/297] sched/pds: Optimize MOD operation when - NORMAL_PRIO_NUM==64 - ---- - kernel/sched/alt_sched.h | 5 ++--- - kernel/sched/pds.h | 33 +++++++++++++++++---------------- - 2 files changed, 19 insertions(+), 19 deletions(-) - -diff --git a/kernel/sched/alt_sched.h b/kernel/sched/alt_sched.h -index db89d3d3be63..f9f79422bf0e 100644 ---- a/kernel/sched/alt_sched.h -+++ b/kernel/sched/alt_sched.h -@@ -56,9 +56,8 @@ - #endif - - #ifdef CONFIG_SCHED_PDS --#define SCHED_NORMAL_PRIO_NUM (NORMAL_PRIO_NUM) --/* bits: RT(0-99), reserved(100-127), SCHED_NORMAL_PRIO_NUM, cpu idle task */ --#define SCHED_BITS (MIN_NORMAL_PRIO + SCHED_NORMAL_PRIO_NUM + 1) -+/* bits: RT(0-99), reserved(100-127), NORMAL_PRIO_NUM, cpu idle task */ -+#define SCHED_BITS (MIN_NORMAL_PRIO + NORMAL_PRIO_NUM + 1) - #endif /* CONFIG_SCHED_PDS */ - - #define IDLE_TASK_SCHED_PRIO (SCHED_BITS - 1) -diff --git a/kernel/sched/pds.h b/kernel/sched/pds.h -index 5abc6a9d0b9b..41e9873d8cd7 100644 ---- a/kernel/sched/pds.h -+++ b/kernel/sched/pds.h -@@ -4,16 +4,18 @@ static u64 user_prio2deadline[NICE_WIDTH]; - - extern int alt_debug[20]; - -+#define NORMAL_PRIO_MOD(x) ((x) & (NORMAL_PRIO_NUM - 1)) -+ - static inline int - task_sched_prio_normal(const struct task_struct *p, const struct rq *rq) - { - s64 delta = (p->deadline >> 21) - rq->time_edge + -- SCHED_NORMAL_PRIO_NUM - NICE_WIDTH - 1; -+ NORMAL_PRIO_NUM - NICE_WIDTH - 1; - -- if (unlikely(delta > SCHED_NORMAL_PRIO_NUM - 1)) { -+ if (unlikely(delta > NORMAL_PRIO_NUM - 1)) { - pr_info("pds: task_sched_prio_normal delta %lld, deadline %llu(%llu), time_edge %llu\n", - delta, p->deadline, p->deadline >> 21, rq->time_edge); -- return SCHED_NORMAL_PRIO_NUM - 1ULL; -+ return NORMAL_PRIO_NUM - 1; - } - - return (delta < 0) ? 0 : delta; -@@ -30,23 +32,21 @@ static inline int - task_sched_prio_idx(const struct task_struct *p, const struct rq *rq) - { - return (p->prio < MAX_RT_PRIO) ? p->prio : MIN_NORMAL_PRIO + -- (task_sched_prio_normal(p, rq) + rq->time_edge) % -- SCHED_NORMAL_PRIO_NUM; -+ NORMAL_PRIO_MOD(task_sched_prio_normal(p, rq) + rq->time_edge); - } - --static inline unsigned long sched_prio2idx(unsigned long idx, struct rq *rq) -+static inline unsigned long sched_prio2idx(unsigned long prio, struct rq *rq) - { -- return (IDLE_TASK_SCHED_PRIO == idx || idx < MAX_RT_PRIO) ? idx : -- MIN_NORMAL_PRIO + ((idx - MIN_NORMAL_PRIO) + rq->time_edge) % -- SCHED_NORMAL_PRIO_NUM; -+ return (IDLE_TASK_SCHED_PRIO == prio || prio < MAX_RT_PRIO) ? prio : -+ MIN_NORMAL_PRIO + NORMAL_PRIO_MOD((prio - MIN_NORMAL_PRIO) + -+ rq->time_edge); - } - - static inline unsigned long sched_idx2prio(unsigned long idx, struct rq *rq) - { - return (idx < MAX_RT_PRIO) ? idx : MIN_NORMAL_PRIO + -- ((idx - MIN_NORMAL_PRIO) + SCHED_NORMAL_PRIO_NUM - -- rq->time_edge % SCHED_NORMAL_PRIO_NUM) % -- SCHED_NORMAL_PRIO_NUM; -+ NORMAL_PRIO_MOD((idx - MIN_NORMAL_PRIO) + NORMAL_PRIO_NUM - -+ NORMAL_PRIO_MOD(rq->time_edge)); - } - - static inline void sched_renew_deadline(struct task_struct *p, const struct rq *rq) -@@ -90,19 +90,20 @@ static inline void update_rq_time_edge(struct rq *rq) - if (now == old) - return; - -- delta = min_t(u64, SCHED_NORMAL_PRIO_NUM, now - old); -+ delta = min_t(u64, NORMAL_PRIO_NUM, now - old); - INIT_LIST_HEAD(&head); - - for_each_set_bit(prio, &rq->queue.bitmap[2], delta) { - u64 idx; - -- idx = MIN_NORMAL_PRIO + (prio + old) % SCHED_NORMAL_PRIO_NUM; -+ idx = MIN_NORMAL_PRIO + NORMAL_PRIO_MOD(prio + old); - list_splice_tail_init(rq->queue.heads + idx, &head); - } -- rq->queue.bitmap[2] >>= delta; -+ rq->queue.bitmap[2] = (NORMAL_PRIO_NUM == delta) ? 0UL : -+ rq->queue.bitmap[2] >> delta; - rq->time_edge = now; - if (!list_empty(&head)) { -- u64 new_idx = MIN_NORMAL_PRIO + now % SCHED_NORMAL_PRIO_NUM; -+ u64 new_idx = MIN_NORMAL_PRIO + NORMAL_PRIO_MOD(now); - struct task_struct *p; - - list_for_each_entry(p, &head, sq_node) --- -2.37.0 - - -From 6c682a2c0b8a680256765402c3294e9b4d55cc5d Mon Sep 17 00:00:00 2001 -From: Alfred Chen -Date: Sat, 5 Jun 2021 21:28:50 +0000 -Subject: [PATCH 170/297] sched/pds: Code clean up - ---- - kernel/sched/pds.h | 18 +++++++----------- - 1 file changed, 7 insertions(+), 11 deletions(-) - -diff --git a/kernel/sched/pds.h b/kernel/sched/pds.h -index 41e9873d8cd7..5ce0a16eb454 100644 ---- a/kernel/sched/pds.h -+++ b/kernel/sched/pds.h -@@ -21,8 +21,7 @@ task_sched_prio_normal(const struct task_struct *p, const struct rq *rq) - return (delta < 0) ? 0 : delta; - } - --static inline int --task_sched_prio(const struct task_struct *p) -+static inline int task_sched_prio(const struct task_struct *p) - { - return (p->prio < MAX_RT_PRIO) ? p->prio : - MIN_NORMAL_PRIO + task_sched_prio_normal(p, task_rq(p)); -@@ -93,23 +92,21 @@ static inline void update_rq_time_edge(struct rq *rq) - delta = min_t(u64, NORMAL_PRIO_NUM, now - old); - INIT_LIST_HEAD(&head); - -- for_each_set_bit(prio, &rq->queue.bitmap[2], delta) { -- u64 idx; -+ for_each_set_bit(prio, &rq->queue.bitmap[2], delta) -+ list_splice_tail_init(rq->queue.heads + MIN_NORMAL_PRIO + -+ NORMAL_PRIO_MOD(prio + old), &head); - -- idx = MIN_NORMAL_PRIO + NORMAL_PRIO_MOD(prio + old); -- list_splice_tail_init(rq->queue.heads + idx, &head); -- } - rq->queue.bitmap[2] = (NORMAL_PRIO_NUM == delta) ? 0UL : - rq->queue.bitmap[2] >> delta; - rq->time_edge = now; - if (!list_empty(&head)) { -- u64 new_idx = MIN_NORMAL_PRIO + NORMAL_PRIO_MOD(now); -+ u64 idx = MIN_NORMAL_PRIO + NORMAL_PRIO_MOD(now); - struct task_struct *p; - - list_for_each_entry(p, &head, sq_node) -- p->sq_idx = new_idx; -+ p->sq_idx = idx; - -- list_splice(&head, rq->queue.heads + new_idx); -+ list_splice(&head, rq->queue.heads + idx); - rq->queue.bitmap[2] |= 1UL; - } - } -@@ -118,7 +115,6 @@ static inline void requeue_task(struct task_struct *p, struct rq *rq); - - static inline void time_slice_expired(struct task_struct *p, struct rq *rq) - { -- /*printk(KERN_INFO "sched: time_slice_expired(%d) - %px\n", cpu_of(rq), p);*/ - p->time_slice = sched_timeslice_ns; - sched_renew_deadline(p, rq); - if (SCHED_FIFO != p->policy && task_on_rq_queued(p)) --- -2.37.0 - - -From 197fcf258831095d311a7cf3ea262c24f4406487 Mon Sep 17 00:00:00 2001 -From: Alfred Chen -Date: Sun, 6 Jun 2021 09:32:26 +0000 -Subject: [PATCH 171/297] sched/alt: Merge BMQ&PDS common code (II) - ---- - kernel/sched/alt_core.c | 89 +++++++++++++++++++++++++++--- - kernel/sched/bmq.h | 117 ++++++++++------------------------------ - kernel/sched/pds.h | 96 ++------------------------------- - 3 files changed, 113 insertions(+), 189 deletions(-) - -diff --git a/kernel/sched/alt_core.c b/kernel/sched/alt_core.c -index db8f5b24089d..626bd8d20c4f 100644 ---- a/kernel/sched/alt_core.c -+++ b/kernel/sched/alt_core.c -@@ -142,6 +142,8 @@ static cpumask_t sched_sg_idle_mask ____cacheline_aligned_in_smp; - #endif - static cpumask_t sched_rq_watermark[SCHED_BITS] ____cacheline_aligned_in_smp; - -+static inline void requeue_task(struct task_struct *p, struct rq *rq); -+ - #ifdef CONFIG_SCHED_BMQ - #include "bmq.h" - #endif -@@ -171,8 +173,7 @@ static inline void sched_queue_init_idle(struct sched_queue *q, - list_add(&idle->sq_node, &q->heads[idle->sq_idx]); - } - -- --/* water mark related functions*/ -+/* water mark related functions */ - static inline void update_sched_rq_watermark(struct rq *rq) - { - unsigned long watermark = find_first_bit(rq->queue.bitmap, SCHED_QUEUE_BITS); -@@ -180,8 +181,6 @@ static inline void update_sched_rq_watermark(struct rq *rq) - unsigned long i; - int cpu; - -- /*printk(KERN_INFO "sched: watermark(%d) %d, last %d\n", -- cpu_of(rq), watermark, last_wm);*/ - if (watermark == last_wm) - return; - -@@ -216,6 +215,34 @@ static inline void update_sched_rq_watermark(struct rq *rq) - #endif - } - -+/* -+ * This routine assume that the idle task always in queue -+ */ -+static inline struct task_struct *sched_rq_first_task(struct rq *rq) -+{ -+ unsigned long idx = find_first_bit(rq->queue.bitmap, SCHED_QUEUE_BITS); -+ const struct list_head *head = &rq->queue.heads[sched_prio2idx(idx, rq)]; -+ -+ return list_first_entry(head, struct task_struct, sq_node); -+} -+ -+static inline struct task_struct * -+sched_rq_next_task(struct task_struct *p, struct rq *rq) -+{ -+ unsigned long idx = p->sq_idx; -+ struct list_head *head = &rq->queue.heads[idx]; -+ -+ if (list_is_last(&p->sq_node, head)) { -+ idx = find_next_bit(rq->queue.bitmap, SCHED_QUEUE_BITS, -+ sched_idx2prio(idx, rq) + 1); -+ head = &rq->queue.heads[sched_prio2idx(idx, rq)]; -+ -+ return list_first_entry(head, struct task_struct, sq_node); -+ } -+ -+ return list_next_entry(p, sq_node); -+} -+ - static inline struct task_struct *rq_runnable_task(struct rq *rq) - { - struct task_struct *next = sched_rq_first_task(rq); -@@ -563,6 +590,25 @@ static inline void sched_update_tick_dependency(struct rq *rq) { } - * Add/Remove/Requeue task to/from the runqueue routines - * Context: rq->lock - */ -+#define __SCHED_DEQUEUE_TASK(p, rq, flags, func) \ -+ psi_dequeue(p, flags & DEQUEUE_SLEEP); \ -+ sched_info_dequeued(rq, p); \ -+ \ -+ list_del(&p->sq_node); \ -+ if (list_empty(&rq->queue.heads[p->sq_idx])) { \ -+ clear_bit(sched_idx2prio(p->sq_idx, rq), \ -+ rq->queue.bitmap); \ -+ func; \ -+ } -+ -+#define __SCHED_ENQUEUE_TASK(p, rq, flags) \ -+ sched_info_queued(rq, p); \ -+ psi_enqueue(p, flags); \ -+ \ -+ p->sq_idx = task_sched_prio_idx(p, rq); \ -+ list_add_tail(&p->sq_node, &rq->queue.heads[p->sq_idx]); \ -+ set_bit(sched_idx2prio(p->sq_idx, rq), rq->queue.bitmap); -+ - static inline void dequeue_task(struct task_struct *p, struct rq *rq, int flags) - { - lockdep_assert_held(&rq->lock); -@@ -602,12 +648,25 @@ static inline void enqueue_task(struct task_struct *p, struct rq *rq, int flags) - - static inline void requeue_task(struct task_struct *p, struct rq *rq) - { -+ int idx; -+ - lockdep_assert_held(&rq->lock); - /*printk(KERN_INFO "sched: requeue(%d) %px %016llx\n", cpu_of(rq), p, p->priodl);*/ - WARN_ONCE(task_rq(p) != rq, "sched: cpu[%d] requeue task reside on cpu%d\n", - cpu_of(rq), task_cpu(p)); - -- __SCHED_REQUEUE_TASK(p, rq, update_sched_rq_watermark(rq)); -+ idx = task_sched_prio_idx(p, rq); -+ -+ list_del(&p->sq_node); -+ list_add_tail(&p->sq_node, &rq->queue.heads[idx]); -+ if (idx != p->sq_idx) { -+ if (list_empty(&rq->queue.heads[p->sq_idx])) -+ clear_bit(sched_idx2prio(p->sq_idx, rq), -+ rq->queue.bitmap); -+ p->sq_idx = idx; -+ set_bit(sched_idx2prio(p->sq_idx, rq), rq->queue.bitmap); -+ update_sched_rq_watermark(rq); -+ } - } - - /* -@@ -4565,7 +4624,7 @@ EXPORT_SYMBOL(default_wake_function); - static inline void check_task_changed(struct task_struct *p, struct rq *rq) - { - /* Trigger resched if task sched_prio has been modified. */ -- if (task_on_rq_queued(p) && sched_task_need_requeue(p, rq)) { -+ if (task_on_rq_queued(p) && task_sched_prio_idx(p, rq) != p->sq_idx) { - requeue_task(p, rq); - check_preempt_curr(rq); - } -@@ -4755,6 +4814,24 @@ SYSCALL_DEFINE1(nice, int, increment) - - #endif - -+/** -+ * task_prio - return the priority value of a given task. -+ * @p: the task in question. -+ * -+ * Return: The priority value as seen by users in /proc. -+ * -+ * sched policy return value kernel prio user prio/nice -+ * -+ * (BMQ)normal, batch, idle[0 ... 53] [100 ... 139] 0/[-20 ... 19]/[-7 ... 7] -+ * (PDS)normal, batch, idle[0 ... 39] 100 0/[-20 ... 19] -+ * fifo, rr [-1 ... -100] [99 ... 0] [0 ... 99] -+ */ -+int task_prio(const struct task_struct *p) -+{ -+ return (p->prio < MAX_RT_PRIO) ? p->prio - MAX_RT_PRIO : -+ task_sched_prio_normal(p, task_rq(p)); -+} -+ - /** - * idle_cpu - is a given CPU idle currently? - * @cpu: the processor in question. -diff --git a/kernel/sched/bmq.h b/kernel/sched/bmq.h -index 7299b5cc9a87..840173f29e42 100644 ---- a/kernel/sched/bmq.h -+++ b/kernel/sched/bmq.h -@@ -36,6 +36,33 @@ static inline void deboost_task(struct task_struct *p) - /* - * Common interfaces - */ -+static inline int -+task_sched_prio_normal(const struct task_struct *p, const struct rq *rq) -+{ -+ return p->prio + p->boost_prio - MAX_RT_PRIO; -+} -+ -+static inline int task_sched_prio(const struct task_struct *p) -+{ -+ return (p->prio < MAX_RT_PRIO)? p->prio : MAX_RT_PRIO / 2 + (p->prio + p->boost_prio) / 2; -+} -+ -+static inline int -+task_sched_prio_idx(const struct task_struct *p, const struct rq *rq) -+{ -+ return task_sched_prio(p); -+} -+ -+static inline unsigned long sched_prio2idx(unsigned long prio, struct rq *rq) -+{ -+ return prio; -+} -+ -+static inline unsigned long sched_idx2prio(unsigned long idx, struct rq *rq) -+{ -+ return idx; -+} -+ - static inline void sched_imp_init(void) {} - - static inline int normal_prio(struct task_struct *p) -@@ -46,13 +73,6 @@ static inline int normal_prio(struct task_struct *p) - return p->static_prio + MAX_PRIORITY_ADJ; - } - --static inline int task_sched_prio(struct task_struct *p) --{ -- return (p->prio < MAX_RT_PRIO)? p->prio : MAX_RT_PRIO / 2 + (p->prio + p->boost_prio) / 2; --} -- --static inline void requeue_task(struct task_struct *p, struct rq *rq); -- - static inline void time_slice_expired(struct task_struct *p, struct rq *rq) - { - p->time_slice = sched_timeslice_ns; -@@ -71,95 +91,12 @@ inline int task_running_nice(struct task_struct *p) - return (p->prio + p->boost_prio > DEFAULT_PRIO + MAX_PRIORITY_ADJ); - } - --/* -- * This routine used in bmq scheduler only which assume the idle task in the bmq -- */ --static inline struct task_struct *sched_rq_first_task(struct rq *rq) --{ -- unsigned long idx = find_first_bit(rq->queue.bitmap, SCHED_QUEUE_BITS); -- const struct list_head *head = &rq->queue.heads[idx]; -- -- return list_first_entry(head, struct task_struct, sq_node); --} -- --static inline struct task_struct * --sched_rq_next_task(struct task_struct *p, struct rq *rq) --{ -- unsigned long idx = p->sq_idx; -- struct list_head *head = &rq->queue.heads[idx]; -- -- if (list_is_last(&p->sq_node, head)) { -- idx = find_next_bit(rq->queue.bitmap, SCHED_QUEUE_BITS, idx + 1); -- head = &rq->queue.heads[idx]; -- -- return list_first_entry(head, struct task_struct, sq_node); -- } -- -- return list_next_entry(p, sq_node); --} -- --#define __SCHED_DEQUEUE_TASK(p, rq, flags, func) \ -- psi_dequeue(p, flags & DEQUEUE_SLEEP); \ -- sched_info_dequeued(rq, p); \ -- \ -- list_del(&p->sq_node); \ -- if (list_empty(&rq->queue.heads[p->sq_idx])) { \ -- clear_bit(p->sq_idx, rq->queue.bitmap); \ -- func; \ -- } -- --#define __SCHED_ENQUEUE_TASK(p, rq, flags) \ -- sched_info_queued(rq, p); \ -- psi_enqueue(p, flags); \ -- \ -- p->sq_idx = task_sched_prio(p); \ -- list_add_tail(&p->sq_node, &rq->queue.heads[p->sq_idx]); \ -- set_bit(p->sq_idx, rq->queue.bitmap) -- --#define __SCHED_REQUEUE_TASK(p, rq, func) \ --{ \ -- int idx = task_sched_prio(p); \ --\ -- list_del(&p->sq_node); \ -- list_add_tail(&p->sq_node, &rq->queue.heads[idx]); \ -- if (idx != p->sq_idx) { \ -- if (list_empty(&rq->queue.heads[p->sq_idx])) \ -- clear_bit(p->sq_idx, rq->queue.bitmap); \ -- p->sq_idx = idx; \ -- set_bit(p->sq_idx, rq->queue.bitmap); \ -- func; \ -- } \ --} -- --static inline bool sched_task_need_requeue(struct task_struct *p, struct rq *rq) --{ -- return (task_sched_prio(p) != p->sq_idx); --} -- - static void sched_task_fork(struct task_struct *p, struct rq *rq) - { - p->boost_prio = (p->boost_prio < 0) ? - p->boost_prio + MAX_PRIORITY_ADJ : MAX_PRIORITY_ADJ; - } - --/** -- * task_prio - return the priority value of a given task. -- * @p: the task in question. -- * -- * Return: The priority value as seen by users in /proc. -- * -- * sched policy return value kernel prio user prio/nice/boost -- * -- * normal, batch, idle [0 ... 53] [100 ... 139] 0/[-20 ... 19]/[-7 ... 7] -- * fifo, rr [-1 ... -100] [99 ... 0] [0 ... 99] -- */ --int task_prio(const struct task_struct *p) --{ -- if (p->prio < MAX_RT_PRIO) -- return (p->prio - MAX_RT_PRIO); -- return (p->prio - MAX_RT_PRIO + p->boost_prio); --} -- - static void do_sched_yield_type_1(struct task_struct *p, struct rq *rq) - { - p->boost_prio = MAX_PRIORITY_ADJ; -diff --git a/kernel/sched/pds.h b/kernel/sched/pds.h -index 5ce0a16eb454..31c6bd4d29c8 100644 ---- a/kernel/sched/pds.h -+++ b/kernel/sched/pds.h -@@ -6,6 +6,9 @@ extern int alt_debug[20]; - - #define NORMAL_PRIO_MOD(x) ((x) & (NORMAL_PRIO_NUM - 1)) - -+/* -+ * Common interfaces -+ */ - static inline int - task_sched_prio_normal(const struct task_struct *p, const struct rq *rq) - { -@@ -55,9 +58,6 @@ static inline void sched_renew_deadline(struct task_struct *p, const struct rq * - (MAX_PRIO - NICE_WIDTH)]; - } - --/* -- * Common interfaces -- */ - static inline void sched_imp_init(void) - { - int i; -@@ -111,8 +111,6 @@ static inline void update_rq_time_edge(struct rq *rq) - } - } - --static inline void requeue_task(struct task_struct *p, struct rq *rq); -- - static inline void time_slice_expired(struct task_struct *p, struct rq *rq) - { - p->time_slice = sched_timeslice_ns; -@@ -127,99 +125,11 @@ static inline void sched_task_sanity_check(struct task_struct *p, struct rq *rq) - p->deadline = rq->clock + user_prio2deadline[NICE_WIDTH - 1]; - } - --/* -- * This routine assume that the idle task always in queue -- */ --static inline struct task_struct *sched_rq_first_task(struct rq *rq) --{ -- unsigned long idx = find_first_bit(rq->queue.bitmap, SCHED_QUEUE_BITS); -- const struct list_head *head = &rq->queue.heads[sched_prio2idx(idx, rq)]; -- -- return list_first_entry(head, struct task_struct, sq_node); --} -- --static inline struct task_struct * --sched_rq_next_task(struct task_struct *p, struct rq *rq) --{ -- unsigned long idx = p->sq_idx; -- struct list_head *head = &rq->queue.heads[idx]; -- -- if (list_is_last(&p->sq_node, head)) { -- idx = find_next_bit(rq->queue.bitmap, SCHED_QUEUE_BITS, -- sched_idx2prio(idx, rq) + 1); -- head = &rq->queue.heads[sched_prio2idx(idx, rq)]; -- -- return list_first_entry(head, struct task_struct, sq_node); -- } -- -- return list_next_entry(p, sq_node); --} -- --#define __SCHED_DEQUEUE_TASK(p, rq, flags, func) \ -- psi_dequeue(p, flags & DEQUEUE_SLEEP); \ -- sched_info_dequeued(rq, p); \ -- \ -- list_del(&p->sq_node); \ -- if (list_empty(&rq->queue.heads[p->sq_idx])) { \ -- clear_bit(sched_idx2prio(p->sq_idx, rq), \ -- rq->queue.bitmap); \ -- func; \ -- } -- --#define __SCHED_ENQUEUE_TASK(p, rq, flags) \ -- sched_info_queued(rq, p); \ -- psi_enqueue(p, flags); \ -- \ -- p->sq_idx = task_sched_prio_idx(p, rq); \ -- list_add_tail(&p->sq_node, &rq->queue.heads[p->sq_idx]); \ -- set_bit(sched_idx2prio(p->sq_idx, rq), rq->queue.bitmap); -- --/* -- * Requeue a task @p to @rq -- */ --#define __SCHED_REQUEUE_TASK(p, rq, func) \ --{\ -- int idx = task_sched_prio_idx(p, rq); \ --\ -- list_del(&p->sq_node); \ -- list_add_tail(&p->sq_node, &rq->queue.heads[idx]); \ -- if (idx != p->sq_idx) { \ -- if (list_empty(&rq->queue.heads[p->sq_idx])) \ -- clear_bit(sched_idx2prio(p->sq_idx, rq), \ -- rq->queue.bitmap); \ -- p->sq_idx = idx; \ -- set_bit(sched_idx2prio(p->sq_idx, rq), rq->queue.bitmap); \ -- func; \ -- } \ --} -- --static inline bool sched_task_need_requeue(struct task_struct *p, struct rq *rq) --{ -- return (task_sched_prio_idx(p, rq) != p->sq_idx); --} -- - static void sched_task_fork(struct task_struct *p, struct rq *rq) - { - sched_renew_deadline(p, rq); - } - --/** -- * task_prio - return the priority value of a given task. -- * @p: the task in question. -- * -- * Return: The priority value as seen by users in /proc. -- * -- * sched policy return value kernel prio user prio/nice -- * -- * normal, batch, idle [0 ... 39] 100 0/[-20 ... 19] -- * fifo, rr [-1 ... -100] [99 ... 0] [0 ... 99] -- */ --int task_prio(const struct task_struct *p) --{ -- return (p->prio < MAX_RT_PRIO) ? p->prio - MAX_RT_PRIO : -- task_sched_prio_normal(p, task_rq(p)); --} -- - static void do_sched_yield_type_1(struct task_struct *p, struct rq *rq) - { - time_slice_expired(p, rq); --- -2.37.0 - - -From 41412d3db98db2fa1d3a4ed9de6cca1da3edfbff Mon Sep 17 00:00:00 2001 -From: Alfred Chen -Date: Sun, 6 Jun 2021 18:04:37 +0000 -Subject: [PATCH 172/297] sched/pds: Introduce sched_timeslice_shift - ---- - kernel/sched/alt_core.c | 28 +++++++++++++++------------- - kernel/sched/bmq.h | 2 ++ - kernel/sched/pds.h | 13 ++++++++++--- - 3 files changed, 27 insertions(+), 16 deletions(-) - -diff --git a/kernel/sched/alt_core.c b/kernel/sched/alt_core.c -index 626bd8d20c4f..799605256a19 100644 ---- a/kernel/sched/alt_core.c -+++ b/kernel/sched/alt_core.c -@@ -79,13 +79,24 @@ __read_mostly int sysctl_resched_latency_warn_once = 1; - /* Default time slice is 4 in ms, can be set via kernel parameter "sched_timeslice" */ - u64 sched_timeslice_ns __read_mostly = (2 << 20); - -+static inline void requeue_task(struct task_struct *p, struct rq *rq); -+ -+#ifdef CONFIG_SCHED_BMQ -+#include "bmq.h" -+#endif -+#ifdef CONFIG_SCHED_PDS -+#include "pds.h" -+#endif -+ - static int __init sched_timeslice(char *str) - { -- int timeslice_us; -+ int timeslice_ms; - -- get_option(&str, ×lice_us); -- if (timeslice_us >= 1000) -- sched_timeslice_ns = (timeslice_us / 1000) << 20; -+ get_option(&str, ×lice_ms); -+ if (2 != timeslice_ms) -+ timeslice_ms = 4; -+ sched_timeslice_ns = timeslice_ms << 20; -+ sched_timeslice_imp(timeslice_ms); - - return 0; - } -@@ -142,15 +153,6 @@ static cpumask_t sched_sg_idle_mask ____cacheline_aligned_in_smp; - #endif - static cpumask_t sched_rq_watermark[SCHED_BITS] ____cacheline_aligned_in_smp; - --static inline void requeue_task(struct task_struct *p, struct rq *rq); -- --#ifdef CONFIG_SCHED_BMQ --#include "bmq.h" --#endif --#ifdef CONFIG_SCHED_PDS --#include "pds.h" --#endif -- - /* sched_queue related functions */ - static inline void sched_queue_init(struct sched_queue *q) - { -diff --git a/kernel/sched/bmq.h b/kernel/sched/bmq.h -index 840173f29e42..f9f58c21c1e4 100644 ---- a/kernel/sched/bmq.h -+++ b/kernel/sched/bmq.h -@@ -36,6 +36,8 @@ static inline void deboost_task(struct task_struct *p) - /* - * Common interfaces - */ -+static inline void sched_timeslice_imp(const int timeslice_ms) {} -+ - static inline int - task_sched_prio_normal(const struct task_struct *p, const struct rq *rq) - { -diff --git a/kernel/sched/pds.h b/kernel/sched/pds.h -index 31c6bd4d29c8..b9b19c6a7622 100644 ---- a/kernel/sched/pds.h -+++ b/kernel/sched/pds.h -@@ -1,6 +1,7 @@ - #define ALT_SCHED_VERSION_MSG "sched/pds: PDS CPU Scheduler "ALT_SCHED_VERSION" by Alfred Chen.\n" - - static u64 user_prio2deadline[NICE_WIDTH]; -+static int sched_timeslice_shift = 22; - - extern int alt_debug[20]; - -@@ -9,15 +10,21 @@ extern int alt_debug[20]; - /* - * Common interfaces - */ -+static inline void sched_timeslice_imp(const int timeslice_ms) -+{ -+ if (2 == timeslice_ms) -+ sched_timeslice_shift = 21; -+} -+ - static inline int - task_sched_prio_normal(const struct task_struct *p, const struct rq *rq) - { -- s64 delta = (p->deadline >> 21) - rq->time_edge + -+ s64 delta = (p->deadline >> sched_timeslice_shift) - rq->time_edge + - NORMAL_PRIO_NUM - NICE_WIDTH - 1; - - if (unlikely(delta > NORMAL_PRIO_NUM - 1)) { - pr_info("pds: task_sched_prio_normal delta %lld, deadline %llu(%llu), time_edge %llu\n", -- delta, p->deadline, p->deadline >> 21, rq->time_edge); -+ delta, p->deadline, p->deadline >> sched_timeslice_shift, rq->time_edge); - return NORMAL_PRIO_NUM - 1; - } - -@@ -83,7 +90,7 @@ static inline void update_rq_time_edge(struct rq *rq) - { - struct list_head head; - u64 old = rq->time_edge; -- u64 now = rq->clock >> 21; -+ u64 now = rq->clock >> sched_timeslice_shift; - u64 prio, delta; - - if (now == old) --- -2.37.0 - - -From f1bdc0b54db882449520211ce31c6a8249797ecb Mon Sep 17 00:00:00 2001 -From: Alfred Chen -Date: Mon, 7 Jun 2021 09:31:11 +0000 -Subject: [PATCH 173/297] sched/pds: Optimize task deadline - ---- - include/linux/sched/deadline.h | 2 +- - kernel/sched/alt_core.c | 11 +---------- - kernel/sched/bmq.h | 2 -- - kernel/sched/pds.h | 29 ++++++++--------------------- - 4 files changed, 10 insertions(+), 34 deletions(-) - -diff --git a/include/linux/sched/deadline.h b/include/linux/sched/deadline.h -index 20c59b190b1a..fa30f98cb2be 100644 ---- a/include/linux/sched/deadline.h -+++ b/include/linux/sched/deadline.h -@@ -12,7 +12,7 @@ static inline int dl_task(struct task_struct *p) - #endif - - #ifdef CONFIG_SCHED_PDS --#define __tsk_deadline(p) ((((u64) ((p)->prio))<<56) | (((p)->deadline)>>8)) -+#define __tsk_deadline(p) ((((u64) ((p)->prio))<<56) | (p)->deadline) - #endif - - #else -diff --git a/kernel/sched/alt_core.c b/kernel/sched/alt_core.c -index 799605256a19..946983ca5763 100644 ---- a/kernel/sched/alt_core.c -+++ b/kernel/sched/alt_core.c -@@ -3902,15 +3902,7 @@ void alt_sched_debug(void) - sched_sg_idle_mask.bits[0]); - } - #else --int alt_debug[20]; -- --inline void alt_sched_debug(void) --{ -- int i; -- -- for (i = 0; i < 6; i++) -- printk(KERN_INFO "sched: %d\n", alt_debug[i]); --} -+inline void alt_sched_debug(void) {} - #endif - - #ifdef CONFIG_SMP -@@ -6768,7 +6760,6 @@ void __init sched_init(void) - struct rq *rq; - - printk(KERN_INFO ALT_SCHED_VERSION_MSG); -- sched_imp_init(); - - wait_bit_init(); - -diff --git a/kernel/sched/bmq.h b/kernel/sched/bmq.h -index f9f58c21c1e4..b425f8979b6f 100644 ---- a/kernel/sched/bmq.h -+++ b/kernel/sched/bmq.h -@@ -65,8 +65,6 @@ static inline unsigned long sched_idx2prio(unsigned long idx, struct rq *rq) - return idx; - } - --static inline void sched_imp_init(void) {} -- - static inline int normal_prio(struct task_struct *p) - { - if (task_has_rt_policy(p)) -diff --git a/kernel/sched/pds.h b/kernel/sched/pds.h -index b9b19c6a7622..4898b3ae8e41 100644 ---- a/kernel/sched/pds.h -+++ b/kernel/sched/pds.h -@@ -1,10 +1,7 @@ - #define ALT_SCHED_VERSION_MSG "sched/pds: PDS CPU Scheduler "ALT_SCHED_VERSION" by Alfred Chen.\n" - --static u64 user_prio2deadline[NICE_WIDTH]; - static int sched_timeslice_shift = 22; - --extern int alt_debug[20]; -- - #define NORMAL_PRIO_MOD(x) ((x) & (NORMAL_PRIO_NUM - 1)) - - /* -@@ -19,12 +16,11 @@ static inline void sched_timeslice_imp(const int timeslice_ms) - static inline int - task_sched_prio_normal(const struct task_struct *p, const struct rq *rq) - { -- s64 delta = (p->deadline >> sched_timeslice_shift) - rq->time_edge + -- NORMAL_PRIO_NUM - NICE_WIDTH - 1; -+ s64 delta = p->deadline - rq->time_edge + NORMAL_PRIO_NUM - NICE_WIDTH; - - if (unlikely(delta > NORMAL_PRIO_NUM - 1)) { -- pr_info("pds: task_sched_prio_normal delta %lld, deadline %llu(%llu), time_edge %llu\n", -- delta, p->deadline, p->deadline >> sched_timeslice_shift, rq->time_edge); -+ pr_info("pds: task_sched_prio_normal delta %lld, deadline %llu, time_edge %llu\n", -+ delta, p->deadline, rq->time_edge); - return NORMAL_PRIO_NUM - 1; - } - -@@ -61,18 +57,8 @@ static inline unsigned long sched_idx2prio(unsigned long idx, struct rq *rq) - static inline void sched_renew_deadline(struct task_struct *p, const struct rq *rq) - { - if (p->prio >= MAX_RT_PRIO) -- p->deadline = rq->clock + user_prio2deadline[p->static_prio - -- (MAX_PRIO - NICE_WIDTH)]; --} -- --static inline void sched_imp_init(void) --{ -- int i; -- -- user_prio2deadline[0] = sched_timeslice_ns; -- for (i = 1; i < NICE_WIDTH; i++) -- user_prio2deadline[i] = -- user_prio2deadline[i - 1] + sched_timeslice_ns; -+ p->deadline = (rq->clock >> sched_timeslice_shift) + -+ p->static_prio - (MAX_PRIO - NICE_WIDTH); - } - - static inline int normal_prio(struct task_struct *p) -@@ -128,8 +114,9 @@ static inline void time_slice_expired(struct task_struct *p, struct rq *rq) - - static inline void sched_task_sanity_check(struct task_struct *p, struct rq *rq) - { -- if (unlikely(p->deadline > rq->clock + user_prio2deadline[NICE_WIDTH - 1])) -- p->deadline = rq->clock + user_prio2deadline[NICE_WIDTH - 1]; -+ u64 max_dl = rq->time_edge + NICE_WIDTH - 1; -+ if (unlikely(p->deadline > max_dl)) -+ p->deadline = max_dl; - } - - static void sched_task_fork(struct task_struct *p, struct rq *rq) --- -2.37.0 - - -From a06dd8ed4d172d532f029e7030447d36cfdab3a2 Mon Sep 17 00:00:00 2001 -From: Alfred Chen -Date: Fri, 11 Jun 2021 18:19:19 +0000 -Subject: [PATCH 174/297] sched/pds: Optimize parameter and return types - ---- - kernel/sched/bmq.h | 4 ++-- - kernel/sched/pds.h | 4 ++-- - 2 files changed, 4 insertions(+), 4 deletions(-) - -diff --git a/kernel/sched/bmq.h b/kernel/sched/bmq.h -index b425f8979b6f..76db5eb21a01 100644 ---- a/kernel/sched/bmq.h -+++ b/kernel/sched/bmq.h -@@ -55,12 +55,12 @@ task_sched_prio_idx(const struct task_struct *p, const struct rq *rq) - return task_sched_prio(p); - } - --static inline unsigned long sched_prio2idx(unsigned long prio, struct rq *rq) -+static inline int sched_prio2idx(int prio, struct rq *rq) - { - return prio; - } - --static inline unsigned long sched_idx2prio(unsigned long idx, struct rq *rq) -+static inline int sched_idx2prio(int idx, struct rq *rq) - { - return idx; - } -diff --git a/kernel/sched/pds.h b/kernel/sched/pds.h -index 4898b3ae8e41..ed336dd35809 100644 ---- a/kernel/sched/pds.h -+++ b/kernel/sched/pds.h -@@ -40,14 +40,14 @@ task_sched_prio_idx(const struct task_struct *p, const struct rq *rq) - NORMAL_PRIO_MOD(task_sched_prio_normal(p, rq) + rq->time_edge); - } - --static inline unsigned long sched_prio2idx(unsigned long prio, struct rq *rq) -+static inline int sched_prio2idx(int prio, struct rq *rq) - { - return (IDLE_TASK_SCHED_PRIO == prio || prio < MAX_RT_PRIO) ? prio : - MIN_NORMAL_PRIO + NORMAL_PRIO_MOD((prio - MIN_NORMAL_PRIO) + - rq->time_edge); - } - --static inline unsigned long sched_idx2prio(unsigned long idx, struct rq *rq) -+static inline int sched_idx2prio(int idx, struct rq *rq) - { - return (idx < MAX_RT_PRIO) ? idx : MIN_NORMAL_PRIO + - NORMAL_PRIO_MOD((idx - MIN_NORMAL_PRIO) + NORMAL_PRIO_NUM - --- -2.37.0 - - -From 957d9475a9c6ee7522f5228a278aac130ad69d61 Mon Sep 17 00:00:00 2001 -From: Alfred Chen -Date: Sun, 13 Jun 2021 11:34:41 +0000 -Subject: [PATCH 175/297] sched/pds: Use common normal_prio() - ---- - init/init_task.c | 7 +------ - kernel/sched/alt_core.c | 13 +++++++++++++ - kernel/sched/bmq.h | 8 -------- - kernel/sched/pds.h | 6 ------ - 4 files changed, 14 insertions(+), 20 deletions(-) - -diff --git a/init/init_task.c b/init/init_task.c -index 579d99864d49..2d0bad762895 100644 ---- a/init/init_task.c -+++ b/init/init_task.c -@@ -75,15 +75,10 @@ struct task_struct init_task - .stack = init_stack, - .usage = REFCOUNT_INIT(2), - .flags = PF_KTHREAD, --#ifdef CONFIG_SCHED_BMQ -+#ifdef CONFIG_SCHED_ALT - .prio = DEFAULT_PRIO + MAX_PRIORITY_ADJ, - .static_prio = DEFAULT_PRIO, - .normal_prio = DEFAULT_PRIO + MAX_PRIORITY_ADJ, --#endif --#ifdef CONFIG_SCHED_PDS -- .prio = MAX_RT_PRIO, -- .static_prio = DEFAULT_PRIO, -- .normal_prio = MAX_RT_PRIO, - #else - .prio = MAX_PRIO - 20, - .static_prio = MAX_PRIO - 20, -diff --git a/kernel/sched/alt_core.c b/kernel/sched/alt_core.c -index 946983ca5763..57c34cf29956 100644 ---- a/kernel/sched/alt_core.c -+++ b/kernel/sched/alt_core.c -@@ -1121,6 +1121,19 @@ static inline void hrtick_rq_init(struct rq *rq) - } - #endif /* CONFIG_SCHED_HRTICK */ - -+/* -+ * Calculate the expected normal priority: i.e. priority -+ * without taking RT-inheritance into account. Might be -+ * boosted by interactivity modifiers. Changes upon fork, -+ * setprio syscalls, and whenever the interactivity -+ * estimator recalculates. -+ */ -+static inline int normal_prio(struct task_struct *p) -+{ -+ return task_has_rt_policy(p) ? (MAX_RT_PRIO - 1 - p->rt_priority) : -+ p->static_prio + MAX_PRIORITY_ADJ; -+} -+ - /* - * Calculate the current priority, i.e. the priority - * taken into account by the scheduler. This value might -diff --git a/kernel/sched/bmq.h b/kernel/sched/bmq.h -index 76db5eb21a01..7635c00dde7f 100644 ---- a/kernel/sched/bmq.h -+++ b/kernel/sched/bmq.h -@@ -65,14 +65,6 @@ static inline int sched_idx2prio(int idx, struct rq *rq) - return idx; - } - --static inline int normal_prio(struct task_struct *p) --{ -- if (task_has_rt_policy(p)) -- return MAX_RT_PRIO - 1 - p->rt_priority; -- -- return p->static_prio + MAX_PRIORITY_ADJ; --} -- - static inline void time_slice_expired(struct task_struct *p, struct rq *rq) - { - p->time_slice = sched_timeslice_ns; -diff --git a/kernel/sched/pds.h b/kernel/sched/pds.h -index ed336dd35809..c23294178c2b 100644 ---- a/kernel/sched/pds.h -+++ b/kernel/sched/pds.h -@@ -61,12 +61,6 @@ static inline void sched_renew_deadline(struct task_struct *p, const struct rq * - p->static_prio - (MAX_PRIO - NICE_WIDTH); - } - --static inline int normal_prio(struct task_struct *p) --{ -- return task_has_rt_policy(p) ? (MAX_RT_PRIO - 1 - p->rt_priority) : -- MAX_RT_PRIO; --} -- - int task_running_nice(struct task_struct *p) - { - return task_sched_prio(p) > DEFAULT_PRIO; --- -2.37.0 - - -From b99fc38d6a6f675b02f577f89343fa0a7ccbb798 Mon Sep 17 00:00:00 2001 -From: Alfred Chen -Date: Fri, 25 Jun 2021 07:24:08 +0000 -Subject: [PATCH 176/297] sched/alt: Optimization and code clean-up - ---- - kernel/sched/alt_core.c | 20 +++++++------------- - kernel/sched/pds.h | 2 +- - 2 files changed, 8 insertions(+), 14 deletions(-) - -diff --git a/kernel/sched/alt_core.c b/kernel/sched/alt_core.c -index 57c34cf29956..a8ba783b07ff 100644 ---- a/kernel/sched/alt_core.c -+++ b/kernel/sched/alt_core.c -@@ -77,7 +77,7 @@ __read_mostly int sysctl_resched_latency_warn_once = 1; - #define STOP_PRIO (MAX_RT_PRIO - 1) - - /* Default time slice is 4 in ms, can be set via kernel parameter "sched_timeslice" */ --u64 sched_timeslice_ns __read_mostly = (2 << 20); -+u64 sched_timeslice_ns __read_mostly = (4 << 20); - - static inline void requeue_task(struct task_struct *p, struct rq *rq); - -@@ -193,9 +193,8 @@ static inline void update_sched_rq_watermark(struct rq *rq) - cpumask_andnot(&sched_rq_watermark[i], - &sched_rq_watermark[i], cpumask_of(cpu)); - #ifdef CONFIG_SCHED_SMT -- if (!static_branch_likely(&sched_smt_present)) -- return; -- if (IDLE_WM == last_wm) -+ if (static_branch_likely(&sched_smt_present) && -+ IDLE_WM == last_wm) - cpumask_andnot(&sched_sg_idle_mask, - &sched_sg_idle_mask, cpu_smt_mask(cpu)); - #endif -@@ -205,10 +204,9 @@ static inline void update_sched_rq_watermark(struct rq *rq) - for (i = last_wm + 1; i <= watermark; i++) - cpumask_set_cpu(cpu, &sched_rq_watermark[i]); - #ifdef CONFIG_SCHED_SMT -- if (!static_branch_likely(&sched_smt_present)) -- return; -- if (IDLE_WM == watermark) { -+ if (static_branch_likely(&sched_smt_present) && IDLE_WM == watermark) { - cpumask_t tmp; -+ - cpumask_and(&tmp, cpu_smt_mask(cpu), &sched_rq_watermark[IDLE_WM]); - if (cpumask_equal(&tmp, cpu_smt_mask(cpu))) - cpumask_or(&sched_sg_idle_mask, cpu_smt_mask(cpu), -@@ -1003,13 +1001,10 @@ static void hrtick_clear(struct rq *rq) - static enum hrtimer_restart hrtick(struct hrtimer *timer) - { - struct rq *rq = container_of(timer, struct rq, hrtick_timer); -- struct task_struct *p; - - WARN_ON_ONCE(cpu_of(rq) != smp_processor_id()); - - raw_spin_lock(&rq->lock); -- p = rq->curr; -- p->time_slice = 0; - resched_curr(rq); - raw_spin_unlock(&rq->lock); - -@@ -2733,9 +2728,7 @@ void wake_up_new_task(struct task_struct *p) - struct rq *rq; - - raw_spin_lock_irqsave(&p->pi_lock, flags); -- - p->state = TASK_RUNNING; -- - rq = cpu_rq(select_task_rq(p)); - #ifdef CONFIG_SMP - rseq_migrate(p); -@@ -2743,6 +2736,7 @@ void wake_up_new_task(struct task_struct *p) - * Fork balancing, do it here and not earlier because: - * - cpus_ptr can change in the fork path - * - any previously selected CPU might disappear through hotplug -+ * - * Use __set_task_cpu() to avoid calling sched_class::migrate_task_rq, - * as we're not fully set-up yet. - */ -@@ -2750,8 +2744,8 @@ void wake_up_new_task(struct task_struct *p) - #endif - - raw_spin_lock(&rq->lock); -- - update_rq_clock(rq); -+ - activate_task(p, rq); - trace_sched_wakeup_new(p); - check_preempt_curr(rq); -diff --git a/kernel/sched/pds.h b/kernel/sched/pds.h -index c23294178c2b..06d88e72b543 100644 ---- a/kernel/sched/pds.h -+++ b/kernel/sched/pds.h -@@ -63,7 +63,7 @@ static inline void sched_renew_deadline(struct task_struct *p, const struct rq * - - int task_running_nice(struct task_struct *p) - { -- return task_sched_prio(p) > DEFAULT_PRIO; -+ return (p->prio > DEFAULT_PRIO); - } - - static inline void update_rq_time_edge(struct rq *rq) --- -2.37.0 - - -From 53012af8bf91faf80f525225027156355b9616a8 Mon Sep 17 00:00:00 2001 -From: Alfred Chen -Date: Tue, 6 Jul 2021 11:18:04 +0000 -Subject: [PATCH 177/297] Project-C v5.13-r1 - ---- - kernel/sched/alt_core.c | 2 +- - 1 file changed, 1 insertion(+), 1 deletion(-) - -diff --git a/kernel/sched/alt_core.c b/kernel/sched/alt_core.c -index a8ba783b07ff..b65b12c6014f 100644 ---- a/kernel/sched/alt_core.c -+++ b/kernel/sched/alt_core.c -@@ -67,7 +67,7 @@ __read_mostly int sysctl_resched_latency_warn_once = 1; - #define sched_feat(x) (0) - #endif /* CONFIG_SCHED_DEBUG */ - --#define ALT_SCHED_VERSION "v5.13-r0" -+#define ALT_SCHED_VERSION "v5.13-r1" - - /* rt_prio(prio) defined in include/linux/sched/rt.h */ - #define rt_task(p) rt_prio((p)->prio) --- -2.37.0 - - -From 7a7d569b3d8439a666a0ebd6b090909accd1fe46 Mon Sep 17 00:00:00 2001 -From: Alfred Chen -Date: Sun, 27 Jun 2021 14:45:03 +0000 -Subject: [PATCH 178/297] sched/alt: Reverse sched_rq_watermark order - ---- - kernel/sched/alt_core.c | 21 +++++++++++---------- - 1 file changed, 11 insertions(+), 10 deletions(-) - -diff --git a/kernel/sched/alt_core.c b/kernel/sched/alt_core.c -index b65b12c6014f..ffe95d0b5856 100644 ---- a/kernel/sched/alt_core.c -+++ b/kernel/sched/alt_core.c -@@ -189,9 +189,10 @@ static inline void update_sched_rq_watermark(struct rq *rq) - rq->watermark = watermark; - cpu = cpu_of(rq); - if (watermark < last_wm) { -- for (i = watermark + 1; i <= last_wm; i++) -- cpumask_andnot(&sched_rq_watermark[i], -- &sched_rq_watermark[i], cpumask_of(cpu)); -+ for (i = last_wm; i > watermark; i--) -+ cpumask_andnot(&sched_rq_watermark[SCHED_BITS - 1 - i], -+ &sched_rq_watermark[SCHED_BITS - 1 - i], -+ cpumask_of(cpu)); - #ifdef CONFIG_SCHED_SMT - if (static_branch_likely(&sched_smt_present) && - IDLE_WM == last_wm) -@@ -201,13 +202,13 @@ static inline void update_sched_rq_watermark(struct rq *rq) - return; - } - /* last_wm < watermark */ -- for (i = last_wm + 1; i <= watermark; i++) -- cpumask_set_cpu(cpu, &sched_rq_watermark[i]); -+ for (i = watermark; i > last_wm; i--) -+ cpumask_set_cpu(cpu, &sched_rq_watermark[SCHED_BITS - 1 - i]); - #ifdef CONFIG_SCHED_SMT - if (static_branch_likely(&sched_smt_present) && IDLE_WM == watermark) { - cpumask_t tmp; - -- cpumask_and(&tmp, cpu_smt_mask(cpu), &sched_rq_watermark[IDLE_WM]); -+ cpumask_and(&tmp, cpu_smt_mask(cpu), &sched_rq_watermark[0]); - if (cpumask_equal(&tmp, cpu_smt_mask(cpu))) - cpumask_or(&sched_sg_idle_mask, cpu_smt_mask(cpu), - &sched_sg_idle_mask); -@@ -1736,9 +1737,9 @@ static inline int select_task_rq(struct task_struct *p) - #ifdef CONFIG_SCHED_SMT - cpumask_and(&tmp, &chk_mask, &sched_sg_idle_mask) || - #endif -- cpumask_and(&tmp, &chk_mask, &sched_rq_watermark[IDLE_WM]) || -+ cpumask_and(&tmp, &chk_mask, &sched_rq_watermark[0]) || - cpumask_and(&tmp, &chk_mask, -- &sched_rq_watermark[task_sched_prio(p) + 1])) -+ &sched_rq_watermark[SCHED_BITS - task_sched_prio(p)])) - return best_mask_cpu(task_cpu(p), &tmp); - - return best_mask_cpu(task_cpu(p), &chk_mask); -@@ -3592,7 +3593,7 @@ static inline void sg_balance_check(struct rq *rq) - */ - if (cpumask_test_cpu(cpu, &sched_sg_idle_mask) && - cpumask_andnot(&chk, cpu_online_mask, &sched_rq_pending_mask) && -- cpumask_andnot(&chk, &chk, &sched_rq_watermark[IDLE_WM])) { -+ cpumask_andnot(&chk, &chk, &sched_rq_watermark[0])) { - int i, tried = 0; - - for_each_cpu_wrap(i, &chk, cpu) { -@@ -3905,7 +3906,7 @@ void alt_sched_debug(void) - { - printk(KERN_INFO "sched: pending: 0x%04lx, idle: 0x%04lx, sg_idle: 0x%04lx\n", - sched_rq_pending_mask.bits[0], -- sched_rq_watermark[IDLE_WM].bits[0], -+ sched_rq_watermark[0].bits[0], - sched_sg_idle_mask.bits[0]); - } - #else --- -2.37.0 - - -From 6b4a7bbec176bcd7141d6122f1bc09f94a2c47dd Mon Sep 17 00:00:00 2001 -From: Alfred Chen -Date: Mon, 28 Jun 2021 12:52:23 +0000 -Subject: [PATCH 179/297] sched/alt: Use atomic operation in - update_sched_rq_watermark() and code clean-up - ---- - kernel/sched/alt_core.c | 25 +++++++++++-------------- - 1 file changed, 11 insertions(+), 14 deletions(-) - -diff --git a/kernel/sched/alt_core.c b/kernel/sched/alt_core.c -index ffe95d0b5856..472d73646b67 100644 ---- a/kernel/sched/alt_core.c -+++ b/kernel/sched/alt_core.c -@@ -146,8 +146,6 @@ DEFINE_PER_CPU_SHARED_ALIGNED(struct rq, runqueues); - # define finish_arch_post_lock_switch() do { } while (0) - #endif - --#define IDLE_WM (IDLE_TASK_SCHED_PRIO) -- - #ifdef CONFIG_SCHED_SMT - static cpumask_t sched_sg_idle_mask ____cacheline_aligned_in_smp; - #endif -@@ -190,12 +188,10 @@ static inline void update_sched_rq_watermark(struct rq *rq) - cpu = cpu_of(rq); - if (watermark < last_wm) { - for (i = last_wm; i > watermark; i--) -- cpumask_andnot(&sched_rq_watermark[SCHED_BITS - 1 - i], -- &sched_rq_watermark[SCHED_BITS - 1 - i], -- cpumask_of(cpu)); -+ cpumask_clear_cpu(cpu, sched_rq_watermark + SCHED_BITS - 1 - i); - #ifdef CONFIG_SCHED_SMT - if (static_branch_likely(&sched_smt_present) && -- IDLE_WM == last_wm) -+ IDLE_TASK_SCHED_PRIO == last_wm) - cpumask_andnot(&sched_sg_idle_mask, - &sched_sg_idle_mask, cpu_smt_mask(cpu)); - #endif -@@ -203,12 +199,13 @@ static inline void update_sched_rq_watermark(struct rq *rq) - } - /* last_wm < watermark */ - for (i = watermark; i > last_wm; i--) -- cpumask_set_cpu(cpu, &sched_rq_watermark[SCHED_BITS - 1 - i]); -+ cpumask_set_cpu(cpu, sched_rq_watermark + SCHED_BITS - 1 - i); - #ifdef CONFIG_SCHED_SMT -- if (static_branch_likely(&sched_smt_present) && IDLE_WM == watermark) { -+ if (static_branch_likely(&sched_smt_present) && -+ IDLE_TASK_SCHED_PRIO == watermark) { - cpumask_t tmp; - -- cpumask_and(&tmp, cpu_smt_mask(cpu), &sched_rq_watermark[0]); -+ cpumask_and(&tmp, cpu_smt_mask(cpu), sched_rq_watermark); - if (cpumask_equal(&tmp, cpu_smt_mask(cpu))) - cpumask_or(&sched_sg_idle_mask, cpu_smt_mask(cpu), - &sched_sg_idle_mask); -@@ -1737,9 +1734,9 @@ static inline int select_task_rq(struct task_struct *p) - #ifdef CONFIG_SCHED_SMT - cpumask_and(&tmp, &chk_mask, &sched_sg_idle_mask) || - #endif -- cpumask_and(&tmp, &chk_mask, &sched_rq_watermark[0]) || -+ cpumask_and(&tmp, &chk_mask, sched_rq_watermark) || - cpumask_and(&tmp, &chk_mask, -- &sched_rq_watermark[SCHED_BITS - task_sched_prio(p)])) -+ sched_rq_watermark + SCHED_BITS - task_sched_prio(p))) - return best_mask_cpu(task_cpu(p), &tmp); - - return best_mask_cpu(task_cpu(p), &chk_mask); -@@ -3593,7 +3590,7 @@ static inline void sg_balance_check(struct rq *rq) - */ - if (cpumask_test_cpu(cpu, &sched_sg_idle_mask) && - cpumask_andnot(&chk, cpu_online_mask, &sched_rq_pending_mask) && -- cpumask_andnot(&chk, &chk, &sched_rq_watermark[0])) { -+ cpumask_andnot(&chk, &chk, sched_rq_watermark)) { - int i, tried = 0; - - for_each_cpu_wrap(i, &chk, cpu) { -@@ -6773,7 +6770,7 @@ void __init sched_init(void) - - #ifdef CONFIG_SMP - for (i = 0; i < SCHED_BITS; i++) -- cpumask_copy(&sched_rq_watermark[i], cpu_present_mask); -+ cpumask_copy(sched_rq_watermark + i, cpu_present_mask); - #endif - - #ifdef CONFIG_CGROUP_SCHED -@@ -6787,7 +6784,7 @@ void __init sched_init(void) - rq = cpu_rq(i); - - sched_queue_init(&rq->queue); -- rq->watermark = IDLE_WM; -+ rq->watermark = IDLE_TASK_SCHED_PRIO; - rq->skip = NULL; - - raw_spin_lock_init(&rq->lock); --- -2.37.0 - - -From 7e147334ae87c0f527a38a2a716990715ba92688 Mon Sep 17 00:00:00 2001 -From: Alfred Chen -Date: Wed, 7 Jul 2021 10:38:54 +0000 -Subject: [PATCH 180/297] sched/alt: inline some BMQ/PDS interfaces - ---- - kernel/sched/bmq.h | 6 +++--- - kernel/sched/pds.h | 12 +++++------- - 2 files changed, 8 insertions(+), 10 deletions(-) - -diff --git a/kernel/sched/bmq.h b/kernel/sched/bmq.h -index 7635c00dde7f..be3ee4a553ca 100644 ---- a/kernel/sched/bmq.h -+++ b/kernel/sched/bmq.h -@@ -89,20 +89,20 @@ static void sched_task_fork(struct task_struct *p, struct rq *rq) - p->boost_prio + MAX_PRIORITY_ADJ : MAX_PRIORITY_ADJ; - } - --static void do_sched_yield_type_1(struct task_struct *p, struct rq *rq) -+static inline void do_sched_yield_type_1(struct task_struct *p, struct rq *rq) - { - p->boost_prio = MAX_PRIORITY_ADJ; - } - - #ifdef CONFIG_SMP --static void sched_task_ttwu(struct task_struct *p) -+static inline void sched_task_ttwu(struct task_struct *p) - { - if(this_rq()->clock_task - p->last_ran > sched_timeslice_ns) - boost_task(p); - } - #endif - --static void sched_task_deactivate(struct task_struct *p, struct rq *rq) -+static inline void sched_task_deactivate(struct task_struct *p, struct rq *rq) - { - if (rq_switch_time(rq) < boost_threshold(p)) - boost_task(p); -diff --git a/kernel/sched/pds.h b/kernel/sched/pds.h -index 06d88e72b543..0f1f0d708b77 100644 ---- a/kernel/sched/pds.h -+++ b/kernel/sched/pds.h -@@ -18,11 +18,9 @@ task_sched_prio_normal(const struct task_struct *p, const struct rq *rq) - { - s64 delta = p->deadline - rq->time_edge + NORMAL_PRIO_NUM - NICE_WIDTH; - -- if (unlikely(delta > NORMAL_PRIO_NUM - 1)) { -- pr_info("pds: task_sched_prio_normal delta %lld, deadline %llu, time_edge %llu\n", -- delta, p->deadline, rq->time_edge); -+ if (WARN_ONCE(delta > NORMAL_PRIO_NUM - 1, -+ "pds: task_sched_prio_normal() delta %lld\n", delta)) - return NORMAL_PRIO_NUM - 1; -- } - - return (delta < 0) ? 0 : delta; - } -@@ -118,12 +116,12 @@ static void sched_task_fork(struct task_struct *p, struct rq *rq) - sched_renew_deadline(p, rq); - } - --static void do_sched_yield_type_1(struct task_struct *p, struct rq *rq) -+static inline void do_sched_yield_type_1(struct task_struct *p, struct rq *rq) - { - time_slice_expired(p, rq); - } - - #ifdef CONFIG_SMP --static void sched_task_ttwu(struct task_struct *p) {} -+static inline void sched_task_ttwu(struct task_struct *p) {} - #endif --static void sched_task_deactivate(struct task_struct *p, struct rq *rq) {} -+static inline void sched_task_deactivate(struct task_struct *p, struct rq *rq) {} --- -2.37.0 - - -From a70f521116f0b9875d0d3eda5681d9ab42742a30 Mon Sep 17 00:00:00 2001 -From: Alfred Chen -Date: Sat, 10 Jul 2021 15:28:11 +0000 -Subject: [PATCH 181/297] sched/alt: Remove over design in best_mask_cpu() - ---- - kernel/sched/alt_core.c | 30 +++++++-------------- - kernel/sched/alt_sched.h | 58 ++++------------------------------------ - 2 files changed, 15 insertions(+), 73 deletions(-) - -diff --git a/kernel/sched/alt_core.c b/kernel/sched/alt_core.c -index 472d73646b67..38f2ef489b37 100644 ---- a/kernel/sched/alt_core.c -+++ b/kernel/sched/alt_core.c -@@ -207,8 +207,8 @@ static inline void update_sched_rq_watermark(struct rq *rq) - - cpumask_and(&tmp, cpu_smt_mask(cpu), sched_rq_watermark); - if (cpumask_equal(&tmp, cpu_smt_mask(cpu))) -- cpumask_or(&sched_sg_idle_mask, cpu_smt_mask(cpu), -- &sched_sg_idle_mask); -+ cpumask_or(&sched_sg_idle_mask, -+ &sched_sg_idle_mask, cpu_smt_mask(cpu)); - } - #endif - } -@@ -3528,8 +3528,7 @@ static inline int active_load_balance_cpu_stop(void *data) - cpumask_and(&tmp, p->cpus_ptr, &sched_sg_idle_mask) && - !is_migration_disabled(p)) { - int cpu = cpu_of(rq); -- int dcpu = __best_mask_cpu(cpu, &tmp, -- per_cpu(sched_cpu_llc_mask, cpu)); -+ int dcpu = __best_mask_cpu(&tmp, per_cpu(sched_cpu_llc_mask, cpu)); - rq = move_queued_task(rq, p, dcpu); - } - -@@ -3573,34 +3572,25 @@ static inline int sg_balance_trigger(const int cpu) - static inline void sg_balance_check(struct rq *rq) - { - cpumask_t chk; -- int cpu; -- -- /* exit when no sg in idle */ -- if (cpumask_empty(&sched_sg_idle_mask)) -- return; -+ int cpu = cpu_of(rq); - - /* exit when cpu is offline */ - if (unlikely(!rq->online)) - return; - -- cpu = cpu_of(rq); - /* - * Only cpu in slibing idle group will do the checking and then - * find potential cpus which can migrate the current running task - */ - if (cpumask_test_cpu(cpu, &sched_sg_idle_mask) && -- cpumask_andnot(&chk, cpu_online_mask, &sched_rq_pending_mask) && -- cpumask_andnot(&chk, &chk, sched_rq_watermark)) { -- int i, tried = 0; -+ cpumask_andnot(&chk, cpu_online_mask, sched_rq_watermark) && -+ cpumask_andnot(&chk, &chk, &sched_rq_pending_mask)) { -+ int i; - - for_each_cpu_wrap(i, &chk, cpu) { -- if (cpumask_subset(cpu_smt_mask(i), &chk)) { -- if (sg_balance_trigger(i)) -- return; -- if (tried) -- return; -- tried++; -- } -+ if (cpumask_subset(cpu_smt_mask(i), &chk) && -+ sg_balance_trigger(i)) -+ return; - } - } - } -diff --git a/kernel/sched/alt_sched.h b/kernel/sched/alt_sched.h -index f9f79422bf0e..7a48809550bf 100644 ---- a/kernel/sched/alt_sched.h -+++ b/kernel/sched/alt_sched.h -@@ -302,68 +302,20 @@ enum { - DECLARE_PER_CPU(cpumask_t [NR_CPU_AFFINITY_LEVELS], sched_cpu_topo_masks); - DECLARE_PER_CPU(cpumask_t *, sched_cpu_llc_mask); - --static inline int __best_mask_cpu(int cpu, const cpumask_t *cpumask, -- const cpumask_t *mask) -+static inline int -+__best_mask_cpu(const cpumask_t *cpumask, const cpumask_t *mask) - { --#if NR_CPUS <= 64 -- unsigned long t; -+ int cpu; - -- while ((t = cpumask->bits[0] & mask->bits[0]) == 0UL) -- mask++; -- -- return __ffs(t); --#else - while ((cpu = cpumask_any_and(cpumask, mask)) >= nr_cpu_ids) - mask++; -+ - return cpu; --#endif - } - - static inline int best_mask_cpu(int cpu, const cpumask_t *mask) - { --#if NR_CPUS <= 64 -- unsigned long llc_match; -- cpumask_t *chk = per_cpu(sched_cpu_llc_mask, cpu); -- -- if ((llc_match = mask->bits[0] & chk->bits[0])) { -- unsigned long match; -- -- chk = per_cpu(sched_cpu_topo_masks, cpu); -- if (mask->bits[0] & chk->bits[0]) -- return cpu; -- --#ifdef CONFIG_SCHED_SMT -- chk++; -- if ((match = mask->bits[0] & chk->bits[0])) -- return __ffs(match); --#endif -- -- return __ffs(llc_match); -- } -- -- return __best_mask_cpu(cpu, mask, chk + 1); --#else -- cpumask_t llc_match; -- cpumask_t *chk = per_cpu(sched_cpu_llc_mask, cpu); -- -- if (cpumask_and(&llc_match, mask, chk)) { -- cpumask_t tmp; -- -- chk = per_cpu(sched_cpu_topo_masks, cpu); -- if (cpumask_test_cpu(cpu, mask)) -- return cpu; -- --#ifdef CONFIG_SCHED_SMT -- chk++; -- if (cpumask_and(&tmp, mask, chk)) -- return cpumask_any(&tmp); --#endif -- -- return cpumask_any(&llc_match); -- } -- -- return __best_mask_cpu(cpu, mask, chk + 1); --#endif -+ return __best_mask_cpu(mask, per_cpu(sched_cpu_topo_masks, cpu)); - } - - extern void flush_smp_call_function_from_idle(void); --- -2.37.0 - - -From 3ab1f5f8743219d142b6ed816d2b4472b6beee23 Mon Sep 17 00:00:00 2001 -From: Alfred Chen -Date: Sat, 10 Jul 2021 20:52:27 +0000 -Subject: [PATCH 182/297] sched/alt: Remove sched_cpu_affinity_masks - ---- - kernel/sched/alt_core.c | 44 ++++++++++++++++------------------------- - 1 file changed, 17 insertions(+), 27 deletions(-) - -diff --git a/kernel/sched/alt_core.c b/kernel/sched/alt_core.c -index 38f2ef489b37..bb5f78a1e256 100644 ---- a/kernel/sched/alt_core.c -+++ b/kernel/sched/alt_core.c -@@ -116,11 +116,9 @@ int sched_yield_type __read_mostly = 1; - #ifdef CONFIG_SMP - static cpumask_t sched_rq_pending_mask ____cacheline_aligned_in_smp; - --DEFINE_PER_CPU(cpumask_t [NR_CPU_AFFINITY_LEVELS], sched_cpu_affinity_masks); --DEFINE_PER_CPU(cpumask_t *, sched_cpu_affinity_end_mask); -- - DEFINE_PER_CPU(cpumask_t [NR_CPU_AFFINITY_LEVELS], sched_cpu_topo_masks); - DEFINE_PER_CPU(cpumask_t *, sched_cpu_llc_mask); -+DEFINE_PER_CPU(cpumask_t *, sched_cpu_topo_end_mask); - - #ifdef CONFIG_SCHED_SMT - DEFINE_STATIC_KEY_FALSE(sched_smt_present); -@@ -891,8 +889,8 @@ int get_nohz_timer_target(void) - default_cpu = cpu; - } - -- for (mask = per_cpu(sched_cpu_affinity_masks, cpu) + 1; -- mask < per_cpu(sched_cpu_affinity_end_mask, cpu); mask++) -+ for (mask = per_cpu(sched_cpu_topo_masks, cpu) + 1; -+ mask < per_cpu(sched_cpu_topo_end_mask, cpu); mask++) - for_each_cpu_and(i, mask, housekeeping_cpumask(HK_FLAG_TIMER)) - if (!idle_cpu(i)) - return i; -@@ -3932,7 +3930,7 @@ migrate_pending_tasks(struct rq *rq, struct rq *dest_rq, const int dest_cpu) - - static inline int take_other_rq_tasks(struct rq *rq, int cpu) - { -- struct cpumask *affinity_mask, *end_mask; -+ struct cpumask *topo_mask, *end_mask; - - if (unlikely(!rq->online)) - return 0; -@@ -3940,11 +3938,11 @@ static inline int take_other_rq_tasks(struct rq *rq, int cpu) - if (cpumask_empty(&sched_rq_pending_mask)) - return 0; - -- affinity_mask = per_cpu(sched_cpu_affinity_masks, cpu) + 1; -- end_mask = per_cpu(sched_cpu_affinity_end_mask, cpu); -+ topo_mask = per_cpu(sched_cpu_topo_masks, cpu) + 1; -+ end_mask = per_cpu(sched_cpu_topo_end_mask, cpu); - do { - int i; -- for_each_cpu_and(i, &sched_rq_pending_mask, affinity_mask) { -+ for_each_cpu_and(i, &sched_rq_pending_mask, topo_mask) { - int nr_migrated; - struct rq *src_rq; - -@@ -3975,7 +3973,7 @@ static inline int take_other_rq_tasks(struct rq *rq, int cpu) - spin_release(&src_rq->lock.dep_map, _RET_IP_); - do_raw_spin_unlock(&src_rq->lock); - } -- } while (++affinity_mask < end_mask); -+ } while (++topo_mask < end_mask); - - return 0; - } -@@ -6637,14 +6635,6 @@ static void sched_init_topology_cpumask_early(void) - cpumask_t *tmp; - - for_each_possible_cpu(cpu) { -- /* init affinity masks */ -- tmp = per_cpu(sched_cpu_affinity_masks, cpu); -- -- cpumask_copy(tmp, cpumask_of(cpu)); -- tmp++; -- cpumask_copy(tmp, cpu_possible_mask); -- cpumask_clear_cpu(cpu, tmp); -- per_cpu(sched_cpu_affinity_end_mask, cpu) = ++tmp; - /* init topo masks */ - tmp = per_cpu(sched_cpu_topo_masks, cpu); - -@@ -6652,32 +6642,32 @@ static void sched_init_topology_cpumask_early(void) - tmp++; - cpumask_copy(tmp, cpu_possible_mask); - per_cpu(sched_cpu_llc_mask, cpu) = tmp; -+ per_cpu(sched_cpu_topo_end_mask, cpu) = ++tmp; - /*per_cpu(sd_llc_id, cpu) = cpu;*/ - } - } - --#define TOPOLOGY_CPUMASK(name, mask, last) \ -- if (cpumask_and(chk, chk, mask)) { \ -+#define TOPOLOGY_CPUMASK(name, mask, last)\ -+ if (cpumask_and(topo, topo, mask)) { \ - cpumask_copy(topo, mask); \ -- printk(KERN_INFO "sched: cpu#%02d affinity: 0x%08lx topo: 0x%08lx - "#name,\ -- cpu, (chk++)->bits[0], (topo++)->bits[0]); \ -+ printk(KERN_INFO "sched: cpu#%02d topo: 0x%08lx - "#name, \ -+ cpu, (topo++)->bits[0]); \ - } \ - if (!last) \ -- cpumask_complement(chk, mask) -+ cpumask_complement(topo, mask) - - static void sched_init_topology_cpumask(void) - { - int cpu; -- cpumask_t *chk, *topo; -+ cpumask_t *topo; - - for_each_online_cpu(cpu) { - /* take chance to reset time slice for idle tasks */ - cpu_rq(cpu)->idle->time_slice = sched_timeslice_ns; - -- chk = per_cpu(sched_cpu_affinity_masks, cpu) + 1; - topo = per_cpu(sched_cpu_topo_masks, cpu) + 1; - -- cpumask_complement(chk, cpumask_of(cpu)); -+ cpumask_complement(topo, cpumask_of(cpu)); - #ifdef CONFIG_SCHED_SMT - TOPOLOGY_CPUMASK(smt, topology_sibling_cpumask(cpu), false); - #endif -@@ -6689,7 +6679,7 @@ static void sched_init_topology_cpumask(void) - - TOPOLOGY_CPUMASK(others, cpu_online_mask, true); - -- per_cpu(sched_cpu_affinity_end_mask, cpu) = chk; -+ per_cpu(sched_cpu_topo_end_mask, cpu) = topo; - printk(KERN_INFO "sched: cpu#%02d llc_id = %d, llc_mask idx = %d\n", - cpu, per_cpu(sd_llc_id, cpu), - (int) (per_cpu(sched_cpu_llc_mask, cpu) - --- -2.37.0 - - -From 10baf332ef6005de5c1303ce61a994cd6db734b5 Mon Sep 17 00:00:00 2001 -From: Alfred Chen -Date: Mon, 2 Aug 2021 13:56:40 +0800 -Subject: [PATCH 183/297] sched/alt: kernel document update for sched_timeslice - ---- - Documentation/admin-guide/kernel-parameters.txt | 6 +++--- - 1 file changed, 3 insertions(+), 3 deletions(-) - -diff --git a/Documentation/admin-guide/kernel-parameters.txt b/Documentation/admin-guide/kernel-parameters.txt -index d243405aa3bf..d92bf8eb93b9 100644 ---- a/Documentation/admin-guide/kernel-parameters.txt -+++ b/Documentation/admin-guide/kernel-parameters.txt -@@ -5274,9 +5274,9 @@ - See drivers/net/irda/sa1100_ir.c. - - sched_timeslice= -- [KNL] Time slice in us for BMQ/PDS scheduler. -- Format: (must be >= 1000) -- Default: 4000 -+ [KNL] Time slice in ms for Project C BMQ/PDS scheduler. -+ Format: integer 2, 4 -+ Default: 4 - See Documentation/scheduler/sched-BMQ.txt - - sched_verbose [KNL] Enables verbose scheduler debug messages. --- -2.37.0 - - -From c1010d7f83034c76337bf83fcb032913e169a380 Mon Sep 17 00:00:00 2001 -From: Alfred Chen -Date: Thu, 29 Jul 2021 19:11:07 +0000 -Subject: [PATCH 184/297] Project-C v5.13-r2 - ---- - kernel/sched/alt_core.c | 2 +- - 1 file changed, 1 insertion(+), 1 deletion(-) - -diff --git a/kernel/sched/alt_core.c b/kernel/sched/alt_core.c -index bb5f78a1e256..e296d56e85f0 100644 ---- a/kernel/sched/alt_core.c -+++ b/kernel/sched/alt_core.c -@@ -67,7 +67,7 @@ __read_mostly int sysctl_resched_latency_warn_once = 1; - #define sched_feat(x) (0) - #endif /* CONFIG_SCHED_DEBUG */ - --#define ALT_SCHED_VERSION "v5.13-r1" -+#define ALT_SCHED_VERSION "v5.13-r2" - - /* rt_prio(prio) defined in include/linux/sched/rt.h */ - #define rt_task(p) rt_prio((p)->prio) --- -2.37.0 - - -From f1ba538d3943cdf2b4dbd6888798c06bcc9c9f3f Mon Sep 17 00:00:00 2001 -From: Alfred Chen -Date: Sun, 1 Aug 2021 09:17:30 +0000 -Subject: [PATCH 185/297] sched/alt: Optimize sched_exec() - -Improve exec performance under heavy load. -Kernel compilation comparation ->>>>>16 job(s) -1m21.148s(baseline) --> 1m19.474s (- ~2.06%) ->>>>>24 job(s) -1m22.362s(baseline) --> 1m19.771s (- ~3.15%) ---- - kernel/sched/alt_core.c | 8 +------- - 1 file changed, 1 insertion(+), 7 deletions(-) - -diff --git a/kernel/sched/alt_core.c b/kernel/sched/alt_core.c -index e296d56e85f0..b5e91c874cc1 100644 ---- a/kernel/sched/alt_core.c -+++ b/kernel/sched/alt_core.c -@@ -3321,15 +3321,9 @@ void sched_exec(void) - struct task_struct *p = current; - unsigned long flags; - int dest_cpu; -- struct rq *rq; - - raw_spin_lock_irqsave(&p->pi_lock, flags); -- rq = this_rq(); -- -- if (rq != task_rq(p) || rq->nr_running < 2) -- goto unlock; -- -- dest_cpu = select_task_rq(p); -+ dest_cpu = cpumask_any(p->cpus_ptr); - if (dest_cpu == smp_processor_id()) - goto unlock; - --- -2.37.0 - - -From b613b42d52b674261a5c7bdf8e0e6dc962108b24 Mon Sep 17 00:00:00 2001 -From: Alfred Chen -Date: Sun, 22 Aug 2021 17:06:15 +0000 -Subject: [PATCH 186/297] Project-C v5.13-r3 - ---- - kernel/sched/alt_core.c | 2 +- - 1 file changed, 1 insertion(+), 1 deletion(-) - -diff --git a/kernel/sched/alt_core.c b/kernel/sched/alt_core.c -index b5e91c874cc1..b10012b67435 100644 ---- a/kernel/sched/alt_core.c -+++ b/kernel/sched/alt_core.c -@@ -67,7 +67,7 @@ __read_mostly int sysctl_resched_latency_warn_once = 1; - #define sched_feat(x) (0) - #endif /* CONFIG_SCHED_DEBUG */ - --#define ALT_SCHED_VERSION "v5.13-r2" -+#define ALT_SCHED_VERSION "v5.13-r3" - - /* rt_prio(prio) defined in include/linux/sched/rt.h */ - #define rt_task(p) rt_prio((p)->prio) --- -2.37.0 - - -From f555d191414d5e38b86abc045318539ab992cc80 Mon Sep 17 00:00:00 2001 -From: Alfred Chen -Date: Tue, 10 Aug 2021 15:29:01 +0000 -Subject: [PATCH 187/297] sched/alt: [Sync] 2b8ca1a907d5 sched/core: Remove the - pointless BUG_ON(!task) from wake_up_q() - ---- - kernel/sched/alt_core.c | 1 - - 1 file changed, 1 deletion(-) - -diff --git a/kernel/sched/alt_core.c b/kernel/sched/alt_core.c -index b10012b67435..9417f9b6e88d 100644 ---- a/kernel/sched/alt_core.c -+++ b/kernel/sched/alt_core.c -@@ -807,7 +807,6 @@ void wake_up_q(struct wake_q_head *head) - struct task_struct *task; - - task = container_of(node, struct task_struct, wake_q); -- BUG_ON(!task); - /* task can safely be re-inserted now: */ - node = node->next; - task->wake_q.next = NULL; --- -2.37.0 - - -From 59a86828d9cefa08656255569bd164cdd724d1e2 Mon Sep 17 00:00:00 2001 -From: Alfred Chen -Date: Tue, 10 Aug 2021 16:45:51 +0000 -Subject: [PATCH 188/297] sched/alt: [Sync] 4e29fb709885 sched: Rename - sched_info_{queued,dequeued} - ---- - kernel/sched/alt_core.c | 4 ++-- - 1 file changed, 2 insertions(+), 2 deletions(-) - -diff --git a/kernel/sched/alt_core.c b/kernel/sched/alt_core.c -index 9417f9b6e88d..c9c5d609feb6 100644 ---- a/kernel/sched/alt_core.c -+++ b/kernel/sched/alt_core.c -@@ -588,7 +588,7 @@ static inline void sched_update_tick_dependency(struct rq *rq) { } - */ - #define __SCHED_DEQUEUE_TASK(p, rq, flags, func) \ - psi_dequeue(p, flags & DEQUEUE_SLEEP); \ -- sched_info_dequeued(rq, p); \ -+ sched_info_dequeue(rq, p); \ - \ - list_del(&p->sq_node); \ - if (list_empty(&rq->queue.heads[p->sq_idx])) { \ -@@ -598,7 +598,7 @@ static inline void sched_update_tick_dependency(struct rq *rq) { } - } - - #define __SCHED_ENQUEUE_TASK(p, rq, flags) \ -- sched_info_queued(rq, p); \ -+ sched_info_enqueue(rq, p); \ - psi_enqueue(p, flags); \ - \ - p->sq_idx = task_sched_prio_idx(p, rq); \ --- -2.37.0 - - -From 2332c00ed6782e048b3576c589171bf667e93687 Mon Sep 17 00:00:00 2001 -From: Alfred Chen -Date: Wed, 11 Aug 2021 14:11:13 +0000 -Subject: [PATCH 189/297] sched/alt: [Sync] f1a0a376ca0c sched/core: Initialize - the idle task with preemption disabled - ---- - kernel/sched/alt_core.c | 2 +- - 1 file changed, 1 insertion(+), 1 deletion(-) - -diff --git a/kernel/sched/alt_core.c b/kernel/sched/alt_core.c -index c9c5d609feb6..299a80c5df45 100644 ---- a/kernel/sched/alt_core.c -+++ b/kernel/sched/alt_core.c -@@ -6135,7 +6135,7 @@ void dump_cpu_task(int cpu) - * NOTE: this function does not set the idle thread's NEED_RESCHED - * flag, to make booting more robust. - */ --void init_idle(struct task_struct *idle, int cpu) -+void __init init_idle(struct task_struct *idle, int cpu) - { - struct rq *rq = cpu_rq(cpu); - unsigned long flags; --- -2.37.0 - - -From 28206256499d46b20d2445c670d24be11125b19b Mon Sep 17 00:00:00 2001 -From: Alfred Chen -Date: Wed, 11 Aug 2021 14:13:54 +0000 -Subject: [PATCH 190/297] sched/alt: [Sync] 01aee8fd7fb2 sched: Make - nr_running() return 32-bit value - ---- - kernel/sched/alt_core.c | 4 ++-- - 1 file changed, 2 insertions(+), 2 deletions(-) - -diff --git a/kernel/sched/alt_core.c b/kernel/sched/alt_core.c -index 299a80c5df45..9f10b21c0b4d 100644 ---- a/kernel/sched/alt_core.c -+++ b/kernel/sched/alt_core.c -@@ -3216,9 +3216,9 @@ context_switch(struct rq *rq, struct task_struct *prev, - * externally visible scheduler statistics: current number of runnable - * threads, total number of context switches performed since bootup. - */ --unsigned long nr_running(void) -+unsigned int nr_running(void) - { -- unsigned long i, sum = 0; -+ unsigned int i, sum = 0; - - for_each_online_cpu(i) - sum += cpu_rq(i)->nr_running; --- -2.37.0 - - -From 31d00404a4796c9badeb779cae9d4105cef33672 Mon Sep 17 00:00:00 2001 -From: Alfred Chen -Date: Wed, 11 Aug 2021 14:15:11 +0000 -Subject: [PATCH 191/297] sched/alt: [Sybc] 9745516841a5 sched: Make - nr_iowait() return 32-bit value - ---- - kernel/sched/alt_core.c | 4 ++-- - 1 file changed, 2 insertions(+), 2 deletions(-) - -diff --git a/kernel/sched/alt_core.c b/kernel/sched/alt_core.c -index 9f10b21c0b4d..2f030370f31a 100644 ---- a/kernel/sched/alt_core.c -+++ b/kernel/sched/alt_core.c -@@ -3298,9 +3298,9 @@ unsigned long nr_iowait_cpu(int cpu) - * Task CPU affinities can make all that even more 'interesting'. - */ - --unsigned long nr_iowait(void) -+unsigned int nr_iowait(void) - { -- unsigned long i, sum = 0; -+ unsigned int i, sum = 0; - - for_each_possible_cpu(i) - sum += nr_iowait_cpu(i); --- -2.37.0 - - -From 097316137fdb3237622875f349ef79e19e8b78eb Mon Sep 17 00:00:00 2001 -From: Alfred Chen -Date: Wed, 11 Aug 2021 14:16:22 +0000 -Subject: [PATCH 192/297] sched/alt: [Sync] 8fc2858e572c sched: Make - nr_iowait_cpu() return 32-bit value - ---- - kernel/sched/alt_core.c | 2 +- - 1 file changed, 1 insertion(+), 1 deletion(-) - -diff --git a/kernel/sched/alt_core.c b/kernel/sched/alt_core.c -index 2f030370f31a..5536baf21677 100644 ---- a/kernel/sched/alt_core.c -+++ b/kernel/sched/alt_core.c -@@ -3263,7 +3263,7 @@ unsigned long long nr_context_switches(void) - * it does become runnable. - */ - --unsigned long nr_iowait_cpu(int cpu) -+unsigned int nr_iowait_cpu(int cpu) - { - return atomic_read(&cpu_rq(cpu)->nr_iowait); - } --- -2.37.0 - - -From 66de136c366461e6b3c8c1714f5b62baf8dfd781 Mon Sep 17 00:00:00 2001 -From: Alfred Chen -Date: Wed, 11 Aug 2021 14:24:19 +0000 -Subject: [PATCH 193/297] sched/alt: [Sync] 00b89fe0197f sched: Make the idle - task quack like a per-CPU kthread - ---- - kernel/sched/alt_core.c | 22 ++++++++++++++++------ - 1 file changed, 16 insertions(+), 6 deletions(-) - -diff --git a/kernel/sched/alt_core.c b/kernel/sched/alt_core.c -index 5536baf21677..7e12ba94220c 100644 ---- a/kernel/sched/alt_core.c -+++ b/kernel/sched/alt_core.c -@@ -6142,13 +6142,27 @@ void __init init_idle(struct task_struct *idle, int cpu) - - __sched_fork(0, idle); - -+ /* -+ * The idle task doesn't need the kthread struct to function, but it -+ * is dressed up as a per-CPU kthread and thus needs to play the part -+ * if we want to avoid special-casing it in code that deals with per-CPU -+ * kthreads. -+ */ -+ set_kthread_struct(idle); -+ - raw_spin_lock_irqsave(&idle->pi_lock, flags); - raw_spin_lock(&rq->lock); - update_rq_clock(rq); - - idle->last_ran = rq->clock_task; - idle->state = TASK_RUNNING; -- idle->flags |= PF_IDLE; -+ /* -+ * PF_KTHREAD should already be set at this point; regardless, make it -+ * look like a proper per-CPU kthread. -+ */ -+ idle->flags |= PF_IDLE | PF_KTHREAD | PF_NO_SETAFFINITY; -+ kthread_set_per_cpu(idle, cpu); -+ - sched_queue_init_idle(&rq->queue, idle); - - scs_task_reset(idle); -@@ -6287,12 +6301,8 @@ static void balance_push(struct rq *rq) - /* - * Both the cpu-hotplug and stop task are in this case and are - * required to complete the hotplug process. -- * -- * XXX: the idle task does not match kthread_is_per_cpu() due to -- * histerical raisins. - */ -- if (rq->idle == push_task || -- kthread_is_per_cpu(push_task) || -+ if (kthread_is_per_cpu(push_task) || - is_migration_disabled(push_task)) { - - /* --- -2.37.0 - - -From 45c01cf84560529ad4cb0015df3cf1abb14cab42 Mon Sep 17 00:00:00 2001 -From: Alfred Chen -Date: Wed, 11 Aug 2021 14:59:06 +0000 -Subject: [PATCH 194/297] sched/alt: [Sync] 15faafc6b449 sched,init: Fix - DEBUG_PREEMPT vs early boot - ---- - kernel/sched/alt_core.c | 1 + - 1 file changed, 1 insertion(+) - -diff --git a/kernel/sched/alt_core.c b/kernel/sched/alt_core.c -index 7e12ba94220c..aae0c674519a 100644 ---- a/kernel/sched/alt_core.c -+++ b/kernel/sched/alt_core.c -@@ -6696,6 +6696,7 @@ void __init sched_init_smp(void) - /* Move init over to a non-isolated CPU */ - if (set_cpus_allowed_ptr(current, housekeeping_cpumask(HK_FLAG_DOMAIN)) < 0) - BUG(); -+ current->flags &= ~PF_NO_SETAFFINITY; - - sched_init_topology_cpumask(); - --- -2.37.0 - - -From d83e17ccfae26716b9d867edd40bd9f5d165c356 Mon Sep 17 00:00:00 2001 -From: Alfred Chen -Date: Wed, 11 Aug 2021 15:11:11 +0000 -Subject: [PATCH 195/297] sched/alt: [Sync] 1faa491a49d5 sched/debug: Remove - obsolete init_schedstats() - ---- - kernel/sched/alt_core.c | 19 ++----------------- - 1 file changed, 2 insertions(+), 17 deletions(-) - -diff --git a/kernel/sched/alt_core.c b/kernel/sched/alt_core.c -index aae0c674519a..09639e0e15b8 100644 ---- a/kernel/sched/alt_core.c -+++ b/kernel/sched/alt_core.c -@@ -2636,7 +2636,6 @@ void sched_post_fork(struct task_struct *p) {} - #ifdef CONFIG_SCHEDSTATS - - DEFINE_STATIC_KEY_FALSE(sched_schedstats); --static bool __initdata __sched_schedstats = false; - - static void set_schedstats(bool enabled) - { -@@ -2660,16 +2659,11 @@ static int __init setup_schedstats(char *str) - if (!str) - goto out; - -- /* -- * This code is called before jump labels have been set up, so we can't -- * change the static branch directly just yet. Instead set a temporary -- * variable so init_schedstats() can do it later. -- */ - if (!strcmp(str, "enable")) { -- __sched_schedstats = true; -+ set_schedstats(true); - ret = 1; - } else if (!strcmp(str, "disable")) { -- __sched_schedstats = false; -+ set_schedstats(false); - ret = 1; - } - out: -@@ -2680,11 +2674,6 @@ static int __init setup_schedstats(char *str) - } - __setup("schedstats=", setup_schedstats); - --static void __init init_schedstats(void) --{ -- set_schedstats(__sched_schedstats); --} -- - #ifdef CONFIG_PROC_SYSCTL - int sysctl_schedstats(struct ctl_table *table, int write, - void __user *buffer, size_t *lenp, loff_t *ppos) -@@ -2706,8 +2695,6 @@ int sysctl_schedstats(struct ctl_table *table, int write, - return err; - } - #endif /* CONFIG_PROC_SYSCTL */ --#else /* !CONFIG_SCHEDSTATS */ --static inline void init_schedstats(void) {} - #endif /* CONFIG_SCHEDSTATS */ - - /* -@@ -6823,8 +6810,6 @@ void __init sched_init(void) - sched_init_topology_cpumask_early(); - #endif /* SMP */ - -- init_schedstats(); -- - psi_init(); - } - --- -2.37.0 - - -From d93abecc1add5ac16b9a2765c8673352f9facf66 Mon Sep 17 00:00:00 2001 -From: Alfred Chen -Date: Wed, 11 Aug 2021 15:29:45 +0000 -Subject: [PATCH 196/297] sched/alt: [Sync] b03fbd4ff24c sched: Introduce - task_is_running() - ---- - kernel/sched/alt_core.c | 4 ++-- - 1 file changed, 2 insertions(+), 2 deletions(-) - -diff --git a/kernel/sched/alt_core.c b/kernel/sched/alt_core.c -index 09639e0e15b8..7c9f81a86420 100644 ---- a/kernel/sched/alt_core.c -+++ b/kernel/sched/alt_core.c -@@ -4214,7 +4214,7 @@ static inline void sched_submit_work(struct task_struct *tsk) - { - unsigned int task_flags; - -- if (!tsk->state) -+ if (task_is_running(tsk)) - return; - - task_flags = tsk->flags; -@@ -6032,7 +6032,7 @@ void sched_show_task(struct task_struct *p) - - pr_info("task:%-15.15s state:%c", p->comm, task_state_to_char(p)); - -- if (p->state == TASK_RUNNING) -+ if (task_is_running(p)) - pr_cont(" running task "); - #ifdef CONFIG_DEBUG_STACK_USAGE - free = stack_not_used(p); --- -2.37.0 - - -From ac5a483cb08e12aed6b0c476026f6bb3bc68f24f Mon Sep 17 00:00:00 2001 -From: Alfred Chen -Date: Wed, 11 Aug 2021 15:33:03 +0000 -Subject: [PATCH 197/297] sched/alt: [Sync] d6c23bb3a2ad sched: Add - get_current_state() - ---- - kernel/sched/alt_core.c | 6 +++--- - 1 file changed, 3 insertions(+), 3 deletions(-) - -diff --git a/kernel/sched/alt_core.c b/kernel/sched/alt_core.c -index 7c9f81a86420..6c2e8c7d781e 100644 ---- a/kernel/sched/alt_core.c -+++ b/kernel/sched/alt_core.c -@@ -6823,15 +6823,15 @@ static inline int preempt_count_equals(int preempt_offset) - - void __might_sleep(const char *file, int line, int preempt_offset) - { -+ unsigned int state = get_current_state(); - /* - * Blocking primitives will set (and therefore destroy) current->state, - * since we will exit with TASK_RUNNING make sure we enter with it, - * otherwise we will destroy state. - */ -- WARN_ONCE(current->state != TASK_RUNNING && current->task_state_change, -+ WARN_ONCE(state != TASK_RUNNING && current->task_state_change, - "do not call blocking ops when !TASK_RUNNING; " -- "state=%lx set at [<%p>] %pS\n", -- current->state, -+ "state=%x set at [<%p>] %pS\n", state, - (void *)current->task_state_change, - (void *)current->task_state_change); - --- -2.37.0 - - -From 60c9304c7fdbb00d8e83783bd4268a732b09f993 Mon Sep 17 00:00:00 2001 -From: Alfred Chen -Date: Wed, 11 Aug 2021 16:02:47 +0000 -Subject: [PATCH 198/297] sched/alt: [Sync] 2f064a59a11f sched: Change - task_struct::state - ---- - kernel/sched/alt_core.c | 52 ++++++++++++++++++++++------------------- - 1 file changed, 28 insertions(+), 24 deletions(-) - -diff --git a/kernel/sched/alt_core.c b/kernel/sched/alt_core.c -index 6c2e8c7d781e..4909bf54bd4c 100644 ---- a/kernel/sched/alt_core.c -+++ b/kernel/sched/alt_core.c -@@ -1207,12 +1207,14 @@ static inline bool is_migration_disabled(struct task_struct *p) - void set_task_cpu(struct task_struct *p, unsigned int new_cpu) - { - #ifdef CONFIG_SCHED_DEBUG -+ unsigned int state = READ_ONCE(p->__state); -+ - /* - * We should never call set_task_cpu() on a blocked task, - * ttwu() will sort out the placement. - */ -- WARN_ON_ONCE(p->state != TASK_RUNNING && p->state != TASK_WAKING && -- !p->on_rq); -+ WARN_ON_ONCE(state != TASK_RUNNING && state != TASK_WAKING && !p->on_rq); -+ - #ifdef CONFIG_LOCKDEP - /* - * The caller should hold either p->pi_lock or rq->lock, when changing -@@ -1512,7 +1514,7 @@ inline int task_curr(const struct task_struct *p) - * smp_call_function() if an IPI is sent by the same process we are - * waiting to become inactive. - */ --unsigned long wait_task_inactive(struct task_struct *p, long match_state) -+unsigned long wait_task_inactive(struct task_struct *p, unsigned int match_state) - { - unsigned long flags; - bool running, on_rq; -@@ -1535,7 +1537,7 @@ unsigned long wait_task_inactive(struct task_struct *p, long match_state) - * running somewhere else! - */ - while (task_running(p) && p == rq->curr) { -- if (match_state && unlikely(p->state != match_state)) -+ if (match_state && unlikely(READ_ONCE(p->__state) != match_state)) - return 0; - cpu_relax(); - } -@@ -1550,7 +1552,7 @@ unsigned long wait_task_inactive(struct task_struct *p, long match_state) - running = task_running(p); - on_rq = p->on_rq; - ncsw = 0; -- if (!match_state || p->state == match_state) -+ if (!match_state || READ_ONCE(p->__state) == match_state) - ncsw = p->nvcsw | LONG_MIN; /* sets MSB */ - task_access_unlock_irqrestore(p, lock, &flags); - -@@ -1853,7 +1855,7 @@ static int __set_cpus_allowed_ptr(struct task_struct *p, - rq->nr_pinned--; - } - -- if (task_running(p) || p->state == TASK_WAKING) { -+ if (task_running(p) || READ_ONCE(p->__state) == TASK_WAKING) { - struct migration_arg arg = { p, dest_cpu }; - - /* Need help from migration thread: drop lock and wait. */ -@@ -1937,7 +1939,7 @@ static inline void - ttwu_do_wakeup(struct rq *rq, struct task_struct *p, int wake_flags) - { - check_preempt_curr(rq); -- p->state = TASK_RUNNING; -+ WRITE_ONCE(p->__state, TASK_RUNNING); - trace_sched_wakeup(p); - } - -@@ -2302,12 +2304,12 @@ static int try_to_wake_up(struct task_struct *p, unsigned int state, - * - we're serialized against set_special_state() by virtue of - * it disabling IRQs (this allows not taking ->pi_lock). - */ -- if (!(p->state & state)) -+ if (!(READ_ONCE(p->__state) & state)) - goto out; - - success = 1; - trace_sched_waking(p); -- p->state = TASK_RUNNING; -+ WRITE_ONCE(p->__state, TASK_RUNNING); - trace_sched_wakeup(p); - goto out; - } -@@ -2320,7 +2322,7 @@ static int try_to_wake_up(struct task_struct *p, unsigned int state, - */ - raw_spin_lock_irqsave(&p->pi_lock, flags); - smp_mb__after_spinlock(); -- if (!(p->state & state)) -+ if (!(READ_ONCE(p->__state) & state)) - goto unlock; - - trace_sched_waking(p); -@@ -2386,7 +2388,7 @@ static int try_to_wake_up(struct task_struct *p, unsigned int state, - * TASK_WAKING such that we can unlock p->pi_lock before doing the - * enqueue, such as ttwu_queue_wakelist(). - */ -- p->state = TASK_WAKING; -+ WRITE_ONCE(p->__state, TASK_WAKING); - - /* - * If the owning (remote) CPU is still in the middle of schedule() with -@@ -2482,7 +2484,7 @@ bool try_invoke_on_locked_down_task(struct task_struct *p, bool (*func)(struct t - ret = func(p, arg); - __task_rq_unlock(rq, &rf); - } else { -- switch (p->state) { -+ switch (READ_ONCE(p->__state)) { - case TASK_RUNNING: - case TASK_WAKING: - break; -@@ -2558,7 +2560,7 @@ int sched_fork(unsigned long clone_flags, struct task_struct *p) - * nobody will actually run it, and a signal or other external - * event cannot wake it up and insert it on the runqueue either. - */ -- p->state = TASK_NEW; -+ p->__state = TASK_NEW; - - /* - * Make sure we do not leak PI boosting priority to the child. -@@ -2710,7 +2712,7 @@ void wake_up_new_task(struct task_struct *p) - struct rq *rq; - - raw_spin_lock_irqsave(&p->pi_lock, flags); -- p->state = TASK_RUNNING; -+ WRITE_ONCE(p->__state, TASK_RUNNING); - rq = cpu_rq(select_task_rq(p)); - #ifdef CONFIG_SMP - rseq_migrate(p); -@@ -3063,7 +3065,7 @@ static struct rq *finish_task_switch(struct task_struct *prev) - * running on another CPU and we could rave with its RUNNING -> DEAD - * transition, resulting in a double drop. - */ -- prev_state = prev->state; -+ prev_state = READ_ONCE(prev->__state); - vtime_task_switch(prev); - perf_event_task_sched_in(prev, current); - finish_task(prev); -@@ -3841,7 +3843,7 @@ static inline void schedule_debug(struct task_struct *prev, bool preempt) - #endif - - #ifdef CONFIG_DEBUG_ATOMIC_SLEEP -- if (!preempt && prev->state && prev->non_block_count) { -+ if (!preempt && READ_ONCE(prev->__state) && prev->non_block_count) { - printk(KERN_ERR "BUG: scheduling in a non-blocking section: %s/%d/%i\n", - prev->comm, prev->pid, prev->non_block_count); - dump_stack(); -@@ -4108,10 +4110,10 @@ static void __sched notrace __schedule(bool preempt) - * - we form a control dependency vs deactivate_task() below. - * - ptrace_{,un}freeze_traced() can change ->state underneath us. - */ -- prev_state = prev->state; -- if (!preempt && prev_state && prev_state == prev->state) { -+ prev_state = READ_ONCE(prev->__state); -+ if (!preempt && prev_state) { - if (signal_pending_state(prev_state, prev)) { -- prev->state = TASK_RUNNING; -+ WRITE_ONCE(prev->__state, TASK_RUNNING); - } else { - prev->sched_contributes_to_load = - (prev_state & TASK_UNINTERRUPTIBLE) && -@@ -4289,7 +4291,7 @@ void __sched schedule_idle(void) - * current task can be in any other state. Note, idle is always in the - * TASK_RUNNING state. - */ -- WARN_ON_ONCE(current->state); -+ WARN_ON_ONCE(current->__state); - do { - __schedule(false); - } while (need_resched()); -@@ -6056,26 +6058,28 @@ EXPORT_SYMBOL_GPL(sched_show_task); - static inline bool - state_filter_match(unsigned long state_filter, struct task_struct *p) - { -+ unsigned int state = READ_ONCE(p->__state); -+ - /* no filter, everything matches */ - if (!state_filter) - return true; - - /* filter, but doesn't match */ -- if (!(p->state & state_filter)) -+ if (!(state & state_filter)) - return false; - - /* - * When looking for TASK_UNINTERRUPTIBLE skip TASK_IDLE (allows - * TASK_KILLABLE). - */ -- if (state_filter == TASK_UNINTERRUPTIBLE && p->state == TASK_IDLE) -+ if (state_filter == TASK_UNINTERRUPTIBLE && state == TASK_IDLE) - return false; - - return true; - } - - --void show_state_filter(unsigned long state_filter) -+void show_state_filter(unsigned int state_filter) - { - struct task_struct *g, *p; - -@@ -6142,7 +6146,7 @@ void __init init_idle(struct task_struct *idle, int cpu) - update_rq_clock(rq); - - idle->last_ran = rq->clock_task; -- idle->state = TASK_RUNNING; -+ idle->__state = TASK_RUNNING; - /* - * PF_KTHREAD should already be set at this point; regardless, make it - * look like a proper per-CPU kthread. --- -2.37.0 - - -From 6e8b5a2ee7417d5faccaac3dc998472025232c6a Mon Sep 17 00:00:00 2001 -From: Alfred Chen -Date: Thu, 12 Aug 2021 10:46:39 +0000 -Subject: [PATCH 199/297] sched/alt: [Sync] a1dfb6311c77 tick/nohz: Kick only - _queued_ task whose tick dependency is updated - ---- - kernel/sched/alt_core.c | 5 +++++ - 1 file changed, 5 insertions(+) - -diff --git a/kernel/sched/alt_core.c b/kernel/sched/alt_core.c -index 4909bf54bd4c..706927956630 100644 ---- a/kernel/sched/alt_core.c -+++ b/kernel/sched/alt_core.c -@@ -582,6 +582,11 @@ static inline void sched_update_tick_dependency(struct rq *rq) - static inline void sched_update_tick_dependency(struct rq *rq) { } - #endif - -+bool sched_task_on_rq(struct task_struct *p) -+{ -+ return task_on_rq_queued(p); -+} -+ - /* - * Add/Remove/Requeue task to/from the runqueue routines - * Context: rq->lock --- -2.37.0 - - -From 5c840ab4be958f4a7a166bfb81e94a59dbd2fd70 Mon Sep 17 00:00:00 2001 -From: Alfred Chen -Date: Thu, 12 Aug 2021 10:56:29 +0000 -Subject: [PATCH 200/297] sched/alt: [Sync] 0fdcccfafcff tick/nohz: Call - tick_nohz_task_switch() with interrupts disabled - ---- - kernel/sched/alt_core.c | 2 +- - 1 file changed, 1 insertion(+), 1 deletion(-) - -diff --git a/kernel/sched/alt_core.c b/kernel/sched/alt_core.c -index 706927956630..07cc293d12ae 100644 ---- a/kernel/sched/alt_core.c -+++ b/kernel/sched/alt_core.c -@@ -3074,6 +3074,7 @@ static struct rq *finish_task_switch(struct task_struct *prev) - vtime_task_switch(prev); - perf_event_task_sched_in(prev, current); - finish_task(prev); -+ tick_nohz_task_switch(); - finish_lock_switch(rq); - finish_arch_post_lock_switch(); - kcov_finish_switch(current); -@@ -3116,7 +3117,6 @@ static struct rq *finish_task_switch(struct task_struct *prev) - put_task_struct_rcu_user(prev); - } - -- tick_nohz_task_switch(); - return rq; - } - --- -2.37.0 - - -From 7eb71005ba22b669ad18eca3dfc7b3db009ed871 Mon Sep 17 00:00:00 2001 -From: Alfred Chen -Date: Thu, 12 Aug 2021 11:05:51 +0000 -Subject: [PATCH 201/297] sched/alt: [Sync] 1eb5dde674f5 cpufreq: CPPC: Add - support for frequency invariance - ---- - kernel/sched/alt_core.c | 1 + - 1 file changed, 1 insertion(+) - -diff --git a/kernel/sched/alt_core.c b/kernel/sched/alt_core.c -index 07cc293d12ae..8afacd3736f4 100644 ---- a/kernel/sched/alt_core.c -+++ b/kernel/sched/alt_core.c -@@ -5151,6 +5151,7 @@ int sched_setattr_nocheck(struct task_struct *p, const struct sched_attr *attr) - { - return __sched_setscheduler(p, attr, false, true); - } -+EXPORT_SYMBOL_GPL(sched_setattr_nocheck); - - /** - * sched_setscheduler_nocheck - change the scheduling policy and/or RT priority of a thread from kernelspace. --- -2.37.0 - - -From 2adc42b2009354362a36069f2e1a225f0635bbfc Mon Sep 17 00:00:00 2001 -From: Alfred Chen -Date: Fri, 27 Aug 2021 12:25:47 +0000 -Subject: [PATCH 202/297] sched/alt: [Sync] f558c2b834ec sched/rt: Fix double - enqueue caused by rt_effective_prio - ---- - kernel/sched/alt_core.c | 46 ++++++++++++++++++++--------------------- - 1 file changed, 22 insertions(+), 24 deletions(-) - -diff --git a/kernel/sched/alt_core.c b/kernel/sched/alt_core.c -index 8afacd3736f4..68e555999c46 100644 ---- a/kernel/sched/alt_core.c -+++ b/kernel/sched/alt_core.c -@@ -1116,6 +1116,12 @@ static inline void hrtick_rq_init(struct rq *rq) - } - #endif /* CONFIG_SCHED_HRTICK */ - -+static inline int __normal_prio(int policy, int rt_prio, int nice) -+{ -+ return rt_policy(policy) ? (MAX_RT_PRIO - 1 - rt_prio) : -+ NICE_TO_PRIO(nice) + MAX_PRIORITY_ADJ; -+} -+ - /* - * Calculate the expected normal priority: i.e. priority - * without taking RT-inheritance into account. Might be -@@ -1125,8 +1131,7 @@ static inline void hrtick_rq_init(struct rq *rq) - */ - static inline int normal_prio(struct task_struct *p) - { -- return task_has_rt_policy(p) ? (MAX_RT_PRIO - 1 - p->rt_priority) : -- p->static_prio + MAX_PRIORITY_ADJ; -+ return __normal_prio(p->policy, p->rt_priority, PRIO_TO_NICE(p->static_prio)); - } - - /* -@@ -2583,7 +2588,7 @@ int sched_fork(unsigned long clone_flags, struct task_struct *p) - } else if (PRIO_TO_NICE(p->static_prio) < 0) - p->static_prio = NICE_TO_PRIO(0); - -- p->prio = p->normal_prio = normal_prio(p); -+ p->prio = p->normal_prio = p->static_prio; - - /* - * We don't need the reset flag anymore after the fork. It has -@@ -4604,6 +4609,11 @@ static inline void check_task_changed(struct task_struct *p, struct rq *rq) - } - } - -+static void __setscheduler_prio(struct task_struct *p, int prio) -+{ -+ p->prio = prio; -+} -+ - #ifdef CONFIG_RT_MUTEXES - - static inline int __rt_effective_prio(struct task_struct *pi_task, int prio) -@@ -4685,7 +4695,8 @@ void rt_mutex_setprio(struct task_struct *p, struct task_struct *pi_task) - } - - trace_sched_pi_setprio(p, pi_task); -- p->prio = prio; -+ -+ __setscheduler_prio(p, prio); - - check_task_changed(p, rq); - out_unlock: -@@ -4884,21 +4895,6 @@ static void __setscheduler_params(struct task_struct *p, - p->normal_prio = normal_prio(p); - } - --/* Actually do priority change: must hold rq lock. */ --static void __setscheduler(struct rq *rq, struct task_struct *p, -- const struct sched_attr *attr, bool keep_boost) --{ -- __setscheduler_params(p, attr); -- -- /* -- * Keep a potential priority boosting if called from -- * sched_setscheduler(). -- */ -- p->prio = normal_prio(p); -- if (keep_boost) -- p->prio = rt_effective_prio(p, p->prio); --} -- - /* - * check the target process has a UID that matches the current process's - */ -@@ -4925,9 +4921,8 @@ static int __sched_setscheduler(struct task_struct *p, - .sched_nice = 0, - .sched_priority = 99, - }; -- int newprio = MAX_RT_PRIO - 1 - attr->sched_priority; -- int retval, oldpolicy = -1; -- int policy = attr->sched_policy; -+ int oldpolicy = -1, policy = attr->sched_policy; -+ int retval, newprio; - struct callback_head *head; - unsigned long flags; - struct rq *rq; -@@ -4943,7 +4938,6 @@ static int __sched_setscheduler(struct task_struct *p, - if (unlikely(SCHED_DEADLINE == policy)) { - attr = &dl_squash_attr; - policy = attr->sched_policy; -- newprio = MAX_RT_PRIO - 1 - attr->sched_priority; - } - recheck: - /* Double check policy once rq lock held */ -@@ -5061,6 +5055,7 @@ static int __sched_setscheduler(struct task_struct *p, - - p->sched_reset_on_fork = reset_on_fork; - -+ newprio = __normal_prio(policy, attr->sched_priority, attr->sched_nice); - if (pi) { - /* - * Take priority boosted tasks into account. If the new -@@ -5076,7 +5071,10 @@ static int __sched_setscheduler(struct task_struct *p, - } - } - -- __setscheduler(rq, p, attr, pi); -+ if (!(attr->sched_flags & SCHED_FLAG_KEEP_PARAMS)) { -+ __setscheduler_params(p, attr); -+ __setscheduler_prio(p, newprio); -+ } - - check_task_changed(p, rq); - --- -2.37.0 - - -From 70a924d4be3dbae051725599096d2dbd2ef88730 Mon Sep 17 00:00:00 2001 -From: Alfred Chen -Date: Thu, 12 Aug 2021 15:02:03 +0000 -Subject: [PATCH 203/297] Project-C v5.14-r0 - ---- - kernel/sched/alt_core.c | 2 +- - 1 file changed, 1 insertion(+), 1 deletion(-) - -diff --git a/kernel/sched/alt_core.c b/kernel/sched/alt_core.c -index 68e555999c46..5df1157a597c 100644 ---- a/kernel/sched/alt_core.c -+++ b/kernel/sched/alt_core.c -@@ -67,7 +67,7 @@ __read_mostly int sysctl_resched_latency_warn_once = 1; - #define sched_feat(x) (0) - #endif /* CONFIG_SCHED_DEBUG */ - --#define ALT_SCHED_VERSION "v5.13-r3" -+#define ALT_SCHED_VERSION "v5.14-r0" - - /* rt_prio(prio) defined in include/linux/sched/rt.h */ - #define rt_task(p) rt_prio((p)->prio) --- -2.37.0 - - -From c61bbc99782e5af2e8e4ade9ef2f784393ea46f3 Mon Sep 17 00:00:00 2001 -From: Alfred Chen -Date: Thu, 2 Sep 2021 16:19:32 +0000 -Subject: [PATCH 204/297] sched/alt: Fix for v5.14-prjc-r0 fails on x86 UP - build - -Mainline add some sched-core related api which cause below fails on x86 -UP build, #30. ---- - kernel/sched/alt_core.c | 24 ++++++++++++++++++++++++ - kernel/sched/alt_sched.h | 30 ++++++++++++++++++++++++++++++ - 2 files changed, 54 insertions(+) - -diff --git a/kernel/sched/alt_core.c b/kernel/sched/alt_core.c -index 5df1157a597c..bdff1c65e2e9 100644 ---- a/kernel/sched/alt_core.c -+++ b/kernel/sched/alt_core.c -@@ -492,6 +492,30 @@ rq_unlock_irqrestore(struct rq *rq, struct rq_flags *rf) - raw_spin_unlock_irqrestore(&rq->lock, rf->flags); - } - -+void raw_spin_rq_lock_nested(struct rq *rq, int subclass) -+{ -+ raw_spinlock_t *lock; -+ -+ /* Matches synchronize_rcu() in __sched_core_enable() */ -+ preempt_disable(); -+ -+ for (;;) { -+ lock = __rq_lockp(rq); -+ raw_spin_lock_nested(lock, subclass); -+ if (likely(lock == __rq_lockp(rq))) { -+ /* preempt_count *MUST* be > 1 */ -+ preempt_enable_no_resched(); -+ return; -+ } -+ raw_spin_unlock(lock); -+ } -+} -+ -+void raw_spin_rq_unlock(struct rq *rq) -+{ -+ raw_spin_unlock(rq_lockp(rq)); -+} -+ - /* - * RQ-clock updating methods: - */ -diff --git a/kernel/sched/alt_sched.h b/kernel/sched/alt_sched.h -index 7a48809550bf..f03af9ab9123 100644 ---- a/kernel/sched/alt_sched.h -+++ b/kernel/sched/alt_sched.h -@@ -443,6 +443,36 @@ this_rq_lock_irq(struct rq_flags *rf) - return rq; - } - -+extern void raw_spin_rq_lock_nested(struct rq *rq, int subclass); -+extern void raw_spin_rq_unlock(struct rq *rq); -+ -+static inline raw_spinlock_t *__rq_lockp(struct rq *rq) -+{ -+ return &rq->lock; -+} -+ -+static inline raw_spinlock_t *rq_lockp(struct rq *rq) -+{ -+ return __rq_lockp(rq); -+} -+ -+static inline void raw_spin_rq_lock(struct rq *rq) -+{ -+ raw_spin_rq_lock_nested(rq, 0); -+} -+ -+static inline void raw_spin_rq_lock_irq(struct rq *rq) -+{ -+ local_irq_disable(); -+ raw_spin_rq_lock(rq); -+} -+ -+static inline void raw_spin_rq_unlock_irq(struct rq *rq) -+{ -+ raw_spin_rq_unlock(rq); -+ local_irq_enable(); -+} -+ - static inline int task_current(struct rq *rq, struct task_struct *p) - { - return rq->curr == p; --- -2.37.0 - - -From 78e1ff83298be9c593370f4646ca12d30a49da36 Mon Sep 17 00:00:00 2001 -From: Alfred Chen -Date: Fri, 3 Sep 2021 08:07:43 +0000 -Subject: [PATCH 205/297] sched/alt: Disable SCHED_CORE on Project C - -SCHED_CORE is introduced in 5.14, which is not supported in Project C. ---- - kernel/Kconfig.preempt | 2 +- - 1 file changed, 1 insertion(+), 1 deletion(-) - -diff --git a/kernel/Kconfig.preempt b/kernel/Kconfig.preempt -index c2f1fd95a821..41654679b1b2 100644 ---- a/kernel/Kconfig.preempt -+++ b/kernel/Kconfig.preempt -@@ -117,7 +117,7 @@ config PREEMPT_DYNAMIC - - config SCHED_CORE - bool "Core Scheduling for SMT" -- depends on SCHED_SMT -+ depends on SCHED_SMT && !SCHED_ALT - help - This option permits Core Scheduling, a means of coordinated task - selection across SMT siblings. When enabled -- see --- -2.37.0 - - -From 23df85da949fe03fcbea1ee7d31f6d626d6c8ed0 Mon Sep 17 00:00:00 2001 -From: Alfred Chen -Date: Sat, 4 Sep 2021 08:59:59 +0000 -Subject: [PATCH 206/297] sched/alt: Optimize parameter to __normal_prio(). - ---- - kernel/sched/alt_core.c | 8 ++++---- - 1 file changed, 4 insertions(+), 4 deletions(-) - -diff --git a/kernel/sched/alt_core.c b/kernel/sched/alt_core.c -index bdff1c65e2e9..688a6696749c 100644 ---- a/kernel/sched/alt_core.c -+++ b/kernel/sched/alt_core.c -@@ -1140,10 +1140,10 @@ static inline void hrtick_rq_init(struct rq *rq) - } - #endif /* CONFIG_SCHED_HRTICK */ - --static inline int __normal_prio(int policy, int rt_prio, int nice) -+static inline int __normal_prio(int policy, int rt_prio, int static_prio) - { - return rt_policy(policy) ? (MAX_RT_PRIO - 1 - rt_prio) : -- NICE_TO_PRIO(nice) + MAX_PRIORITY_ADJ; -+ static_prio + MAX_PRIORITY_ADJ; - } - - /* -@@ -1155,7 +1155,7 @@ static inline int __normal_prio(int policy, int rt_prio, int nice) - */ - static inline int normal_prio(struct task_struct *p) - { -- return __normal_prio(p->policy, p->rt_priority, PRIO_TO_NICE(p->static_prio)); -+ return __normal_prio(p->policy, p->rt_priority, p->static_prio); - } - - /* -@@ -5079,7 +5079,7 @@ static int __sched_setscheduler(struct task_struct *p, - - p->sched_reset_on_fork = reset_on_fork; - -- newprio = __normal_prio(policy, attr->sched_priority, attr->sched_nice); -+ newprio = __normal_prio(policy, attr->sched_priority, NICE_TO_PRIO(attr->sched_nice)); - if (pi) { - /* - * Take priority boosted tasks into account. If the new --- -2.37.0 - - -From 827dd84ce146e07acd45f7f8539de6454aa50332 Mon Sep 17 00:00:00 2001 -From: Alfred Chen -Date: Sat, 4 Sep 2021 09:12:38 +0000 -Subject: [PATCH 207/297] Project-C v5.14-r1 - ---- - kernel/sched/alt_core.c | 2 +- - 1 file changed, 1 insertion(+), 1 deletion(-) - -diff --git a/kernel/sched/alt_core.c b/kernel/sched/alt_core.c -index 688a6696749c..900889c838ea 100644 ---- a/kernel/sched/alt_core.c -+++ b/kernel/sched/alt_core.c -@@ -67,7 +67,7 @@ __read_mostly int sysctl_resched_latency_warn_once = 1; - #define sched_feat(x) (0) - #endif /* CONFIG_SCHED_DEBUG */ - --#define ALT_SCHED_VERSION "v5.14-r0" -+#define ALT_SCHED_VERSION "v5.14-r1" - - /* rt_prio(prio) defined in include/linux/sched/rt.h */ - #define rt_task(p) rt_prio((p)->prio) --- -2.37.0 - - -From 9324fd52c57652d38d1e63188a718cdc1473cde8 Mon Sep 17 00:00:00 2001 -From: Alfred Chen -Date: Mon, 6 Sep 2021 16:13:18 +0000 -Subject: [PATCH 208/297] sched/alt: Sync-up mainline change for pi in - __sched_setscheduler() - ---- - kernel/sched/alt_core.c | 6 +----- - 1 file changed, 1 insertion(+), 5 deletions(-) - -diff --git a/kernel/sched/alt_core.c b/kernel/sched/alt_core.c -index 900889c838ea..ea551c1fca45 100644 ---- a/kernel/sched/alt_core.c -+++ b/kernel/sched/alt_core.c -@@ -5088,11 +5088,7 @@ static int __sched_setscheduler(struct task_struct *p, - * the runqueue. This will be done when the task deboost - * itself. - */ -- if (rt_effective_prio(p, newprio) == p->prio) { -- __setscheduler_params(p, attr); -- retval = 0; -- goto unlock; -- } -+ newprio = rt_effective_prio(p, newprio); - } - - if (!(attr->sched_flags & SCHED_FLAG_KEEP_PARAMS)) { --- -2.37.0 - - -From c259d1cbb016ca585d976c254c70a4125d58b86c Mon Sep 17 00:00:00 2001 -From: Alfred Chen -Date: Sun, 19 Sep 2021 07:46:44 +0000 -Subject: [PATCH 209/297] sched/alt: [Sync] 5b7992c06c54 sched: Prevent - balance_push() on remote runqueues - ---- - kernel/sched/alt_core.c | 6 +++--- - 1 file changed, 3 insertions(+), 3 deletions(-) - -diff --git a/kernel/sched/alt_core.c b/kernel/sched/alt_core.c -index ea551c1fca45..2cfc83c87d71 100644 ---- a/kernel/sched/alt_core.c -+++ b/kernel/sched/alt_core.c -@@ -6300,7 +6300,6 @@ static void balance_push(struct rq *rq) - struct task_struct *push_task = rq->curr; - - lockdep_assert_held(&rq->lock); -- SCHED_WARN_ON(rq->cpu != smp_processor_id()); - - /* - * Ensure the thing is persistent until balance_push_set(.on = false); -@@ -6308,9 +6307,10 @@ static void balance_push(struct rq *rq) - rq->balance_callback = &balance_push_callback; - - /* -- * Only active while going offline. -+ * Only active while going offline and when invoked on the outgoing -+ * CPU. - */ -- if (!cpu_dying(rq->cpu)) -+ if (!cpu_dying(rq->cpu) || rq != this_rq()) - return; - - /* --- -2.37.0 - - -From a307aae151702d3c39c73eb3ebcb85b432a0bb1f Mon Sep 17 00:00:00 2001 -From: Alfred Chen -Date: Sun, 19 Sep 2021 16:25:34 +0000 -Subject: [PATCH 210/297] sched/alt: Move general load accounting to RQ. - -This commit move the general load accounting from cpufreq_schedutil to -RQ in core file. - -Also implement sched_cpu_util() using the general load accounting, which -fix compilation error of missing sched_cpu_util(). ---- - kernel/sched/alt_core.c | 95 ++++++++++++++++++++++++++++++++ - kernel/sched/alt_sched.h | 42 +++----------- - kernel/sched/cpufreq_schedutil.c | 68 ++--------------------- - 3 files changed, 107 insertions(+), 98 deletions(-) - -diff --git a/kernel/sched/alt_core.c b/kernel/sched/alt_core.c -index 2cfc83c87d71..ee6fc0307135 100644 ---- a/kernel/sched/alt_core.c -+++ b/kernel/sched/alt_core.c -@@ -584,6 +584,101 @@ static inline void update_rq_clock(struct rq *rq) - update_rq_clock_task(rq, delta); - } - -+/* -+ * RQ Load update routine -+ */ -+#define RQ_LOAD_HISTORY_BITS (sizeof(s32) * 8ULL) -+#define RQ_UTIL_SHIFT (8) -+#define RQ_LOAD_HISTORY_TO_UTIL(l) (((l) >> (RQ_LOAD_HISTORY_BITS - 1 - RQ_UTIL_SHIFT)) & 0xff) -+ -+#define LOAD_BLOCK(t) ((t) >> 17) -+#define LOAD_HALF_BLOCK(t) ((t) >> 16) -+#define BLOCK_MASK(t) ((t) & ((0x01 << 18) - 1)) -+#define LOAD_BLOCK_BIT(b) (1UL << (RQ_LOAD_HISTORY_BITS - 1 - (b))) -+#define CURRENT_LOAD_BIT LOAD_BLOCK_BIT(0) -+ -+static inline void rq_load_update(struct rq *rq) -+{ -+ u64 time = rq->clock; -+ u64 delta = min(LOAD_BLOCK(time) - LOAD_BLOCK(rq->load_stamp), -+ RQ_LOAD_HISTORY_BITS - 1); -+ u64 prev = !!(rq->load_history & CURRENT_LOAD_BIT); -+ u64 curr = !!cpu_rq(rq->cpu)->nr_running; -+ -+ if (delta) { -+ rq->load_history = rq->load_history >> delta; -+ -+ if (delta < RQ_UTIL_SHIFT) { -+ rq->load_block += (~BLOCK_MASK(rq->load_stamp)) * prev; -+ if (!!LOAD_HALF_BLOCK(rq->load_block) ^ curr) -+ rq->load_history ^= LOAD_BLOCK_BIT(delta); -+ } -+ -+ rq->load_block = BLOCK_MASK(time) * prev; -+ } else { -+ rq->load_block += (time - rq->load_stamp) * prev; -+ } -+ if (prev ^ curr) -+ rq->load_history ^= CURRENT_LOAD_BIT; -+ rq->load_stamp = time; -+} -+ -+unsigned long rq_load_util(struct rq *rq, unsigned long max) -+{ -+ return RQ_LOAD_HISTORY_TO_UTIL(rq->load_history) * (max >> RQ_UTIL_SHIFT); -+} -+ -+#ifdef CONFIG_SMP -+unsigned long sched_cpu_util(int cpu, unsigned long max) -+{ -+ return rq_load_util(cpu_rq(cpu), max); -+} -+#endif /* CONFIG_SMP */ -+ -+#ifdef CONFIG_CPU_FREQ -+/** -+ * cpufreq_update_util - Take a note about CPU utilization changes. -+ * @rq: Runqueue to carry out the update for. -+ * @flags: Update reason flags. -+ * -+ * This function is called by the scheduler on the CPU whose utilization is -+ * being updated. -+ * -+ * It can only be called from RCU-sched read-side critical sections. -+ * -+ * The way cpufreq is currently arranged requires it to evaluate the CPU -+ * performance state (frequency/voltage) on a regular basis to prevent it from -+ * being stuck in a completely inadequate performance level for too long. -+ * That is not guaranteed to happen if the updates are only triggered from CFS -+ * and DL, though, because they may not be coming in if only RT tasks are -+ * active all the time (or there are RT tasks only). -+ * -+ * As a workaround for that issue, this function is called periodically by the -+ * RT sched class to trigger extra cpufreq updates to prevent it from stalling, -+ * but that really is a band-aid. Going forward it should be replaced with -+ * solutions targeted more specifically at RT tasks. -+ */ -+static inline void cpufreq_update_util(struct rq *rq, unsigned int flags) -+{ -+ struct update_util_data *data; -+ -+#ifdef CONFIG_SMP -+ rq_load_update(rq); -+#endif -+ data = rcu_dereference_sched(*per_cpu_ptr(&cpufreq_update_util_data, -+ cpu_of(rq))); -+ if (data) -+ data->func(data, rq_clock(rq), flags); -+} -+#else -+static inline void cpufreq_update_util(struct rq *rq, unsigned int flags) -+{ -+#ifdef CONFIG_SMP -+ rq_load_update(rq); -+#endif -+} -+#endif /* CONFIG_CPU_FREQ */ -+ - #ifdef CONFIG_NO_HZ_FULL - /* - * Tick may be needed by tasks in the runqueue depending on their policy and -diff --git a/kernel/sched/alt_sched.h b/kernel/sched/alt_sched.h -index f03af9ab9123..289058a09bd5 100644 ---- a/kernel/sched/alt_sched.h -+++ b/kernel/sched/alt_sched.h -@@ -197,6 +197,7 @@ struct rq { - struct rcuwait hotplug_wait; - #endif - unsigned int nr_pinned; -+ - #endif /* CONFIG_SMP */ - #ifdef CONFIG_IRQ_TIME_ACCOUNTING - u64 prev_irq_time; -@@ -208,6 +209,11 @@ struct rq { - u64 prev_steal_time_rq; - #endif /* CONFIG_PARAVIRT_TIME_ACCOUNTING */ - -+ /* For genenal cpu load util */ -+ s32 load_history; -+ u64 load_block; -+ u64 load_stamp; -+ - /* calc_load related fields */ - unsigned long calc_load_update; - long calc_load_active; -@@ -260,6 +266,8 @@ struct rq { - #endif /* CONFIG_NO_HZ_COMMON */ - }; - -+extern unsigned long rq_load_util(struct rq *rq, unsigned long max); -+ - extern unsigned long calc_load_update; - extern atomic_long_t calc_load_tasks; - -@@ -572,40 +580,6 @@ static inline u64 irq_time_read(int cpu) - - #ifdef CONFIG_CPU_FREQ - DECLARE_PER_CPU(struct update_util_data __rcu *, cpufreq_update_util_data); -- --/** -- * cpufreq_update_util - Take a note about CPU utilization changes. -- * @rq: Runqueue to carry out the update for. -- * @flags: Update reason flags. -- * -- * This function is called by the scheduler on the CPU whose utilization is -- * being updated. -- * -- * It can only be called from RCU-sched read-side critical sections. -- * -- * The way cpufreq is currently arranged requires it to evaluate the CPU -- * performance state (frequency/voltage) on a regular basis to prevent it from -- * being stuck in a completely inadequate performance level for too long. -- * That is not guaranteed to happen if the updates are only triggered from CFS -- * and DL, though, because they may not be coming in if only RT tasks are -- * active all the time (or there are RT tasks only). -- * -- * As a workaround for that issue, this function is called periodically by the -- * RT sched class to trigger extra cpufreq updates to prevent it from stalling, -- * but that really is a band-aid. Going forward it should be replaced with -- * solutions targeted more specifically at RT tasks. -- */ --static inline void cpufreq_update_util(struct rq *rq, unsigned int flags) --{ -- struct update_util_data *data; -- -- data = rcu_dereference_sched(*per_cpu_ptr(&cpufreq_update_util_data, -- cpu_of(rq))); -- if (data) -- data->func(data, rq_clock(rq), flags); --} --#else --static inline void cpufreq_update_util(struct rq *rq, unsigned int flags) {} - #endif /* CONFIG_CPU_FREQ */ - - #ifdef CONFIG_NO_HZ_FULL -diff --git a/kernel/sched/cpufreq_schedutil.c b/kernel/sched/cpufreq_schedutil.c -index ab803029b7fd..b2590f961139 100644 ---- a/kernel/sched/cpufreq_schedutil.c -+++ b/kernel/sched/cpufreq_schedutil.c -@@ -50,13 +50,6 @@ struct sugov_cpu { - unsigned long bw_dl; - unsigned long max; - --#ifdef CONFIG_SCHED_ALT -- /* For genenal cpu load util */ -- s32 load_history; -- u64 load_block; -- u64 load_stamp; --#endif -- - /* The field below is for single-CPU policies only: */ - #ifdef CONFIG_NO_HZ_COMMON - unsigned long saved_idle_calls; -@@ -161,66 +154,21 @@ static unsigned int get_next_freq(struct sugov_policy *sg_policy, - return cpufreq_driver_resolve_freq(policy, freq); - } - --#ifndef CONFIG_SCHED_ALT - static void sugov_get_util(struct sugov_cpu *sg_cpu) - { - struct rq *rq = cpu_rq(sg_cpu->cpu); - unsigned long max = arch_scale_cpu_capacity(sg_cpu->cpu); - - sg_cpu->max = max; -+#ifndef CONFIG_SCHED_ALT - sg_cpu->bw_dl = cpu_bw_dl(rq); - sg_cpu->util = effective_cpu_util(sg_cpu->cpu, cpu_util_cfs(sg_cpu->cpu), max, - FREQUENCY_UTIL, NULL); --} -- --#else /* CONFIG_SCHED_ALT */ -- --#define SG_CPU_LOAD_HISTORY_BITS (sizeof(s32) * 8ULL) --#define SG_CPU_UTIL_SHIFT (8) --#define SG_CPU_LOAD_HISTORY_SHIFT (SG_CPU_LOAD_HISTORY_BITS - 1 - SG_CPU_UTIL_SHIFT) --#define SG_CPU_LOAD_HISTORY_TO_UTIL(l) (((l) >> SG_CPU_LOAD_HISTORY_SHIFT) & 0xff) -- --#define LOAD_BLOCK(t) ((t) >> 17) --#define LOAD_HALF_BLOCK(t) ((t) >> 16) --#define BLOCK_MASK(t) ((t) & ((0x01 << 18) - 1)) --#define LOAD_BLOCK_BIT(b) (1UL << (SG_CPU_LOAD_HISTORY_BITS - 1 - (b))) --#define CURRENT_LOAD_BIT LOAD_BLOCK_BIT(0) -- --static void sugov_get_util(struct sugov_cpu *sg_cpu) --{ -- unsigned long max = arch_scale_cpu_capacity(sg_cpu->cpu); -- -- sg_cpu->max = max; -+#else - sg_cpu->bw_dl = 0; -- sg_cpu->util = SG_CPU_LOAD_HISTORY_TO_UTIL(sg_cpu->load_history) * -- (max >> SG_CPU_UTIL_SHIFT); --} -- --static inline void sugov_cpu_load_update(struct sugov_cpu *sg_cpu, u64 time) --{ -- u64 delta = min(LOAD_BLOCK(time) - LOAD_BLOCK(sg_cpu->load_stamp), -- SG_CPU_LOAD_HISTORY_BITS - 1); -- u64 prev = !!(sg_cpu->load_history & CURRENT_LOAD_BIT); -- u64 curr = !!cpu_rq(sg_cpu->cpu)->nr_running; -- -- if (delta) { -- sg_cpu->load_history = sg_cpu->load_history >> delta; -- -- if (delta <= SG_CPU_UTIL_SHIFT) { -- sg_cpu->load_block += (~BLOCK_MASK(sg_cpu->load_stamp)) * prev; -- if (!!LOAD_HALF_BLOCK(sg_cpu->load_block) ^ curr) -- sg_cpu->load_history ^= LOAD_BLOCK_BIT(delta); -- } -- -- sg_cpu->load_block = BLOCK_MASK(time) * prev; -- } else { -- sg_cpu->load_block += (time - sg_cpu->load_stamp) * prev; -- } -- if (prev ^ curr) -- sg_cpu->load_history ^= CURRENT_LOAD_BIT; -- sg_cpu->load_stamp = time; --} -+ sg_cpu->util = rq_load_util(rq, max); - #endif /* CONFIG_SCHED_ALT */ -+} - - /** - * sugov_iowait_reset() - Reset the IO boost status of a CPU. -@@ -372,10 +320,6 @@ static inline void ignore_dl_rate_limit(struct sugov_cpu *sg_cpu) - static inline bool sugov_update_single_common(struct sugov_cpu *sg_cpu, - u64 time, unsigned int flags) - { --#ifdef CONFIG_SCHED_ALT -- sugov_cpu_load_update(sg_cpu, time); --#endif /* CONFIG_SCHED_ALT */ -- - sugov_iowait_boost(sg_cpu, time, flags); - sg_cpu->last_update = time; - -@@ -502,10 +446,6 @@ sugov_update_shared(struct update_util_data *hook, u64 time, unsigned int flags) - - raw_spin_lock(&sg_policy->update_lock); - --#ifdef CONFIG_SCHED_ALT -- sugov_cpu_load_update(sg_cpu, time); --#endif /* CONFIG_SCHED_ALT */ -- - sugov_iowait_boost(sg_cpu, time, flags); - sg_cpu->last_update = time; - --- -2.37.0 - - -From 1ef06827dfe7e2db1bdda65d1c8817f9d05035bd Mon Sep 17 00:00:00 2001 -From: Alfred Chen -Date: Sun, 19 Sep 2021 16:38:23 +0000 -Subject: [PATCH 211/297] Project-C v5.14-r2 - ---- - kernel/sched/alt_core.c | 2 +- - 1 file changed, 1 insertion(+), 1 deletion(-) - -diff --git a/kernel/sched/alt_core.c b/kernel/sched/alt_core.c -index ee6fc0307135..7c998e606114 100644 ---- a/kernel/sched/alt_core.c -+++ b/kernel/sched/alt_core.c -@@ -67,7 +67,7 @@ __read_mostly int sysctl_resched_latency_warn_once = 1; - #define sched_feat(x) (0) - #endif /* CONFIG_SCHED_DEBUG */ - --#define ALT_SCHED_VERSION "v5.14-r1" -+#define ALT_SCHED_VERSION "v5.14-r2" - - /* rt_prio(prio) defined in include/linux/sched/rt.h */ - #define rt_task(p) rt_prio((p)->prio) --- -2.37.0 - - -From f2bb457d75d11588616a4932537b7d955e88f0a9 Mon Sep 17 00:00:00 2001 -From: Alfred Chen -Date: Tue, 21 Sep 2021 09:04:32 +0000 -Subject: [PATCH 212/297] sched/alt: Add sanity check at - migrate_pending_tasks() - -sched_task_sanity_check() is missing when task moving from other rq(s), -this should fix #36 ---- - kernel/sched/alt_core.c | 1 + - 1 file changed, 1 insertion(+) - -diff --git a/kernel/sched/alt_core.c b/kernel/sched/alt_core.c -index 7c998e606114..3d334abeadce 100644 ---- a/kernel/sched/alt_core.c -+++ b/kernel/sched/alt_core.c -@@ -4030,6 +4030,7 @@ migrate_pending_tasks(struct rq *rq, struct rq *dest_rq, const int dest_cpu) - if (cpumask_test_cpu(dest_cpu, p->cpus_ptr)) { - __SCHED_DEQUEUE_TASK(p, rq, 0, ); - set_task_cpu(p, dest_cpu); -+ sched_task_sanity_check(p, dest_rq); - __SCHED_ENQUEUE_TASK(p, dest_rq, 0); - nr_migrated++; - } --- -2.37.0 - - -From cd61f958e2ec980c724cb98111c2be1dbc9b4c26 Mon Sep 17 00:00:00 2001 -From: Alfred Chen -Date: Tue, 21 Sep 2021 11:14:18 +0000 -Subject: [PATCH 213/297] sched/alt: Fix missing cpuacct.usage. - -Call to cgroup_account_cputime() is missing in Project C scheduler, -which cause cpuacct.usage reports zero. - -This should fixed #40 ---- - kernel/sched/alt_core.c | 1 + - 1 file changed, 1 insertion(+) - -diff --git a/kernel/sched/alt_core.c b/kernel/sched/alt_core.c -index 3d334abeadce..c1643fa8243c 100644 ---- a/kernel/sched/alt_core.c -+++ b/kernel/sched/alt_core.c -@@ -3468,6 +3468,7 @@ static inline void update_curr(struct rq *rq, struct task_struct *p) - s64 ns = rq->clock_task - p->last_ran; - - p->sched_time += ns; -+ cgroup_account_cputime(p, ns); - account_group_exec_runtime(p, ns); - - p->time_slice -= ns; --- -2.37.0 - - -From bad1f2d661f25c57d57303026b29fd2459475bcc Mon Sep 17 00:00:00 2001 -From: Alfred Chen -Date: Thu, 23 Sep 2021 14:25:30 +0000 -Subject: [PATCH 214/297] sched/alt: Fix compilation issue with - rebuild_sched_domains_energy() - -Energy model is not supported in Project C, this fix the undefined -reference to `rebuild_sched_domains_energy'. ---- - include/linux/sched/topology.h | 3 ++- - 1 file changed, 2 insertions(+), 1 deletion(-) - -diff --git a/include/linux/sched/topology.h b/include/linux/sched/topology.h -index 56cffe42abbc..e020fc572b22 100644 ---- a/include/linux/sched/topology.h -+++ b/include/linux/sched/topology.h -@@ -233,7 +233,8 @@ static inline bool cpus_share_cache(int this_cpu, int that_cpu) - - #endif /* !CONFIG_SMP */ - --#if defined(CONFIG_ENERGY_MODEL) && defined(CONFIG_CPU_FREQ_GOV_SCHEDUTIL) -+#if defined(CONFIG_ENERGY_MODEL) && defined(CONFIG_CPU_FREQ_GOV_SCHEDUTIL) && \ -+ !defined(CONFIG_SCHED_ALT) - extern void rebuild_sched_domains_energy(void); - #else - static inline void rebuild_sched_domains_energy(void) --- -2.37.0 - - -From 20ab96e4d076e8e51e7d34e53e09eabb132fa2a3 Mon Sep 17 00:00:00 2001 -From: Alfred Chen -Date: Thu, 23 Sep 2021 14:53:14 +0000 -Subject: [PATCH 215/297] Project-C v5.14-r3 - ---- - kernel/sched/alt_core.c | 2 +- - 1 file changed, 1 insertion(+), 1 deletion(-) - -diff --git a/kernel/sched/alt_core.c b/kernel/sched/alt_core.c -index c1643fa8243c..56aed2b1e42c 100644 ---- a/kernel/sched/alt_core.c -+++ b/kernel/sched/alt_core.c -@@ -67,7 +67,7 @@ __read_mostly int sysctl_resched_latency_warn_once = 1; - #define sched_feat(x) (0) - #endif /* CONFIG_SCHED_DEBUG */ - --#define ALT_SCHED_VERSION "v5.14-r2" -+#define ALT_SCHED_VERSION "v5.14-r3" - - /* rt_prio(prio) defined in include/linux/sched/rt.h */ - #define rt_task(p) rt_prio((p)->prio) --- -2.37.0 - - -From 043b0f475a9c347ab2df59d85ad56209e1319c3d Mon Sep 17 00:00:00 2001 -From: Alfred Chen -Date: Thu, 7 Oct 2021 20:15:28 +0000 -Subject: [PATCH 216/297] sched/alt: Fix UP compilation issue. - -Refine the code in rq_load_update() and fix UP compilation issue. ---- - kernel/sched/alt_core.c | 2 +- - 1 file changed, 1 insertion(+), 1 deletion(-) - -diff --git a/kernel/sched/alt_core.c b/kernel/sched/alt_core.c -index 56aed2b1e42c..80faf3000851 100644 ---- a/kernel/sched/alt_core.c -+++ b/kernel/sched/alt_core.c -@@ -603,7 +603,7 @@ static inline void rq_load_update(struct rq *rq) - u64 delta = min(LOAD_BLOCK(time) - LOAD_BLOCK(rq->load_stamp), - RQ_LOAD_HISTORY_BITS - 1); - u64 prev = !!(rq->load_history & CURRENT_LOAD_BIT); -- u64 curr = !!cpu_rq(rq->cpu)->nr_running; -+ u64 curr = !!rq->nr_running; - - if (delta) { - rq->load_history = rq->load_history >> delta; --- -2.37.0 - - -From 4d5cf13eb754af0352df0c680553aec60dd31dc2 Mon Sep 17 00:00:00 2001 -From: Alfred Chen -Date: Fri, 8 Oct 2021 13:48:36 +0000 -Subject: [PATCH 217/297] Project-C v5.14-r4 - ---- - kernel/sched/alt_core.c | 2 +- - 1 file changed, 1 insertion(+), 1 deletion(-) - -diff --git a/kernel/sched/alt_core.c b/kernel/sched/alt_core.c -index 80faf3000851..e1a7c163c2d5 100644 ---- a/kernel/sched/alt_core.c -+++ b/kernel/sched/alt_core.c -@@ -67,7 +67,7 @@ __read_mostly int sysctl_resched_latency_warn_once = 1; - #define sched_feat(x) (0) - #endif /* CONFIG_SCHED_DEBUG */ - --#define ALT_SCHED_VERSION "v5.14-r3" -+#define ALT_SCHED_VERSION "v5.14-r4" - - /* rt_prio(prio) defined in include/linux/sched/rt.h */ - #define rt_task(p) rt_prio((p)->prio) --- -2.37.0 - - -From 4b9654988d24888e1db9f206b8340b66a4cc32a7 Mon Sep 17 00:00:00 2001 -From: Alfred Chen -Date: Fri, 15 Oct 2021 13:50:38 +0000 -Subject: [PATCH 218/297] sched/alt: [Sync] 508958259bb3 rcu: Explain why - rcu_all_qs() is a stub in preemptible TREE RCU - ---- - kernel/sched/alt_core.c | 11 +++++++++++ - 1 file changed, 11 insertions(+) - -diff --git a/kernel/sched/alt_core.c b/kernel/sched/alt_core.c -index e1a7c163c2d5..3f74f912d534 100644 ---- a/kernel/sched/alt_core.c -+++ b/kernel/sched/alt_core.c -@@ -5848,6 +5848,17 @@ int __sched __cond_resched(void) - preempt_schedule_common(); - return 1; - } -+ /* -+ * In preemptible kernels, ->rcu_read_lock_nesting tells the tick -+ * whether the current CPU is in an RCU read-side critical section, -+ * so the tick can report quiescent states even for CPUs looping -+ * in kernel context. In contrast, in non-preemptible kernels, -+ * RCU readers leave no in-memory hints, which means that CPU-bound -+ * processes executing in kernel context might never report an -+ * RCU quiescent state. Therefore, the following code causes -+ * cond_resched() to report a quiescent state, but only when RCU -+ * is in urgent need of one. -+ */ - #ifndef CONFIG_PREEMPT_RCU - rcu_all_qs(); - #endif --- -2.37.0 - - -From e76f8a809e9c9f3f2816471949123c7301baf2fc Mon Sep 17 00:00:00 2001 -From: Alfred Chen -Date: Fri, 15 Oct 2021 14:00:12 +0000 -Subject: [PATCH 219/297] sched/alt: [Sync] 031e3bd8986f sched: Optimize - housekeeping_cpumask() in for_each_cpu_and() - ---- - kernel/sched/alt_core.c | 5 ++++- - 1 file changed, 4 insertions(+), 1 deletion(-) - -diff --git a/kernel/sched/alt_core.c b/kernel/sched/alt_core.c -index 3f74f912d534..50b89c828837 100644 ---- a/kernel/sched/alt_core.c -+++ b/kernel/sched/alt_core.c -@@ -1005,6 +1005,7 @@ int get_nohz_timer_target(void) - { - int i, cpu = smp_processor_id(), default_cpu = -1; - struct cpumask *mask; -+ const struct cpumask *hk_mask; - - if (housekeeping_cpu(cpu, HK_FLAG_TIMER)) { - if (!idle_cpu(cpu)) -@@ -1012,9 +1013,11 @@ int get_nohz_timer_target(void) - default_cpu = cpu; - } - -+ hk_mask = housekeeping_cpumask(HK_FLAG_TIMER); -+ - for (mask = per_cpu(sched_cpu_topo_masks, cpu) + 1; - mask < per_cpu(sched_cpu_topo_end_mask, cpu); mask++) -- for_each_cpu_and(i, mask, housekeeping_cpumask(HK_FLAG_TIMER)) -+ for_each_cpu_and(i, mask, hk_mask) - if (!idle_cpu(i)) - return i; - --- -2.37.0 - - -From 11bb1d45a50840ca29158114418156a13a3daa05 Mon Sep 17 00:00:00 2001 -From: Alfred Chen -Date: Fri, 15 Oct 2021 14:10:17 +0000 -Subject: [PATCH 220/297] sched/alt: [Sync] 7ad721bf1071 sched: Don't report - SCHED_FLAG_SUGOV in sched_getattr() - ---- - kernel/sched/alt_core.c | 1 + - 1 file changed, 1 insertion(+) - -diff --git a/kernel/sched/alt_core.c b/kernel/sched/alt_core.c -index 50b89c828837..25184a6ddef3 100644 ---- a/kernel/sched/alt_core.c -+++ b/kernel/sched/alt_core.c -@@ -5617,6 +5617,7 @@ SYSCALL_DEFINE4(sched_getattr, pid_t, pid, struct sched_attr __user *, uattr, - kattr.sched_priority = p->rt_priority; - else - kattr.sched_nice = task_nice(p); -+ kattr.sched_flags &= SCHED_FLAG_ALL; - - #ifdef CONFIG_UCLAMP_TASK - kattr.sched_util_min = p->uclamp_req[UCLAMP_MIN].value; --- -2.37.0 - - -From 4aeb4a2d94d1203145c3da2279ef1ed52d9e04c3 Mon Sep 17 00:00:00 2001 -From: Alfred Chen -Date: Fri, 15 Oct 2021 14:39:30 +0000 -Subject: [PATCH 221/297] sched/alt: [Sync] 9ae606bc74dd sched: Introduce - task_cpu_possible_mask() to limit fallback rq selection - ---- - kernel/sched/alt_core.c | 8 +++----- - 1 file changed, 3 insertions(+), 5 deletions(-) - -diff --git a/kernel/sched/alt_core.c b/kernel/sched/alt_core.c -index 25184a6ddef3..2ae47a36458c 100644 ---- a/kernel/sched/alt_core.c -+++ b/kernel/sched/alt_core.c -@@ -1478,7 +1478,7 @@ static inline bool is_cpu_allowed(struct task_struct *p, int cpu) - - /* Non kernel threads are not allowed during either online or offline. */ - if (!(p->flags & PF_KTHREAD)) -- return cpu_active(cpu); -+ return cpu_active(cpu) && task_cpu_possible(cpu, p); - - /* KTHREAD_IS_PER_CPU is always allowed. */ - if (kthread_is_per_cpu(p)) -@@ -1797,9 +1797,7 @@ static int select_fallback_rq(int cpu, struct task_struct *p) - - /* Look for allowed, online CPU in same node. */ - for_each_cpu(dest_cpu, nodemask) { -- if (!cpu_active(dest_cpu)) -- continue; -- if (cpumask_test_cpu(dest_cpu, p->cpus_ptr)) -+ if (is_cpu_allowed(p, dest_cpu)) - return dest_cpu; - } - } -@@ -1828,7 +1826,7 @@ static int select_fallback_rq(int cpu, struct task_struct *p) - * - * More yuck to audit. - */ -- do_set_cpus_allowed(p, cpu_possible_mask); -+ do_set_cpus_allowed(p, task_cpu_possible_mask(p)); - state = fail; - break; - --- -2.37.0 - - -From cae9fb325825c4a607bcf3a225f135f037bcdb13 Mon Sep 17 00:00:00 2001 -From: Alfred Chen -Date: Fri, 15 Oct 2021 14:42:29 +0000 -Subject: [PATCH 222/297] sched/alt: [Sync] 97c0054dbe2c cpuset: Cleanup - cpuset_cpus_allowed_fallback() use in select_fallback_rq() - ---- - kernel/sched/alt_core.c | 3 +-- - 1 file changed, 1 insertion(+), 2 deletions(-) - -diff --git a/kernel/sched/alt_core.c b/kernel/sched/alt_core.c -index 2ae47a36458c..2708688b24f9 100644 ---- a/kernel/sched/alt_core.c -+++ b/kernel/sched/alt_core.c -@@ -1813,8 +1813,7 @@ static int select_fallback_rq(int cpu, struct task_struct *p) - /* No more Mr. Nice Guy. */ - switch (state) { - case cpuset: -- if (IS_ENABLED(CONFIG_CPUSETS)) { -- cpuset_cpus_allowed_fallback(p); -+ if (cpuset_cpus_allowed_fallback(p)) { - state = possible; - break; - } --- -2.37.0 - - -From 931fcd03848348878418583c4e3fbce41a1eb274 Mon Sep 17 00:00:00 2001 -From: Alfred Chen -Date: Fri, 15 Oct 2021 15:15:41 +0000 -Subject: [PATCH 223/297] sched/alt: [Sync] 234a503e670b sched: Reject CPU - affinity changes based on task_cpu_possible_mask() - ---- - kernel/sched/alt_core.c | 9 ++++++++- - 1 file changed, 8 insertions(+), 1 deletion(-) - -diff --git a/kernel/sched/alt_core.c b/kernel/sched/alt_core.c -index 2708688b24f9..11c847aa8691 100644 ---- a/kernel/sched/alt_core.c -+++ b/kernel/sched/alt_core.c -@@ -1927,7 +1927,9 @@ static int __set_cpus_allowed_ptr(struct task_struct *p, - const struct cpumask *new_mask, - u32 flags) - { -+ const struct cpumask *cpu_allowed_mask = task_cpu_possible_mask(p); - const struct cpumask *cpu_valid_mask = cpu_active_mask; -+ bool kthread = p->flags & PF_KTHREAD; - int dest_cpu; - unsigned long irq_flags; - struct rq *rq; -@@ -1937,7 +1939,7 @@ static int __set_cpus_allowed_ptr(struct task_struct *p, - raw_spin_lock_irqsave(&p->pi_lock, irq_flags); - rq = __task_access_lock(p, &lock); - -- if (p->flags & PF_KTHREAD || is_migration_disabled(p)) { -+ if (kthread || is_migration_disabled(p)) { - /* - * Kernel threads are allowed on online && !active CPUs, - * however, during cpu-hot-unplug, even these might get pushed -@@ -1951,6 +1953,11 @@ static int __set_cpus_allowed_ptr(struct task_struct *p, - cpu_valid_mask = cpu_online_mask; - } - -+ if (!kthread && !cpumask_subset(new_mask, cpu_allowed_mask)) { -+ ret = -EINVAL; -+ goto out; -+ } -+ - /* - * Must re-check here, to close a race against __kthread_bind(), - * sched_setaffinity() is not guaranteed to observe the flag. --- -2.37.0 - - -From 53428a9670df072cde92f8872c10b7a14bc2229b Mon Sep 17 00:00:00 2001 -From: Alfred Chen -Date: Fri, 15 Oct 2021 16:31:35 +0000 -Subject: [PATCH 224/297] sched/alt: [Sync] b90ca8badbd1 sched: Introduce - task_struct::user_cpus_ptr to track requested affinity - ---- - kernel/sched/alt_core.c | 20 ++++++++++++++++++++ - 1 file changed, 20 insertions(+) - -diff --git a/kernel/sched/alt_core.c b/kernel/sched/alt_core.c -index 11c847aa8691..6eeeb10f2eea 100644 ---- a/kernel/sched/alt_core.c -+++ b/kernel/sched/alt_core.c -@@ -1616,6 +1616,26 @@ void do_set_cpus_allowed(struct task_struct *p, const struct cpumask *new_mask) - __do_set_cpus_allowed(p, new_mask); - } - -+int dup_user_cpus_ptr(struct task_struct *dst, struct task_struct *src, -+ int node) -+{ -+ if (!src->user_cpus_ptr) -+ return 0; -+ -+ dst->user_cpus_ptr = kmalloc_node(cpumask_size(), GFP_KERNEL, node); -+ if (!dst->user_cpus_ptr) -+ return -ENOMEM; -+ -+ cpumask_copy(dst->user_cpus_ptr, src->user_cpus_ptr); -+ return 0; -+} -+ -+void release_user_cpus_ptr(struct task_struct *p) -+{ -+ kfree(p->user_cpus_ptr); -+ p->user_cpus_ptr = NULL; -+} -+ - #endif - - /** --- -2.37.0 - - -From d855c8f926b5eaefa57a511cb41955e53dca391b Mon Sep 17 00:00:00 2001 -From: Alfred Chen -Date: Mon, 18 Oct 2021 21:10:56 +0000 -Subject: [PATCH 225/297] sched/alt: [Sync] db3b02ae896e sched: Split the guts - of sched_setaffinity() into a helper function - ---- - kernel/sched/alt_core.c | 78 +++++++++++++++++++++++------------------ - 1 file changed, 44 insertions(+), 34 deletions(-) - -diff --git a/kernel/sched/alt_core.c b/kernel/sched/alt_core.c -index 6eeeb10f2eea..f2546d629b9d 100644 ---- a/kernel/sched/alt_core.c -+++ b/kernel/sched/alt_core.c -@@ -5657,9 +5657,47 @@ SYSCALL_DEFINE4(sched_getattr, pid_t, pid, struct sched_attr __user *, uattr, - return retval; - } - --long sched_setaffinity(pid_t pid, const struct cpumask *in_mask) -+static int -+__sched_setaffinity(struct task_struct *p, const struct cpumask *mask) - { -+ int retval; - cpumask_var_t cpus_allowed, new_mask; -+ -+ if (!alloc_cpumask_var(&cpus_allowed, GFP_KERNEL)) -+ return -ENOMEM; -+ -+ if (!alloc_cpumask_var(&new_mask, GFP_KERNEL)) { -+ retval = -ENOMEM; -+ goto out_free_cpus_allowed; -+ } -+ -+ cpuset_cpus_allowed(p, cpus_allowed); -+ cpumask_and(new_mask, mask, cpus_allowed); -+again: -+ retval = __set_cpus_allowed_ptr(p, new_mask, SCA_CHECK); -+ if (retval) -+ goto out_free_new_mask; -+ -+ cpuset_cpus_allowed(p, cpus_allowed); -+ if (!cpumask_subset(new_mask, cpus_allowed)) { -+ /* -+ * We must have raced with a concurrent cpuset -+ * update. Just reset the cpus_allowed to the -+ * cpuset's cpus_allowed -+ */ -+ cpumask_copy(new_mask, cpus_allowed); -+ goto again; -+ } -+ -+out_free_new_mask: -+ free_cpumask_var(new_mask); -+out_free_cpus_allowed: -+ free_cpumask_var(cpus_allowed); -+ return retval; -+} -+ -+long sched_setaffinity(pid_t pid, const struct cpumask *in_mask) -+{ - struct task_struct *p; - int retval; - -@@ -5679,50 +5717,22 @@ long sched_setaffinity(pid_t pid, const struct cpumask *in_mask) - retval = -EINVAL; - goto out_put_task; - } -- if (!alloc_cpumask_var(&cpus_allowed, GFP_KERNEL)) { -- retval = -ENOMEM; -- goto out_put_task; -- } -- if (!alloc_cpumask_var(&new_mask, GFP_KERNEL)) { -- retval = -ENOMEM; -- goto out_free_cpus_allowed; -- } -- retval = -EPERM; -+ - if (!check_same_owner(p)) { - rcu_read_lock(); - if (!ns_capable(__task_cred(p)->user_ns, CAP_SYS_NICE)) { - rcu_read_unlock(); -- goto out_free_new_mask; -+ retval = -EPERM; -+ goto out_put_task; - } - rcu_read_unlock(); - } - - retval = security_task_setscheduler(p); - if (retval) -- goto out_free_new_mask; -- -- cpuset_cpus_allowed(p, cpus_allowed); -- cpumask_and(new_mask, in_mask, cpus_allowed); -- --again: -- retval = __set_cpus_allowed_ptr(p, new_mask, SCA_CHECK); -+ goto out_put_task; - -- if (!retval) { -- cpuset_cpus_allowed(p, cpus_allowed); -- if (!cpumask_subset(new_mask, cpus_allowed)) { -- /* -- * We must have raced with a concurrent cpuset -- * update. Just reset the cpus_allowed to the -- * cpuset's cpus_allowed -- */ -- cpumask_copy(new_mask, cpus_allowed); -- goto again; -- } -- } --out_free_new_mask: -- free_cpumask_var(new_mask); --out_free_cpus_allowed: -- free_cpumask_var(cpus_allowed); -+ retval = __sched_setaffinity(p, in_mask); - out_put_task: - put_task_struct(p); - return retval; --- -2.37.0 - - -From 13397b10a79f6055b970cbb41466cd61644b7f2a Mon Sep 17 00:00:00 2001 -From: Alfred Chen -Date: Tue, 2 Nov 2021 09:55:27 +0000 -Subject: [PATCH 226/297] sched/alt: [Sync] 07ec77a1d4e8 sched: Allow task CPU - affinity to be restricted on asymmetric systems - ---- - kernel/sched/alt_core.c | 262 ++++++++++++++++++++++++++++++++-------- - 1 file changed, 211 insertions(+), 51 deletions(-) - -diff --git a/kernel/sched/alt_core.c b/kernel/sched/alt_core.c -index f2546d629b9d..fe42edce5d55 100644 ---- a/kernel/sched/alt_core.c -+++ b/kernel/sched/alt_core.c -@@ -1333,6 +1333,7 @@ static inline bool is_migration_disabled(struct task_struct *p) - } - - #define SCA_CHECK 0x01 -+#define SCA_USER 0x08 - - #ifdef CONFIG_SMP - -@@ -1630,10 +1631,18 @@ int dup_user_cpus_ptr(struct task_struct *dst, struct task_struct *src, - return 0; - } - -+static inline struct cpumask *clear_user_cpus_ptr(struct task_struct *p) -+{ -+ struct cpumask *user_mask = NULL; -+ -+ swap(p->user_cpus_ptr, user_mask); -+ -+ return user_mask; -+} -+ - void release_user_cpus_ptr(struct task_struct *p) - { -- kfree(p->user_cpus_ptr); -- p->user_cpus_ptr = NULL; -+ kfree(clear_user_cpus_ptr(p)); - } - - #endif -@@ -1934,31 +1943,58 @@ void sched_set_stop_task(int cpu, struct task_struct *stop) - } - } - --/* -- * Change a given task's CPU affinity. Migrate the thread to a -- * proper CPU and schedule it away if the CPU it's executing on -- * is removed from the allowed bitmask. -- * -- * NOTE: the caller must have a valid reference to the task, the -- * task must not exit() & deallocate itself prematurely. The -- * call is not atomic; no spinlocks may be held. -- */ --static int __set_cpus_allowed_ptr(struct task_struct *p, -- const struct cpumask *new_mask, -- u32 flags) -+static int affine_move_task(struct rq *rq, struct task_struct *p, int dest_cpu, -+ raw_spinlock_t *lock, unsigned long irq_flags) -+{ -+ /* Can the task run on the task's current CPU? If so, we're done */ -+ if (!cpumask_test_cpu(task_cpu(p), &p->cpus_mask)) { -+ if (p->migration_disabled) { -+ if (likely(p->cpus_ptr != &p->cpus_mask)) -+ __do_set_cpus_ptr(p, &p->cpus_mask); -+ p->migration_disabled = 0; -+ p->migration_flags |= MDF_FORCE_ENABLED; -+ /* When p is migrate_disabled, rq->lock should be held */ -+ rq->nr_pinned--; -+ } -+ -+ if (task_running(p) || READ_ONCE(p->__state) == TASK_WAKING) { -+ struct migration_arg arg = { p, dest_cpu }; -+ -+ /* Need help from migration thread: drop lock and wait. */ -+ __task_access_unlock(p, lock); -+ raw_spin_unlock_irqrestore(&p->pi_lock, irq_flags); -+ stop_one_cpu(cpu_of(rq), migration_cpu_stop, &arg); -+ return 0; -+ } -+ if (task_on_rq_queued(p)) { -+ /* -+ * OK, since we're going to drop the lock immediately -+ * afterwards anyway. -+ */ -+ update_rq_clock(rq); -+ rq = move_queued_task(rq, p, dest_cpu); -+ lock = &rq->lock; -+ } -+ } -+ __task_access_unlock(p, lock); -+ raw_spin_unlock_irqrestore(&p->pi_lock, irq_flags); -+ return 0; -+} -+ -+static int __set_cpus_allowed_ptr_locked(struct task_struct *p, -+ const struct cpumask *new_mask, -+ u32 flags, -+ struct rq *rq, -+ raw_spinlock_t *lock, -+ unsigned long irq_flags) - { - const struct cpumask *cpu_allowed_mask = task_cpu_possible_mask(p); - const struct cpumask *cpu_valid_mask = cpu_active_mask; - bool kthread = p->flags & PF_KTHREAD; -+ struct cpumask *user_mask = NULL; - int dest_cpu; -- unsigned long irq_flags; -- struct rq *rq; -- raw_spinlock_t *lock; - int ret = 0; - -- raw_spin_lock_irqsave(&p->pi_lock, irq_flags); -- rq = __task_access_lock(p, &lock); -- - if (kthread || is_migration_disabled(p)) { - /* - * Kernel threads are allowed on online && !active CPUs, -@@ -1998,37 +2034,14 @@ static int __set_cpus_allowed_ptr(struct task_struct *p, - - __do_set_cpus_allowed(p, new_mask); - -- /* Can the task run on the task's current CPU? If so, we're done */ -- if (cpumask_test_cpu(task_cpu(p), new_mask)) -- goto out; -+ if (flags & SCA_USER) -+ user_mask = clear_user_cpus_ptr(p); - -- if (p->migration_disabled) { -- if (likely(p->cpus_ptr != &p->cpus_mask)) -- __do_set_cpus_ptr(p, &p->cpus_mask); -- p->migration_disabled = 0; -- p->migration_flags |= MDF_FORCE_ENABLED; -- /* When p is migrate_disabled, rq->lock should be held */ -- rq->nr_pinned--; -- } -+ ret = affine_move_task(rq, p, dest_cpu, lock, irq_flags); - -- if (task_running(p) || READ_ONCE(p->__state) == TASK_WAKING) { -- struct migration_arg arg = { p, dest_cpu }; -+ kfree(user_mask); - -- /* Need help from migration thread: drop lock and wait. */ -- __task_access_unlock(p, lock); -- raw_spin_unlock_irqrestore(&p->pi_lock, irq_flags); -- stop_one_cpu(cpu_of(rq), migration_cpu_stop, &arg); -- return 0; -- } -- if (task_on_rq_queued(p)) { -- /* -- * OK, since we're going to drop the lock immediately -- * afterwards anyway. -- */ -- update_rq_clock(rq); -- rq = move_queued_task(rq, p, dest_cpu); -- lock = &rq->lock; -- } -+ return ret; - - out: - __task_access_unlock(p, lock); -@@ -2037,12 +2050,160 @@ static int __set_cpus_allowed_ptr(struct task_struct *p, - return ret; - } - -+/* -+ * Change a given task's CPU affinity. Migrate the thread to a -+ * proper CPU and schedule it away if the CPU it's executing on -+ * is removed from the allowed bitmask. -+ * -+ * NOTE: the caller must have a valid reference to the task, the -+ * task must not exit() & deallocate itself prematurely. The -+ * call is not atomic; no spinlocks may be held. -+ */ -+static int __set_cpus_allowed_ptr(struct task_struct *p, -+ const struct cpumask *new_mask, u32 flags) -+{ -+ unsigned long irq_flags; -+ struct rq *rq; -+ raw_spinlock_t *lock; -+ -+ raw_spin_lock_irqsave(&p->pi_lock, irq_flags); -+ rq = __task_access_lock(p, &lock); -+ -+ return __set_cpus_allowed_ptr_locked(p, new_mask, flags, rq, lock, irq_flags); -+} -+ - int set_cpus_allowed_ptr(struct task_struct *p, const struct cpumask *new_mask) - { - return __set_cpus_allowed_ptr(p, new_mask, 0); - } - EXPORT_SYMBOL_GPL(set_cpus_allowed_ptr); - -+/* -+ * Change a given task's CPU affinity to the intersection of its current -+ * affinity mask and @subset_mask, writing the resulting mask to @new_mask -+ * and pointing @p->user_cpus_ptr to a copy of the old mask. -+ * If the resulting mask is empty, leave the affinity unchanged and return -+ * -EINVAL. -+ */ -+static int restrict_cpus_allowed_ptr(struct task_struct *p, -+ struct cpumask *new_mask, -+ const struct cpumask *subset_mask) -+{ -+ struct cpumask *user_mask = NULL; -+ unsigned long irq_flags; -+ raw_spinlock_t *lock; -+ struct rq *rq; -+ int err; -+ -+ if (!p->user_cpus_ptr) { -+ user_mask = kmalloc(cpumask_size(), GFP_KERNEL); -+ if (!user_mask) -+ return -ENOMEM; -+ } -+ -+ raw_spin_lock_irqsave(&p->pi_lock, irq_flags); -+ rq = __task_access_lock(p, &lock); -+ -+ if (!cpumask_and(new_mask, &p->cpus_mask, subset_mask)) { -+ err = -EINVAL; -+ goto err_unlock; -+ } -+ -+ /* -+ * We're about to butcher the task affinity, so keep track of what -+ * the user asked for in case we're able to restore it later on. -+ */ -+ if (user_mask) { -+ cpumask_copy(user_mask, p->cpus_ptr); -+ p->user_cpus_ptr = user_mask; -+ } -+ -+ /*return __set_cpus_allowed_ptr_locked(p, new_mask, 0, rq, &rf);*/ -+ return __set_cpus_allowed_ptr_locked(p, new_mask, 0, rq, lock, irq_flags); -+ -+err_unlock: -+ __task_access_unlock(p, lock); -+ raw_spin_unlock_irqrestore(&p->pi_lock, irq_flags); -+ kfree(user_mask); -+ return err; -+} -+ -+/* -+ * Restrict the CPU affinity of task @p so that it is a subset of -+ * task_cpu_possible_mask() and point @p->user_cpu_ptr to a copy of the -+ * old affinity mask. If the resulting mask is empty, we warn and walk -+ * up the cpuset hierarchy until we find a suitable mask. -+ */ -+void force_compatible_cpus_allowed_ptr(struct task_struct *p) -+{ -+ cpumask_var_t new_mask; -+ const struct cpumask *override_mask = task_cpu_possible_mask(p); -+ -+ alloc_cpumask_var(&new_mask, GFP_KERNEL); -+ -+ /* -+ * __migrate_task() can fail silently in the face of concurrent -+ * offlining of the chosen destination CPU, so take the hotplug -+ * lock to ensure that the migration succeeds. -+ */ -+ cpus_read_lock(); -+ if (!cpumask_available(new_mask)) -+ goto out_set_mask; -+ -+ if (!restrict_cpus_allowed_ptr(p, new_mask, override_mask)) -+ goto out_free_mask; -+ -+ /* -+ * We failed to find a valid subset of the affinity mask for the -+ * task, so override it based on its cpuset hierarchy. -+ */ -+ cpuset_cpus_allowed(p, new_mask); -+ override_mask = new_mask; -+ -+out_set_mask: -+ if (printk_ratelimit()) { -+ printk_deferred("Overriding affinity for process %d (%s) to CPUs %*pbl\n", -+ task_pid_nr(p), p->comm, -+ cpumask_pr_args(override_mask)); -+ } -+ -+ WARN_ON(set_cpus_allowed_ptr(p, override_mask)); -+out_free_mask: -+ cpus_read_unlock(); -+ free_cpumask_var(new_mask); -+} -+ -+static int -+__sched_setaffinity(struct task_struct *p, const struct cpumask *mask); -+ -+/* -+ * Restore the affinity of a task @p which was previously restricted by a -+ * call to force_compatible_cpus_allowed_ptr(). This will clear (and free) -+ * @p->user_cpus_ptr. -+ * -+ * It is the caller's responsibility to serialise this with any calls to -+ * force_compatible_cpus_allowed_ptr(@p). -+ */ -+void relax_compatible_cpus_allowed_ptr(struct task_struct *p) -+{ -+ struct cpumask *user_mask = p->user_cpus_ptr; -+ unsigned long flags; -+ -+ /* -+ * Try to restore the old affinity mask. If this fails, then -+ * we free the mask explicitly to avoid it being inherited across -+ * a subsequent fork(). -+ */ -+ if (!user_mask || !__sched_setaffinity(p, user_mask)) -+ return; -+ -+ raw_spin_lock_irqsave(&p->pi_lock, flags); -+ user_mask = clear_user_cpus_ptr(p); -+ raw_spin_unlock_irqrestore(&p->pi_lock, flags); -+ -+ kfree(user_mask); -+} -+ - #else /* CONFIG_SMP */ - - static inline int select_task_rq(struct task_struct *p) -@@ -2052,8 +2213,7 @@ static inline int select_task_rq(struct task_struct *p) - - static inline int - __set_cpus_allowed_ptr(struct task_struct *p, -- const struct cpumask *new_mask, -- u32 flags) -+ const struct cpumask *new_mask, u32 flags) - { - return set_cpus_allowed_ptr(p, new_mask); - } -@@ -5674,7 +5834,7 @@ __sched_setaffinity(struct task_struct *p, const struct cpumask *mask) - cpuset_cpus_allowed(p, cpus_allowed); - cpumask_and(new_mask, mask, cpus_allowed); - again: -- retval = __set_cpus_allowed_ptr(p, new_mask, SCA_CHECK); -+ retval = __set_cpus_allowed_ptr(p, new_mask, SCA_CHECK | SCA_USER); - if (retval) - goto out_free_new_mask; - --- -2.37.0 - - -From 418955fbb100e0a418836b0410f816fa3b568ac1 Mon Sep 17 00:00:00 2001 -From: Alfred Chen -Date: Tue, 2 Nov 2021 10:30:05 +0000 -Subject: [PATCH 227/297] sched/alt: [Sync] sched/wakeup - -This commit sync-up the below commits -43295d73adc8 sched/wakeup: Split out the wakeup ->__state check -5f220be21418 sched/wakeup: Prepare for RT sleeping spin/rwlocks ---- - kernel/sched/alt_core.c | 57 ++++++++++++++++++++++++++++++++++++----- - 1 file changed, 51 insertions(+), 6 deletions(-) - -diff --git a/kernel/sched/alt_core.c b/kernel/sched/alt_core.c -index fe42edce5d55..7931fed720cc 100644 ---- a/kernel/sched/alt_core.c -+++ b/kernel/sched/alt_core.c -@@ -2473,6 +2473,55 @@ static inline void ttwu_queue(struct task_struct *p, int cpu, int wake_flags) - raw_spin_unlock(&rq->lock); - } - -+/* -+ * Invoked from try_to_wake_up() to check whether the task can be woken up. -+ * -+ * The caller holds p::pi_lock if p != current or has preemption -+ * disabled when p == current. -+ * -+ * The rules of PREEMPT_RT saved_state: -+ * -+ * The related locking code always holds p::pi_lock when updating -+ * p::saved_state, which means the code is fully serialized in both cases. -+ * -+ * The lock wait and lock wakeups happen via TASK_RTLOCK_WAIT. No other -+ * bits set. This allows to distinguish all wakeup scenarios. -+ */ -+static __always_inline -+bool ttwu_state_match(struct task_struct *p, unsigned int state, int *success) -+{ -+ if (IS_ENABLED(CONFIG_DEBUG_PREEMPT)) { -+ WARN_ON_ONCE((state & TASK_RTLOCK_WAIT) && -+ state != TASK_RTLOCK_WAIT); -+ } -+ -+ if (READ_ONCE(p->__state) & state) { -+ *success = 1; -+ return true; -+ } -+ -+#ifdef CONFIG_PREEMPT_RT -+ /* -+ * Saved state preserves the task state across blocking on -+ * an RT lock. If the state matches, set p::saved_state to -+ * TASK_RUNNING, but do not wake the task because it waits -+ * for a lock wakeup. Also indicate success because from -+ * the regular waker's point of view this has succeeded. -+ * -+ * After acquiring the lock the task will restore p::__state -+ * from p::saved_state which ensures that the regular -+ * wakeup is not lost. The restore will also set -+ * p::saved_state to TASK_RUNNING so any further tests will -+ * not result in false positives vs. @success -+ */ -+ if (p->saved_state & state) { -+ p->saved_state = TASK_RUNNING; -+ *success = 1; -+ } -+#endif -+ return false; -+} -+ - /* - * Notes on Program-Order guarantees on SMP systems. - * -@@ -2620,10 +2669,9 @@ static int try_to_wake_up(struct task_struct *p, unsigned int state, - * - we're serialized against set_special_state() by virtue of - * it disabling IRQs (this allows not taking ->pi_lock). - */ -- if (!(READ_ONCE(p->__state) & state)) -+ if (!ttwu_state_match(p, state, &success)) - goto out; - -- success = 1; - trace_sched_waking(p); - WRITE_ONCE(p->__state, TASK_RUNNING); - trace_sched_wakeup(p); -@@ -2638,14 +2686,11 @@ static int try_to_wake_up(struct task_struct *p, unsigned int state, - */ - raw_spin_lock_irqsave(&p->pi_lock, flags); - smp_mb__after_spinlock(); -- if (!(READ_ONCE(p->__state) & state)) -+ if (!ttwu_state_match(p, state, &success)) - goto unlock; - - trace_sched_waking(p); - -- /* We're going to change ->state: */ -- success = 1; -- - /* - * Ensure we load p->on_rq _after_ p->state, otherwise it would - * be possible to, falsely, observe p->on_rq == 0 and get stuck --- -2.37.0 - - -From 2d8bd043e300268f660a5f839ee925406eb52c56 Mon Sep 17 00:00:00 2001 -From: Alfred Chen -Date: Tue, 2 Nov 2021 10:54:21 +0000 -Subject: [PATCH 228/297] sched/alt: [Sync] b4bfa3fcfe3b sched/core: Rework the - __schedule() preempt argument - ---- - kernel/sched/alt_core.c | 34 +++++++++++++++++++++++----------- - 1 file changed, 23 insertions(+), 11 deletions(-) - -diff --git a/kernel/sched/alt_core.c b/kernel/sched/alt_core.c -index 7931fed720cc..b30f46567470 100644 ---- a/kernel/sched/alt_core.c -+++ b/kernel/sched/alt_core.c -@@ -4386,6 +4386,18 @@ choose_next_task(struct rq *rq, int cpu, struct task_struct *prev) - return next; - } - -+/* -+ * Constants for the sched_mode argument of __schedule(). -+ * -+ * The mode argument allows RT enabled kernels to differentiate a -+ * preemption from blocking on an 'sleeping' spin/rwlock. Note that -+ * SM_MASK_PREEMPT for !RT has all bits set, which allows the compiler to -+ * optimize the AND operation out and just check for zero. -+ */ -+#define SM_NONE 0x0 -+#define SM_PREEMPT 0x1 -+#define SM_MASK_PREEMPT (~0U) -+ - /* - * schedule() is the main scheduler function. - * -@@ -4425,7 +4437,7 @@ choose_next_task(struct rq *rq, int cpu, struct task_struct *prev) - * - * WARNING: must be called with preemption disabled! - */ --static void __sched notrace __schedule(bool preempt) -+static void __sched notrace __schedule(unsigned int sched_mode) - { - struct task_struct *prev, *next; - unsigned long *switch_count; -@@ -4437,13 +4449,13 @@ static void __sched notrace __schedule(bool preempt) - rq = cpu_rq(cpu); - prev = rq->curr; - -- schedule_debug(prev, preempt); -+ schedule_debug(prev, !!sched_mode); - - /* by passing sched_feat(HRTICK) checking which Alt schedule FW doesn't support */ - hrtick_clear(rq); - - local_irq_disable(); -- rcu_note_context_switch(preempt); -+ rcu_note_context_switch(!!sched_mode); - - /* - * Make sure that signal_pending_state()->signal_pending() below -@@ -4474,7 +4486,7 @@ static void __sched notrace __schedule(bool preempt) - * - ptrace_{,un}freeze_traced() can change ->state underneath us. - */ - prev_state = READ_ONCE(prev->__state); -- if (!preempt && prev_state) { -+ if (!(sched_mode & SM_MASK_PREEMPT) && prev_state) { - if (signal_pending_state(prev_state, prev)) { - WRITE_ONCE(prev->__state, TASK_RUNNING); - } else { -@@ -4545,7 +4557,7 @@ static void __sched notrace __schedule(bool preempt) - - psi_sched_switch(prev, next, !task_on_rq_queued(prev)); - -- trace_sched_switch(preempt, prev, next); -+ trace_sched_switch(sched_mode & SM_MASK_PREEMPT, prev, next); - - /* Also unlocks the rq: */ - rq = context_switch(rq, prev, next); -@@ -4567,7 +4579,7 @@ void __noreturn do_task_dead(void) - /* Tell freezer to ignore us: */ - current->flags |= PF_NOFREEZE; - -- __schedule(false); -+ __schedule(SM_NONE); - BUG(); - - /* Avoid "noreturn function does return" - but don't continue if BUG() is a NOP: */ -@@ -4628,7 +4640,7 @@ asmlinkage __visible void __sched schedule(void) - sched_submit_work(tsk); - do { - preempt_disable(); -- __schedule(false); -+ __schedule(SM_NONE); - sched_preempt_enable_no_resched(); - } while (need_resched()); - sched_update_worker(tsk); -@@ -4656,7 +4668,7 @@ void __sched schedule_idle(void) - */ - WARN_ON_ONCE(current->__state); - do { -- __schedule(false); -+ __schedule(SM_NONE); - } while (need_resched()); - } - -@@ -4709,7 +4721,7 @@ static void __sched notrace preempt_schedule_common(void) - */ - preempt_disable_notrace(); - preempt_latency_start(1); -- __schedule(true); -+ __schedule(SM_PREEMPT); - preempt_latency_stop(1); - preempt_enable_no_resched_notrace(); - -@@ -4788,7 +4800,7 @@ asmlinkage __visible void __sched notrace preempt_schedule_notrace(void) - * an infinite recursion. - */ - prev_ctx = exception_enter(); -- __schedule(true); -+ __schedule(SM_PREEMPT); - exception_exit(prev_ctx); - - preempt_latency_stop(1); -@@ -4937,7 +4949,7 @@ asmlinkage __visible void __sched preempt_schedule_irq(void) - do { - preempt_disable(); - local_irq_enable(); -- __schedule(true); -+ __schedule(SM_PREEMPT); - local_irq_disable(); - sched_preempt_enable_no_resched(); - } while (need_resched()); --- -2.37.0 - - -From a52bd2dcfe2ed4d4fa8376a0847db1a1c5bb6217 Mon Sep 17 00:00:00 2001 -From: Alfred Chen -Date: Tue, 2 Nov 2021 10:59:04 +0000 -Subject: [PATCH 229/297] sched/alt: [Sync] 6991436c2b5d sched/core: Provide a - scheduling point for RT locks - ---- - kernel/sched/alt_core.c | 20 +++++++++++++++++++- - 1 file changed, 19 insertions(+), 1 deletion(-) - -diff --git a/kernel/sched/alt_core.c b/kernel/sched/alt_core.c -index b30f46567470..f10749755cc8 100644 ---- a/kernel/sched/alt_core.c -+++ b/kernel/sched/alt_core.c -@@ -4396,7 +4396,13 @@ choose_next_task(struct rq *rq, int cpu, struct task_struct *prev) - */ - #define SM_NONE 0x0 - #define SM_PREEMPT 0x1 --#define SM_MASK_PREEMPT (~0U) -+#define SM_RTLOCK_WAIT 0x2 -+ -+#ifndef CONFIG_PREEMPT_RT -+# define SM_MASK_PREEMPT (~0U) -+#else -+# define SM_MASK_PREEMPT SM_PREEMPT -+#endif - - /* - * schedule() is the main scheduler function. -@@ -4703,6 +4709,18 @@ void __sched schedule_preempt_disabled(void) - preempt_disable(); - } - -+#ifdef CONFIG_PREEMPT_RT -+void __sched notrace schedule_rtlock(void) -+{ -+ do { -+ preempt_disable(); -+ __schedule(SM_RTLOCK_WAIT); -+ sched_preempt_enable_no_resched(); -+ } while (need_resched()); -+} -+NOKPROBE_SYMBOL(schedule_rtlock); -+#endif -+ - static void __sched notrace preempt_schedule_common(void) - { - do { --- -2.37.0 - - -From 565ecd60ab8d9eb1165e3d05f1898d72579748af Mon Sep 17 00:00:00 2001 -From: Alfred Chen -Date: Tue, 2 Nov 2021 11:05:16 +0000 -Subject: [PATCH 230/297] sched/alt: [Sync] 63acd42c0d49 sched/scs: Reset the - shadow stack when idle_task_exit - ---- - kernel/sched/alt_core.c | 1 + - 1 file changed, 1 insertion(+) - -diff --git a/kernel/sched/alt_core.c b/kernel/sched/alt_core.c -index f10749755cc8..44361857be30 100644 ---- a/kernel/sched/alt_core.c -+++ b/kernel/sched/alt_core.c -@@ -6642,6 +6642,7 @@ void idle_task_exit(void) - finish_arch_post_lock_switch(); - } - -+ scs_task_reset(current); - /* finish_cpu(), as ran on the BP, will clean up the active_mm state */ - } - --- -2.37.0 - - -From 9382a3b0c9ccb4917982e1f7b0fed7f1f120a9c4 Mon Sep 17 00:00:00 2001 -From: Alfred Chen -Date: Tue, 2 Nov 2021 14:11:00 +0000 -Subject: [PATCH 231/297] Project-C v5.15-r0 - ---- - kernel/sched/alt_core.c | 2 +- - 1 file changed, 1 insertion(+), 1 deletion(-) - -diff --git a/kernel/sched/alt_core.c b/kernel/sched/alt_core.c -index 44361857be30..9576c57f82da 100644 ---- a/kernel/sched/alt_core.c -+++ b/kernel/sched/alt_core.c -@@ -67,7 +67,7 @@ __read_mostly int sysctl_resched_latency_warn_once = 1; - #define sched_feat(x) (0) - #endif /* CONFIG_SCHED_DEBUG */ - --#define ALT_SCHED_VERSION "v5.14-r4" -+#define ALT_SCHED_VERSION "v5.15-r0" - - /* rt_prio(prio) defined in include/linux/sched/rt.h */ - #define rt_task(p) rt_prio((p)->prio) --- -2.37.0 - - -From 219ac601646e194c11e589e17fa29e278c467dea Mon Sep 17 00:00:00 2001 -From: Alfred Chen -Date: Fri, 19 Nov 2021 14:44:34 +0000 -Subject: [PATCH 232/297] sched/alt: [Sync] 3869eecf0504 kernel/sched: Fix - sched_fork() access an invalid sched_task_group - ---- - kernel/sched/alt_core.c | 29 +++++++++++++++-------------- - 1 file changed, 15 insertions(+), 14 deletions(-) - -diff --git a/kernel/sched/alt_core.c b/kernel/sched/alt_core.c -index 9576c57f82da..b8d5b1db9fac 100644 ---- a/kernel/sched/alt_core.c -+++ b/kernel/sched/alt_core.c -@@ -2912,9 +2912,6 @@ static inline void __sched_fork(unsigned long clone_flags, struct task_struct *p - */ - int sched_fork(unsigned long clone_flags, struct task_struct *p) - { -- unsigned long flags; -- struct rq *rq; -- - __sched_fork(clone_flags, p); - /* - * We mark the process as NEW here. This guarantees that -@@ -2948,6 +2945,20 @@ int sched_fork(unsigned long clone_flags, struct task_struct *p) - p->sched_reset_on_fork = 0; - } - -+#ifdef CONFIG_SCHED_INFO -+ if (unlikely(sched_info_on())) -+ memset(&p->sched_info, 0, sizeof(p->sched_info)); -+#endif -+ init_task_preempt_count(p); -+ -+ return 0; -+} -+ -+void sched_post_fork(struct task_struct *p, struct kernel_clone_args *kargs) -+{ -+ unsigned long flags; -+ struct rq *rq; -+ - /* - * The child is not yet in the pid-hash so no cgroup attach races, - * and the cgroup is pinned to this child due to cgroup_fork() -@@ -2982,20 +2993,10 @@ int sched_fork(unsigned long clone_flags, struct task_struct *p) - * We're setting the CPU for the first time, we don't migrate, - * so use __set_task_cpu(). - */ -- __set_task_cpu(p, cpu_of(rq)); -+ __set_task_cpu(p, smp_processor_id()); - raw_spin_unlock_irqrestore(&p->pi_lock, flags); -- --#ifdef CONFIG_SCHED_INFO -- if (unlikely(sched_info_on())) -- memset(&p->sched_info, 0, sizeof(p->sched_info)); --#endif -- init_task_preempt_count(p); -- -- return 0; - } - --void sched_post_fork(struct task_struct *p) {} -- - #ifdef CONFIG_SCHEDSTATS - - DEFINE_STATIC_KEY_FALSE(sched_schedstats); --- -2.37.0 - - -From 0f40f9b6ce1e2e8d06f863f750b95a179f8f39d5 Mon Sep 17 00:00:00 2001 -From: Alfred Chen -Date: Fri, 19 Nov 2021 15:15:12 +0000 -Subject: [PATCH 233/297] Project-C v5.15-r1 - ---- - kernel/sched/alt_core.c | 2 +- - 1 file changed, 1 insertion(+), 1 deletion(-) - -diff --git a/kernel/sched/alt_core.c b/kernel/sched/alt_core.c -index b8d5b1db9fac..8b0ddbdd24e4 100644 ---- a/kernel/sched/alt_core.c -+++ b/kernel/sched/alt_core.c -@@ -67,7 +67,7 @@ __read_mostly int sysctl_resched_latency_warn_once = 1; - #define sched_feat(x) (0) - #endif /* CONFIG_SCHED_DEBUG */ - --#define ALT_SCHED_VERSION "v5.15-r0" -+#define ALT_SCHED_VERSION "v5.15-r1" - - /* rt_prio(prio) defined in include/linux/sched/rt.h */ - #define rt_task(p) rt_prio((p)->prio) --- -2.37.0 - - -From 51eb72aad2884ab9241bf686327c65d04abe90a5 Mon Sep 17 00:00:00 2001 -From: Alfred Chen -Date: Mon, 31 Jan 2022 23:54:53 +0000 -Subject: [PATCH 234/297] sched/alt: [Sync] 008f75a20e70 block: cleanup the - flush plug helpers - ---- - kernel/sched/alt_core.c | 5 +++-- - 1 file changed, 3 insertions(+), 2 deletions(-) - -diff --git a/kernel/sched/alt_core.c b/kernel/sched/alt_core.c -index 8b0ddbdd24e4..39b6c5b8a147 100644 ---- a/kernel/sched/alt_core.c -+++ b/kernel/sched/alt_core.c -@@ -4627,7 +4627,7 @@ static inline void sched_submit_work(struct task_struct *tsk) - * make sure to submit it to avoid deadlocks. - */ - if (blk_needs_flush_plug(tsk)) -- blk_schedule_flush_plug(tsk); -+ blk_flush_plug(tsk->plug, true); - } - - static void sched_update_worker(struct task_struct *tsk) -@@ -6274,7 +6274,8 @@ int io_schedule_prepare(void) - int old_iowait = current->in_iowait; - - current->in_iowait = 1; -- blk_schedule_flush_plug(current); -+ if (current->plug) -+ blk_flush_plug(current->plug, true); - - return old_iowait; - } --- -2.37.0 - - -From 874ce3af55cd45ad1a86e22375c553f46b9c79a9 Mon Sep 17 00:00:00 2001 -From: Alfred Chen -Date: Mon, 31 Jan 2022 23:57:26 +0000 -Subject: [PATCH 235/297] sched/alt: [Sync] 874f670e6088 sched: Clean up the - might_sleep() underscore zoo - ---- - kernel/sched/alt_core.c | 6 +++--- - 1 file changed, 3 insertions(+), 3 deletions(-) - -diff --git a/kernel/sched/alt_core.c b/kernel/sched/alt_core.c -index 39b6c5b8a147..6512c37d4fed 100644 ---- a/kernel/sched/alt_core.c -+++ b/kernel/sched/alt_core.c -@@ -7247,11 +7247,11 @@ void __might_sleep(const char *file, int line, int preempt_offset) - (void *)current->task_state_change, - (void *)current->task_state_change); - -- ___might_sleep(file, line, preempt_offset); -+ __might_resched(file, line, preempt_offset); - } - EXPORT_SYMBOL(__might_sleep); - --void ___might_sleep(const char *file, int line, int preempt_offset) -+void __might_resched(const char *file, int line, int preempt_offset) - { - /* Ratelimiting timestamp: */ - static unsigned long prev_jiffy; -@@ -7296,7 +7296,7 @@ void ___might_sleep(const char *file, int line, int preempt_offset) - dump_stack(); - add_taint(TAINT_WARN, LOCKDEP_STILL_OK); - } --EXPORT_SYMBOL(___might_sleep); -+EXPORT_SYMBOL(__might_resched); - - void __cant_sleep(const char *file, int line, int preempt_offset) - { --- -2.37.0 - - -From 8fe7d47613b3ec4082df7d18bd2e6c2d56b886a2 Mon Sep 17 00:00:00 2001 -From: Alfred Chen -Date: Mon, 31 Jan 2022 23:59:31 +0000 -Subject: [PATCH 236/297] sched/alt: [Sync] 42a387566c56 sched: Remove - preempt_offset argument from __might_sleep() - ---- - kernel/sched/alt_core.c | 4 ++-- - 1 file changed, 2 insertions(+), 2 deletions(-) - -diff --git a/kernel/sched/alt_core.c b/kernel/sched/alt_core.c -index 6512c37d4fed..7f099c407879 100644 ---- a/kernel/sched/alt_core.c -+++ b/kernel/sched/alt_core.c -@@ -7233,7 +7233,7 @@ static inline int preempt_count_equals(int preempt_offset) - return (nested == preempt_offset); - } - --void __might_sleep(const char *file, int line, int preempt_offset) -+void __might_sleep(const char *file, int line) - { - unsigned int state = get_current_state(); - /* -@@ -7247,7 +7247,7 @@ void __might_sleep(const char *file, int line, int preempt_offset) - (void *)current->task_state_change, - (void *)current->task_state_change); - -- __might_resched(file, line, preempt_offset); -+ __might_resched(file, line, 0); - } - EXPORT_SYMBOL(__might_sleep); - --- -2.37.0 - - -From 91c92a5bd40c7cc9d87ed3802284e410e8555d2e Mon Sep 17 00:00:00 2001 -From: Alfred Chen -Date: Tue, 1 Feb 2022 00:03:46 +0000 -Subject: [PATCH 237/297] sched/alt: [Sync] a45ed302b6e6 sched: Cleanup - might_sleep() printks - ---- - kernel/sched/alt_core.c | 14 ++++++-------- - 1 file changed, 6 insertions(+), 8 deletions(-) - -diff --git a/kernel/sched/alt_core.c b/kernel/sched/alt_core.c -index 7f099c407879..cb8f93bca0b7 100644 ---- a/kernel/sched/alt_core.c -+++ b/kernel/sched/alt_core.c -@@ -7273,16 +7273,14 @@ void __might_resched(const char *file, int line, int preempt_offset) - /* Save this before calling printk(), since that will clobber it: */ - preempt_disable_ip = get_preempt_disable_ip(current); - -- printk(KERN_ERR -- "BUG: sleeping function called from invalid context at %s:%d\n", -- file, line); -- printk(KERN_ERR -- "in_atomic(): %d, irqs_disabled(): %d, non_block: %d, pid: %d, name: %s\n", -- in_atomic(), irqs_disabled(), current->non_block_count, -- current->pid, current->comm); -+ pr_err("BUG: sleeping function called from invalid context at %s:%d\n", -+ file, line); -+ pr_err("in_atomic(): %d, irqs_disabled(): %d, non_block: %d, pid: %d, name: %s\n", -+ in_atomic(), irqs_disabled(), current->non_block_count, -+ current->pid, current->comm); - - if (task_stack_end_corrupted(current)) -- printk(KERN_EMERG "Thread overran stack, or stack corrupted\n"); -+ pr_emerg("Thread overran stack, or stack corrupted\n"); - - debug_show_held_locks(current); - if (irqs_disabled()) --- -2.37.0 - - -From 4ac3f931dc243f8fd01ed754d7bbc19ec9f2933c Mon Sep 17 00:00:00 2001 -From: Alfred Chen -Date: Thu, 24 Mar 2022 11:05:50 +0000 -Subject: [PATCH 238/297] sched/alt: [Sync] 8d713b699e84 sched: Make - might_sleep() output less confusing - ---- - kernel/sched/alt_core.c | 28 ++++++++++++++++++++++------ - 1 file changed, 22 insertions(+), 6 deletions(-) - -diff --git a/kernel/sched/alt_core.c b/kernel/sched/alt_core.c -index cb8f93bca0b7..35d41ddd866a 100644 ---- a/kernel/sched/alt_core.c -+++ b/kernel/sched/alt_core.c -@@ -7251,6 +7251,18 @@ void __might_sleep(const char *file, int line) - } - EXPORT_SYMBOL(__might_sleep); - -+static void print_preempt_disable_ip(int preempt_offset, unsigned long ip) -+{ -+ if (!IS_ENABLED(CONFIG_DEBUG_PREEMPT)) -+ return; -+ -+ if (preempt_count() == preempt_offset) -+ return; -+ -+ pr_err("Preemption disabled at:"); -+ print_ip_sym(KERN_ERR, ip); -+} -+ - void __might_resched(const char *file, int line, int preempt_offset) - { - /* Ratelimiting timestamp: */ -@@ -7278,6 +7290,13 @@ void __might_resched(const char *file, int line, int preempt_offset) - pr_err("in_atomic(): %d, irqs_disabled(): %d, non_block: %d, pid: %d, name: %s\n", - in_atomic(), irqs_disabled(), current->non_block_count, - current->pid, current->comm); -+ pr_err("preempt_count: %x, expected: %x\n", preempt_count(), -+ preempt_offset); -+ -+ if (IS_ENABLED(CONFIG_PREEMPT_RCU)) { -+ pr_err("RCU nest depth: %d, expected: 0\n", -+ rcu_preempt_depth()); -+ } - - if (task_stack_end_corrupted(current)) - pr_emerg("Thread overran stack, or stack corrupted\n"); -@@ -7285,12 +7304,9 @@ void __might_resched(const char *file, int line, int preempt_offset) - debug_show_held_locks(current); - if (irqs_disabled()) - print_irqtrace_events(current); --#ifdef CONFIG_DEBUG_PREEMPT -- if (!preempt_count_equals(preempt_offset)) { -- pr_err("Preemption disabled at:"); -- print_ip_sym(KERN_ERR, preempt_disable_ip); -- } --#endif -+ -+ print_preempt_disable_ip(preempt_offset, preempt_disable_ip); -+ - dump_stack(); - add_taint(TAINT_WARN, LOCKDEP_STILL_OK); - } --- -2.37.0 - - -From 33c812e321c733db96c0c0867b884b4149383c3b Mon Sep 17 00:00:00 2001 -From: Alfred Chen -Date: Thu, 24 Mar 2022 11:14:16 +0000 -Subject: [PATCH 239/297] sched/alt: [Sync] 50e081b96e35 sched: Make RCU nest - depth distinct in __might_resched() - ---- - kernel/sched/alt_core.c | 28 ++++++++++++++++------------ - 1 file changed, 16 insertions(+), 12 deletions(-) - -diff --git a/kernel/sched/alt_core.c b/kernel/sched/alt_core.c -index 35d41ddd866a..5015be1987f0 100644 ---- a/kernel/sched/alt_core.c -+++ b/kernel/sched/alt_core.c -@@ -7226,12 +7226,6 @@ void __init sched_init(void) - } - - #ifdef CONFIG_DEBUG_ATOMIC_SLEEP --static inline int preempt_count_equals(int preempt_offset) --{ -- int nested = preempt_count() + rcu_preempt_depth(); -- -- return (nested == preempt_offset); --} - - void __might_sleep(const char *file, int line) - { -@@ -7263,7 +7257,16 @@ static void print_preempt_disable_ip(int preempt_offset, unsigned long ip) - print_ip_sym(KERN_ERR, ip); - } - --void __might_resched(const char *file, int line, int preempt_offset) -+static inline bool resched_offsets_ok(unsigned int offsets) -+{ -+ unsigned int nested = preempt_count(); -+ -+ nested += rcu_preempt_depth() << MIGHT_RESCHED_RCU_SHIFT; -+ -+ return nested == offsets; -+} -+ -+void __might_resched(const char *file, int line, unsigned int offsets) - { - /* Ratelimiting timestamp: */ - static unsigned long prev_jiffy; -@@ -7273,7 +7276,7 @@ void __might_resched(const char *file, int line, int preempt_offset) - /* WARN_ON_ONCE() by default, no rate limit required: */ - rcu_sleep_check(); - -- if ((preempt_count_equals(preempt_offset) && !irqs_disabled() && -+ if ((resched_offsets_ok(offsets) && !irqs_disabled() && - !is_idle_task(current) && !current->non_block_count) || - system_state == SYSTEM_BOOTING || system_state > SYSTEM_RUNNING || - oops_in_progress) -@@ -7291,11 +7294,11 @@ void __might_resched(const char *file, int line, int preempt_offset) - in_atomic(), irqs_disabled(), current->non_block_count, - current->pid, current->comm); - pr_err("preempt_count: %x, expected: %x\n", preempt_count(), -- preempt_offset); -+ offsets & MIGHT_RESCHED_PREEMPT_MASK); - - if (IS_ENABLED(CONFIG_PREEMPT_RCU)) { -- pr_err("RCU nest depth: %d, expected: 0\n", -- rcu_preempt_depth()); -+ pr_err("RCU nest depth: %d, expected: %u\n", -+ rcu_preempt_depth(), offsets >> MIGHT_RESCHED_RCU_SHIFT); - } - - if (task_stack_end_corrupted(current)) -@@ -7305,7 +7308,8 @@ void __might_resched(const char *file, int line, int preempt_offset) - if (irqs_disabled()) - print_irqtrace_events(current); - -- print_preempt_disable_ip(preempt_offset, preempt_disable_ip); -+ print_preempt_disable_ip(offsets & MIGHT_RESCHED_PREEMPT_MASK, -+ preempt_disable_ip); - - dump_stack(); - add_taint(TAINT_WARN, LOCKDEP_STILL_OK); --- -2.37.0 - - -From 514691cc12c9e113eda837fd1367710e209aadf7 Mon Sep 17 00:00:00 2001 -From: Alfred Chen -Date: Thu, 24 Mar 2022 11:22:03 +0000 -Subject: [PATCH 240/297] sched/alt: [Sync] c33627e9a114 sched: Switch - wait_task_inactive to HRTIMER_MODE_REL_HARD - ---- - kernel/sched/alt_core.c | 2 +- - 1 file changed, 1 insertion(+), 1 deletion(-) - -diff --git a/kernel/sched/alt_core.c b/kernel/sched/alt_core.c -index 5015be1987f0..ff4a45708a68 100644 ---- a/kernel/sched/alt_core.c -+++ b/kernel/sched/alt_core.c -@@ -1747,7 +1747,7 @@ unsigned long wait_task_inactive(struct task_struct *p, unsigned int match_state - ktime_t to = NSEC_PER_SEC / HZ; - - set_current_state(TASK_UNINTERRUPTIBLE); -- schedule_hrtimeout(&to, HRTIMER_MODE_REL); -+ schedule_hrtimeout(&to, HRTIMER_MODE_REL_HARD); - continue; - } - --- -2.37.0 - - -From 6e0c61073cc9c76d6ac48058e6019cb92cbda964 Mon Sep 17 00:00:00 2001 -From: Alfred Chen -Date: Fri, 25 Mar 2022 10:43:13 +0000 -Subject: [PATCH 241/297] sched/alt: [Sync] ceeadb83aea2 sched: Make struct - sched_statistics independent of fair sched class - ---- - kernel/sched/alt_core.c | 16 +++++++++++++--- - kernel/sched/alt_sched.h | 1 + - kernel/sched/stats.h | 2 ++ - 3 files changed, 16 insertions(+), 3 deletions(-) - -diff --git a/kernel/sched/alt_core.c b/kernel/sched/alt_core.c -index ff4a45708a68..f783788e3caa 100644 ---- a/kernel/sched/alt_core.c -+++ b/kernel/sched/alt_core.c -@@ -28,7 +28,6 @@ - #include - #include - #include --#include - #include - #include - #include -@@ -2236,9 +2235,10 @@ ttwu_stat(struct task_struct *p, int cpu, int wake_flags) - rq = this_rq(); - - #ifdef CONFIG_SMP -- if (cpu == rq->cpu) -+ if (cpu == rq->cpu) { - __schedstat_inc(rq->ttwu_local); -- else { -+ __schedstat_inc(p->stats.nr_wakeups_local); -+ } else { - /** Alt schedule FW ToDo: - * How to do ttwu_wake_remote - */ -@@ -2246,6 +2246,7 @@ ttwu_stat(struct task_struct *p, int cpu, int wake_flags) - #endif /* CONFIG_SMP */ - - __schedstat_inc(rq->ttwu_count); -+ __schedstat_inc(p->stats.nr_wakeups); - } - - /* -@@ -2895,6 +2896,11 @@ static inline void __sched_fork(unsigned long clone_flags, struct task_struct *p - p->stime = 0; - p->sched_time = 0; - -+#ifdef CONFIG_SCHEDSTATS -+ /* Even if schedstat is disabled, there should not be garbage */ -+ memset(&p->stats, 0, sizeof(p->stats)); -+#endif -+ - #ifdef CONFIG_PREEMPT_NOTIFIERS - INIT_HLIST_HEAD(&p->preempt_notifiers); - #endif -@@ -7397,6 +7403,10 @@ void normalize_rt_tasks(void) - if (p->flags & PF_KTHREAD) - continue; - -+ schedstat_set(p->stats.wait_start, 0); -+ schedstat_set(p->stats.sleep_start, 0); -+ schedstat_set(p->stats.block_start, 0); -+ - if (!rt_task(p)) { - /* - * Renice negative nice level userspace -diff --git a/kernel/sched/alt_sched.h b/kernel/sched/alt_sched.h -index 289058a09bd5..f2b9e686d6a6 100644 ---- a/kernel/sched/alt_sched.h -+++ b/kernel/sched/alt_sched.h -@@ -31,6 +31,7 @@ - #include - #include - #include -+#include - #include - #include - #include -diff --git a/kernel/sched/stats.h b/kernel/sched/stats.h -index baa839c1ba96..15238be0581b 100644 ---- a/kernel/sched/stats.h -+++ b/kernel/sched/stats.h -@@ -89,6 +89,7 @@ static inline void rq_sched_info_depart (struct rq *rq, unsigned long long delt - - #endif /* CONFIG_SCHEDSTATS */ - -+#ifndef CONFIG_SCHED_ALT - #ifdef CONFIG_FAIR_GROUP_SCHED - struct sched_entity_stats { - struct sched_entity se; -@@ -105,6 +106,7 @@ __schedstats_from_se(struct sched_entity *se) - #endif - return &task_of(se)->stats; - } -+#endif /* CONFIG_SCHED_ALT */ - - #ifdef CONFIG_PSI - /* --- -2.37.0 - - -From ed37afe16495d81e98faa671095c14c0c3dce01c Mon Sep 17 00:00:00 2001 -From: Alfred Chen -Date: Fri, 25 Mar 2022 13:47:50 +0000 -Subject: [PATCH 242/297] sched/alt: [Sync] c597bfddc9e9 sched: Provide Kconfig - support for default dynamic preempt mode - ---- - kernel/sched/alt_core.c | 29 ++++++++++++++++++++++++++--- - 1 file changed, 26 insertions(+), 3 deletions(-) - -diff --git a/kernel/sched/alt_core.c b/kernel/sched/alt_core.c -index f783788e3caa..5fad6f795625 100644 ---- a/kernel/sched/alt_core.c -+++ b/kernel/sched/alt_core.c -@@ -4876,12 +4876,13 @@ EXPORT_STATIC_CALL_TRAMP(preempt_schedule_notrace); - */ - - enum { -- preempt_dynamic_none = 0, -+ preempt_dynamic_undefined = -1, -+ preempt_dynamic_none, - preempt_dynamic_voluntary, - preempt_dynamic_full, - }; - --int preempt_dynamic_mode = preempt_dynamic_full; -+int preempt_dynamic_mode = preempt_dynamic_undefined; - - int sched_dynamic_mode(const char *str) - { -@@ -4954,7 +4955,27 @@ static int __init setup_preempt_mode(char *str) - } - __setup("preempt=", setup_preempt_mode); - --#endif /* CONFIG_PREEMPT_DYNAMIC */ -+static void __init preempt_dynamic_init(void) -+{ -+ if (preempt_dynamic_mode == preempt_dynamic_undefined) { -+ if (IS_ENABLED(CONFIG_PREEMPT_NONE)) { -+ sched_dynamic_update(preempt_dynamic_none); -+ } else if (IS_ENABLED(CONFIG_PREEMPT_VOLUNTARY)) { -+ sched_dynamic_update(preempt_dynamic_voluntary); -+ } else { -+ /* Default static call setting, nothing to do */ -+ WARN_ON_ONCE(!IS_ENABLED(CONFIG_PREEMPT)); -+ preempt_dynamic_mode = preempt_dynamic_full; -+ pr_info("Dynamic Preempt: full\n"); -+ } -+ } -+} -+ -+#else /* !CONFIG_PREEMPT_DYNAMIC */ -+ -+static inline void preempt_dynamic_init(void) { } -+ -+#endif /* #ifdef CONFIG_PREEMPT_DYNAMIC */ - - /* - * This is the entry point to schedule() from kernel preemption -@@ -7229,6 +7250,8 @@ void __init sched_init(void) - #endif /* SMP */ - - psi_init(); -+ -+ preempt_dynamic_init(); - } - - #ifdef CONFIG_DEBUG_ATOMIC_SLEEP --- -2.37.0 - - -From e6f6d45ddd16dc961aa6f70cb9d90a6b873a390a Mon Sep 17 00:00:00 2001 -From: Alfred Chen -Date: Fri, 25 Mar 2022 13:52:06 +0000 -Subject: [PATCH 243/297] sched/alt: [Sync] 8d491de6edc2 sched: Move mmdrop to - RCU on RT - ---- - kernel/sched/alt_core.c | 2 +- - 1 file changed, 1 insertion(+), 1 deletion(-) - -diff --git a/kernel/sched/alt_core.c b/kernel/sched/alt_core.c -index 5fad6f795625..89cd86225ed3 100644 ---- a/kernel/sched/alt_core.c -+++ b/kernel/sched/alt_core.c -@@ -3465,7 +3465,7 @@ static struct rq *finish_task_switch(struct task_struct *prev) - */ - if (mm) { - membarrier_mm_sync_core_before_usermode(mm); -- mmdrop(mm); -+ mmdrop_sched(mm); - } - if (unlikely(prev_state == TASK_DEAD)) { - /* --- -2.37.0 - - -From d3567a80216b2828cd001b3c97ad9d90754c8dec Mon Sep 17 00:00:00 2001 -From: Alfred Chen -Date: Fri, 25 Mar 2022 14:06:58 +0000 -Subject: [PATCH 244/297] sched/alt: [Sync] 670721c7bd2a sched: Move kprobes - cleanup out of finish_task_switch() - ---- - kernel/sched/alt_core.c | 6 ------ - 1 file changed, 6 deletions(-) - -diff --git a/kernel/sched/alt_core.c b/kernel/sched/alt_core.c -index 89cd86225ed3..107c5f4b74a7 100644 ---- a/kernel/sched/alt_core.c -+++ b/kernel/sched/alt_core.c -@@ -3468,12 +3468,6 @@ static struct rq *finish_task_switch(struct task_struct *prev) - mmdrop_sched(mm); - } - if (unlikely(prev_state == TASK_DEAD)) { -- /* -- * Remove function-return probe instances associated with this -- * task and put them back on the free list. -- */ -- kprobe_flush_task(prev); -- - /* Task is done with its stack. */ - put_task_stack(prev); - --- -2.37.0 - - -From 6eaae287302bde2d2107e4800710df3ebe02206c Mon Sep 17 00:00:00 2001 -From: Alfred Chen -Date: Fri, 25 Mar 2022 14:10:30 +0000 -Subject: [PATCH 245/297] sched/alt: [Sync] b945efcdd07d sched: Remove - pointless preemption disable in sched_submit_work() - ---- - kernel/sched/alt_core.c | 10 ++-------- - 1 file changed, 2 insertions(+), 8 deletions(-) - -diff --git a/kernel/sched/alt_core.c b/kernel/sched/alt_core.c -index 107c5f4b74a7..3ba686739e4d 100644 ---- a/kernel/sched/alt_core.c -+++ b/kernel/sched/alt_core.c -@@ -4603,20 +4603,14 @@ static inline void sched_submit_work(struct task_struct *tsk) - - task_flags = tsk->flags; - /* -- * If a worker went to sleep, notify and ask workqueue whether -- * it wants to wake up a task to maintain concurrency. -- * As this function is called inside the schedule() context, -- * we disable preemption to avoid it calling schedule() again -- * in the possible wakeup of a kworker and because wq_worker_sleeping() -- * requires it. -+ * If a worker goes to sleep, notify and ask workqueue whether it -+ * wants to wake up a task to maintain concurrency. - */ - if (task_flags & (PF_WQ_WORKER | PF_IO_WORKER)) { -- preempt_disable(); - if (task_flags & PF_WQ_WORKER) - wq_worker_sleeping(tsk); - else - io_wq_worker_sleeping(tsk); -- preempt_enable_no_resched(); - } - - if (tsk_is_pi_blocked(tsk)) --- -2.37.0 - - -From 6842ba831f915ba07bd0bc2b84515b2f446fb74f Mon Sep 17 00:00:00 2001 -From: Alfred Chen -Date: Fri, 25 Mar 2022 21:35:14 +0000 -Subject: [PATCH 246/297] sched/alt: [Sync] f6ac18fafcf6 sched: Improve - try_invoke_on_locked_down_task() - ---- - kernel/sched/alt_core.c | 63 +++++++++++++++++++++++++---------------- - 1 file changed, 39 insertions(+), 24 deletions(-) - -diff --git a/kernel/sched/alt_core.c b/kernel/sched/alt_core.c -index 3ba686739e4d..fbb46f7c98ce 100644 ---- a/kernel/sched/alt_core.c -+++ b/kernel/sched/alt_core.c -@@ -2821,41 +2821,56 @@ static int try_to_wake_up(struct task_struct *p, unsigned int state, - * @func: Function to invoke. - * @arg: Argument to function. - * -- * If the specified task can be quickly locked into a definite state -- * (either sleeping or on a given runqueue), arrange to keep it in that -- * state while invoking @func(@arg). This function can use ->on_rq and -- * task_curr() to work out what the state is, if required. Given that -- * @func can be invoked with a runqueue lock held, it had better be quite -- * lightweight. -+ * Fix the task in it's current state by avoiding wakeups and or rq operations -+ * and call @func(@arg) on it. This function can use ->on_rq and task_curr() -+ * to work out what the state is, if required. Given that @func can be invoked -+ * with a runqueue lock held, it had better be quite lightweight. - * - * Returns: -- * @false if the task slipped out from under the locks. -- * @true if the task was locked onto a runqueue or is sleeping. -- * However, @func can override this by returning @false. -+ * Whatever @func returns - */ - bool try_invoke_on_locked_down_task(struct task_struct *p, bool (*func)(struct task_struct *t, void *arg), void *arg) - { -+ struct rq *rq = NULL; -+ unsigned int state; - struct rq_flags rf; - bool ret = false; -- struct rq *rq; - - raw_spin_lock_irqsave(&p->pi_lock, rf.flags); -- if (p->on_rq) { -+ -+ state = READ_ONCE(p->__state); -+ -+ /* -+ * Ensure we load p->on_rq after p->__state, otherwise it would be -+ * possible to, falsely, observe p->on_rq == 0. -+ * -+ * See try_to_wake_up() for a longer comment. -+ */ -+ smp_rmb(); -+ -+ /* -+ * Since pi->lock blocks try_to_wake_up(), we don't need rq->lock when -+ * the task is blocked. Make sure to check @state since ttwu() can drop -+ * locks at the end, see ttwu_queue_wakelist(). -+ */ -+ if (state == TASK_RUNNING || state == TASK_WAKING || p->on_rq) - rq = __task_rq_lock(p, &rf); -- if (task_rq(p) == rq) -- ret = func(p, arg); -+ -+ /* -+ * At this point the task is pinned; either: -+ * - blocked and we're holding off wakeups (pi->lock) -+ * - woken, and we're holding off enqueue (rq->lock) -+ * - queued, and we're holding off schedule (rq->lock) -+ * - running, and we're holding off de-schedule (rq->lock) -+ * -+ * The called function (@func) can use: task_curr(), p->on_rq and -+ * p->__state to differentiate between these states. -+ */ -+ ret = func(p, arg); -+ -+ if (rq) - __task_rq_unlock(rq, &rf); -- } else { -- switch (READ_ONCE(p->__state)) { -- case TASK_RUNNING: -- case TASK_WAKING: -- break; -- default: -- smp_rmb(); // See smp_rmb() comment in try_to_wake_up(). -- if (!p->on_rq) -- ret = func(p, arg); -- } -- } -+ - raw_spin_unlock_irqrestore(&p->pi_lock, rf.flags); - return ret; - } --- -2.37.0 - - -From f884a8b53f81a0e837d002bcc30d6417be0c9a1f Mon Sep 17 00:00:00 2001 -From: Alfred Chen -Date: Fri, 25 Mar 2022 22:07:37 +0000 -Subject: [PATCH 247/297] sched/alt: [Sync] 9b3c4ab3045e sched,rcu: Rework - try_invoke_on_locked_down_task() - ---- - kernel/sched/alt_core.c | 6 +++--- - 1 file changed, 3 insertions(+), 3 deletions(-) - -diff --git a/kernel/sched/alt_core.c b/kernel/sched/alt_core.c -index fbb46f7c98ce..4c02602ea9ab 100644 ---- a/kernel/sched/alt_core.c -+++ b/kernel/sched/alt_core.c -@@ -2816,7 +2816,7 @@ static int try_to_wake_up(struct task_struct *p, unsigned int state, - } - - /** -- * try_invoke_on_locked_down_task - Invoke a function on task in fixed state -+ * task_call_func - Invoke a function on task in fixed state - * @p: Process for which the function is to be invoked, can be @current. - * @func: Function to invoke. - * @arg: Argument to function. -@@ -2829,12 +2829,12 @@ static int try_to_wake_up(struct task_struct *p, unsigned int state, - * Returns: - * Whatever @func returns - */ --bool try_invoke_on_locked_down_task(struct task_struct *p, bool (*func)(struct task_struct *t, void *arg), void *arg) -+int task_call_func(struct task_struct *p, task_call_f func, void *arg) - { - struct rq *rq = NULL; - unsigned int state; - struct rq_flags rf; -- bool ret = false; -+ int ret; - - raw_spin_lock_irqsave(&p->pi_lock, rf.flags); - --- -2.37.0 - - -From 44d5bff770e0a048a330580b286e49515656d9e5 Mon Sep 17 00:00:00 2001 -From: Alfred Chen -Date: Sat, 26 Mar 2022 14:57:59 +0000 -Subject: [PATCH 248/297] sched/alt: [Sync] 8850cb663b5c sched: Simplify - wake_up_*idle*() - ---- - kernel/sched/alt_core.c | 14 +++++--------- - 1 file changed, 5 insertions(+), 9 deletions(-) - -diff --git a/kernel/sched/alt_core.c b/kernel/sched/alt_core.c -index 4c02602ea9ab..12bad31a663a 100644 ---- a/kernel/sched/alt_core.c -+++ b/kernel/sched/alt_core.c -@@ -2434,15 +2434,11 @@ void wake_up_if_idle(int cpu) - if (!is_idle_task(rcu_dereference(rq->curr))) - goto out; - -- if (set_nr_if_polling(rq->idle)) { -- trace_sched_wake_idle_without_ipi(cpu); -- } else { -- raw_spin_lock_irqsave(&rq->lock, flags); -- if (is_idle_task(rq->curr)) -- smp_send_reschedule(cpu); -- /* Else CPU is not idle, do nothing here */ -- raw_spin_unlock_irqrestore(&rq->lock, flags); -- } -+ raw_spin_lock_irqsave(&rq->lock, flags); -+ if (is_idle_task(rq->curr)) -+ resched_curr(rq); -+ /* Else CPU is not idle, do nothing here */ -+ raw_spin_unlock_irqrestore(&rq->lock, flags); - - out: - rcu_read_unlock(); --- -2.37.0 - - -From ca436f41b3429fe65d576e08288971b993be7b2f Mon Sep 17 00:00:00 2001 -From: Alfred Chen -Date: Sat, 26 Mar 2022 15:37:29 +0000 -Subject: [PATCH 249/297] sched/alt: [Sync] 42a20f86dc19 sched: Add wrapper for - get_wchan() to keep task blocked - ---- - kernel/sched/alt_core.c | 19 +++++++++++++++++++ - 1 file changed, 19 insertions(+) - -diff --git a/kernel/sched/alt_core.c b/kernel/sched/alt_core.c -index 12bad31a663a..1b7eae610c8f 100644 ---- a/kernel/sched/alt_core.c -+++ b/kernel/sched/alt_core.c -@@ -705,6 +705,25 @@ bool sched_task_on_rq(struct task_struct *p) - return task_on_rq_queued(p); - } - -+unsigned long get_wchan(struct task_struct *p) -+{ -+ unsigned long ip = 0; -+ unsigned int state; -+ -+ if (!p || p == current) -+ return 0; -+ -+ /* Only get wchan if task is blocked and we can keep it that way. */ -+ raw_spin_lock_irq(&p->pi_lock); -+ state = READ_ONCE(p->__state); -+ smp_rmb(); /* see try_to_wake_up() */ -+ if (state != TASK_RUNNING && state != TASK_WAKING && !p->on_rq) -+ ip = __get_wchan(p); -+ raw_spin_unlock_irq(&p->pi_lock); -+ -+ return ip; -+} -+ - /* - * Add/Remove/Requeue task to/from the runqueue routines - * Context: rq->lock --- -2.37.0 - - -From 1374b3bed52331b57467ce3b08a20fd51e064213 Mon Sep 17 00:00:00 2001 -From: Alfred Chen -Date: Sat, 26 Mar 2022 15:53:21 +0000 -Subject: [PATCH 250/297] sched/alt: [Sync] 42dc938a590c sched/core: Mitigate - race cpus_share_cache()/update_top_cache_domain() - ---- - kernel/sched/alt_core.c | 3 +++ - 1 file changed, 3 insertions(+) - -diff --git a/kernel/sched/alt_core.c b/kernel/sched/alt_core.c -index 1b7eae610c8f..2f05197477a7 100644 ---- a/kernel/sched/alt_core.c -+++ b/kernel/sched/alt_core.c -@@ -2465,6 +2465,9 @@ void wake_up_if_idle(int cpu) - - bool cpus_share_cache(int this_cpu, int that_cpu) - { -+ if (this_cpu == that_cpu) -+ return true; -+ - return per_cpu(sd_llc_id, this_cpu) == per_cpu(sd_llc_id, that_cpu); - } - #else /* !CONFIG_SMP */ --- -2.37.0 - - -From 546b3c60ac0f5ff3734f9c06099ddc2d422320a4 Mon Sep 17 00:00:00 2001 -From: Alfred Chen -Date: Sat, 26 Mar 2022 16:31:36 +0000 -Subject: [PATCH 251/297] sched/alt: [Sync] b027789e5e50 sched/fair: Prevent - dead task groups from regaining cfs_rq's - ---- - kernel/sched/alt_core.c | 30 ++++++++++++++++++++++-------- - 1 file changed, 22 insertions(+), 8 deletions(-) - -diff --git a/kernel/sched/alt_core.c b/kernel/sched/alt_core.c -index 2f05197477a7..7d696c143d7d 100644 ---- a/kernel/sched/alt_core.c -+++ b/kernel/sched/alt_core.c -@@ -7522,6 +7522,20 @@ static void sched_free_group(struct task_group *tg) - kmem_cache_free(task_group_cache, tg); - } - -+static void sched_free_group_rcu(struct rcu_head *rhp) -+{ -+ sched_free_group(container_of(rhp, struct task_group, rcu)); -+} -+ -+static void sched_unregister_group(struct task_group *tg) -+{ -+ /* -+ * We have to wait for yet another RCU grace period to expire, as -+ * print_cfs_stats() might run concurrently. -+ */ -+ call_rcu(&tg->rcu, sched_free_group_rcu); -+} -+ - /* allocate runqueue etc for a new task group */ - struct task_group *sched_create_group(struct task_group *parent) - { -@@ -7539,19 +7553,19 @@ void sched_online_group(struct task_group *tg, struct task_group *parent) - } - - /* rcu callback to free various structures associated with a task group */ --static void sched_free_group_rcu(struct rcu_head *rhp) -+static void sched_unregister_group_rcu(struct rcu_head *rhp) - { -- /* Now it should be safe to free those cfs_rqs */ -- sched_free_group(container_of(rhp, struct task_group, rcu)); -+ /* Now it should be safe to free those cfs_rqs: */ -+ sched_unregister_group(container_of(rhp, struct task_group, rcu)); - } - - void sched_destroy_group(struct task_group *tg) - { -- /* Wait for possible concurrent references to cfs_rqs complete */ -- call_rcu(&tg->rcu, sched_free_group_rcu); -+ /* Wait for possible concurrent references to cfs_rqs complete: */ -+ call_rcu(&tg->rcu, sched_unregister_group_rcu); - } - --void sched_offline_group(struct task_group *tg) -+void sched_release_group(struct task_group *tg) - { - } - -@@ -7592,7 +7606,7 @@ static void cpu_cgroup_css_released(struct cgroup_subsys_state *css) - { - struct task_group *tg = css_tg(css); - -- sched_offline_group(tg); -+ sched_release_group(tg); - } - - static void cpu_cgroup_css_free(struct cgroup_subsys_state *css) -@@ -7602,7 +7616,7 @@ static void cpu_cgroup_css_free(struct cgroup_subsys_state *css) - /* - * Relies on the RCU grace period between css_released() and this. - */ -- sched_free_group(tg); -+ sched_unregister_group(tg); - } - - static void cpu_cgroup_fork(struct task_struct *task) --- -2.37.0 - - -From eafb174cc75a117b7a41bf419b2fcd823c24c67d Mon Sep 17 00:00:00 2001 -From: Alfred Chen -Date: Mon, 28 Mar 2022 14:41:07 +0000 -Subject: [PATCH 252/297] sched/alt: [Sync] dce1ca0525bf sched/scs: Reset task - stack state in bringup_cpu() - ---- - kernel/sched/alt_core.c | 4 ---- - 1 file changed, 4 deletions(-) - -diff --git a/kernel/sched/alt_core.c b/kernel/sched/alt_core.c -index 7d696c143d7d..564ddea29a5c 100644 ---- a/kernel/sched/alt_core.c -+++ b/kernel/sched/alt_core.c -@@ -6611,9 +6611,6 @@ void __init init_idle(struct task_struct *idle, int cpu) - - sched_queue_init_idle(&rq->queue, idle); - -- scs_task_reset(idle); -- kasan_unpoison_task_stack(idle); -- - #ifdef CONFIG_SMP - /* - * It's possible that init_idle() gets called multiple times on a task, -@@ -6692,7 +6689,6 @@ void idle_task_exit(void) - finish_arch_post_lock_switch(); - } - -- scs_task_reset(current); - /* finish_cpu(), as ran on the BP, will clean up the active_mm state */ - } - --- -2.37.0 - - -From 971a3cad3439a60192fc109d292211ee949bd30f Mon Sep 17 00:00:00 2001 -From: Alfred Chen -Date: Mon, 28 Mar 2022 15:58:21 +0000 -Subject: [PATCH 253/297] sched/alt: [Sync] 9ed20bafc858 preempt/dynamic: Fix - setup_preempt_mode() return value - ---- - kernel/sched/alt_core.c | 4 ++-- - 1 file changed, 2 insertions(+), 2 deletions(-) - -diff --git a/kernel/sched/alt_core.c b/kernel/sched/alt_core.c -index 564ddea29a5c..e6fb7f44d1f8 100644 ---- a/kernel/sched/alt_core.c -+++ b/kernel/sched/alt_core.c -@@ -4968,11 +4968,11 @@ static int __init setup_preempt_mode(char *str) - int mode = sched_dynamic_mode(str); - if (mode < 0) { - pr_warn("Dynamic Preempt: unsupported mode: %s\n", str); -- return 1; -+ return 0; - } - - sched_dynamic_update(mode); -- return 0; -+ return 1; - } - __setup("preempt=", setup_preempt_mode); - --- -2.37.0 - - -From 8a378e4e86bfb0ab4c2f96539ec085ed85b6f597 Mon Sep 17 00:00:00 2001 -From: Alfred Chen -Date: Mon, 28 Mar 2022 16:39:07 +0000 -Subject: [PATCH 254/297] sched/alt: [Sync] bcf9033e5449 sched: move CPU field - back into thread_info if THREAD_INFO_IN_TASK=y - ---- - kernel/sched/alt_core.c | 4 ---- - 1 file changed, 4 deletions(-) - -diff --git a/kernel/sched/alt_core.c b/kernel/sched/alt_core.c -index e6fb7f44d1f8..01af42ae88c9 100644 ---- a/kernel/sched/alt_core.c -+++ b/kernel/sched/alt_core.c -@@ -1333,12 +1333,8 @@ static inline void __set_task_cpu(struct task_struct *p, unsigned int cpu) - */ - smp_wmb(); - --#ifdef CONFIG_THREAD_INFO_IN_TASK -- WRITE_ONCE(p->cpu, cpu); --#else - WRITE_ONCE(task_thread_info(p)->cpu, cpu); - #endif --#endif - } - - static inline bool is_migration_disabled(struct task_struct *p) --- -2.37.0 - - -From a2f5af380f37a9dfed9b0db77e4bf5e241484048 Mon Sep 17 00:00:00 2001 -From: Alfred Chen -Date: Wed, 30 Mar 2022 14:02:11 +0000 -Subject: [PATCH 255/297] Project-C v5.16-r0 - ---- - kernel/sched/alt_core.c | 2 +- - 1 file changed, 1 insertion(+), 1 deletion(-) - -diff --git a/kernel/sched/alt_core.c b/kernel/sched/alt_core.c -index 01af42ae88c9..c90cd4a48660 100644 ---- a/kernel/sched/alt_core.c -+++ b/kernel/sched/alt_core.c -@@ -66,7 +66,7 @@ __read_mostly int sysctl_resched_latency_warn_once = 1; - #define sched_feat(x) (0) - #endif /* CONFIG_SCHED_DEBUG */ - --#define ALT_SCHED_VERSION "v5.15-r1" -+#define ALT_SCHED_VERSION "v5.16-r0" - - /* rt_prio(prio) defined in include/linux/sched/rt.h */ - #define rt_task(p) rt_prio((p)->prio) --- -2.37.0 - - -From 6d57850ffda9de0a69e1432a0635b1159f61cbbd Mon Sep 17 00:00:00 2001 -From: Alfred Chen -Date: Wed, 30 Mar 2022 14:09:08 +0000 -Subject: [PATCH 256/297] sched/alt: [Sync] c65cfd89cef6 sched: Fix yet more - sched_fork() races - ---- - kernel/sched/alt_core.c | 13 +++++++------ - 1 file changed, 7 insertions(+), 6 deletions(-) - -diff --git a/kernel/sched/alt_core.c b/kernel/sched/alt_core.c -index c90cd4a48660..3a841b733f1e 100644 ---- a/kernel/sched/alt_core.c -+++ b/kernel/sched/alt_core.c -@@ -2989,17 +2989,14 @@ int sched_fork(unsigned long clone_flags, struct task_struct *p) - return 0; - } - --void sched_post_fork(struct task_struct *p, struct kernel_clone_args *kargs) -+void sched_cgroup_fork(struct task_struct *p, struct kernel_clone_args *kargs) - { - unsigned long flags; - struct rq *rq; - - /* -- * The child is not yet in the pid-hash so no cgroup attach races, -- * and the cgroup is pinned to this child due to cgroup_fork() -- * is ran before sched_fork(). -- * -- * Silence PROVE_RCU. -+ * Because we're not yet on the pid-hash, p->pi_lock isn't strictly -+ * required yet, but lockdep gets upset if rules are violated. - */ - raw_spin_lock_irqsave(&p->pi_lock, flags); - /* -@@ -3032,6 +3029,10 @@ void sched_post_fork(struct task_struct *p, struct kernel_clone_args *kargs) - raw_spin_unlock_irqrestore(&p->pi_lock, flags); - } - -+void sched_post_fork(struct task_struct *p) -+{ -+} -+ - #ifdef CONFIG_SCHEDSTATS - - DEFINE_STATIC_KEY_FALSE(sched_schedstats); --- -2.37.0 - - -From be71d9d7fd6c13111e2102c3e17f68fc0061dc52 Mon Sep 17 00:00:00 2001 -From: Alfred Chen -Date: Wed, 30 Mar 2022 14:17:03 +0000 -Subject: [PATCH 257/297] sched/alt: [Sync] f07660619137 sched: Avoid double - preemption in __cond_resched_*lock*() - ---- - kernel/sched/alt_core.c | 12 +++--------- - 1 file changed, 3 insertions(+), 9 deletions(-) - -diff --git a/kernel/sched/alt_core.c b/kernel/sched/alt_core.c -index 3a841b733f1e..02b29f2ca885 100644 ---- a/kernel/sched/alt_core.c -+++ b/kernel/sched/alt_core.c -@@ -6211,9 +6211,7 @@ int __cond_resched_lock(spinlock_t *lock) - - if (spin_needbreak(lock) || resched) { - spin_unlock(lock); -- if (resched) -- preempt_schedule_common(); -- else -+ if (!_cond_resched()) - cpu_relax(); - ret = 1; - spin_lock(lock); -@@ -6231,9 +6229,7 @@ int __cond_resched_rwlock_read(rwlock_t *lock) - - if (rwlock_needbreak(lock) || resched) { - read_unlock(lock); -- if (resched) -- preempt_schedule_common(); -- else -+ if (!_cond_resched()) - cpu_relax(); - ret = 1; - read_lock(lock); -@@ -6251,9 +6247,7 @@ int __cond_resched_rwlock_write(rwlock_t *lock) - - if (rwlock_needbreak(lock) || resched) { - write_unlock(lock); -- if (resched) -- preempt_schedule_common(); -- else -+ if (!_cond_resched()) - cpu_relax(); - ret = 1; - write_lock(lock); --- -2.37.0 - - -From 262bb5b1599c775cae11329cc6aaf8ced2b6a869 Mon Sep 17 00:00:00 2001 -From: Alfred Chen -Date: Wed, 30 Mar 2022 15:35:44 +0000 -Subject: [PATCH 258/297] Project-C v5.16-r1 - ---- - kernel/sched/alt_core.c | 2 +- - 1 file changed, 1 insertion(+), 1 deletion(-) - -diff --git a/kernel/sched/alt_core.c b/kernel/sched/alt_core.c -index 02b29f2ca885..83407c4ee806 100644 ---- a/kernel/sched/alt_core.c -+++ b/kernel/sched/alt_core.c -@@ -66,7 +66,7 @@ __read_mostly int sysctl_resched_latency_warn_once = 1; - #define sched_feat(x) (0) - #endif /* CONFIG_SCHED_DEBUG */ - --#define ALT_SCHED_VERSION "v5.16-r0" -+#define ALT_SCHED_VERSION "v5.16-r1" - - /* rt_prio(prio) defined in include/linux/sched/rt.h */ - #define rt_task(p) rt_prio((p)->prio) --- -2.37.0 - - -From 52165e52705901f7f6a7df5324fe6e8e09140ac9 Mon Sep 17 00:00:00 2001 -From: Alfred Chen -Date: Thu, 31 Mar 2022 13:38:40 +0000 -Subject: [PATCH 259/297] sched/alt: [Sync] 0569b245132c sched: Snapshot thread - flags - ---- - kernel/sched/alt_core.c | 2 +- - 1 file changed, 1 insertion(+), 1 deletion(-) - -diff --git a/kernel/sched/alt_core.c b/kernel/sched/alt_core.c -index 83407c4ee806..4c1ab66489a5 100644 ---- a/kernel/sched/alt_core.c -+++ b/kernel/sched/alt_core.c -@@ -6492,7 +6492,7 @@ void sched_show_task(struct task_struct *p) - rcu_read_unlock(); - pr_cont(" stack:%5lu pid:%5d ppid:%6d flags:0x%08lx\n", - free, task_pid_nr(p), ppid, -- (unsigned long)task_thread_info(p)->flags); -+ read_task_thread_flags(p)); - - print_worker_info(KERN_INFO, p); - print_stop_info(KERN_INFO, p); --- -2.37.0 - - -From a54525ecc20872a88395111beb0ac9383136b8f5 Mon Sep 17 00:00:00 2001 -From: Alfred Chen -Date: Thu, 31 Mar 2022 13:44:13 +0000 -Subject: [PATCH 260/297] sched/alt: [Sync] 9d0df3779745 sched: Trigger warning - if ->migration_disabled counter underflows. - ---- - kernel/sched/alt_core.c | 3 +++ - 1 file changed, 3 insertions(+) - -diff --git a/kernel/sched/alt_core.c b/kernel/sched/alt_core.c -index 4c1ab66489a5..c1f3afc120cf 100644 ---- a/kernel/sched/alt_core.c -+++ b/kernel/sched/alt_core.c -@@ -1449,6 +1449,9 @@ void migrate_enable(void) - return; - } - -+ if (WARN_ON_ONCE(!p->migration_disabled)) -+ return; -+ - /* - * Ensure stop_task runs either before or after this, and that - * __set_cpus_allowed_ptr(SCA_MIGRATE_ENABLE) doesn't schedule(). --- -2.37.0 - - -From bb74763fb1e197e2cda77d77112680cd39e570d3 Mon Sep 17 00:00:00 2001 -From: Alfred Chen -Date: Thu, 31 Mar 2022 13:47:42 +0000 -Subject: [PATCH 261/297] sched/alt: [Sync] 40966e316f86 kthread: Ensure struct - kthread is present for all kthreads - ---- - kernel/sched/alt_core.c | 16 ++++++++-------- - 1 file changed, 8 insertions(+), 8 deletions(-) - -diff --git a/kernel/sched/alt_core.c b/kernel/sched/alt_core.c -index c1f3afc120cf..475437df15e1 100644 ---- a/kernel/sched/alt_core.c -+++ b/kernel/sched/alt_core.c -@@ -6582,14 +6582,6 @@ void __init init_idle(struct task_struct *idle, int cpu) - - __sched_fork(0, idle); - -- /* -- * The idle task doesn't need the kthread struct to function, but it -- * is dressed up as a per-CPU kthread and thus needs to play the part -- * if we want to avoid special-casing it in code that deals with per-CPU -- * kthreads. -- */ -- set_kthread_struct(idle); -- - raw_spin_lock_irqsave(&idle->pi_lock, flags); - raw_spin_lock(&rq->lock); - update_rq_clock(rq); -@@ -7243,6 +7235,14 @@ void __init sched_init(void) - mmgrab(&init_mm); - enter_lazy_tlb(&init_mm, current); - -+ /* -+ * The idle task doesn't need the kthread struct to function, but it -+ * is dressed up as a per-CPU kthread and thus needs to play the part -+ * if we want to avoid special-casing it in code that deals with per-CPU -+ * kthreads. -+ */ -+ WARN_ON(set_kthread_struct(current)); -+ - /* - * Make us the idle thread. Technically, schedule() should not be - * called from this thread, however somewhere below it might be, --- -2.37.0 - - -From 514861d279a8517b0a9188f103c7b46e96eb8254 Mon Sep 17 00:00:00 2001 -From: Alfred Chen -Date: Thu, 31 Mar 2022 13:49:18 +0000 -Subject: [PATCH 262/297] sched/alt: [Sync] dd621ee0cf8e kthread: Warn about - failed allocations for the init kthread - ---- - kernel/sched/alt_core.c | 2 +- - 1 file changed, 1 insertion(+), 1 deletion(-) - -diff --git a/kernel/sched/alt_core.c b/kernel/sched/alt_core.c -index 475437df15e1..4c0e90628368 100644 ---- a/kernel/sched/alt_core.c -+++ b/kernel/sched/alt_core.c -@@ -7241,7 +7241,7 @@ void __init sched_init(void) - * if we want to avoid special-casing it in code that deals with per-CPU - * kthreads. - */ -- WARN_ON(set_kthread_struct(current)); -+ WARN_ON(!set_kthread_struct(current)); - - /* - * Make us the idle thread. Technically, schedule() should not be --- -2.37.0 - - -From 00c11cdce606918bf2d6fe15197e85e9e5e90c40 Mon Sep 17 00:00:00 2001 -From: Alfred Chen -Date: Thu, 31 Mar 2022 13:51:02 +0000 -Subject: [PATCH 263/297] sched/alt: [Sync] 00580f03af5e kthread: Never - put_user the set_child_tid address - ---- - kernel/sched/alt_core.c | 2 +- - 1 file changed, 1 insertion(+), 1 deletion(-) - -diff --git a/kernel/sched/alt_core.c b/kernel/sched/alt_core.c -index 4c0e90628368..69058fc928f9 100644 ---- a/kernel/sched/alt_core.c -+++ b/kernel/sched/alt_core.c -@@ -3529,7 +3529,7 @@ asmlinkage __visible void schedule_tail(struct task_struct *prev) - finish_task_switch(prev); - preempt_enable(); - -- if (current->set_child_tid) -+ if (!(current->flags & PF_KTHREAD) && current->set_child_tid) - put_user(task_pid_vnr(current), current->set_child_tid); - - calculate_sigpending(); --- -2.37.0 - - -From 0b94c47182f02121330dc86919eb96ab691efc2e Mon Sep 17 00:00:00 2001 -From: Alfred Chen -Date: Thu, 31 Mar 2022 16:37:18 +0000 -Subject: [PATCH 264/297] sched/alt: [Sync] e32cf5dfbe22 kthread: Generalize - pf_io_worker so it can point to struct kthread - ---- - kernel/sched/alt_core.c | 2 +- - 1 file changed, 1 insertion(+), 1 deletion(-) - -diff --git a/kernel/sched/alt_core.c b/kernel/sched/alt_core.c -index 69058fc928f9..4c0e90628368 100644 ---- a/kernel/sched/alt_core.c -+++ b/kernel/sched/alt_core.c -@@ -3529,7 +3529,7 @@ asmlinkage __visible void schedule_tail(struct task_struct *prev) - finish_task_switch(prev); - preempt_enable(); - -- if (!(current->flags & PF_KTHREAD) && current->set_child_tid) -+ if (current->set_child_tid) - put_user(task_pid_vnr(current), current->set_child_tid); - - calculate_sigpending(); --- -2.37.0 - - -From 9a29056912c42933ea78411c141d7f509263563a Mon Sep 17 00:00:00 2001 -From: Alfred Chen -Date: Fri, 1 Apr 2022 07:40:22 +0000 -Subject: [PATCH 265/297] Project-C v5.17-r0 - ---- - kernel/sched/alt_core.c | 2 +- - 1 file changed, 1 insertion(+), 1 deletion(-) - -diff --git a/kernel/sched/alt_core.c b/kernel/sched/alt_core.c -index 4c0e90628368..6338a97b429e 100644 ---- a/kernel/sched/alt_core.c -+++ b/kernel/sched/alt_core.c -@@ -66,7 +66,7 @@ __read_mostly int sysctl_resched_latency_warn_once = 1; - #define sched_feat(x) (0) - #endif /* CONFIG_SCHED_DEBUG */ - --#define ALT_SCHED_VERSION "v5.16-r1" -+#define ALT_SCHED_VERSION "v5.17-r0" - - /* rt_prio(prio) defined in include/linux/sched/rt.h */ - #define rt_task(p) rt_prio((p)->prio) --- -2.37.0 - - -From e3604a1d033f652b9cef229b91c45608cddeaa50 Mon Sep 17 00:00:00 2001 -From: Piotr Gorski -Date: Mon, 4 Apr 2022 12:03:13 +0200 -Subject: [PATCH 266/297] prjc: remove duplicated struct __call_single_nod - -Signed-off-by: Piotr Gorski ---- - include/linux/sched.h | 6 ++++-- - 1 file changed, 4 insertions(+), 2 deletions(-) - -diff --git a/include/linux/sched.h b/include/linux/sched.h -index 2423bc6713eb..7c65e6317d97 100644 ---- a/include/linux/sched.h -+++ b/include/linux/sched.h -@@ -756,8 +756,9 @@ struct task_struct { - #if defined(CONFIG_SMP) || defined(CONFIG_SCHED_ALT) - int on_cpu; - #endif --#if defined(CONFIG_SMP) && !defined(CONFIG_SCHED_ALT) -- struct __call_single_node wake_entry; -+ -+#ifdef CONFIG_SMP -+#ifndef CONFIG_SCHED_ALT - unsigned int wakee_flips; - unsigned long wakee_flip_decay_ts; - struct task_struct *last_wakee; -@@ -771,6 +772,7 @@ struct task_struct { - */ - int recent_used_cpu; - int wake_cpu; -+#endif /* !CONFIG_SCHED_ALT */ - #endif - int on_rq; - --- -2.37.0 - - -From 27c2772dc1e4f949ee8cad94985a81def05016ea Mon Sep 17 00:00:00 2001 -From: Alfred Chen -Date: Fri, 1 Apr 2022 21:12:21 +0000 -Subject: [PATCH 267/297] sched/alt: [Sync] 61bb6cd2 mm: move - node_reclaim_distance to fix NUMA without SMP - ---- - kernel/sched/topology.c | 2 -- - 1 file changed, 2 deletions(-) - -diff --git a/kernel/sched/topology.c b/kernel/sched/topology.c -index 2c9daf90398f..480ef393b3c9 100644 ---- a/kernel/sched/topology.c -+++ b/kernel/sched/topology.c -@@ -2649,8 +2649,6 @@ void partition_sched_domains(int ndoms_new, cpumask_var_t doms_new[], - {} - - #ifdef CONFIG_NUMA --int __read_mostly node_reclaim_distance = RECLAIM_DISTANCE; -- - int sched_numa_find_closest(const struct cpumask *cpus, int cpu) - { - return best_mask_cpu(cpu, cpus); --- -2.37.0 - - -From 1cb07337b129d2b6c2930b686226cf191e67e0be Mon Sep 17 00:00:00 2001 -From: Alfred Chen -Date: Fri, 1 Apr 2022 21:34:05 +0000 -Subject: [PATCH 268/297] sched/alt: Fix Kconfig menu item for ProjectC - alternative schedulers - ---- - init/Kconfig | 58 ++++++++++++++++++++++++++-------------------------- - 1 file changed, 29 insertions(+), 29 deletions(-) - -diff --git a/init/Kconfig b/init/Kconfig -index fcdfda2b98f1..d2b593e3807d 100644 ---- a/init/Kconfig -+++ b/init/Kconfig -@@ -809,35 +809,6 @@ config GENERIC_SCHED_CLOCK - - menu "Scheduler features" - --menuconfig SCHED_ALT -- bool "Alternative CPU Schedulers" -- default y -- help -- This feature enable alternative CPU scheduler" -- --if SCHED_ALT -- --choice -- prompt "Alternative CPU Scheduler" -- default SCHED_BMQ -- --config SCHED_BMQ -- bool "BMQ CPU scheduler" -- help -- The BitMap Queue CPU scheduler for excellent interactivity and -- responsiveness on the desktop and solid scalability on normal -- hardware and commodity servers. -- --config SCHED_PDS -- bool "PDS CPU scheduler" -- help -- The Priority and Deadline based Skip list multiple queue CPU -- Scheduler. -- --endchoice -- --endif -- - config UCLAMP_TASK - bool "Enable utilization clamping for RT/FAIR tasks" - depends on CPU_FREQ_GOV_SCHEDUTIL -@@ -888,6 +859,35 @@ config UCLAMP_BUCKETS_COUNT - - If in doubt, use the default value. - -+menuconfig SCHED_ALT -+ bool "Alternative CPU Schedulers" -+ default y -+ help -+ This feature enable alternative CPU scheduler" -+ -+if SCHED_ALT -+ -+choice -+ prompt "Alternative CPU Scheduler" -+ default SCHED_BMQ -+ -+config SCHED_BMQ -+ bool "BMQ CPU scheduler" -+ help -+ The BitMap Queue CPU scheduler for excellent interactivity and -+ responsiveness on the desktop and solid scalability on normal -+ hardware and commodity servers. -+ -+config SCHED_PDS -+ bool "PDS CPU scheduler" -+ help -+ The Priority and Deadline based Skip list multiple queue CPU -+ Scheduler. -+ -+endchoice -+ -+endif -+ - endmenu - - # --- -2.37.0 - - -From c344f4e608af37bb14d7eaf449fe9ff4f461f8f0 Mon Sep 17 00:00:00 2001 -From: Alfred Chen -Date: Sat, 2 Apr 2022 13:41:18 +0000 -Subject: [PATCH 269/297] Project-C v5.17-r1 - ---- - kernel/sched/alt_core.c | 2 +- - 1 file changed, 1 insertion(+), 1 deletion(-) - -diff --git a/kernel/sched/alt_core.c b/kernel/sched/alt_core.c -index 6338a97b429e..abd0f2bc531e 100644 ---- a/kernel/sched/alt_core.c -+++ b/kernel/sched/alt_core.c -@@ -66,7 +66,7 @@ __read_mostly int sysctl_resched_latency_warn_once = 1; - #define sched_feat(x) (0) - #endif /* CONFIG_SCHED_DEBUG */ - --#define ALT_SCHED_VERSION "v5.17-r0" -+#define ALT_SCHED_VERSION "v5.17-r1" - - /* rt_prio(prio) defined in include/linux/sched/rt.h */ - #define rt_task(p) rt_prio((p)->prio) --- -2.37.0 - - -From 5455a5775c24053234bf3c0570b91c608cd44292 Mon Sep 17 00:00:00 2001 -From: Alfred Chen -Date: Sat, 2 Apr 2022 16:07:47 +0000 -Subject: [PATCH 270/297] sched/alt: Avoid call task_sched_prio_idx() in - check_task_changed() code path. - ---- - kernel/sched/alt_core.c | 14 ++++++-------- - kernel/sched/bmq.h | 2 +- - kernel/sched/pds.h | 2 +- - 3 files changed, 8 insertions(+), 10 deletions(-) - -diff --git a/kernel/sched/alt_core.c b/kernel/sched/alt_core.c -index abd0f2bc531e..a3b1d8bbe53d 100644 ---- a/kernel/sched/alt_core.c -+++ b/kernel/sched/alt_core.c -@@ -78,7 +78,7 @@ __read_mostly int sysctl_resched_latency_warn_once = 1; - /* Default time slice is 4 in ms, can be set via kernel parameter "sched_timeslice" */ - u64 sched_timeslice_ns __read_mostly = (4 << 20); - --static inline void requeue_task(struct task_struct *p, struct rq *rq); -+static inline void requeue_task(struct task_struct *p, struct rq *rq, int idx); - - #ifdef CONFIG_SCHED_BMQ - #include "bmq.h" -@@ -784,17 +784,13 @@ static inline void enqueue_task(struct task_struct *p, struct rq *rq, int flags) - sched_update_tick_dependency(rq); - } - --static inline void requeue_task(struct task_struct *p, struct rq *rq) -+static inline void requeue_task(struct task_struct *p, struct rq *rq, int idx) - { -- int idx; -- - lockdep_assert_held(&rq->lock); - /*printk(KERN_INFO "sched: requeue(%d) %px %016llx\n", cpu_of(rq), p, p->priodl);*/ - WARN_ONCE(task_rq(p) != rq, "sched: cpu[%d] requeue task reside on cpu%d\n", - cpu_of(rq), task_cpu(p)); - -- idx = task_sched_prio_idx(p, rq); -- - list_del(&p->sq_node); - list_add_tail(&p->sq_node, &rq->queue.heads[idx]); - if (idx != p->sq_idx) { -@@ -5034,9 +5030,11 @@ EXPORT_SYMBOL(default_wake_function); - - static inline void check_task_changed(struct task_struct *p, struct rq *rq) - { -+ int idx; -+ - /* Trigger resched if task sched_prio has been modified. */ -- if (task_on_rq_queued(p) && task_sched_prio_idx(p, rq) != p->sq_idx) { -- requeue_task(p, rq); -+ if (task_on_rq_queued(p) && (idx = task_sched_prio_idx(p, rq)) != p->sq_idx) { -+ requeue_task(p, rq, idx); - check_preempt_curr(rq); - } - } -diff --git a/kernel/sched/bmq.h b/kernel/sched/bmq.h -index be3ee4a553ca..bf7ac80ec242 100644 ---- a/kernel/sched/bmq.h -+++ b/kernel/sched/bmq.h -@@ -72,7 +72,7 @@ static inline void time_slice_expired(struct task_struct *p, struct rq *rq) - if (SCHED_FIFO != p->policy && task_on_rq_queued(p)) { - if (SCHED_RR != p->policy) - deboost_task(p); -- requeue_task(p, rq); -+ requeue_task(p, rq, task_sched_prio_idx(p, rq)); - } - } - -diff --git a/kernel/sched/pds.h b/kernel/sched/pds.h -index 0f1f0d708b77..56a649d02e49 100644 ---- a/kernel/sched/pds.h -+++ b/kernel/sched/pds.h -@@ -101,7 +101,7 @@ static inline void time_slice_expired(struct task_struct *p, struct rq *rq) - p->time_slice = sched_timeslice_ns; - sched_renew_deadline(p, rq); - if (SCHED_FIFO != p->policy && task_on_rq_queued(p)) -- requeue_task(p, rq); -+ requeue_task(p, rq, task_sched_prio_idx(p, rq)); - } - - static inline void sched_task_sanity_check(struct task_struct *p, struct rq *rq) --- -2.37.0 - - -From ff747536b8cf890249d993cdde251e93273ad46c Mon Sep 17 00:00:00 2001 -From: Alfred Chen -Date: Mon, 4 Apr 2022 14:25:30 +0000 -Subject: [PATCH 271/297] sched/alt: Delay update_sched_rq_watermark in - deactivation. - ---- - kernel/sched/alt_core.c | 27 ++++++++++++++------------- - 1 file changed, 14 insertions(+), 13 deletions(-) - -diff --git a/kernel/sched/alt_core.c b/kernel/sched/alt_core.c -index a3b1d8bbe53d..41e4b63801e6 100644 ---- a/kernel/sched/alt_core.c -+++ b/kernel/sched/alt_core.c -@@ -728,16 +728,13 @@ unsigned long get_wchan(struct task_struct *p) - * Add/Remove/Requeue task to/from the runqueue routines - * Context: rq->lock - */ --#define __SCHED_DEQUEUE_TASK(p, rq, flags, func) \ -- psi_dequeue(p, flags & DEQUEUE_SLEEP); \ -- sched_info_dequeue(rq, p); \ -- \ -- list_del(&p->sq_node); \ -- if (list_empty(&rq->queue.heads[p->sq_idx])) { \ -- clear_bit(sched_idx2prio(p->sq_idx, rq), \ -- rq->queue.bitmap); \ -- func; \ -- } -+#define __SCHED_DEQUEUE_TASK(p, rq, flags) \ -+ psi_dequeue(p, flags & DEQUEUE_SLEEP); \ -+ sched_info_dequeue(rq, p); \ -+ \ -+ list_del(&p->sq_node); \ -+ if (list_empty(&rq->queue.heads[p->sq_idx])) \ -+ clear_bit(sched_idx2prio(p->sq_idx, rq), rq->queue.bitmap); - - #define __SCHED_ENQUEUE_TASK(p, rq, flags) \ - sched_info_enqueue(rq, p); \ -@@ -755,7 +752,7 @@ static inline void dequeue_task(struct task_struct *p, struct rq *rq, int flags) - WARN_ONCE(task_rq(p) != rq, "sched: dequeue task reside on cpu%d from cpu%d\n", - task_cpu(p), cpu_of(rq)); - -- __SCHED_DEQUEUE_TASK(p, rq, flags, update_sched_rq_watermark(rq)); -+ __SCHED_DEQUEUE_TASK(p, rq, flags); - --rq->nr_running; - #ifdef CONFIG_SMP - if (1 == rq->nr_running) -@@ -1532,6 +1529,7 @@ static struct rq *move_queued_task(struct rq *rq, struct task_struct *p, int - - WRITE_ONCE(p->on_rq, TASK_ON_RQ_MIGRATING); - dequeue_task(p, rq, 0); -+ update_sched_rq_watermark(rq); - set_task_cpu(p, new_cpu); - raw_spin_unlock(&rq->lock); - -@@ -4291,7 +4289,7 @@ migrate_pending_tasks(struct rq *rq, struct rq *dest_rq, const int dest_cpu) - (p = sched_rq_next_task(skip, rq)) != rq->idle) { - skip = sched_rq_next_task(p, rq); - if (cpumask_test_cpu(dest_cpu, p->cpus_ptr)) { -- __SCHED_DEQUEUE_TASK(p, rq, 0, ); -+ __SCHED_DEQUEUE_TASK(p, rq, 0); - set_task_cpu(p, dest_cpu); - sched_task_sanity_check(p, dest_rq); - __SCHED_ENQUEUE_TASK(p, dest_rq, 0); -@@ -4336,7 +4334,6 @@ static inline int take_other_rq_tasks(struct rq *rq, int cpu) - if (rq->nr_running > 1) - cpumask_set_cpu(cpu, &sched_rq_pending_mask); - -- update_sched_rq_watermark(rq); - cpufreq_update_util(rq, 0); - - spin_release(&src_rq->lock.dep_map, _RET_IP_); -@@ -4480,6 +4477,7 @@ static void __sched notrace __schedule(unsigned int sched_mode) - unsigned long prev_state; - struct rq *rq; - int cpu; -+ int deactivated = 0; - - cpu = smp_processor_id(); - rq = cpu_rq(cpu); -@@ -4547,6 +4545,7 @@ static void __sched notrace __schedule(unsigned int sched_mode) - */ - sched_task_deactivate(prev, rq); - deactivate_task(prev, rq); -+ deactivated = 1; - - if (prev->in_iowait) { - atomic_inc(&rq->nr_iowait); -@@ -4566,6 +4565,8 @@ static void __sched notrace __schedule(unsigned int sched_mode) - #endif - - if (likely(prev != next)) { -+ if (deactivated) -+ update_sched_rq_watermark(rq); - next->last_ran = rq->clock_task; - rq->last_ts_switch = rq->clock; - --- -2.37.0 - - -From 6e486af3b5bd7cc5f6c8c9ea1f06d897125e633e Mon Sep 17 00:00:00 2001 -From: Alfred Chen -Date: Sat, 7 May 2022 16:16:33 +0000 -Subject: [PATCH 272/297] sched/alt: Dummy uclamp sync-up - -88e4d8c3cf08 sched/uclamp: Fix iowait boost escaping uclamp restriction -d9f51b85bfd8 sched/sugov: Ignore 'busy' filter when rq is capped by uclamp_max ---- - kernel/sched/alt_sched.h | 10 ++++++++++ - 1 file changed, 10 insertions(+) - -diff --git a/kernel/sched/alt_sched.h b/kernel/sched/alt_sched.h -index f2b9e686d6a6..7bbe006ce568 100644 ---- a/kernel/sched/alt_sched.h -+++ b/kernel/sched/alt_sched.h -@@ -664,4 +664,14 @@ extern void sched_dynamic_update(int mode); - #endif - - static inline void nohz_run_idle_balance(int cpu) { } -+ -+static inline -+unsigned long uclamp_rq_util_with(struct rq *rq, unsigned long util, -+ struct task_struct *p) -+{ -+ return util; -+} -+ -+static inline bool uclamp_rq_is_capped(struct rq *rq) { return false; } -+ - #endif /* ALT_SCHED_H */ --- -2.37.0 - - -From 4d10039793835a860bd1cec75443ee9f0ef9f83f Mon Sep 17 00:00:00 2001 -From: Alfred Chen -Date: Mon, 9 May 2022 11:25:02 +0000 -Subject: [PATCH 273/297] Project-C v5.17-r2 - ---- - kernel/sched/alt_core.c | 2 +- - 1 file changed, 1 insertion(+), 1 deletion(-) - -diff --git a/kernel/sched/alt_core.c b/kernel/sched/alt_core.c -index 41e4b63801e6..c60f255bb828 100644 ---- a/kernel/sched/alt_core.c -+++ b/kernel/sched/alt_core.c -@@ -66,7 +66,7 @@ __read_mostly int sysctl_resched_latency_warn_once = 1; - #define sched_feat(x) (0) - #endif /* CONFIG_SCHED_DEBUG */ - --#define ALT_SCHED_VERSION "v5.17-r1" -+#define ALT_SCHED_VERSION "v5.17-r2" - - /* rt_prio(prio) defined in include/linux/sched/rt.h */ - #define rt_task(p) rt_prio((p)->prio) --- -2.37.0 - - -From ee44ff2ea9884f5a5de2e352f557e011c1c27d77 Mon Sep 17 00:00:00 2001 -From: Alfred Chen -Date: Wed, 18 May 2022 14:05:22 +0000 -Subject: [PATCH 274/297] sched/alt: [Sync] b1f866b013e6 block: remove - blk_needs_flush_plug - ---- - kernel/sched/alt_core.c | 2 +- - 1 file changed, 1 insertion(+), 1 deletion(-) - -diff --git a/kernel/sched/alt_core.c b/kernel/sched/alt_core.c -index c60f255bb828..3c0dde3280c1 100644 ---- a/kernel/sched/alt_core.c -+++ b/kernel/sched/alt_core.c -@@ -4650,7 +4650,7 @@ static inline void sched_submit_work(struct task_struct *tsk) - * If we are going to sleep and we have plugged IO queued, - * make sure to submit it to avoid deadlocks. - */ -- if (blk_needs_flush_plug(tsk)) -+ if (tsk->plug) - blk_flush_plug(tsk->plug, true); - } - --- -2.37.0 - - -From 9fd276f94028804dd9b645ca87c57bfd2e6e7233 Mon Sep 17 00:00:00 2001 -From: Alfred Chen -Date: Wed, 18 May 2022 14:07:37 +0000 -Subject: [PATCH 275/297] sched/alt: [Sync] aa8dcccaf32b block: check that - there is a plug in blk_flush_plug - ---- - kernel/sched/alt_core.c | 7 ++----- - 1 file changed, 2 insertions(+), 5 deletions(-) - -diff --git a/kernel/sched/alt_core.c b/kernel/sched/alt_core.c -index 3c0dde3280c1..5ed2477f09d0 100644 ---- a/kernel/sched/alt_core.c -+++ b/kernel/sched/alt_core.c -@@ -4650,8 +4650,7 @@ static inline void sched_submit_work(struct task_struct *tsk) - * If we are going to sleep and we have plugged IO queued, - * make sure to submit it to avoid deadlocks. - */ -- if (tsk->plug) -- blk_flush_plug(tsk->plug, true); -+ blk_flush_plug(tsk->plug, true); - } - - static void sched_update_worker(struct task_struct *tsk) -@@ -6315,9 +6314,7 @@ int io_schedule_prepare(void) - int old_iowait = current->in_iowait; - - current->in_iowait = 1; -- if (current->plug) -- blk_flush_plug(current->plug, true); -- -+ blk_flush_plug(current->plug, true); - return old_iowait; - } - --- -2.37.0 - - -From 110f181cd3c7f5b336fb2d2245b7ad1b3c770d49 Mon Sep 17 00:00:00 2001 -From: Alfred Chen -Date: Thu, 19 May 2022 12:41:20 +0000 -Subject: [PATCH 276/297] sched/alt: [Sync] 04d4e665a609 sched/isolation: Use - single feature type while referring to housekeeping cpumask - ---- - kernel/sched/alt_core.c | 12 ++++++------ - 1 file changed, 6 insertions(+), 6 deletions(-) - -diff --git a/kernel/sched/alt_core.c b/kernel/sched/alt_core.c -index 5ed2477f09d0..852dbf392fb3 100644 ---- a/kernel/sched/alt_core.c -+++ b/kernel/sched/alt_core.c -@@ -1018,13 +1018,13 @@ int get_nohz_timer_target(void) - struct cpumask *mask; - const struct cpumask *hk_mask; - -- if (housekeeping_cpu(cpu, HK_FLAG_TIMER)) { -+ if (housekeeping_cpu(cpu, HK_TYPE_TIMER)) { - if (!idle_cpu(cpu)) - return cpu; - default_cpu = cpu; - } - -- hk_mask = housekeeping_cpumask(HK_FLAG_TIMER); -+ hk_mask = housekeeping_cpumask(HK_TYPE_TIMER); - - for (mask = per_cpu(sched_cpu_topo_masks, cpu) + 1; - mask < per_cpu(sched_cpu_topo_end_mask, cpu); mask++) -@@ -1033,7 +1033,7 @@ int get_nohz_timer_target(void) - return i; - - if (default_cpu == -1) -- default_cpu = housekeeping_any_cpu(HK_FLAG_TIMER); -+ default_cpu = housekeeping_any_cpu(HK_TYPE_TIMER); - cpu = default_cpu; - - return cpu; -@@ -4062,7 +4062,7 @@ static void sched_tick_start(int cpu) - int os; - struct tick_work *twork; - -- if (housekeeping_cpu(cpu, HK_FLAG_TICK)) -+ if (housekeeping_cpu(cpu, HK_TYPE_TICK)) - return; - - WARN_ON_ONCE(!tick_work_cpu); -@@ -4082,7 +4082,7 @@ static void sched_tick_stop(int cpu) - { - struct tick_work *twork; - -- if (housekeeping_cpu(cpu, HK_FLAG_TICK)) -+ if (housekeeping_cpu(cpu, HK_TYPE_TICK)) - return; - - WARN_ON_ONCE(!tick_work_cpu); -@@ -7119,7 +7119,7 @@ static void sched_init_topology_cpumask(void) - void __init sched_init_smp(void) - { - /* Move init over to a non-isolated CPU */ -- if (set_cpus_allowed_ptr(current, housekeeping_cpumask(HK_FLAG_DOMAIN)) < 0) -+ if (set_cpus_allowed_ptr(current, housekeeping_cpumask(HK_TYPE_DOMAIN)) < 0) - BUG(); - current->flags &= ~PF_NO_SETAFFINITY; - --- -2.37.0 - - -From d15b4ba78545069c8aae836ba4f755287d4e4acb Mon Sep 17 00:00:00 2001 -From: Alfred Chen -Date: Thu, 19 May 2022 15:21:22 +0000 -Subject: [PATCH 277/297] sched/alt: [Sync] 4c7485584d48 sched/preempt: Move - PREEMPT_DYNAMIC logic later - ---- - kernel/sched/alt_core.c | 272 ++++++++++++++++++++-------------------- - 1 file changed, 136 insertions(+), 136 deletions(-) - -diff --git a/kernel/sched/alt_core.c b/kernel/sched/alt_core.c -index 852dbf392fb3..71edc3dd7e9a 100644 ---- a/kernel/sched/alt_core.c -+++ b/kernel/sched/alt_core.c -@@ -4858,142 +4858,6 @@ EXPORT_STATIC_CALL_TRAMP(preempt_schedule_notrace); - - #endif /* CONFIG_PREEMPTION */ - --#ifdef CONFIG_PREEMPT_DYNAMIC -- --#include -- --/* -- * SC:cond_resched -- * SC:might_resched -- * SC:preempt_schedule -- * SC:preempt_schedule_notrace -- * SC:irqentry_exit_cond_resched -- * -- * -- * NONE: -- * cond_resched <- __cond_resched -- * might_resched <- RET0 -- * preempt_schedule <- NOP -- * preempt_schedule_notrace <- NOP -- * irqentry_exit_cond_resched <- NOP -- * -- * VOLUNTARY: -- * cond_resched <- __cond_resched -- * might_resched <- __cond_resched -- * preempt_schedule <- NOP -- * preempt_schedule_notrace <- NOP -- * irqentry_exit_cond_resched <- NOP -- * -- * FULL: -- * cond_resched <- RET0 -- * might_resched <- RET0 -- * preempt_schedule <- preempt_schedule -- * preempt_schedule_notrace <- preempt_schedule_notrace -- * irqentry_exit_cond_resched <- irqentry_exit_cond_resched -- */ -- --enum { -- preempt_dynamic_undefined = -1, -- preempt_dynamic_none, -- preempt_dynamic_voluntary, -- preempt_dynamic_full, --}; -- --int preempt_dynamic_mode = preempt_dynamic_undefined; -- --int sched_dynamic_mode(const char *str) --{ -- if (!strcmp(str, "none")) -- return preempt_dynamic_none; -- -- if (!strcmp(str, "voluntary")) -- return preempt_dynamic_voluntary; -- -- if (!strcmp(str, "full")) -- return preempt_dynamic_full; -- -- return -EINVAL; --} -- --void sched_dynamic_update(int mode) --{ -- /* -- * Avoid {NONE,VOLUNTARY} -> FULL transitions from ever ending up in -- * the ZERO state, which is invalid. -- */ -- static_call_update(cond_resched, __cond_resched); -- static_call_update(might_resched, __cond_resched); -- static_call_update(preempt_schedule, __preempt_schedule_func); -- static_call_update(preempt_schedule_notrace, __preempt_schedule_notrace_func); -- static_call_update(irqentry_exit_cond_resched, irqentry_exit_cond_resched); -- -- switch (mode) { -- case preempt_dynamic_none: -- static_call_update(cond_resched, __cond_resched); -- static_call_update(might_resched, (void *)&__static_call_return0); -- static_call_update(preempt_schedule, NULL); -- static_call_update(preempt_schedule_notrace, NULL); -- static_call_update(irqentry_exit_cond_resched, NULL); -- pr_info("Dynamic Preempt: none\n"); -- break; -- -- case preempt_dynamic_voluntary: -- static_call_update(cond_resched, __cond_resched); -- static_call_update(might_resched, __cond_resched); -- static_call_update(preempt_schedule, NULL); -- static_call_update(preempt_schedule_notrace, NULL); -- static_call_update(irqentry_exit_cond_resched, NULL); -- pr_info("Dynamic Preempt: voluntary\n"); -- break; -- -- case preempt_dynamic_full: -- static_call_update(cond_resched, (void *)&__static_call_return0); -- static_call_update(might_resched, (void *)&__static_call_return0); -- static_call_update(preempt_schedule, __preempt_schedule_func); -- static_call_update(preempt_schedule_notrace, __preempt_schedule_notrace_func); -- static_call_update(irqentry_exit_cond_resched, irqentry_exit_cond_resched); -- pr_info("Dynamic Preempt: full\n"); -- break; -- } -- -- preempt_dynamic_mode = mode; --} -- --static int __init setup_preempt_mode(char *str) --{ -- int mode = sched_dynamic_mode(str); -- if (mode < 0) { -- pr_warn("Dynamic Preempt: unsupported mode: %s\n", str); -- return 0; -- } -- -- sched_dynamic_update(mode); -- return 1; --} --__setup("preempt=", setup_preempt_mode); -- --static void __init preempt_dynamic_init(void) --{ -- if (preempt_dynamic_mode == preempt_dynamic_undefined) { -- if (IS_ENABLED(CONFIG_PREEMPT_NONE)) { -- sched_dynamic_update(preempt_dynamic_none); -- } else if (IS_ENABLED(CONFIG_PREEMPT_VOLUNTARY)) { -- sched_dynamic_update(preempt_dynamic_voluntary); -- } else { -- /* Default static call setting, nothing to do */ -- WARN_ON_ONCE(!IS_ENABLED(CONFIG_PREEMPT)); -- preempt_dynamic_mode = preempt_dynamic_full; -- pr_info("Dynamic Preempt: full\n"); -- } -- } --} -- --#else /* !CONFIG_PREEMPT_DYNAMIC */ -- --static inline void preempt_dynamic_init(void) { } -- --#endif /* #ifdef CONFIG_PREEMPT_DYNAMIC */ -- - /* - * This is the entry point to schedule() from kernel preemption - * off of irq context. -@@ -6257,6 +6121,142 @@ int __cond_resched_rwlock_write(rwlock_t *lock) - } - EXPORT_SYMBOL(__cond_resched_rwlock_write); - -+#ifdef CONFIG_PREEMPT_DYNAMIC -+ -+#include -+ -+/* -+ * SC:cond_resched -+ * SC:might_resched -+ * SC:preempt_schedule -+ * SC:preempt_schedule_notrace -+ * SC:irqentry_exit_cond_resched -+ * -+ * -+ * NONE: -+ * cond_resched <- __cond_resched -+ * might_resched <- RET0 -+ * preempt_schedule <- NOP -+ * preempt_schedule_notrace <- NOP -+ * irqentry_exit_cond_resched <- NOP -+ * -+ * VOLUNTARY: -+ * cond_resched <- __cond_resched -+ * might_resched <- __cond_resched -+ * preempt_schedule <- NOP -+ * preempt_schedule_notrace <- NOP -+ * irqentry_exit_cond_resched <- NOP -+ * -+ * FULL: -+ * cond_resched <- RET0 -+ * might_resched <- RET0 -+ * preempt_schedule <- preempt_schedule -+ * preempt_schedule_notrace <- preempt_schedule_notrace -+ * irqentry_exit_cond_resched <- irqentry_exit_cond_resched -+ */ -+ -+enum { -+ preempt_dynamic_undefined = -1, -+ preempt_dynamic_none, -+ preempt_dynamic_voluntary, -+ preempt_dynamic_full, -+}; -+ -+int preempt_dynamic_mode = preempt_dynamic_undefined; -+ -+int sched_dynamic_mode(const char *str) -+{ -+ if (!strcmp(str, "none")) -+ return preempt_dynamic_none; -+ -+ if (!strcmp(str, "voluntary")) -+ return preempt_dynamic_voluntary; -+ -+ if (!strcmp(str, "full")) -+ return preempt_dynamic_full; -+ -+ return -EINVAL; -+} -+ -+void sched_dynamic_update(int mode) -+{ -+ /* -+ * Avoid {NONE,VOLUNTARY} -> FULL transitions from ever ending up in -+ * the ZERO state, which is invalid. -+ */ -+ static_call_update(cond_resched, __cond_resched); -+ static_call_update(might_resched, __cond_resched); -+ static_call_update(preempt_schedule, __preempt_schedule_func); -+ static_call_update(preempt_schedule_notrace, __preempt_schedule_notrace_func); -+ static_call_update(irqentry_exit_cond_resched, irqentry_exit_cond_resched); -+ -+ switch (mode) { -+ case preempt_dynamic_none: -+ static_call_update(cond_resched, __cond_resched); -+ static_call_update(might_resched, (void *)&__static_call_return0); -+ static_call_update(preempt_schedule, NULL); -+ static_call_update(preempt_schedule_notrace, NULL); -+ static_call_update(irqentry_exit_cond_resched, NULL); -+ pr_info("Dynamic Preempt: none\n"); -+ break; -+ -+ case preempt_dynamic_voluntary: -+ static_call_update(cond_resched, __cond_resched); -+ static_call_update(might_resched, __cond_resched); -+ static_call_update(preempt_schedule, NULL); -+ static_call_update(preempt_schedule_notrace, NULL); -+ static_call_update(irqentry_exit_cond_resched, NULL); -+ pr_info("Dynamic Preempt: voluntary\n"); -+ break; -+ -+ case preempt_dynamic_full: -+ static_call_update(cond_resched, (void *)&__static_call_return0); -+ static_call_update(might_resched, (void *)&__static_call_return0); -+ static_call_update(preempt_schedule, __preempt_schedule_func); -+ static_call_update(preempt_schedule_notrace, __preempt_schedule_notrace_func); -+ static_call_update(irqentry_exit_cond_resched, irqentry_exit_cond_resched); -+ pr_info("Dynamic Preempt: full\n"); -+ break; -+ } -+ -+ preempt_dynamic_mode = mode; -+} -+ -+static int __init setup_preempt_mode(char *str) -+{ -+ int mode = sched_dynamic_mode(str); -+ if (mode < 0) { -+ pr_warn("Dynamic Preempt: unsupported mode: %s\n", str); -+ return 0; -+ } -+ -+ sched_dynamic_update(mode); -+ return 1; -+} -+__setup("preempt=", setup_preempt_mode); -+ -+static void __init preempt_dynamic_init(void) -+{ -+ if (preempt_dynamic_mode == preempt_dynamic_undefined) { -+ if (IS_ENABLED(CONFIG_PREEMPT_NONE)) { -+ sched_dynamic_update(preempt_dynamic_none); -+ } else if (IS_ENABLED(CONFIG_PREEMPT_VOLUNTARY)) { -+ sched_dynamic_update(preempt_dynamic_voluntary); -+ } else { -+ /* Default static call setting, nothing to do */ -+ WARN_ON_ONCE(!IS_ENABLED(CONFIG_PREEMPT)); -+ preempt_dynamic_mode = preempt_dynamic_full; -+ pr_info("Dynamic Preempt: full\n"); -+ } -+ } -+} -+ -+#else /* !CONFIG_PREEMPT_DYNAMIC */ -+ -+static inline void preempt_dynamic_init(void) { } -+ -+#endif /* #ifdef CONFIG_PREEMPT_DYNAMIC */ -+ - /** - * yield - yield the current processor to other threads. - * --- -2.37.0 - - -From 83722edbe96c3adb41638f2db5091965f1a083d3 Mon Sep 17 00:00:00 2001 -From: Alfred Chen -Date: Thu, 19 May 2022 15:54:41 +0000 -Subject: [PATCH 278/297] sched/alt: [Sync] 8a69fe0be143 sched/preempt: - Refactor sched_dynamic_update() - ---- - kernel/sched/alt_core.c | 61 +++++++++++++++++++++++++---------------- - 1 file changed, 37 insertions(+), 24 deletions(-) - -diff --git a/kernel/sched/alt_core.c b/kernel/sched/alt_core.c -index 71edc3dd7e9a..94450a8a4325 100644 ---- a/kernel/sched/alt_core.c -+++ b/kernel/sched/alt_core.c -@@ -4794,8 +4794,11 @@ NOKPROBE_SYMBOL(preempt_schedule); - EXPORT_SYMBOL(preempt_schedule); - - #ifdef CONFIG_PREEMPT_DYNAMIC --DEFINE_STATIC_CALL(preempt_schedule, __preempt_schedule_func); --EXPORT_STATIC_CALL_TRAMP(preempt_schedule); -+#ifndef preempt_schedule_dynamic_enabled -+#define preempt_schedule_dynamic_enabled preempt_schedule -+#define preempt_schedule_dynamic_disabled NULL -+#endif -+DEFINE_STATIC_CALL(preempt_schedule, preempt_schedule_dynamic_enabled); - #endif - - -@@ -4852,8 +4855,11 @@ asmlinkage __visible void __sched notrace preempt_schedule_notrace(void) - EXPORT_SYMBOL_GPL(preempt_schedule_notrace); - - #ifdef CONFIG_PREEMPT_DYNAMIC --DEFINE_STATIC_CALL(preempt_schedule_notrace, __preempt_schedule_notrace_func); --EXPORT_STATIC_CALL_TRAMP(preempt_schedule_notrace); -+#ifndef preempt_schedule_notrace_dynamic_enabled -+#define preempt_schedule_notrace_dynamic_enabled preempt_schedule_notrace -+#define preempt_schedule_notrace_dynamic_disabled NULL -+#endif -+DEFINE_STATIC_CALL(preempt_schedule_notrace, preempt_schedule_notrace_dynamic_enabled); - #endif - - #endif /* CONFIG_PREEMPTION */ -@@ -6052,9 +6058,13 @@ EXPORT_SYMBOL(__cond_resched); - #endif - - #ifdef CONFIG_PREEMPT_DYNAMIC -+#define cond_resched_dynamic_enabled __cond_resched -+#define cond_resched_dynamic_disabled ((void *)&__static_call_return0) - DEFINE_STATIC_CALL_RET0(cond_resched, __cond_resched); - EXPORT_STATIC_CALL_TRAMP(cond_resched); - -+#define might_resched_dynamic_enabled __cond_resched -+#define might_resched_dynamic_disabled ((void *)&__static_call_return0) - DEFINE_STATIC_CALL_RET0(might_resched, __cond_resched); - EXPORT_STATIC_CALL_TRAMP(might_resched); - #endif -@@ -6178,43 +6188,46 @@ int sched_dynamic_mode(const char *str) - return -EINVAL; - } - -+#define preempt_dynamic_enable(f) static_call_update(f, f##_dynamic_enabled) -+#define preempt_dynamic_disable(f) static_call_update(f, f##_dynamic_disabled) -+ - void sched_dynamic_update(int mode) - { - /* - * Avoid {NONE,VOLUNTARY} -> FULL transitions from ever ending up in - * the ZERO state, which is invalid. - */ -- static_call_update(cond_resched, __cond_resched); -- static_call_update(might_resched, __cond_resched); -- static_call_update(preempt_schedule, __preempt_schedule_func); -- static_call_update(preempt_schedule_notrace, __preempt_schedule_notrace_func); -- static_call_update(irqentry_exit_cond_resched, irqentry_exit_cond_resched); -+ preempt_dynamic_enable(cond_resched); -+ preempt_dynamic_enable(might_resched); -+ preempt_dynamic_enable(preempt_schedule); -+ preempt_dynamic_enable(preempt_schedule_notrace); -+ preempt_dynamic_enable(irqentry_exit_cond_resched); - - switch (mode) { - case preempt_dynamic_none: -- static_call_update(cond_resched, __cond_resched); -- static_call_update(might_resched, (void *)&__static_call_return0); -- static_call_update(preempt_schedule, NULL); -- static_call_update(preempt_schedule_notrace, NULL); -- static_call_update(irqentry_exit_cond_resched, NULL); -+ preempt_dynamic_enable(cond_resched); -+ preempt_dynamic_disable(might_resched); -+ preempt_dynamic_disable(preempt_schedule); -+ preempt_dynamic_disable(preempt_schedule_notrace); -+ preempt_dynamic_disable(irqentry_exit_cond_resched); - pr_info("Dynamic Preempt: none\n"); - break; - - case preempt_dynamic_voluntary: -- static_call_update(cond_resched, __cond_resched); -- static_call_update(might_resched, __cond_resched); -- static_call_update(preempt_schedule, NULL); -- static_call_update(preempt_schedule_notrace, NULL); -- static_call_update(irqentry_exit_cond_resched, NULL); -+ preempt_dynamic_enable(cond_resched); -+ preempt_dynamic_enable(might_resched); -+ preempt_dynamic_disable(preempt_schedule); -+ preempt_dynamic_disable(preempt_schedule_notrace); -+ preempt_dynamic_disable(irqentry_exit_cond_resched); - pr_info("Dynamic Preempt: voluntary\n"); - break; - - case preempt_dynamic_full: -- static_call_update(cond_resched, (void *)&__static_call_return0); -- static_call_update(might_resched, (void *)&__static_call_return0); -- static_call_update(preempt_schedule, __preempt_schedule_func); -- static_call_update(preempt_schedule_notrace, __preempt_schedule_notrace_func); -- static_call_update(irqentry_exit_cond_resched, irqentry_exit_cond_resched); -+ preempt_dynamic_disable(cond_resched); -+ preempt_dynamic_disable(might_resched); -+ preempt_dynamic_enable(preempt_schedule); -+ preempt_dynamic_enable(preempt_schedule_notrace); -+ preempt_dynamic_enable(irqentry_exit_cond_resched); - pr_info("Dynamic Preempt: full\n"); - break; - } --- -2.37.0 - - -From 6f3fefb494e2b409cd56f7b2d889fedb2a3283e2 Mon Sep 17 00:00:00 2001 -From: Alfred Chen -Date: Thu, 19 May 2022 16:00:36 +0000 -Subject: [PATCH 279/297] sched/alt: [Sync] 33c64734be34 sched/preempt: - Decouple HAVE_PREEMPT_DYNAMIC from GENERIC_ENTRY - ---- - kernel/sched/alt_core.c | 2 ++ - 1 file changed, 2 insertions(+) - -diff --git a/kernel/sched/alt_core.c b/kernel/sched/alt_core.c -index 94450a8a4325..4012c7f597ff 100644 ---- a/kernel/sched/alt_core.c -+++ b/kernel/sched/alt_core.c -@@ -6133,7 +6133,9 @@ EXPORT_SYMBOL(__cond_resched_rwlock_write); - - #ifdef CONFIG_PREEMPT_DYNAMIC - -+#ifdef CONFIG_GENERIC_ENTRY - #include -+#endif - - /* - * SC:cond_resched --- -2.37.0 - - -From ece7246e356476506a92f18b83e32af6acb0c0b8 Mon Sep 17 00:00:00 2001 -From: Alfred Chen -Date: Thu, 19 May 2022 16:53:23 +0000 -Subject: [PATCH 280/297] sched/alt: [Sync] 99cf983cc8bc sched/preempt: Add - PREEMPT_DYNAMIC using static keys - ---- - kernel/sched/alt_core.c | 55 ++++++++++++++++++++++++++++++++++++++++- - 1 file changed, 54 insertions(+), 1 deletion(-) - -diff --git a/kernel/sched/alt_core.c b/kernel/sched/alt_core.c -index 4012c7f597ff..1f09a5210445 100644 ---- a/kernel/sched/alt_core.c -+++ b/kernel/sched/alt_core.c -@@ -25,6 +25,7 @@ - #include - #include - #include -+#include - #include - #include - #include -@@ -4794,13 +4795,25 @@ NOKPROBE_SYMBOL(preempt_schedule); - EXPORT_SYMBOL(preempt_schedule); - - #ifdef CONFIG_PREEMPT_DYNAMIC -+#if defined(CONFIG_HAVE_PREEMPT_DYNAMIC_CALL) - #ifndef preempt_schedule_dynamic_enabled - #define preempt_schedule_dynamic_enabled preempt_schedule - #define preempt_schedule_dynamic_disabled NULL - #endif - DEFINE_STATIC_CALL(preempt_schedule, preempt_schedule_dynamic_enabled); -+EXPORT_STATIC_CALL_TRAMP(preempt_schedule); -+#elif defined(CONFIG_HAVE_PREEMPT_DYNAMIC_KEY) -+static DEFINE_STATIC_KEY_TRUE(sk_dynamic_preempt_schedule); -+void __sched notrace dynamic_preempt_schedule(void) -+{ -+ if (!static_branch_unlikely(&sk_dynamic_preempt_schedule)) -+ return; -+ preempt_schedule(); -+} -+NOKPROBE_SYMBOL(dynamic_preempt_schedule); -+EXPORT_SYMBOL(dynamic_preempt_schedule); -+#endif - #endif -- - - /** - * preempt_schedule_notrace - preempt_schedule called by tracing -@@ -4855,11 +4868,24 @@ asmlinkage __visible void __sched notrace preempt_schedule_notrace(void) - EXPORT_SYMBOL_GPL(preempt_schedule_notrace); - - #ifdef CONFIG_PREEMPT_DYNAMIC -+#if defined(CONFIG_HAVE_PREEMPT_DYNAMIC_CALL) - #ifndef preempt_schedule_notrace_dynamic_enabled - #define preempt_schedule_notrace_dynamic_enabled preempt_schedule_notrace - #define preempt_schedule_notrace_dynamic_disabled NULL - #endif - DEFINE_STATIC_CALL(preempt_schedule_notrace, preempt_schedule_notrace_dynamic_enabled); -+EXPORT_STATIC_CALL_TRAMP(preempt_schedule_notrace); -+#elif defined(CONFIG_HAVE_PREEMPT_DYNAMIC_KEY) -+static DEFINE_STATIC_KEY_TRUE(sk_dynamic_preempt_schedule_notrace); -+void __sched notrace dynamic_preempt_schedule_notrace(void) -+{ -+ if (!static_branch_unlikely(&sk_dynamic_preempt_schedule_notrace)) -+ return; -+ preempt_schedule_notrace(); -+} -+NOKPROBE_SYMBOL(dynamic_preempt_schedule_notrace); -+EXPORT_SYMBOL(dynamic_preempt_schedule_notrace); -+#endif - #endif - - #endif /* CONFIG_PREEMPTION */ -@@ -6058,6 +6084,7 @@ EXPORT_SYMBOL(__cond_resched); - #endif - - #ifdef CONFIG_PREEMPT_DYNAMIC -+#if defined(CONFIG_HAVE_PREEMPT_DYNAMIC_CALL) - #define cond_resched_dynamic_enabled __cond_resched - #define cond_resched_dynamic_disabled ((void *)&__static_call_return0) - DEFINE_STATIC_CALL_RET0(cond_resched, __cond_resched); -@@ -6067,6 +6094,25 @@ EXPORT_STATIC_CALL_TRAMP(cond_resched); - #define might_resched_dynamic_disabled ((void *)&__static_call_return0) - DEFINE_STATIC_CALL_RET0(might_resched, __cond_resched); - EXPORT_STATIC_CALL_TRAMP(might_resched); -+#elif defined(CONFIG_HAVE_PREEMPT_DYNAMIC_KEY) -+static DEFINE_STATIC_KEY_FALSE(sk_dynamic_cond_resched); -+int __sched dynamic_cond_resched(void) -+{ -+ if (!static_branch_unlikely(&sk_dynamic_cond_resched)) -+ return 0; -+ return __cond_resched(); -+} -+EXPORT_SYMBOL(dynamic_cond_resched); -+ -+static DEFINE_STATIC_KEY_FALSE(sk_dynamic_might_resched); -+int __sched dynamic_might_resched(void) -+{ -+ if (!static_branch_unlikely(&sk_dynamic_might_resched)) -+ return 0; -+ return __cond_resched(); -+} -+EXPORT_SYMBOL(dynamic_might_resched); -+#endif - #endif - - /* -@@ -6190,8 +6236,15 @@ int sched_dynamic_mode(const char *str) - return -EINVAL; - } - -+#if defined(CONFIG_HAVE_PREEMPT_DYNAMIC_CALL) - #define preempt_dynamic_enable(f) static_call_update(f, f##_dynamic_enabled) - #define preempt_dynamic_disable(f) static_call_update(f, f##_dynamic_disabled) -+#elif defined(CONFIG_HAVE_PREEMPT_DYNAMIC_KEY) -+#define preempt_dynamic_enable(f) static_key_enable(&sk_dynamic_##f.key) -+#define preempt_dynamic_disable(f) static_key_disable(&sk_dynamic_##f.key) -+#else -+#error "Unsupported PREEMPT_DYNAMIC mechanism" -+#endif - - void sched_dynamic_update(int mode) - { --- -2.37.0 - - -From 8bc62bbacd2c5c300177d7bb95147106170d3eba Mon Sep 17 00:00:00 2001 -From: Alfred Chen -Date: Sat, 21 May 2022 07:43:39 +0000 -Subject: [PATCH 281/297] sched/alt: [Sync] fa2c3254d7cf sched/tracing: Don't - re-read p->state when emitting sched_switch event - ---- - kernel/sched/alt_core.c | 4 ++-- - 1 file changed, 2 insertions(+), 2 deletions(-) - -diff --git a/kernel/sched/alt_core.c b/kernel/sched/alt_core.c -index 1f09a5210445..3c1a0f3ddcbc 100644 ---- a/kernel/sched/alt_core.c -+++ b/kernel/sched/alt_core.c -@@ -3430,7 +3430,7 @@ static struct rq *finish_task_switch(struct task_struct *prev) - { - struct rq *rq = this_rq(); - struct mm_struct *mm = rq->prev_mm; -- long prev_state; -+ unsigned int prev_state; - - /* - * The previous task will have left us with a preempt_count of 2 -@@ -4595,7 +4595,7 @@ static void __sched notrace __schedule(unsigned int sched_mode) - - psi_sched_switch(prev, next, !task_on_rq_queued(prev)); - -- trace_sched_switch(sched_mode & SM_MASK_PREEMPT, prev, next); -+ trace_sched_switch(sched_mode & SM_MASK_PREEMPT, prev, next, prev_state); - - /* Also unlocks the rq: */ - rq = context_switch(rq, prev, next); --- -2.37.0 - - -From c41cdff6539523725c3fa3f1b842711a7c1c6a87 Mon Sep 17 00:00:00 2001 -From: Alfred Chen -Date: Sat, 21 May 2022 11:44:15 +0000 -Subject: [PATCH 282/297] sched/alt: [Sync] 801c14195510 sched/headers: - Introduce kernel/sched/build_utility.c and build multiple .c files there - ---- - kernel/sched/alt_sched.h | 12 +++++++++--- - kernel/sched/build_utility.c | 4 +--- - 2 files changed, 10 insertions(+), 6 deletions(-) - -diff --git a/kernel/sched/alt_sched.h b/kernel/sched/alt_sched.h -index 7bbe006ce568..39c6057b0ad3 100644 ---- a/kernel/sched/alt_sched.h -+++ b/kernel/sched/alt_sched.h -@@ -48,6 +48,7 @@ - - #include "cpupri.h" - -+#include - #include - - #ifdef CONFIG_SCHED_BMQ -@@ -452,9 +453,6 @@ this_rq_lock_irq(struct rq_flags *rf) - return rq; - } - --extern void raw_spin_rq_lock_nested(struct rq *rq, int subclass); --extern void raw_spin_rq_unlock(struct rq *rq); -- - static inline raw_spinlock_t *__rq_lockp(struct rq *rq) - { - return &rq->lock; -@@ -465,6 +463,14 @@ static inline raw_spinlock_t *rq_lockp(struct rq *rq) - return __rq_lockp(rq); - } - -+static inline void lockdep_assert_rq_held(struct rq *rq) -+{ -+ lockdep_assert_held(__rq_lockp(rq)); -+} -+ -+extern void raw_spin_rq_lock_nested(struct rq *rq, int subclass); -+extern void raw_spin_rq_unlock(struct rq *rq); -+ - static inline void raw_spin_rq_lock(struct rq *rq) - { - raw_spin_rq_lock_nested(rq, 0); -diff --git a/kernel/sched/build_utility.c b/kernel/sched/build_utility.c -index 128a283332f4..23f80a86d2d7 100644 ---- a/kernel/sched/build_utility.c -+++ b/kernel/sched/build_utility.c -@@ -69,11 +69,9 @@ - # include "cpufreq_schedutil.c" - #endif - --#ifdef CONFIG_SCHED_ALT - #ifdef CONFIG_SCHED_DEBUG - # include "debug.c" - #endif --#endif - - #ifdef CONFIG_SCHEDSTATS - # include "stats.c" -@@ -87,7 +85,7 @@ - - #ifdef CONFIG_SMP - # include "cpupri.c" --#ifdef CONFIG_SCHED_ALT -+#ifndef CONFIG_SCHED_ALT - # include "stop_task.c" - #endif - # include "topology.c" --- -2.37.0 - - -From b2fe44ce98f2d6666a1cb69b31dae32a84eeac93 Mon Sep 17 00:00:00 2001 -From: Alfred Chen -Date: Mon, 23 May 2022 11:03:24 +0000 -Subject: [PATCH 283/297] Project-C v5.18-r0 - ---- - kernel/sched/alt_core.c | 2 +- - 1 file changed, 1 insertion(+), 1 deletion(-) - -diff --git a/kernel/sched/alt_core.c b/kernel/sched/alt_core.c -index 3c1a0f3ddcbc..cb8f7d24ab5d 100644 ---- a/kernel/sched/alt_core.c -+++ b/kernel/sched/alt_core.c -@@ -67,7 +67,7 @@ __read_mostly int sysctl_resched_latency_warn_once = 1; - #define sched_feat(x) (0) - #endif /* CONFIG_SCHED_DEBUG */ - --#define ALT_SCHED_VERSION "v5.17-r2" -+#define ALT_SCHED_VERSION "v5.18-r0" - - /* rt_prio(prio) defined in include/linux/sched/rt.h */ - #define rt_task(p) rt_prio((p)->prio) --- -2.37.0 - - -From 9cab4c8a8c422ea563e11df3f55946e2821bfa66 Mon Sep 17 00:00:00 2001 -From: Alfred Chen -Date: Tue, 24 May 2022 15:30:25 +0000 -Subject: [PATCH 284/297] sched/alt: Headers cleanup - ---- - kernel/sched/alt_core.c | 41 +++++++++++++++++------------------ - kernel/sched/alt_sched.h | 46 ++++------------------------------------ - 2 files changed, 23 insertions(+), 64 deletions(-) - -diff --git a/kernel/sched/alt_core.c b/kernel/sched/alt_core.c -index cb8f7d24ab5d..aaebbb988343 100644 ---- a/kernel/sched/alt_core.c -+++ b/kernel/sched/alt_core.c -@@ -11,41 +11,38 @@ - * scheduler by Alfred Chen. - * 2019-02-20 BMQ(BitMap Queue) kernel scheduler by Alfred Chen. - */ --#define CREATE_TRACE_POINTS --#include --#undef CREATE_TRACE_POINTS -- --#include "sched.h" -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+#include - --#include -- --#include --#include - #include -+#include -+#include - #include --#include - #include --#include -+#include - #include --#include -+#include - #include --#include --#include --#include --#include - --#include --#include -+#include - - #include - --#include "../workqueue_internal.h" -+#define CREATE_TRACE_POINTS -+#include -+#undef CREATE_TRACE_POINTS -+ -+#include "sched.h" -+ - #include "../../fs/io-wq.h" - #include "../smpboot.h" - --#include "pelt.h" --#include "smp.h" -- - /* - * Export tracepoints that act as a bare tracehook (ie: have no trace event - * associated with them) to allow external modules to probe them. -diff --git a/kernel/sched/alt_sched.h b/kernel/sched/alt_sched.h -index 39c6057b0ad3..611424bbfa9b 100644 ---- a/kernel/sched/alt_sched.h -+++ b/kernel/sched/alt_sched.h -@@ -1,56 +1,18 @@ - #ifndef ALT_SCHED_H - #define ALT_SCHED_H - --#include -- --#include --#include --#include --#include --#include --#include --#include --#include --#include --#include --#include --#include --#include --#include --#include -- --#include -- --#include --#include --#include --#include --#include --#include --#include --#include --#include --#include --#include - #include --#include - #include --#include --#include - #include --#include -+#include - --#include -+#include -+#include - --#ifdef CONFIG_PARAVIRT --# include --#endif -+#include "../workqueue_internal.h" - - #include "cpupri.h" - --#include --#include -- - #ifdef CONFIG_SCHED_BMQ - /* bits: - * RT(0-99), (Low prio adj range, nice width, high prio adj range) / 2, cpu idle task */ --- -2.37.0 - - -From efdd27969a4e455b6d36195afdd38416d67430c8 Mon Sep 17 00:00:00 2001 -From: Alfred Chen -Date: Tue, 24 May 2022 16:33:15 +0000 -Subject: [PATCH 285/297] Project-C v5.18-r1 - ---- - kernel/sched/alt_core.c | 2 +- - 1 file changed, 1 insertion(+), 1 deletion(-) - -diff --git a/kernel/sched/alt_core.c b/kernel/sched/alt_core.c -index aaebbb988343..a466a05301b8 100644 ---- a/kernel/sched/alt_core.c -+++ b/kernel/sched/alt_core.c -@@ -64,7 +64,7 @@ __read_mostly int sysctl_resched_latency_warn_once = 1; - #define sched_feat(x) (0) - #endif /* CONFIG_SCHED_DEBUG */ - --#define ALT_SCHED_VERSION "v5.18-r0" -+#define ALT_SCHED_VERSION "v5.18-r1" - - /* rt_prio(prio) defined in include/linux/sched/rt.h */ - #define rt_task(p) rt_prio((p)->prio) --- -2.37.0 - - -From 497edb58f02de67c6c0323b6a4b7cfa2e2c277bb Mon Sep 17 00:00:00 2001 -From: Tor Vic -Date: Tue, 5 Apr 2022 09:56:18 +0200 -Subject: [PATCH 286/297] sched/alt: Add missing call to lru_gen_use_mm when - using MGLRU - ---- - kernel/sched/alt_core.c | 3 +++ - 1 file changed, 3 insertions(+) - -diff --git a/kernel/sched/alt_core.c b/kernel/sched/alt_core.c -index a466a05301b8..31a487038547 100644 ---- a/kernel/sched/alt_core.c -+++ b/kernel/sched/alt_core.c -@@ -3569,6 +3569,9 @@ context_switch(struct rq *rq, struct task_struct *prev, - * finish_task_switch()'s mmdrop(). - */ - switch_mm_irqs_off(prev->active_mm, next->mm, next); -+#ifdef CONFIG_LRU_GEN -+ lru_gen_use_mm(next->mm); -+#endif - - if (!prev->mm) { // from kernel - /* will mmdrop() in finish_task_switch(). */ --- -2.37.0 - - -From 1b7fdd70e63bfd3d1c8ffca949df7ed6e605bb55 Mon Sep 17 00:00:00 2001 -From: Tor Vic -Date: Tue, 5 Apr 2022 09:58:51 +0200 -Subject: [PATCH 287/297] sched/alt: Kconfig: Disable SCHED_ALT by default, - prefer PDS over BMQ - -Also edit some help text. ---- - init/Kconfig | 8 ++++---- - 1 file changed, 4 insertions(+), 4 deletions(-) - -diff --git a/init/Kconfig b/init/Kconfig -index d2b593e3807d..48e3e5684076 100644 ---- a/init/Kconfig -+++ b/init/Kconfig -@@ -861,15 +861,15 @@ config UCLAMP_BUCKETS_COUNT - - menuconfig SCHED_ALT - bool "Alternative CPU Schedulers" -- default y -+ default n - help -- This feature enable alternative CPU scheduler" -+ This feature enables the ProjectC alternative CPU schedulers." - - if SCHED_ALT - - choice -- prompt "Alternative CPU Scheduler" -- default SCHED_BMQ -+ prompt "Alternative CPU schedulers" -+ default SCHED_PDS - - config SCHED_BMQ - bool "BMQ CPU scheduler" --- -2.37.0 - - -From df2558ad4d27543e2e40d6668b711267826d0fcd Mon Sep 17 00:00:00 2001 -From: Tor Vic -Date: Mon, 30 May 2022 10:34:56 +0200 -Subject: [PATCH 288/297] sched/alt: Add missing preempt model accessors - ---- - kernel/sched/alt_core.c | 12 ++++++++++++ - 1 file changed, 12 insertions(+) - -diff --git a/kernel/sched/alt_core.c b/kernel/sched/alt_core.c -index 31a487038547..95572f44ca31 100644 ---- a/kernel/sched/alt_core.c -+++ b/kernel/sched/alt_core.c -@@ -6319,6 +6319,18 @@ static void __init preempt_dynamic_init(void) - } - } - -+#define PREEMPT_MODEL_ACCESSOR(mode) \ -+ bool preempt_model_##mode(void) \ -+ { \ -+ WARN_ON_ONCE(preempt_dynamic_mode == preempt_dynamic_undefined); \ -+ return preempt_dynamic_mode == preempt_dynamic_##mode; \ -+ } \ -+ EXPORT_SYMBOL_GPL(preempt_model_##mode) -+ -+PREEMPT_MODEL_ACCESSOR(none); -+PREEMPT_MODEL_ACCESSOR(voluntary); -+PREEMPT_MODEL_ACCESSOR(full); -+ - #else /* !CONFIG_PREEMPT_DYNAMIC */ - - static inline void preempt_dynamic_init(void) { } --- -2.37.0 - - -From aeaf4e4c78b4117636a6ad31dca389d540111bb1 Mon Sep 17 00:00:00 2001 -From: Tor Vic -Date: Thu, 2 Jun 2022 09:54:37 +0200 -Subject: [PATCH 289/297] sched/alt: [Sync] sched: Fix the check of nr_running - at queue wakelist - ---- - kernel/sched/alt_core.c | 2 +- - 1 file changed, 1 insertion(+), 1 deletion(-) - -diff --git a/kernel/sched/alt_core.c b/kernel/sched/alt_core.c -index 95572f44ca31..f1a5a610b07c 100644 ---- a/kernel/sched/alt_core.c -+++ b/kernel/sched/alt_core.c -@@ -2414,7 +2414,7 @@ static inline bool ttwu_queue_cond(int cpu, int wake_flags) - * the soon-to-be-idle CPU as the current CPU is likely busy. - * nr_running is checked to avoid unnecessary task stacking. - */ -- if ((wake_flags & WF_ON_CPU) && cpu_rq(cpu)->nr_running <= 1) -+ if ((wake_flags & WF_ON_CPU) && !cpu_rq(cpu)->nr_running) - return true; - - return false; --- -2.37.0 - - -From 3404ff57f1cd54fc17a691f589abee298a69b681 Mon Sep 17 00:00:00 2001 -From: Tor Vic -Date: Thu, 2 Jun 2022 10:05:02 +0200 -Subject: [PATCH 290/297] sched/alt: [Sync] sched: Remove the limitation of - WF_ON_CPU on wakelist if wakee cpu is idle - ---- - kernel/sched/alt_core.c | 27 ++++++++++++++++----------- - kernel/sched/alt_sched.h | 1 - - 2 files changed, 16 insertions(+), 12 deletions(-) - -diff --git a/kernel/sched/alt_core.c b/kernel/sched/alt_core.c -index f1a5a610b07c..8ae6975aa495 100644 ---- a/kernel/sched/alt_core.c -+++ b/kernel/sched/alt_core.c -@@ -2392,7 +2392,7 @@ static void __ttwu_queue_wakelist(struct task_struct *p, int cpu, int wake_flags - __smp_call_single_queue(cpu, &p->wake_entry.llist); - } - --static inline bool ttwu_queue_cond(int cpu, int wake_flags) -+static inline bool ttwu_queue_cond(int cpu) - { - /* - * Do not complicate things with the async wake_list while the CPU is -@@ -2408,13 +2408,21 @@ static inline bool ttwu_queue_cond(int cpu, int wake_flags) - if (!cpus_share_cache(smp_processor_id(), cpu)) - return true; - -+ if (cpu == smp_processor_id()) -+ return false; -+ - /* -- * If the task is descheduling and the only running task on the -- * CPU then use the wakelist to offload the task activation to -- * the soon-to-be-idle CPU as the current CPU is likely busy. -- * nr_running is checked to avoid unnecessary task stacking. -+ * If the wakee cpu is idle, or the task is descheduling and the -+ * only running task on the CPU, then use the wakelist to offload -+ * the task activation to the idle (or soon-to-be-idle) CPU as -+ * the current CPU is likely busy. nr_running is checked to -+ * avoid unnecessary task stacking. -+ * -+ * Note that we can only get here with (wakee) p->on_rq=0, -+ * p->on_cpu can be whatever, we've done the dequeue, so -+ * the wakee has been accounted out of ->nr_running. - */ -- if ((wake_flags & WF_ON_CPU) && !cpu_rq(cpu)->nr_running) -+ if (!cpu_rq(cpu)->nr_running) - return true; - - return false; -@@ -2422,10 +2430,7 @@ static inline bool ttwu_queue_cond(int cpu, int wake_flags) - - static bool ttwu_queue_wakelist(struct task_struct *p, int cpu, int wake_flags) - { -- if (__is_defined(ALT_SCHED_TTWU_QUEUE) && ttwu_queue_cond(cpu, wake_flags)) { -- if (WARN_ON_ONCE(cpu == smp_processor_id())) -- return false; -- -+ if (__is_defined(ALT_SCHED_TTWU_QUEUE) && ttwu_queue_cond(cpu)) { - sched_clock_cpu(cpu); /* Sync clocks across CPUs */ - __ttwu_queue_wakelist(p, cpu, wake_flags); - return true; -@@ -2781,7 +2786,7 @@ static int try_to_wake_up(struct task_struct *p, unsigned int state, - * scheduling. - */ - if (smp_load_acquire(&p->on_cpu) && -- ttwu_queue_wakelist(p, task_cpu(p), wake_flags | WF_ON_CPU)) -+ ttwu_queue_wakelist(p, task_cpu(p), wake_flags)) - goto unlock; - - /* -diff --git a/kernel/sched/alt_sched.h b/kernel/sched/alt_sched.h -index 611424bbfa9b..1f85c5627d6d 100644 ---- a/kernel/sched/alt_sched.h -+++ b/kernel/sched/alt_sched.h -@@ -99,7 +99,6 @@ static inline int task_on_rq_migrating(struct task_struct *p) - #define WF_SYNC 0x01 /* waker goes to sleep after wakeup */ - #define WF_FORK 0x02 /* child wakeup after fork */ - #define WF_MIGRATED 0x04 /* internal use, task got migrated */ --#define WF_ON_CPU 0x08 /* Wakee is on_rq */ - - #define SCHED_QUEUE_BITS (SCHED_BITS - 1) - --- -2.37.0 - - -From 570e4903cdff5177f3317dc50e636537e6494471 Mon Sep 17 00:00:00 2001 -From: Tor Vic -Date: Mon, 20 Jun 2022 19:37:43 +0200 -Subject: [PATCH 291/297] sched/alt: [Sync]: sched: Fix balance_push() vs - __sched_setscheduler() - ---- - kernel/sched/alt_core.c | 39 ++++++++++++++++++++++++++++++++++----- - 1 file changed, 34 insertions(+), 5 deletions(-) - -diff --git a/kernel/sched/alt_core.c b/kernel/sched/alt_core.c -index 8ae6975aa495..b43a992edd87 100644 ---- a/kernel/sched/alt_core.c -+++ b/kernel/sched/alt_core.c -@@ -3275,26 +3275,55 @@ static void do_balance_callbacks(struct rq *rq, struct callback_head *head) - - static void balance_push(struct rq *rq); - -+/* -+ * balance_push_callback is a right abuse of the callback interface and plays -+ * by significantly different rules. -+ * -+ * Where the normal balance_callback's purpose is to be ran in the same context -+ * that queued it (only later, when it's safe to drop rq->lock again), -+ * balance_push_callback is specifically targeted at __schedule(). -+ * -+ * This abuse is tolerated because it places all the unlikely/odd cases behind -+ * a single test, namely: rq->balance_callback == NULL. -+ */ - struct callback_head balance_push_callback = { - .next = NULL, - .func = (void (*)(struct callback_head *))balance_push, - }; - --static inline struct callback_head *splice_balance_callbacks(struct rq *rq) -+static inline struct callback_head * -+__splice_balance_callbacks(struct rq *rq, bool split) - { - struct callback_head *head = rq->balance_callback; - -- if (head) { -- lockdep_assert_held(&rq->lock); -+ if (likely(!head)) -+ return NULL; -+ -+ lockdep_assert_held(&rq->lock); -+ /* -+ * Must not take balance_push_callback off the list when -+ * splice_balance_callbacks() and balance_callbacks() are not -+ * in the same rq->lock section. -+ * -+ * In that case it would be possible for __schedule() to interleave -+ * and observe the list empty. -+ */ -+ if (split && head == &balance_push_callback) -+ head = NULL; -+ else - rq->balance_callback = NULL; -- } - - return head; - } - -+static inline struct callback_head *splice_balance_callbacks(struct rq *rq) -+{ -+ return __splice_balance_callbacks(rq, true); -+} -+ - static void __balance_callbacks(struct rq *rq) - { -- do_balance_callbacks(rq, splice_balance_callbacks(rq)); -+ do_balance_callbacks(rq, __splice_balance_callbacks(rq, false)); - } - - static inline void balance_callbacks(struct rq *rq, struct callback_head *head) --- -2.37.0 - - -From ae09d83fe8e118d31fe8a60c193949ef807633e9 Mon Sep 17 00:00:00 2001 -From: Alfred Chen -Date: Thu, 26 May 2022 16:52:23 +0000 -Subject: [PATCH 292/297] sched/alt: Rename slibing group balance functions - ---- - kernel/sched/alt_core.c | 12 ++++++------ - 1 file changed, 6 insertions(+), 6 deletions(-) - -diff --git a/kernel/sched/alt_core.c b/kernel/sched/alt_core.c -index b43a992edd87..70da89bacc72 100644 ---- a/kernel/sched/alt_core.c -+++ b/kernel/sched/alt_core.c -@@ -3915,7 +3915,7 @@ void scheduler_tick(void) - } - - #ifdef CONFIG_SCHED_SMT --static inline int active_load_balance_cpu_stop(void *data) -+static inline int sg_balance_cpu_stop(void *data) - { - struct rq *rq = this_rq(); - struct task_struct *p = data; -@@ -3966,15 +3966,15 @@ static inline int sg_balance_trigger(const int cpu) - raw_spin_unlock_irqrestore(&rq->lock, flags); - - if (res) -- stop_one_cpu_nowait(cpu, active_load_balance_cpu_stop, -- curr, &rq->active_balance_work); -+ stop_one_cpu_nowait(cpu, sg_balance_cpu_stop, curr, -+ &rq->active_balance_work); - return res; - } - - /* -- * sg_balance_check - slibing group balance check for run queue @rq -+ * sg_balance - slibing group balance check for run queue @rq - */ --static inline void sg_balance_check(struct rq *rq) -+static inline void sg_balance(struct rq *rq) - { - cpumask_t chk; - int cpu = cpu_of(rq); -@@ -4639,7 +4639,7 @@ static void __sched notrace __schedule(unsigned int sched_mode) - } - - #ifdef CONFIG_SCHED_SMT -- sg_balance_check(rq); -+ sg_balance(rq); - #endif - } - --- -2.37.0 - - -From ce3f2729759e710ed1b5b44dd867977ac66f63ec Mon Sep 17 00:00:00 2001 -From: Alfred Chen -Date: Sun, 5 Jun 2022 15:09:03 +0000 -Subject: [PATCH 293/297] sched/alt: Watermark preempt fix - -Schedule watermark based preempt is not wokring properly. Typical -scenario is running low priority tasks on all CPUs, then run kernel -compilation with normal prioority, kernel compilation will spread only -on cpu0. Here is the fix. - -1) Fix the size of sched_rq_watermark, IDLE_TASK_SCHED_PRIO doesn't need - a watermark. -2) Remove sched_exec() implementation. It tends scheduling on cpu 0. -3) For BMQ, children task runs at lowest boost priority. ---- - kernel/sched/alt_core.c | 30 ++++++------------------------ - kernel/sched/bmq.h | 3 +-- - 2 files changed, 7 insertions(+), 26 deletions(-) - -diff --git a/kernel/sched/alt_core.c b/kernel/sched/alt_core.c -index 70da89bacc72..9f8c1a826095 100644 ---- a/kernel/sched/alt_core.c -+++ b/kernel/sched/alt_core.c -@@ -144,14 +144,14 @@ DEFINE_PER_CPU_SHARED_ALIGNED(struct rq, runqueues); - #ifdef CONFIG_SCHED_SMT - static cpumask_t sched_sg_idle_mask ____cacheline_aligned_in_smp; - #endif --static cpumask_t sched_rq_watermark[SCHED_BITS] ____cacheline_aligned_in_smp; -+static cpumask_t sched_rq_watermark[SCHED_QUEUE_BITS] ____cacheline_aligned_in_smp; - - /* sched_queue related functions */ - static inline void sched_queue_init(struct sched_queue *q) - { - int i; - -- bitmap_zero(q->bitmap, SCHED_BITS); -+ bitmap_zero(q->bitmap, SCHED_QUEUE_BITS); - for(i = 0; i < SCHED_BITS; i++) - INIT_LIST_HEAD(&q->heads[i]); - } -@@ -183,7 +183,7 @@ static inline void update_sched_rq_watermark(struct rq *rq) - cpu = cpu_of(rq); - if (watermark < last_wm) { - for (i = last_wm; i > watermark; i--) -- cpumask_clear_cpu(cpu, sched_rq_watermark + SCHED_BITS - 1 - i); -+ cpumask_clear_cpu(cpu, sched_rq_watermark + SCHED_QUEUE_BITS - i); - #ifdef CONFIG_SCHED_SMT - if (static_branch_likely(&sched_smt_present) && - IDLE_TASK_SCHED_PRIO == last_wm) -@@ -194,7 +194,7 @@ static inline void update_sched_rq_watermark(struct rq *rq) - } - /* last_wm < watermark */ - for (i = watermark; i > last_wm; i--) -- cpumask_set_cpu(cpu, sched_rq_watermark + SCHED_BITS - 1 - i); -+ cpumask_set_cpu(cpu, sched_rq_watermark + SCHED_QUEUE_BITS - i); - #ifdef CONFIG_SCHED_SMT - if (static_branch_likely(&sched_smt_present) && - IDLE_TASK_SCHED_PRIO == watermark) { -@@ -1902,7 +1902,7 @@ static inline int select_task_rq(struct task_struct *p) - #endif - cpumask_and(&tmp, &chk_mask, sched_rq_watermark) || - cpumask_and(&tmp, &chk_mask, -- sched_rq_watermark + SCHED_BITS - task_sched_prio(p))) -+ sched_rq_watermark + SCHED_QUEUE_BITS - 1 - task_sched_prio(p))) - return best_mask_cpu(task_cpu(p), &tmp); - - return best_mask_cpu(task_cpu(p), &chk_mask); -@@ -3730,24 +3730,6 @@ unsigned int nr_iowait(void) - */ - void sched_exec(void) - { -- struct task_struct *p = current; -- unsigned long flags; -- int dest_cpu; -- -- raw_spin_lock_irqsave(&p->pi_lock, flags); -- dest_cpu = cpumask_any(p->cpus_ptr); -- if (dest_cpu == smp_processor_id()) -- goto unlock; -- -- if (likely(cpu_active(dest_cpu))) { -- struct migration_arg arg = { p, dest_cpu }; -- -- raw_spin_unlock_irqrestore(&p->pi_lock, flags); -- stop_one_cpu(task_cpu(p), migration_cpu_stop, &arg); -- return; -- } --unlock: -- raw_spin_unlock_irqrestore(&p->pi_lock, flags); - } - - #endif -@@ -7292,7 +7274,7 @@ void __init sched_init(void) - wait_bit_init(); - - #ifdef CONFIG_SMP -- for (i = 0; i < SCHED_BITS; i++) -+ for (i = 0; i < SCHED_QUEUE_BITS; i++) - cpumask_copy(sched_rq_watermark + i, cpu_present_mask); - #endif - -diff --git a/kernel/sched/bmq.h b/kernel/sched/bmq.h -index bf7ac80ec242..66b77291b9d0 100644 ---- a/kernel/sched/bmq.h -+++ b/kernel/sched/bmq.h -@@ -85,8 +85,7 @@ inline int task_running_nice(struct task_struct *p) - - static void sched_task_fork(struct task_struct *p, struct rq *rq) - { -- p->boost_prio = (p->boost_prio < 0) ? -- p->boost_prio + MAX_PRIORITY_ADJ : MAX_PRIORITY_ADJ; -+ p->boost_prio = MAX_PRIORITY_ADJ; - } - - static inline void do_sched_yield_type_1(struct task_struct *p, struct rq *rq) --- -2.37.0 - - -From bee4479334a920c506a6bff410251c39fd82c6b2 Mon Sep 17 00:00:00 2001 -From: Alfred Chen -Date: Sun, 19 Jun 2022 10:20:11 +0000 -Subject: [PATCH 294/297] Project-C v5.18-r2 - ---- - kernel/sched/alt_core.c | 2 +- - 1 file changed, 1 insertion(+), 1 deletion(-) - -diff --git a/kernel/sched/alt_core.c b/kernel/sched/alt_core.c -index 9f8c1a826095..b70c19fdb7a6 100644 ---- a/kernel/sched/alt_core.c -+++ b/kernel/sched/alt_core.c -@@ -64,7 +64,7 @@ __read_mostly int sysctl_resched_latency_warn_once = 1; - #define sched_feat(x) (0) - #endif /* CONFIG_SCHED_DEBUG */ - --#define ALT_SCHED_VERSION "v5.18-r1" -+#define ALT_SCHED_VERSION "v5.18-r2" - - /* rt_prio(prio) defined in include/linux/sched/rt.h */ - #define rt_task(p) rt_prio((p)->prio) --- -2.37.0 - - -From ae834915c8ab8e84348a3285b5760caf416285e6 Mon Sep 17 00:00:00 2001 -From: Tor Vic -Date: Fri, 8 Jul 2022 11:26:06 +0200 -Subject: [PATCH 295/297] sched/alt: Rebase onto 5.19 - ---- - kernel/sched/alt_core.c | 2 +- - 1 file changed, 1 insertion(+), 1 deletion(-) - -diff --git a/kernel/sched/alt_core.c b/kernel/sched/alt_core.c -index b70c19fdb7a6..1ee86c260e20 100644 ---- a/kernel/sched/alt_core.c -+++ b/kernel/sched/alt_core.c -@@ -64,7 +64,7 @@ __read_mostly int sysctl_resched_latency_warn_once = 1; - #define sched_feat(x) (0) - #endif /* CONFIG_SCHED_DEBUG */ - --#define ALT_SCHED_VERSION "v5.18-r2" -+#define ALT_SCHED_VERSION "v5.19-r0-vd" - - /* rt_prio(prio) defined in include/linux/sched/rt.h */ - #define rt_task(p) rt_prio((p)->prio) --- -2.37.0 - - -From 9c4a7c5fd612abc5e58d018a76d45bad06ff3a6f Mon Sep 17 00:00:00 2001 -From: Tor Vic -Date: Fri, 8 Jul 2022 14:09:33 +0200 -Subject: [PATCH 296/297] sched/alt: [Sync] smp: Rename - flush_smp_call_function_from_idle() - ---- - kernel/sched/alt_core.c | 2 +- - kernel/sched/alt_sched.h | 4 ++-- - 2 files changed, 3 insertions(+), 3 deletions(-) - -diff --git a/kernel/sched/alt_core.c b/kernel/sched/alt_core.c -index 1ee86c260e20..bea63c917bde 100644 ---- a/kernel/sched/alt_core.c -+++ b/kernel/sched/alt_core.c -@@ -1590,7 +1590,7 @@ static int migration_cpu_stop(void *data) - * __migrate_task() such that we will not miss enforcing cpus_ptr - * during wakeups, see set_cpus_allowed_ptr()'s TASK_WAKING test. - */ -- flush_smp_call_function_from_idle(); -+ flush_smp_call_function_queue(); - - raw_spin_lock(&p->pi_lock); - raw_spin_lock(&rq->lock); -diff --git a/kernel/sched/alt_sched.h b/kernel/sched/alt_sched.h -index 1f85c5627d6d..fc6f9a805571 100644 ---- a/kernel/sched/alt_sched.h -+++ b/kernel/sched/alt_sched.h -@@ -289,10 +289,10 @@ static inline int best_mask_cpu(int cpu, const cpumask_t *mask) - return __best_mask_cpu(mask, per_cpu(sched_cpu_topo_masks, cpu)); - } - --extern void flush_smp_call_function_from_idle(void); -+extern void flush_smp_call_function_queue(void); - - #else /* !CONFIG_SMP */ --static inline void flush_smp_call_function_from_idle(void) { } -+static inline void flush_smp_call_function_queue(void) { } - #endif - - #ifndef arch_scale_freq_tick --- -2.37.0 - - -From e73b28fc1e4234835c241307ec8cf46d992d3ec4 Mon Sep 17 00:00:00 2001 -From: Tor Vic -Date: Fri, 8 Jul 2022 14:11:40 +0200 -Subject: [PATCH 297/297] sched/alt: Add lost includes again - ---- - kernel/sched/alt_core.c | 3 +++ - 1 file changed, 3 insertions(+) - -diff --git a/kernel/sched/alt_core.c b/kernel/sched/alt_core.c -index bea63c917bde..3e8ecfb413f9 100644 ---- a/kernel/sched/alt_core.c -+++ b/kernel/sched/alt_core.c -@@ -29,6 +29,7 @@ - #include - #include - #include -+#include - - #include - -@@ -40,6 +41,8 @@ - - #include "sched.h" - -+#include "pelt.h" -+ - #include "../../fs/io-wq.h" - #include "../smpboot.h" - --- -2.37.0 - +