From 24f561c816153b32f3a7aeaac585aa1a64765186 Mon Sep 17 00:00:00 2001 From: Tk-Glitch Date: Wed, 12 Apr 2023 17:37:56 +0200 Subject: [PATCH] linux 6.2.y: Update Project C to r2 with pds priority squeeze 0.5 https://gitlab.com/alfredchen/projectc/-/tree/master/6.2 --- linux-tkg-config/prepare | 5 +- ..._v6.2-r0.patch => 0009-prjc_v6.2-r2.patch} | 763 +++++++++++++----- 2 files changed, 544 insertions(+), 224 deletions(-) rename linux-tkg-patches/6.2/{0009-prjc_v6.2-r0.patch => 0009-prjc_v6.2-r2.patch} (94%) diff --git a/linux-tkg-config/prepare b/linux-tkg-config/prepare index 4a40c22..97f1839 100644 --- a/linux-tkg-config/prepare +++ b/linux-tkg-config/prepare @@ -252,7 +252,7 @@ _set_cpu_scheduler() { ["bore"]="BORE (Burst-Oriented Response Enhancer) CPU Scheduler" ) - # CPU SCHED selector + # CPU SCHED selector - _projectc_unoff=1 sets unofficial Project C revision flag for a given version if [ "$_kver" = "504" ]; then _avail_cpu_scheds=("pds" "bmq" "muqss" "cacule" "cfs") elif [ "$_kver" = "507" ]; then @@ -285,7 +285,6 @@ _set_cpu_scheduler() { _avail_cpu_scheds=("cfs" "pds" "bmq" "tt" "bore") elif [ "$_kver" = "602" ]; then _avail_cpu_scheds=("cfs" "pds" "bmq" "tt" "bore") - _projectc_unoff=1 else _avail_cpu_scheds=("cfs") fi @@ -726,6 +725,8 @@ _tkg_srcprep() { rev=2 elif [ "$_kver" = "601" ]; then rev=1 + elif [ "$_kver" = "602" ]; then + rev=2 else rev=0 fi diff --git a/linux-tkg-patches/6.2/0009-prjc_v6.2-r0.patch b/linux-tkg-patches/6.2/0009-prjc_v6.2-r2.patch similarity index 94% rename from linux-tkg-patches/6.2/0009-prjc_v6.2-r0.patch rename to linux-tkg-patches/6.2/0009-prjc_v6.2-r2.patch index c3304d2..69704ff 100644 --- a/linux-tkg-patches/6.2/0009-prjc_v6.2-r0.patch +++ b/linux-tkg-patches/6.2/0009-prjc_v6.2-r2.patch @@ -1,57 +1,3 @@ -From e44ef62b127f6a161a131c84db92a7527d8fc72d Mon Sep 17 00:00:00 2001 -From: Peter Jung -Date: Wed, 22 Feb 2023 19:24:36 +0100 -Subject: [PATCH] prjc - -Signed-off-by: Peter Jung ---- - .../admin-guide/kernel-parameters.txt | 6 + - Documentation/admin-guide/sysctl/kernel.rst | 10 + - Documentation/scheduler/sched-BMQ.txt | 110 + - fs/proc/base.c | 2 +- - include/asm-generic/resource.h | 2 +- - include/linux/sched.h | 33 +- - include/linux/sched/deadline.h | 20 + - include/linux/sched/prio.h | 26 + - include/linux/sched/rt.h | 2 + - include/linux/sched/topology.h | 3 +- - init/Kconfig | 34 + - init/init_task.c | 18 + - kernel/Kconfig.preempt | 2 +- - kernel/cgroup/cpuset.c | 4 +- - kernel/delayacct.c | 2 +- - kernel/exit.c | 4 +- - kernel/locking/rtmutex.c | 16 +- - kernel/sched/Makefile | 5 + - kernel/sched/alt_core.c | 8111 +++++++++++++++++ - kernel/sched/alt_debug.c | 31 + - kernel/sched/alt_sched.h | 671 ++ - kernel/sched/bmq.h | 110 + - kernel/sched/build_policy.c | 8 +- - kernel/sched/build_utility.c | 2 + - kernel/sched/cpufreq_schedutil.c | 10 + - kernel/sched/cputime.c | 10 +- - kernel/sched/debug.c | 10 + - kernel/sched/idle.c | 2 + - kernel/sched/pds.h | 127 + - kernel/sched/pelt.c | 4 +- - kernel/sched/pelt.h | 8 +- - kernel/sched/sched.h | 9 + - kernel/sched/stats.c | 4 + - kernel/sched/stats.h | 2 + - kernel/sched/topology.c | 17 + - kernel/sysctl.c | 15 + - kernel/time/hrtimer.c | 2 + - kernel/time/posix-cpu-timers.c | 10 +- - kernel/trace/trace_selftest.c | 5 + - 39 files changed, 9445 insertions(+), 22 deletions(-) - create mode 100644 Documentation/scheduler/sched-BMQ.txt - create mode 100644 kernel/sched/alt_core.c - create mode 100644 kernel/sched/alt_debug.c - create mode 100644 kernel/sched/alt_sched.h - create mode 100644 kernel/sched/bmq.h - create mode 100644 kernel/sched/pds.h - diff --git a/Documentation/admin-guide/kernel-parameters.txt b/Documentation/admin-guide/kernel-parameters.txt index 6cfa6e3996cf..1b6a407213da 100644 --- a/Documentation/admin-guide/kernel-parameters.txt @@ -59,7 +5,7 @@ index 6cfa6e3996cf..1b6a407213da 100644 @@ -5437,6 +5437,12 @@ sa1100ir [NET] See drivers/net/irda/sa1100_ir.c. - + + sched_timeslice= + [KNL] Time slice in ms for Project C BMQ/PDS scheduler. + Format: integer 2, 4 @@ -67,14 +13,14 @@ index 6cfa6e3996cf..1b6a407213da 100644 + See Documentation/scheduler/sched-BMQ.txt + sched_verbose [KNL] Enables verbose scheduler debug messages. - + schedstats= [KNL,X86] Enable or disable scheduled statistics. diff --git a/Documentation/admin-guide/sysctl/kernel.rst b/Documentation/admin-guide/sysctl/kernel.rst index 46e3d62c0eea..fb4568c919d0 100644 --- a/Documentation/admin-guide/sysctl/kernel.rst +++ b/Documentation/admin-guide/sysctl/kernel.rst @@ -1597,3 +1597,13 @@ is 10 seconds. - + The softlockup threshold is (``2 * watchdog_thresh``). Setting this tunable to zero will disable lockup detection altogether. + @@ -215,7 +161,7 @@ index 9e479d7d202b..2a8530021b23 100644 + (unsigned long long)tsk_seruntime(task), (unsigned long long)task->sched_info.run_delay, task->sched_info.pcount); - + diff --git a/include/asm-generic/resource.h b/include/asm-generic/resource.h index 8874f681b056..59eb72bf7d5f 100644 --- a/include/asm-generic/resource.h @@ -235,7 +181,7 @@ index 853d08f7562b..ad7e050d7455 100644 +++ b/include/linux/sched.h @@ -762,8 +762,14 @@ struct task_struct { unsigned int ptrace; - + #ifdef CONFIG_SMP - int on_cpu; struct __call_single_node wake_entry; @@ -256,11 +202,11 @@ index 853d08f7562b..ad7e050d7455 100644 +#endif /* !CONFIG_SCHED_ALT */ #endif int on_rq; - + @@ -785,6 +792,20 @@ struct task_struct { int normal_prio; unsigned int rt_priority; - + +#ifdef CONFIG_SCHED_ALT + u64 last_ran; + s64 time_slice; @@ -283,13 +229,13 @@ index 853d08f7562b..ad7e050d7455 100644 unsigned int core_occupation; #endif +#endif /* !CONFIG_SCHED_ALT */ - + #ifdef CONFIG_CGROUP_SCHED struct task_group *sched_task_group; @@ -1539,6 +1561,15 @@ struct task_struct { */ }; - + +#ifdef CONFIG_SCHED_ALT +#define tsk_seruntime(t) ((t)->sched_time) +/* replace the uncertian rt_timeout with 0UL */ @@ -308,7 +254,7 @@ index 7c83d4d5a971..fa30f98cb2be 100644 +++ b/include/linux/sched/deadline.h @@ -1,5 +1,24 @@ /* SPDX-License-Identifier: GPL-2.0 */ - + +#ifdef CONFIG_SCHED_ALT + +static inline int dl_task(struct task_struct *p) @@ -336,7 +282,7 @@ index 7c83d4d5a971..fa30f98cb2be 100644 return dl_prio(p->prio); } +#endif /* CONFIG_SCHED_ALT */ - + static inline bool dl_time_before(u64 a, u64 b) { diff --git a/include/linux/sched/prio.h b/include/linux/sched/prio.h @@ -346,7 +292,7 @@ index ab83d85e1183..6af9ae681116 100644 @@ -18,6 +18,32 @@ #define MAX_PRIO (MAX_RT_PRIO + NICE_WIDTH) #define DEFAULT_PRIO (MAX_RT_PRIO + NICE_WIDTH / 2) - + +#ifdef CONFIG_SCHED_ALT + +/* Undefine MAX_PRIO and DEFAULT_PRIO */ @@ -381,7 +327,7 @@ index 994c25640e15..8c050a59ece1 100644 --- a/include/linux/sched/rt.h +++ b/include/linux/sched/rt.h @@ -24,8 +24,10 @@ static inline bool task_is_realtime(struct task_struct *tsk) - + if (policy == SCHED_FIFO || policy == SCHED_RR) return true; +#ifndef CONFIG_SCHED_ALT @@ -390,15 +336,15 @@ index 994c25640e15..8c050a59ece1 100644 +#endif return false; } - + diff --git a/include/linux/sched/topology.h b/include/linux/sched/topology.h index 816df6cc444e..c8da08e18c91 100644 --- a/include/linux/sched/topology.h +++ b/include/linux/sched/topology.h @@ -234,7 +234,8 @@ static inline bool cpus_share_cache(int this_cpu, int that_cpu) - + #endif /* !CONFIG_SMP */ - + -#if defined(CONFIG_ENERGY_MODEL) && defined(CONFIG_CPU_FREQ_GOV_SCHEDUTIL) +#if defined(CONFIG_ENERGY_MODEL) && defined(CONFIG_CPU_FREQ_GOV_SCHEDUTIL) && \ + !defined(CONFIG_SCHED_ALT) @@ -418,9 +364,9 @@ index 44e90b28a30f..af24591984ab 100644 This feature enables the scheduler to track the clamped utilization of each CPU based on RUNNABLE tasks scheduled on that CPU. @@ -867,6 +868,35 @@ config UCLAMP_BUCKETS_COUNT - + If in doubt, use the default value. - + +menuconfig SCHED_ALT + bool "Alternative CPU Schedulers" + default y @@ -451,7 +397,7 @@ index 44e90b28a30f..af24591984ab 100644 +endif + endmenu - + # @@ -924,6 +954,7 @@ config NUMA_BALANCING depends on ARCH_SUPPORTS_NUMA_BALANCING @@ -464,7 +410,7 @@ index 44e90b28a30f..af24591984ab 100644 @@ -1021,6 +1052,7 @@ config FAIR_GROUP_SCHED depends on CGROUP_SCHED default CGROUP_SCHED - + +if !SCHED_ALT config CFS_BANDWIDTH bool "CPU bandwidth provisioning for FAIR_GROUP_SCHED" @@ -472,13 +418,13 @@ index 44e90b28a30f..af24591984ab 100644 @@ -1043,6 +1075,7 @@ config RT_GROUP_SCHED realtime bandwidth for them. See Documentation/scheduler/sched-rt-group.rst for more information. - + +endif #!SCHED_ALT endif #CGROUP_SCHED - + config UCLAMP_TASK_GROUP @@ -1287,6 +1320,7 @@ config CHECKPOINT_RESTORE - + config SCHED_AUTOGROUP bool "Automatic process group scheduling" + depends on !SCHED_ALT @@ -536,7 +482,7 @@ index c2f1fd95a821..41654679b1b2 100644 --- a/kernel/Kconfig.preempt +++ b/kernel/Kconfig.preempt @@ -117,7 +117,7 @@ config PREEMPT_DYNAMIC - + config SCHED_CORE bool "Core Scheduling for SMT" - depends on SCHED_SMT @@ -551,7 +497,7 @@ index ca826bd1eba3..60e194f1d6d8 100644 @@ -791,7 +791,7 @@ static int validate_change(struct cpuset *cur, struct cpuset *trial) return ret; } - + -#ifdef CONFIG_SMP +#if defined(CONFIG_SMP) && !defined(CONFIG_SCHED_ALT) /* @@ -576,9 +522,9 @@ index e39cb696cfbd..463423572e09 100644 t2 = tsk->sched_info.run_delay; - t3 = tsk->se.sum_exec_runtime; + t3 = tsk_seruntime(tsk); - + d->cpu_count += t1; - + diff --git a/kernel/exit.c b/kernel/exit.c index 15dc2ec80c46..1e583e0f89a7 100644 --- a/kernel/exit.c @@ -586,11 +532,11 @@ index 15dc2ec80c46..1e583e0f89a7 100644 @@ -172,7 +172,7 @@ static void __exit_signal(struct task_struct *tsk) sig->curr_target = next_thread(tsk); } - + - add_device_randomness((const void*) &tsk->se.sum_exec_runtime, + add_device_randomness((const void*) &tsk_seruntime(tsk), sizeof(unsigned long long)); - + /* @@ -193,7 +193,7 @@ static void __exit_signal(struct task_struct *tsk) sig->inblock += task_io_get_inblock(tsk); @@ -612,14 +558,14 @@ index 728f434de2bb..0e1082a4e878 100644 - waiter->deadline = task->dl.deadline; + waiter->deadline = __tsk_deadline(task); } - + /* * Only use with rt_mutex_waiter_{less,equal}() */ #define task_to_waiter(p) \ - &(struct rt_mutex_waiter){ .prio = __waiter_prio(p), .deadline = (p)->dl.deadline } + &(struct rt_mutex_waiter){ .prio = __waiter_prio(p), .deadline = __tsk_deadline(p) } - + static __always_inline int rt_mutex_waiter_less(struct rt_mutex_waiter *left, struct rt_mutex_waiter *right) { @@ -628,7 +574,7 @@ index 728f434de2bb..0e1082a4e878 100644 +#else if (left->prio < right->prio) return 1; - + +#ifndef CONFIG_SCHED_BMQ /* * If both waiters have dl_prio(), we check the deadlines of the @@ -638,11 +584,11 @@ index 728f434de2bb..0e1082a4e878 100644 if (dl_prio(left->prio)) return dl_time_before(left->deadline, right->deadline); +#endif - + return 0; +#endif } - + static __always_inline int rt_mutex_waiter_equal(struct rt_mutex_waiter *left, struct rt_mutex_waiter *right) { @@ -651,7 +597,7 @@ index 728f434de2bb..0e1082a4e878 100644 +#else if (left->prio != right->prio) return 0; - + +#ifndef CONFIG_SCHED_BMQ /* * If both waiters have dl_prio(), we check the deadlines of the @@ -661,11 +607,11 @@ index 728f434de2bb..0e1082a4e878 100644 if (dl_prio(left->prio)) return left->deadline == right->deadline; +#endif - + return 1; +#endif } - + static inline bool rt_mutex_steal(struct rt_mutex_waiter *waiter, diff --git a/kernel/sched/Makefile b/kernel/sched/Makefile index 976092b7bd45..31d587c16ec1 100644 @@ -686,10 +632,10 @@ index 976092b7bd45..31d587c16ec1 100644 obj-y += build_utility.o diff --git a/kernel/sched/alt_core.c b/kernel/sched/alt_core.c new file mode 100644 -index 000000000000..f5e9c01f9382 +index 000000000000..a122b1f8678e --- /dev/null +++ b/kernel/sched/alt_core.c -@@ -0,0 +1,8111 @@ +@@ -0,0 +1,8120 @@ +/* + * kernel/sched/alt_core.c + * @@ -759,7 +705,12 @@ index 000000000000..f5e9c01f9382 +#define sched_feat(x) (0) +#endif /* CONFIG_SCHED_DEBUG */ + -+#define ALT_SCHED_VERSION "v6.2-r0" ++#define ALT_SCHED_VERSION "v6.2-r2" ++ ++/* ++ * Compile time debug macro ++ * #define ALT_SCHED_DEBUG ++ */ + +/* rt_prio(prio) defined in include/linux/sched/rt.h */ +#define rt_task(p) rt_prio((p)->prio) @@ -814,9 +765,9 @@ index 000000000000..f5e9c01f9382 +#ifdef CONFIG_SMP +static cpumask_t sched_rq_pending_mask ____cacheline_aligned_in_smp; + -+DEFINE_PER_CPU(cpumask_t [NR_CPU_AFFINITY_LEVELS], sched_cpu_topo_masks); -+DEFINE_PER_CPU(cpumask_t *, sched_cpu_llc_mask); -+DEFINE_PER_CPU(cpumask_t *, sched_cpu_topo_end_mask); ++DEFINE_PER_CPU_ALIGNED(cpumask_t [NR_CPU_AFFINITY_LEVELS], sched_cpu_topo_masks); ++DEFINE_PER_CPU_ALIGNED(cpumask_t *, sched_cpu_llc_mask); ++DEFINE_PER_CPU_ALIGNED(cpumask_t *, sched_cpu_topo_end_mask); + +#ifdef CONFIG_SCHED_SMT +DEFINE_STATIC_KEY_FALSE(sched_smt_present); @@ -910,13 +861,13 @@ index 000000000000..f5e9c01f9382 + + if (prio < last_prio) { + if (IDLE_TASK_SCHED_PRIO == last_prio) { -+ cpumask_clear_cpu(cpu, sched_idle_mask); -+ last_prio -= 2; +#ifdef CONFIG_SCHED_SMT + if (static_branch_likely(&sched_smt_present)) + cpumask_andnot(&sched_sg_idle_mask, + &sched_sg_idle_mask, cpu_smt_mask(cpu)); +#endif ++ cpumask_clear_cpu(cpu, sched_idle_mask); ++ last_prio -= 2; + } + clear_recorded_preempt_mask(pr, prio, last_prio, cpu); + @@ -924,18 +875,14 @@ index 000000000000..f5e9c01f9382 + } + /* last_prio < prio */ + if (IDLE_TASK_SCHED_PRIO == prio) { ++#ifdef CONFIG_SCHED_SMT ++ if (static_branch_likely(&sched_smt_present) && ++ cpumask_intersects(cpu_smt_mask(cpu), sched_idle_mask)) ++ cpumask_or(&sched_sg_idle_mask, ++ &sched_sg_idle_mask, cpu_smt_mask(cpu)); ++#endif + cpumask_set_cpu(cpu, sched_idle_mask); + prio -= 2; -+#ifdef CONFIG_SCHED_SMT -+ if (static_branch_likely(&sched_smt_present)) { -+ cpumask_t tmp; -+ -+ cpumask_and(&tmp, cpu_smt_mask(cpu), sched_idle_mask); -+ if (cpumask_equal(&tmp, cpu_smt_mask(cpu))) -+ cpumask_or(&sched_sg_idle_mask, -+ &sched_sg_idle_mask, cpu_smt_mask(cpu)); -+ } -+#endif + } + set_recorded_preempt_mask(pr, last_prio, prio, cpu); +} @@ -1476,11 +1423,13 @@ index 000000000000..f5e9c01f9382 + +static inline void dequeue_task(struct task_struct *p, struct rq *rq, int flags) +{ ++#ifdef ALT_SCHED_DEBUG + lockdep_assert_held(&rq->lock); + + /*printk(KERN_INFO "sched: dequeue(%d) %px %016llx\n", cpu_of(rq), p, p->priodl);*/ + WARN_ONCE(task_rq(p) != rq, "sched: dequeue task reside on cpu%d from cpu%d\n", + task_cpu(p), cpu_of(rq)); ++#endif + + __SCHED_DEQUEUE_TASK(p, rq, flags); + --rq->nr_running; @@ -1494,11 +1443,13 @@ index 000000000000..f5e9c01f9382 + +static inline void enqueue_task(struct task_struct *p, struct rq *rq, int flags) +{ ++#ifdef ALT_SCHED_DEBUG + lockdep_assert_held(&rq->lock); + + /*printk(KERN_INFO "sched: enqueue(%d) %px %016llx\n", cpu_of(rq), p, p->priodl);*/ + WARN_ONCE(task_rq(p) != rq, "sched: enqueue task reside on cpu%d to cpu%d\n", + task_cpu(p), cpu_of(rq)); ++#endif + + __SCHED_ENQUEUE_TASK(p, rq, flags); + update_sched_preempt_mask(rq); @@ -1513,10 +1464,12 @@ index 000000000000..f5e9c01f9382 + +static inline void requeue_task(struct task_struct *p, struct rq *rq, int idx) +{ ++#ifdef ALT_SCHED_DEBUG + lockdep_assert_held(&rq->lock); + /*printk(KERN_INFO "sched: requeue(%d) %px %016llx\n", cpu_of(rq), p, p->priodl);*/ + WARN_ONCE(task_rq(p) != rq, "sched: cpu[%d] requeue task reside on cpu%d\n", + cpu_of(rq), task_cpu(p)); ++#endif + + list_del(&p->sq_node); + list_add_tail(&p->sq_node, &rq->queue.heads[idx]); @@ -2035,8 +1988,8 @@ index 000000000000..f5e9c01f9382 + */ +static inline void deactivate_task(struct task_struct *p, struct rq *rq) +{ -+ dequeue_task(p, rq, DEQUEUE_SLEEP); + p->on_rq = 0; ++ dequeue_task(p, rq, DEQUEUE_SLEEP); + cpufreq_update_util(rq, 0); +} + @@ -2253,7 +2206,7 @@ index 000000000000..f5e9c01f9382 +{ + lockdep_assert_held(&rq->lock); + -+ WRITE_ONCE(p->on_rq, TASK_ON_RQ_MIGRATING); ++ p->on_rq = TASK_ON_RQ_MIGRATING; + dequeue_task(p, rq, 0); + update_sched_preempt_mask(rq); + set_task_cpu(p, new_cpu); @@ -4849,10 +4802,9 @@ index 000000000000..f5e9c01f9382 +/* + * sg_balance - slibing group balance check for run queue @rq + */ -+static inline void sg_balance(struct rq *rq) ++static inline void sg_balance(struct rq *rq, int cpu) +{ + cpumask_t chk; -+ int cpu = cpu_of(rq); + + /* exit when cpu is offline */ + if (unlikely(!rq->online)) @@ -5166,11 +5118,6 @@ index 000000000000..f5e9c01f9382 + schedstat_inc(this_rq()->sched_count); +} + -+/* -+ * Compile time debug macro -+ * #define ALT_SCHED_DEBUG -+ */ -+ +#ifdef ALT_SCHED_DEBUG +void alt_sched_debug(void) +{ @@ -5207,10 +5154,12 @@ index 000000000000..f5e9c01f9382 + (p = sched_rq_next_task(skip, rq)) != rq->idle) { + skip = sched_rq_next_task(p, rq); + if (cpumask_test_cpu(dest_cpu, p->cpus_ptr)) { ++ p->on_rq = TASK_ON_RQ_MIGRATING; + __SCHED_DEQUEUE_TASK(p, rq, 0); + set_task_cpu(p, dest_cpu); + sched_task_sanity_check(p, dest_rq); + __SCHED_ENQUEUE_TASK(p, dest_rq, 0); ++ p->on_rq = TASK_ON_RQ_QUEUED; + nr_migrated++; + } + nr_tries--; @@ -5507,19 +5456,21 @@ index 000000000000..f5e9c01f9382 + */ + ++*switch_count; + -+ psi_sched_switch(prev, next, !task_on_rq_queued(prev)); ++ psi_sched_switch(prev, next, deactivated); + + trace_sched_switch(sched_mode & SM_MASK_PREEMPT, prev, next, prev_state); + + /* Also unlocks the rq: */ + rq = context_switch(rq, prev, next); ++ ++ cpu = cpu_of(rq); + } else { + __balance_callbacks(rq); + raw_spin_unlock_irq(&rq->lock); + } + +#ifdef CONFIG_SCHED_SMT -+ sg_balance(rq); ++ sg_balance(rq, cpu); +#endif +} + @@ -5844,12 +5795,16 @@ index 000000000000..f5e9c01f9382 + +static inline void check_task_changed(struct task_struct *p, struct rq *rq) +{ -+ int idx; -+ + /* Trigger resched if task sched_prio has been modified. */ -+ if (task_on_rq_queued(p) && (idx = task_sched_prio_idx(p, rq)) != p->sq_idx) { -+ requeue_task(p, rq, idx); -+ check_preempt_curr(rq); ++ if (task_on_rq_queued(p)) { ++ int idx; ++ ++ update_rq_clock(rq); ++ idx = task_sched_prio_idx(p, rq); ++ if (idx != p->sq_idx) { ++ requeue_task(p, rq, idx); ++ check_preempt_curr(rq); ++ } + } +} + @@ -5902,7 +5857,6 @@ index 000000000000..f5e9c01f9382 + return; + + rq = __task_access_lock(p, &lock); -+ update_rq_clock(rq); + /* + * Set under pi_lock && rq->lock, such that the value can be used under + * either lock. @@ -8275,7 +8229,8 @@ index 000000000000..f5e9c01f9382 + int i; + struct rq *rq; + -+ printk(KERN_INFO ALT_SCHED_VERSION_MSG); ++ printk(KERN_INFO "sched/alt: "ALT_SCHED_NAME" CPU Scheduler "ALT_SCHED_VERSION\ ++ " by Alfred Chen.\n"); + + wait_bit_init(); + @@ -8840,10 +8795,10 @@ index 000000000000..1212a031700e +{} diff --git a/kernel/sched/alt_sched.h b/kernel/sched/alt_sched.h new file mode 100644 -index 000000000000..0b563999d4c1 +index 000000000000..e9b93e63406a --- /dev/null +++ b/kernel/sched/alt_sched.h -@@ -0,0 +1,671 @@ +@@ -0,0 +1,672 @@ +#ifndef ALT_SCHED_H +#define ALT_SCHED_H + @@ -8965,6 +8920,8 @@ index 000000000000..0b563999d4c1 +}; + +struct rq; ++struct cpuidle_state; ++ +struct balance_callback { + struct balance_callback *next; + void (*func)(struct rq *rq); @@ -9136,8 +9093,7 @@ index 000000000000..0b563999d4c1 + NR_CPU_AFFINITY_LEVELS +}; + -+DECLARE_PER_CPU(cpumask_t [NR_CPU_AFFINITY_LEVELS], sched_cpu_topo_masks); -+DECLARE_PER_CPU(cpumask_t *, sched_cpu_llc_mask); ++DECLARE_PER_CPU_ALIGNED(cpumask_t [NR_CPU_AFFINITY_LEVELS], sched_cpu_topo_masks); + +static inline int +__best_mask_cpu(const cpumask_t *cpumask, const cpumask_t *mask) @@ -9517,11 +9473,11 @@ index 000000000000..0b563999d4c1 +#endif /* ALT_SCHED_H */ diff --git a/kernel/sched/bmq.h b/kernel/sched/bmq.h new file mode 100644 -index 000000000000..66b77291b9d0 +index 000000000000..f29b8f3aa786 --- /dev/null +++ b/kernel/sched/bmq.h @@ -0,0 +1,110 @@ -+#define ALT_SCHED_VERSION_MSG "sched/bmq: BMQ CPU Scheduler "ALT_SCHED_VERSION" by Alfred Chen.\n" ++#define ALT_SCHED_NAME "BMQ" + +/* + * BMQ only routines @@ -9636,23 +9592,23 @@ index d9dc9ab3773f..71a25540d65e 100644 --- a/kernel/sched/build_policy.c +++ b/kernel/sched/build_policy.c @@ -42,13 +42,19 @@ - + #include "idle.c" - + +#ifndef CONFIG_SCHED_ALT #include "rt.c" +#endif - + #ifdef CONFIG_SMP +#ifndef CONFIG_SCHED_ALT # include "cpudeadline.c" +#endif # include "pelt.c" #endif - + #include "cputime.c" -#include "deadline.c" - + +#ifndef CONFIG_SCHED_ALT +#include "deadline.c" +#endif @@ -9661,7 +9617,7 @@ index 99bdd96f454f..23f80a86d2d7 100644 --- a/kernel/sched/build_utility.c +++ b/kernel/sched/build_utility.c @@ -85,7 +85,9 @@ - + #ifdef CONFIG_SMP # include "cpupri.c" +#ifndef CONFIG_SCHED_ALT @@ -9669,14 +9625,14 @@ index 99bdd96f454f..23f80a86d2d7 100644 +#endif # include "topology.c" #endif - + diff --git a/kernel/sched/cpufreq_schedutil.c b/kernel/sched/cpufreq_schedutil.c index 1207c78f85c1..68812e0756cb 100644 --- a/kernel/sched/cpufreq_schedutil.c +++ b/kernel/sched/cpufreq_schedutil.c @@ -159,9 +159,14 @@ static void sugov_get_util(struct sugov_cpu *sg_cpu) struct rq *rq = cpu_rq(sg_cpu->cpu); - + sg_cpu->max = arch_scale_cpu_capacity(sg_cpu->cpu); +#ifndef CONFIG_SCHED_ALT sg_cpu->bw_dl = cpu_bw_dl(rq); @@ -9687,7 +9643,7 @@ index 1207c78f85c1..68812e0756cb 100644 + sg_cpu->util = rq_load_util(rq, sg_cpu->max); +#endif /* CONFIG_SCHED_ALT */ } - + /** @@ -305,8 +310,10 @@ static inline bool sugov_cpu_is_busy(struct sugov_cpu *sg_cpu) { return false; } */ @@ -9698,11 +9654,11 @@ index 1207c78f85c1..68812e0756cb 100644 sg_cpu->sg_policy->limits_changed = true; +#endif } - + static inline bool sugov_update_single_common(struct sugov_cpu *sg_cpu, @@ -606,6 +613,7 @@ static int sugov_kthread_create(struct sugov_policy *sg_policy) } - + ret = sched_setattr_nocheck(thread, &attr); + if (ret) { @@ -9717,7 +9673,7 @@ index 1207c78f85c1..68812e0756cb 100644 +#endif /* CONFIG_SCHED_ALT */ } static DECLARE_WORK(rebuild_sd_work, rebuild_sd_workfn); - + diff --git a/kernel/sched/cputime.c b/kernel/sched/cputime.c index 95fc77853743..b48b3f9ed47f 100644 --- a/kernel/sched/cputime.c @@ -9725,15 +9681,15 @@ index 95fc77853743..b48b3f9ed47f 100644 @@ -122,7 +122,7 @@ void account_user_time(struct task_struct *p, u64 cputime) p->utime += cputime; account_group_user_time(p, cputime); - + - index = (task_nice(p) > 0) ? CPUTIME_NICE : CPUTIME_USER; + index = task_running_nice(p) ? CPUTIME_NICE : CPUTIME_USER; - + /* Add user time to cpustat. */ task_group_account_field(p, index, cputime); @@ -146,7 +146,7 @@ void account_guest_time(struct task_struct *p, u64 cputime) p->gtime += cputime; - + /* Add guest time to cpustat. */ - if (task_nice(p) > 0) { + if (task_running_nice(p)) { @@ -9751,12 +9707,12 @@ index 95fc77853743..b48b3f9ed47f 100644 static u64 read_sum_exec_runtime(struct task_struct *t) @@ -294,7 +294,7 @@ static u64 read_sum_exec_runtime(struct task_struct *t) struct rq *rq; - + rq = task_rq_lock(t, &rf); - ns = t->se.sum_exec_runtime; + ns = tsk_seruntime(t); task_rq_unlock(rq, t, &rf); - + return ns; @@ -626,7 +626,7 @@ void cputime_adjust(struct task_cputime *curr, struct prev_cputime *prev, void task_cputime_adjusted(struct task_struct *p, u64 *ut, u64 *st) @@ -9765,7 +9721,7 @@ index 95fc77853743..b48b3f9ed47f 100644 - .sum_exec_runtime = p->se.sum_exec_runtime, + .sum_exec_runtime = tsk_seruntime(p), }; - + if (task_cputime(p, &cputime.utime, &cputime.stime)) diff --git a/kernel/sched/debug.c b/kernel/sched/debug.c index 1637b65ba07a..033c6deeb515 100644 @@ -9774,39 +9730,39 @@ index 1637b65ba07a..033c6deeb515 100644 @@ -7,6 +7,7 @@ * Copyright(C) 2007, Red Hat, Inc., Ingo Molnar */ - + +#ifndef CONFIG_SCHED_ALT /* * This allows printing both to /proc/sched_debug and * to the console @@ -215,6 +216,7 @@ static const struct file_operations sched_scaling_fops = { }; - + #endif /* SMP */ +#endif /* !CONFIG_SCHED_ALT */ - + #ifdef CONFIG_PREEMPT_DYNAMIC - + @@ -278,6 +280,7 @@ static const struct file_operations sched_dynamic_fops = { - + #endif /* CONFIG_PREEMPT_DYNAMIC */ - + +#ifndef CONFIG_SCHED_ALT __read_mostly bool sched_debug_verbose; - + static const struct seq_operations sched_debug_sops; @@ -293,6 +296,7 @@ static const struct file_operations sched_debug_fops = { .llseek = seq_lseek, .release = seq_release, }; +#endif /* !CONFIG_SCHED_ALT */ - + static struct dentry *debugfs_sched; - + @@ -302,12 +306,15 @@ static __init int sched_init_debug(void) - + debugfs_sched = debugfs_create_dir("sched", NULL); - + +#ifndef CONFIG_SCHED_ALT debugfs_create_file("features", 0644, debugfs_sched, NULL, &sched_feat_fops); debugfs_create_bool("verbose", 0644, debugfs_sched, &sched_debug_verbose); @@ -9814,31 +9770,31 @@ index 1637b65ba07a..033c6deeb515 100644 #ifdef CONFIG_PREEMPT_DYNAMIC debugfs_create_file("preempt", 0644, debugfs_sched, NULL, &sched_dynamic_fops); #endif - + +#ifndef CONFIG_SCHED_ALT debugfs_create_u32("latency_ns", 0644, debugfs_sched, &sysctl_sched_latency); debugfs_create_u32("min_granularity_ns", 0644, debugfs_sched, &sysctl_sched_min_granularity); debugfs_create_u32("idle_min_granularity_ns", 0644, debugfs_sched, &sysctl_sched_idle_min_granularity); @@ -337,11 +344,13 @@ static __init int sched_init_debug(void) #endif - + debugfs_create_file("debug", 0444, debugfs_sched, NULL, &sched_debug_fops); +#endif /* !CONFIG_SCHED_ALT */ - + return 0; } late_initcall(sched_init_debug); - + +#ifndef CONFIG_SCHED_ALT #ifdef CONFIG_SMP - + static cpumask_var_t sd_sysctl_cpus; @@ -1068,6 +1077,7 @@ void proc_sched_set_task(struct task_struct *p) memset(&p->stats, 0, sizeof(p->stats)); #endif } +#endif /* !CONFIG_SCHED_ALT */ - + void resched_latency_warn(int cpu, u64 latency) { diff --git a/kernel/sched/idle.c b/kernel/sched/idle.c @@ -9848,7 +9804,7 @@ index f26ab2675f7d..480d4ad16d45 100644 @@ -400,6 +400,7 @@ void cpu_startup_entry(enum cpuhp_state state) do_idle(); } - + +#ifndef CONFIG_SCHED_ALT /* * idle-task scheduling class. @@ -9860,14 +9816,15 @@ index f26ab2675f7d..480d4ad16d45 100644 +#endif diff --git a/kernel/sched/pds.h b/kernel/sched/pds.h new file mode 100644 -index 000000000000..56a649d02e49 +index 000000000000..27e09b4feb8c --- /dev/null +++ b/kernel/sched/pds.h -@@ -0,0 +1,127 @@ -+#define ALT_SCHED_VERSION_MSG "sched/pds: PDS CPU Scheduler "ALT_SCHED_VERSION" by Alfred Chen.\n" +@@ -0,0 +1,133 @@ ++#define ALT_SCHED_NAME "PDS" + +static int sched_timeslice_shift = 22; + ++/* PDS assume NORMAL_PRIO_NUM is power of 2 */ +#define NORMAL_PRIO_MOD(x) ((x) & (NORMAL_PRIO_NUM - 1)) + +/* @@ -9884,38 +9841,43 @@ index 000000000000..56a649d02e49 +{ + s64 delta = p->deadline - rq->time_edge + NORMAL_PRIO_NUM - NICE_WIDTH; + ++#ifdef ALT_SCHED_DEBUG + if (WARN_ONCE(delta > NORMAL_PRIO_NUM - 1, + "pds: task_sched_prio_normal() delta %lld\n", delta)) + return NORMAL_PRIO_NUM - 1; ++#endif + -+ return (delta < 0) ? 0 : delta; ++ return max(0LL, delta); +} + +static inline int task_sched_prio(const struct task_struct *p) +{ -+ return (p->prio < MAX_RT_PRIO) ? p->prio : ++ return (p->prio < MIN_NORMAL_PRIO) ? p->prio : + MIN_NORMAL_PRIO + task_sched_prio_normal(p, task_rq(p)); +} + +static inline int +task_sched_prio_idx(const struct task_struct *p, const struct rq *rq) +{ -+ return (p->prio < MAX_RT_PRIO) ? p->prio : MIN_NORMAL_PRIO + -+ NORMAL_PRIO_MOD(task_sched_prio_normal(p, rq) + rq->time_edge); ++ u64 idx; ++ ++ if (p->prio < MAX_RT_PRIO) ++ return p->prio; ++ ++ idx = max(p->deadline + NORMAL_PRIO_NUM - NICE_WIDTH, rq->time_edge); ++ return MIN_NORMAL_PRIO + NORMAL_PRIO_MOD(idx); +} + +static inline int sched_prio2idx(int prio, struct rq *rq) +{ + return (IDLE_TASK_SCHED_PRIO == prio || prio < MAX_RT_PRIO) ? prio : -+ MIN_NORMAL_PRIO + NORMAL_PRIO_MOD((prio - MIN_NORMAL_PRIO) + -+ rq->time_edge); ++ MIN_NORMAL_PRIO + NORMAL_PRIO_MOD(prio + rq->time_edge); +} + +static inline int sched_idx2prio(int idx, struct rq *rq) +{ + return (idx < MAX_RT_PRIO) ? idx : MIN_NORMAL_PRIO + -+ NORMAL_PRIO_MOD((idx - MIN_NORMAL_PRIO) + NORMAL_PRIO_NUM - -+ NORMAL_PRIO_MOD(rq->time_edge)); ++ NORMAL_PRIO_MOD(idx - rq->time_edge); +} + +static inline void sched_renew_deadline(struct task_struct *p, const struct rq *rq) @@ -9940,6 +9902,7 @@ index 000000000000..56a649d02e49 + if (now == old) + return; + ++ rq->time_edge = now; + delta = min_t(u64, NORMAL_PRIO_NUM, now - old); + INIT_LIST_HEAD(&head); + @@ -9949,10 +9912,9 @@ index 000000000000..56a649d02e49 + + rq->queue.bitmap[2] = (NORMAL_PRIO_NUM == delta) ? 0UL : + rq->queue.bitmap[2] >> delta; -+ rq->time_edge = now; + if (!list_empty(&head)) { -+ u64 idx = MIN_NORMAL_PRIO + NORMAL_PRIO_MOD(now); + struct task_struct *p; ++ u64 idx = MIN_NORMAL_PRIO + NORMAL_PRIO_MOD(now); + + list_for_each_entry(p, &head, sq_node) + p->sq_idx = idx; @@ -9998,17 +9960,17 @@ index 0f310768260c..bd38bf738fe9 100644 @@ -266,6 +266,7 @@ ___update_load_avg(struct sched_avg *sa, unsigned long load) WRITE_ONCE(sa->util_avg, sa->util_sum / divider); } - + +#ifndef CONFIG_SCHED_ALT /* * sched_entity: * @@ -383,8 +384,9 @@ int update_dl_rq_load_avg(u64 now, struct rq *rq, int running) - + return 0; } +#endif - + -#ifdef CONFIG_SCHED_THERMAL_PRESSURE +#if defined(CONFIG_SCHED_THERMAL_PRESSURE) && !defined(CONFIG_SCHED_ALT) /* @@ -10021,7 +9983,7 @@ index 3a0e0dc28721..e8a7d84aa5a5 100644 @@ -1,13 +1,15 @@ #ifdef CONFIG_SMP #include "sched-pelt.h" - + +#ifndef CONFIG_SCHED_ALT int __update_load_avg_blocked_se(u64 now, struct sched_entity *se); int __update_load_avg_se(u64 now, struct cfs_rq *cfs_rq, struct sched_entity *se); @@ -10029,16 +9991,16 @@ index 3a0e0dc28721..e8a7d84aa5a5 100644 int update_rt_rq_load_avg(u64 now, struct rq *rq, int running); int update_dl_rq_load_avg(u64 now, struct rq *rq, int running); +#endif - + -#ifdef CONFIG_SCHED_THERMAL_PRESSURE +#if defined(CONFIG_SCHED_THERMAL_PRESSURE) && !defined(CONFIG_SCHED_ALT) int update_thermal_load_avg(u64 now, struct rq *rq, u64 capacity); - + static inline u64 thermal_load_avg(struct rq *rq) @@ -44,6 +46,7 @@ static inline u32 get_pelt_divider(struct sched_avg *avg) return PELT_MIN_DIVIDER + avg->period_contrib; } - + +#ifndef CONFIG_SCHED_ALT static inline void cfs_se_util_change(struct sched_avg *avg) { @@ -10048,9 +10010,9 @@ index 3a0e0dc28721..e8a7d84aa5a5 100644 } #endif +#endif /* CONFIG_SCHED_ALT */ - + #else - + +#ifndef CONFIG_SCHED_ALT static inline int update_cfs_rq_load_avg(u64 now, struct cfs_rq *cfs_rq) @@ -10060,7 +10022,7 @@ index 3a0e0dc28721..e8a7d84aa5a5 100644 return 0; } +#endif - + static inline int update_thermal_load_avg(u64 now, struct rq *rq, u64 capacity) diff --git a/kernel/sched/sched.h b/kernel/sched/sched.h @@ -10070,7 +10032,7 @@ index 771f8ddb7053..787a5069d69a 100644 @@ -5,6 +5,10 @@ #ifndef _KERNEL_SCHED_SCHED_H #define _KERNEL_SCHED_SCHED_H - + +#ifdef CONFIG_SCHED_ALT +#include "alt_sched.h" +#else @@ -10081,7 +10043,7 @@ index 771f8ddb7053..787a5069d69a 100644 @@ -3261,4 +3265,9 @@ static inline void update_current_exec_runtime(struct task_struct *curr, cgroup_account_cputime(curr, delta_exec); } - + +static inline int task_running_nice(struct task_struct *p) +{ + return (task_nice(p) > 0); @@ -10105,7 +10067,7 @@ index 857f837f52cb..5486c63e4790 100644 rq = cpu_rq(cpu); @@ -143,6 +145,7 @@ static int show_schedstat(struct seq_file *seq, void *v) seq_printf(seq, "\n"); - + #ifdef CONFIG_SMP +#ifndef CONFIG_SCHED_ALT /* domain-specific stats */ @@ -10124,9 +10086,9 @@ index 38f3698f5e5b..b9d597394316 100644 --- a/kernel/sched/stats.h +++ b/kernel/sched/stats.h @@ -89,6 +89,7 @@ static inline void rq_sched_info_depart (struct rq *rq, unsigned long long delt - + #endif /* CONFIG_SCHEDSTATS */ - + +#ifndef CONFIG_SCHED_ALT #ifdef CONFIG_FAIR_GROUP_SCHED struct sched_entity_stats { @@ -10136,7 +10098,7 @@ index 38f3698f5e5b..b9d597394316 100644 return &task_of(se)->stats; } +#endif /* CONFIG_SCHED_ALT */ - + #ifdef CONFIG_PSI void psi_task_change(struct task_struct *task, int clear, int set); diff --git a/kernel/sched/topology.c b/kernel/sched/topology.c @@ -10146,37 +10108,37 @@ index 8739c2a5a54e..d8dd6c15eb47 100644 @@ -3,6 +3,7 @@ * Scheduler topology setup/handling methods */ - + +#ifndef CONFIG_SCHED_ALT DEFINE_MUTEX(sched_domains_mutex); - + /* Protected by sched_domains_mutex: */ @@ -1413,8 +1414,10 @@ static void asym_cpu_capacity_scan(void) */ - + static int default_relax_domain_level = -1; +#endif /* CONFIG_SCHED_ALT */ int sched_domain_level_max; - + +#ifndef CONFIG_SCHED_ALT static int __init setup_relax_domain_level(char *str) { if (kstrtoint(str, 0, &default_relax_domain_level)) @@ -1647,6 +1650,7 @@ sd_init(struct sched_domain_topology_level *tl, - + return sd; } +#endif /* CONFIG_SCHED_ALT */ - + /* * Topology list, bottom-up. @@ -1683,6 +1687,7 @@ void set_sched_topology(struct sched_domain_topology_level *tl) sched_domain_topology_saved = NULL; } - + +#ifndef CONFIG_SCHED_ALT #ifdef CONFIG_NUMA - + static const struct cpumask *sd_numa_mask(int cpu) @@ -2645,3 +2650,15 @@ void partition_sched_domains(int ndoms_new, cpumask_var_t doms_new[], partition_sched_domains_locked(ndoms_new, doms_new, dattr_new); @@ -10199,9 +10161,9 @@ index 137d4abe3eda..6bada3a6d571 100644 --- a/kernel/sysctl.c +++ b/kernel/sysctl.c @@ -93,6 +93,10 @@ EXPORT_SYMBOL_GPL(sysctl_long_vals); - + /* Constants used for minimum and maximum */ - + +#ifdef CONFIG_SCHED_ALT +extern int sched_yield_type; +#endif @@ -10234,13 +10196,13 @@ index 3ae661ab6260..35f0176dcdb0 100644 @@ -2088,8 +2088,10 @@ long hrtimer_nanosleep(ktime_t rqtp, const enum hrtimer_mode mode, int ret = 0; u64 slack; - + +#ifndef CONFIG_SCHED_ALT slack = current->timer_slack_ns; if (dl_task(current) || rt_task(current)) +#endif slack = 0; - + hrtimer_init_sleeper_on_stack(&t, clockid, mode); diff --git a/kernel/time/posix-cpu-timers.c b/kernel/time/posix-cpu-timers.c index cb925e8ef9a8..67d823510f5c 100644 @@ -10248,17 +10210,17 @@ index cb925e8ef9a8..67d823510f5c 100644 +++ b/kernel/time/posix-cpu-timers.c @@ -223,7 +223,7 @@ static void task_sample_cputime(struct task_struct *p, u64 *samples) u64 stime, utime; - + task_cputime(p, &utime, &stime); - store_samples(samples, stime, utime, p->se.sum_exec_runtime); + store_samples(samples, stime, utime, tsk_seruntime(p)); } - + static void proc_sample_cputime_atomic(struct task_cputime_atomic *at, @@ -866,6 +866,7 @@ static void collect_posix_cputimers(struct posix_cputimers *pct, u64 *samples, } } - + +#ifndef CONFIG_SCHED_ALT static inline void check_dl_overrun(struct task_struct *tsk) { @@ -10268,18 +10230,18 @@ index cb925e8ef9a8..67d823510f5c 100644 } } +#endif - + static bool check_rlimit(u64 time, u64 limit, int signo, bool rt, bool hard) { @@ -900,8 +902,10 @@ static void check_thread_timers(struct task_struct *tsk, u64 samples[CPUCLOCK_MAX]; unsigned long soft; - + +#ifndef CONFIG_SCHED_ALT if (dl_task(tsk)) check_dl_overrun(tsk); +#endif - + if (expiry_cache_is_inactive(pct)) return; @@ -915,7 +919,7 @@ static void check_thread_timers(struct task_struct *tsk, @@ -10289,17 +10251,17 @@ index cb925e8ef9a8..67d823510f5c 100644 - unsigned long rttime = tsk->rt.timeout * (USEC_PER_SEC / HZ); + unsigned long rttime = tsk_rttimeout(tsk) * (USEC_PER_SEC / HZ); unsigned long hard = task_rlimit_max(tsk, RLIMIT_RTTIME); - + /* At the hard limit, send SIGKILL. No further action. */ @@ -1151,8 +1155,10 @@ static inline bool fastpath_timer_check(struct task_struct *tsk) return true; } - + +#ifndef CONFIG_SCHED_ALT if (dl_task(tsk) && tsk->dl.dl_overrun) return true; +#endif - + return false; } diff --git a/kernel/trace/trace_selftest.c b/kernel/trace/trace_selftest.c @@ -10321,7 +10283,364 @@ index ff0536cea968..ce266990006d 100644 +#endif }; struct wakeup_test_data *x = data; - --- -2.39.2 +diff --git a/kernel/sched/alt_core.c b/kernel/sched/alt_core.c +index a122b1f8678e..78748ebb1d71 100644 +--- a/kernel/sched/alt_core.c ++++ b/kernel/sched/alt_core.c +@@ -254,8 +254,7 @@ static inline void update_sched_preempt_mask(struct rq *rq) + */ + static inline struct task_struct *sched_rq_first_task(struct rq *rq) + { +- unsigned long idx = find_first_bit(rq->queue.bitmap, SCHED_QUEUE_BITS); +- const struct list_head *head = &rq->queue.heads[sched_prio2idx(idx, rq)]; ++ const struct list_head *head = &rq->queue.heads[sched_prio2idx(rq->prio, rq)]; + + return list_first_entry(head, struct task_struct, sq_node); + } +@@ -767,13 +766,15 @@ unsigned long get_wchan(struct task_struct *p) + * Add/Remove/Requeue task to/from the runqueue routines + * Context: rq->lock + */ +-#define __SCHED_DEQUEUE_TASK(p, rq, flags) \ ++#define __SCHED_DEQUEUE_TASK(p, rq, flags, func) \ + sched_info_dequeue(rq, p); \ + psi_dequeue(p, flags & DEQUEUE_SLEEP); \ + \ + list_del(&p->sq_node); \ +- if (list_empty(&rq->queue.heads[p->sq_idx])) \ +- clear_bit(sched_idx2prio(p->sq_idx, rq), rq->queue.bitmap); ++ if (list_empty(&rq->queue.heads[p->sq_idx])) { \ ++ clear_bit(sched_idx2prio(p->sq_idx, rq), rq->queue.bitmap); \ ++ func; \ ++ } + + #define __SCHED_ENQUEUE_TASK(p, rq, flags) \ + sched_info_enqueue(rq, p); \ +@@ -788,12 +789,12 @@ static inline void dequeue_task(struct task_struct *p, struct rq *rq, int flags) + #ifdef ALT_SCHED_DEBUG + lockdep_assert_held(&rq->lock); + +- /*printk(KERN_INFO "sched: dequeue(%d) %px %016llx\n", cpu_of(rq), p, p->priodl);*/ ++ /*printk(KERN_INFO "sched: dequeue(%d) %px %016llx\n", cpu_of(rq), p, p->deadline);*/ + WARN_ONCE(task_rq(p) != rq, "sched: dequeue task reside on cpu%d from cpu%d\n", + task_cpu(p), cpu_of(rq)); + #endif + +- __SCHED_DEQUEUE_TASK(p, rq, flags); ++ __SCHED_DEQUEUE_TASK(p, rq, flags, update_sched_preempt_mask(rq)); + --rq->nr_running; + #ifdef CONFIG_SMP + if (1 == rq->nr_running) +@@ -808,7 +809,7 @@ static inline void enqueue_task(struct task_struct *p, struct rq *rq, int flags) + #ifdef ALT_SCHED_DEBUG + lockdep_assert_held(&rq->lock); + +- /*printk(KERN_INFO "sched: enqueue(%d) %px %016llx\n", cpu_of(rq), p, p->priodl);*/ ++ /*printk(KERN_INFO "sched: enqueue(%d) %px %d\n", cpu_of(rq), p, p->prio);*/ + WARN_ONCE(task_rq(p) != rq, "sched: enqueue task reside on cpu%d to cpu%d\n", + task_cpu(p), cpu_of(rq)); + #endif +@@ -828,7 +829,7 @@ static inline void requeue_task(struct task_struct *p, struct rq *rq, int idx) + { + #ifdef ALT_SCHED_DEBUG + lockdep_assert_held(&rq->lock); +- /*printk(KERN_INFO "sched: requeue(%d) %px %016llx\n", cpu_of(rq), p, p->priodl);*/ ++ /*printk(KERN_INFO "sched: requeue(%d) %px %016llx\n", cpu_of(rq), p, p->deadline);*/ + WARN_ONCE(task_rq(p) != rq, "sched: cpu[%d] requeue task reside on cpu%d\n", + cpu_of(rq), task_cpu(p)); + #endif +@@ -837,8 +838,7 @@ static inline void requeue_task(struct task_struct *p, struct rq *rq, int idx) + list_add_tail(&p->sq_node, &rq->queue.heads[idx]); + if (idx != p->sq_idx) { + if (list_empty(&rq->queue.heads[p->sq_idx])) +- clear_bit(sched_idx2prio(p->sq_idx, rq), +- rq->queue.bitmap); ++ clear_bit(sched_idx2prio(p->sq_idx, rq), rq->queue.bitmap); + p->sq_idx = idx; + set_bit(sched_idx2prio(p->sq_idx, rq), rq->queue.bitmap); + update_sched_preempt_mask(rq); +@@ -1350,8 +1350,8 @@ static void activate_task(struct task_struct *p, struct rq *rq) + */ + static inline void deactivate_task(struct task_struct *p, struct rq *rq) + { +- p->on_rq = 0; + dequeue_task(p, rq, DEQUEUE_SLEEP); ++ p->on_rq = 0; + cpufreq_update_util(rq, 0); + } + +@@ -1568,9 +1568,8 @@ static struct rq *move_queued_task(struct rq *rq, struct task_struct *p, int + { + lockdep_assert_held(&rq->lock); + +- p->on_rq = TASK_ON_RQ_MIGRATING; ++ WRITE_ONCE(p->on_rq, TASK_ON_RQ_MIGRATING); + dequeue_task(p, rq, 0); +- update_sched_preempt_mask(rq); + set_task_cpu(p, new_cpu); + raw_spin_unlock(&rq->lock); + +@@ -4516,12 +4515,10 @@ migrate_pending_tasks(struct rq *rq, struct rq *dest_rq, const int dest_cpu) + (p = sched_rq_next_task(skip, rq)) != rq->idle) { + skip = sched_rq_next_task(p, rq); + if (cpumask_test_cpu(dest_cpu, p->cpus_ptr)) { +- p->on_rq = TASK_ON_RQ_MIGRATING; +- __SCHED_DEQUEUE_TASK(p, rq, 0); ++ __SCHED_DEQUEUE_TASK(p, rq, 0, ); + set_task_cpu(p, dest_cpu); + sched_task_sanity_check(p, dest_rq); + __SCHED_ENQUEUE_TASK(p, dest_rq, 0); +- p->on_rq = TASK_ON_RQ_QUEUED; + nr_migrated++; + } + nr_tries--; +@@ -4566,6 +4563,7 @@ static inline int take_other_rq_tasks(struct rq *rq, int cpu) + if (rq->nr_running > 1) + cpumask_set_cpu(cpu, &sched_rq_pending_mask); + ++ update_sched_preempt_mask(rq); + cpufreq_update_util(rq, 0); + + return 1; +@@ -4637,8 +4635,7 @@ choose_next_task(struct rq *rq, int cpu) + #ifdef CONFIG_HIGH_RES_TIMERS + hrtick_start(rq, next->time_slice); + #endif +- /*printk(KERN_INFO "sched: choose_next_task(%d) next %px\n", cpu, +- * next);*/ ++ /*printk(KERN_INFO "sched: choose_next_task(%d) next %px\n", cpu, next);*/ + return next; + } + +@@ -4706,7 +4703,6 @@ static void __sched notrace __schedule(unsigned int sched_mode) + unsigned long prev_state; + struct rq *rq; + int cpu; +- int deactivated = 0; + + cpu = smp_processor_id(); + rq = cpu_rq(cpu); +@@ -4771,7 +4767,6 @@ static void __sched notrace __schedule(unsigned int sched_mode) + */ + sched_task_deactivate(prev, rq); + deactivate_task(prev, rq); +- deactivated = 1; + + if (prev->in_iowait) { + atomic_inc(&rq->nr_iowait); +@@ -4791,11 +4786,10 @@ static void __sched notrace __schedule(unsigned int sched_mode) + #endif + + if (likely(prev != next)) { +- if (deactivated) +- update_sched_preempt_mask(rq); + next->last_ran = rq->clock_task; + rq->last_ts_switch = rq->clock; + ++ /*printk(KERN_INFO "sched: %px -> %px\n", prev, next);*/ + rq->nr_switches++; + /* + * RCU users of rcu_dereference(rq->curr) may not see +@@ -4818,7 +4812,7 @@ static void __sched notrace __schedule(unsigned int sched_mode) + */ + ++*switch_count; + +- psi_sched_switch(prev, next, deactivated); ++ psi_sched_switch(prev, next, !task_on_rq_queued(prev)); + + trace_sched_switch(sched_mode & SM_MASK_PREEMPT, prev, next, prev_state); + +diff --git a/kernel/sched/alt_sched.h b/kernel/sched/alt_sched.h +index e9b93e63406a..60bbb4583d16 100644 +--- a/kernel/sched/alt_sched.h ++++ b/kernel/sched/alt_sched.h +@@ -22,8 +22,8 @@ + #endif + + #ifdef CONFIG_SCHED_PDS +-/* bits: RT(0-99), reserved(100-127), NORMAL_PRIO_NUM, cpu idle task */ +-#define SCHED_BITS (MIN_NORMAL_PRIO + NORMAL_PRIO_NUM + 1) ++/* bits: RT(0-24), reserved(25-31), SCHED_NORMAL_PRIO_NUM(32), cpu idle task(1) */ ++#define SCHED_BITS (64 + 1) + #endif /* CONFIG_SCHED_PDS */ + + #define IDLE_TASK_SCHED_PRIO (SCHED_BITS - 1) +@@ -142,7 +142,7 @@ struct rq { + #ifdef CONFIG_SCHED_PDS + u64 time_edge; + #endif +- unsigned long prio; ++ unsigned long prio; + + /* switch count */ + u64 nr_switches; +diff --git a/kernel/sched/pds.h b/kernel/sched/pds.h +index 27e09b4feb8c..5a94a98e19af 100644 +--- a/kernel/sched/pds.h ++++ b/kernel/sched/pds.h +@@ -1,9 +1,15 @@ + #define ALT_SCHED_NAME "PDS" + +-static int sched_timeslice_shift = 22; ++#define MIN_SCHED_NORMAL_PRIO (32) ++#define SCHED_NORMAL_PRIO_NUM (32) ++#define SCHED_EDGE_DELTA (SCHED_NORMAL_PRIO_NUM - NICE_WIDTH / 2) + + /* PDS assume NORMAL_PRIO_NUM is power of 2 */ + #define NORMAL_PRIO_MOD(x) ((x) & (NORMAL_PRIO_NUM - 1)) ++#define SCHED_NORMAL_PRIO_MOD(x) ((x) & (SCHED_NORMAL_PRIO_NUM - 1)) ++ ++/* 4ms -> shift 22, 2 time slice slots -> shift 23 */ ++static int sched_timeslice_shift = 23; + + /* + * Common interfaces +@@ -11,18 +17,18 @@ static int sched_timeslice_shift = 22; + static inline void sched_timeslice_imp(const int timeslice_ms) + { + if (2 == timeslice_ms) +- sched_timeslice_shift = 21; ++ sched_timeslice_shift = 22; + } + + static inline int + task_sched_prio_normal(const struct task_struct *p, const struct rq *rq) + { +- s64 delta = p->deadline - rq->time_edge + NORMAL_PRIO_NUM - NICE_WIDTH; ++ s64 delta = p->deadline - rq->time_edge + SCHED_EDGE_DELTA; + + #ifdef ALT_SCHED_DEBUG + if (WARN_ONCE(delta > NORMAL_PRIO_NUM - 1, + "pds: task_sched_prio_normal() delta %lld\n", delta)) +- return NORMAL_PRIO_NUM - 1; ++ return SCHED_NORMAL_PRIO_NUM - 1; + #endif + + return max(0LL, delta); +@@ -30,8 +36,8 @@ task_sched_prio_normal(const struct task_struct *p, const struct rq *rq) + + static inline int task_sched_prio(const struct task_struct *p) + { +- return (p->prio < MIN_NORMAL_PRIO) ? p->prio : +- MIN_NORMAL_PRIO + task_sched_prio_normal(p, task_rq(p)); ++ return (p->prio < MIN_NORMAL_PRIO) ? (p->prio >> 2) : ++ MIN_SCHED_NORMAL_PRIO + task_sched_prio_normal(p, task_rq(p)); + } + + static inline int +@@ -39,30 +45,35 @@ task_sched_prio_idx(const struct task_struct *p, const struct rq *rq) + { + u64 idx; + +- if (p->prio < MAX_RT_PRIO) +- return p->prio; ++ if (p->prio < MIN_NORMAL_PRIO) ++ return p->prio >> 2; + +- idx = max(p->deadline + NORMAL_PRIO_NUM - NICE_WIDTH, rq->time_edge); +- return MIN_NORMAL_PRIO + NORMAL_PRIO_MOD(idx); ++ idx = max(p->deadline + SCHED_EDGE_DELTA, rq->time_edge); ++ /*printk(KERN_INFO "sched: task_sched_prio_idx edge:%llu, deadline=%llu idx=%llu\n", rq->time_edge, p->deadline, idx);*/ ++ return MIN_SCHED_NORMAL_PRIO + SCHED_NORMAL_PRIO_MOD(idx); + } + +-static inline int sched_prio2idx(int prio, struct rq *rq) ++static inline int sched_prio2idx(int sched_prio, struct rq *rq) + { +- return (IDLE_TASK_SCHED_PRIO == prio || prio < MAX_RT_PRIO) ? prio : +- MIN_NORMAL_PRIO + NORMAL_PRIO_MOD(prio + rq->time_edge); ++ return (IDLE_TASK_SCHED_PRIO == sched_prio || sched_prio < MIN_SCHED_NORMAL_PRIO) ? ++ sched_prio : ++ MIN_SCHED_NORMAL_PRIO + SCHED_NORMAL_PRIO_MOD(sched_prio + rq->time_edge); + } + +-static inline int sched_idx2prio(int idx, struct rq *rq) ++static inline int sched_idx2prio(int sched_idx, struct rq *rq) + { +- return (idx < MAX_RT_PRIO) ? idx : MIN_NORMAL_PRIO + +- NORMAL_PRIO_MOD(idx - rq->time_edge); ++ int ret; ++ ret = (sched_idx < MIN_SCHED_NORMAL_PRIO) ? sched_idx : ++ MIN_SCHED_NORMAL_PRIO + SCHED_NORMAL_PRIO_MOD(sched_idx - rq->time_edge); ++ /*printk(KERN_INFO "sched: sched_idx2prio edge:%llu, %d -> %d\n", rq->time_edge, sched_idx, ret);*/ ++ ++ return ret; + } + + static inline void sched_renew_deadline(struct task_struct *p, const struct rq *rq) + { +- if (p->prio >= MAX_RT_PRIO) +- p->deadline = (rq->clock >> sched_timeslice_shift) + +- p->static_prio - (MAX_PRIO - NICE_WIDTH); ++ if (p->prio >= MIN_NORMAL_PRIO) ++ p->deadline = rq->time_edge + (p->static_prio - (MAX_PRIO - NICE_WIDTH)) / 2; + } + + int task_running_nice(struct task_struct *p) +@@ -70,36 +81,48 @@ int task_running_nice(struct task_struct *p) + return (p->prio > DEFAULT_PRIO); + } + ++const u64 RT_MASK = 0xffffffffULL; ++ + static inline void update_rq_time_edge(struct rq *rq) + { + struct list_head head; + u64 old = rq->time_edge; + u64 now = rq->clock >> sched_timeslice_shift; + u64 prio, delta; ++ DECLARE_BITMAP(normal, SCHED_QUEUE_BITS); + + if (now == old) + return; + + rq->time_edge = now; +- delta = min_t(u64, NORMAL_PRIO_NUM, now - old); ++ delta = min_t(u64, SCHED_NORMAL_PRIO_NUM, now - old); + INIT_LIST_HEAD(&head); + +- for_each_set_bit(prio, &rq->queue.bitmap[2], delta) +- list_splice_tail_init(rq->queue.heads + MIN_NORMAL_PRIO + +- NORMAL_PRIO_MOD(prio + old), &head); ++ /*printk(KERN_INFO "sched: update_rq_time_edge 0x%016lx %llu\n", rq->queue.bitmap[0], delta);*/ ++ prio = MIN_SCHED_NORMAL_PRIO; ++ for_each_set_bit_from(prio, &rq->queue.bitmap[0], MIN_SCHED_NORMAL_PRIO + delta) ++ list_splice_tail_init(rq->queue.heads + MIN_SCHED_NORMAL_PRIO + ++ SCHED_NORMAL_PRIO_MOD(prio + old), &head); + +- rq->queue.bitmap[2] = (NORMAL_PRIO_NUM == delta) ? 0UL : +- rq->queue.bitmap[2] >> delta; ++ bitmap_shift_right(&normal[0], &rq->queue.bitmap[0], delta, SCHED_QUEUE_BITS); + if (!list_empty(&head)) { + struct task_struct *p; +- u64 idx = MIN_NORMAL_PRIO + NORMAL_PRIO_MOD(now); ++ u64 idx = MIN_SCHED_NORMAL_PRIO + SCHED_NORMAL_PRIO_MOD(now); + + list_for_each_entry(p, &head, sq_node) + p->sq_idx = idx; + + list_splice(&head, rq->queue.heads + idx); +- rq->queue.bitmap[2] |= 1UL; ++ set_bit(MIN_SCHED_NORMAL_PRIO, &normal[0]); + } ++ bitmap_replace(&rq->queue.bitmap[0], &normal[0], &rq->queue.bitmap[0], ++ (const unsigned long *)&RT_MASK, SCHED_QUEUE_BITS); ++ /*printk(KERN_INFO "sched: update_rq_time_edge 0x%016lx 0x%016lx", rq->queue.bitmap[0], normal);*/ ++ if (rq->prio < MIN_SCHED_NORMAL_PRIO || IDLE_TASK_SCHED_PRIO == rq->prio) ++ return; ++ ++ rq->prio = (rq->prio < MIN_SCHED_NORMAL_PRIO + delta) ? ++ MIN_SCHED_NORMAL_PRIO : rq->prio - delta; + } + + static inline void time_slice_expired(struct task_struct *p, struct rq *rq) +@@ -112,7 +135,7 @@ static inline void time_slice_expired(struct task_struct *p, struct rq *rq) + + static inline void sched_task_sanity_check(struct task_struct *p, struct rq *rq) + { +- u64 max_dl = rq->time_edge + NICE_WIDTH - 1; ++ u64 max_dl = rq->time_edge + NICE_WIDTH / 2 - 1; + if (unlikely(p->deadline > max_dl)) + p->deadline = max_dl; + }