From b8835ca20232e4bbf0738bd4e25bcffeb917ad57 Mon Sep 17 00:00:00 2001 From: ViNi-Arco <57958460+ViNi-Arco@users.noreply.github.com> Date: Tue, 6 Dec 2022 15:02:29 -0300 Subject: [PATCH] Update prjc 6.0r0 with latest official Alfred Chen (#659) * Update prjc 6.0r0 with latest official Alfred Chen Clean patchset with latest Alfred code: https://gitlab.com/alfredchen/linux-prjc/-/commits/linux-6.0.y-prjc/ * Update prjc 6.0r0 with latest official Alfred Chen And clean whitespace --- linux-tkg-patches/6.0/0009-prjc_v6.0-r0.patch | 512 ++++++------------ 1 file changed, 171 insertions(+), 341 deletions(-) diff --git a/linux-tkg-patches/6.0/0009-prjc_v6.0-r0.patch b/linux-tkg-patches/6.0/0009-prjc_v6.0-r0.patch index 06c0344..5326a46 100644 --- a/linux-tkg-patches/6.0/0009-prjc_v6.0-r0.patch +++ b/linux-tkg-patches/6.0/0009-prjc_v6.0-r0.patch @@ -1,56 +1,3 @@ -From 711a56e8f6314d77141b0f661e6c13c8a2c4dddf Mon Sep 17 00:00:00 2001 -From: Tor Vic -Date: Wed, 16 Nov 2022 11:29:00 +0100 -Subject: [PATCH] Project-C 6.0-rc0-vd - ---- - .../admin-guide/kernel-parameters.txt | 6 + - Documentation/admin-guide/sysctl/kernel.rst | 10 + - Documentation/scheduler/sched-BMQ.txt | 110 + - fs/proc/base.c | 2 +- - include/asm-generic/resource.h | 2 +- - include/linux/sched.h | 33 +- - include/linux/sched/deadline.h | 20 + - include/linux/sched/prio.h | 26 + - include/linux/sched/rt.h | 2 + - include/linux/sched/topology.h | 3 +- - init/Kconfig | 34 + - init/init_task.c | 18 + - kernel/Kconfig.preempt | 2 +- - kernel/cgroup/cpuset.c | 4 +- - kernel/delayacct.c | 2 +- - kernel/exit.c | 4 +- - kernel/locking/rtmutex.c | 16 +- - kernel/sched/Makefile | 5 + - kernel/sched/alt_core.c | 7959 +++++++++++++++++ - kernel/sched/alt_debug.c | 31 + - kernel/sched/alt_sched.h | 645 ++ - kernel/sched/bmq.h | 110 + - kernel/sched/build_policy.c | 8 +- - kernel/sched/build_utility.c | 3 +- - kernel/sched/cpufreq_schedutil.c | 10 + - kernel/sched/cputime.c | 10 +- - kernel/sched/debug.c | 10 + - kernel/sched/idle.c | 2 + - kernel/sched/pds.h | 127 + - kernel/sched/pelt.c | 4 +- - kernel/sched/pelt.h | 8 +- - kernel/sched/sched.h | 9 + - kernel/sched/stats.c | 4 + - kernel/sched/stats.h | 2 + - kernel/sched/topology.c | 17 + - kernel/sysctl.c | 15 + - kernel/time/hrtimer.c | 2 + - kernel/time/posix-cpu-timers.c | 10 +- - kernel/trace/trace_selftest.c | 5 + - 39 files changed, 9267 insertions(+), 23 deletions(-) - create mode 100644 Documentation/scheduler/sched-BMQ.txt - create mode 100644 kernel/sched/alt_core.c - create mode 100644 kernel/sched/alt_debug.c - create mode 100644 kernel/sched/alt_sched.h - create mode 100644 kernel/sched/bmq.h - create mode 100644 kernel/sched/pds.h - diff --git a/Documentation/admin-guide/kernel-parameters.txt b/Documentation/admin-guide/kernel-parameters.txt index 426fa892d311..43b06e44128c 100644 --- a/Documentation/admin-guide/kernel-parameters.txt @@ -405,7 +352,7 @@ index 816df6cc444e..c8da08e18c91 100644 #else static inline void rebuild_sched_domains_energy(void) diff --git a/init/Kconfig b/init/Kconfig -index 532362fcfe31..d9ccd98f2856 100644 +index 532362fcfe31..2bf9e67b73c9 100644 --- a/init/Kconfig +++ b/init/Kconfig @@ -808,6 +808,7 @@ menu "Scheduler features" @@ -422,15 +369,15 @@ index 532362fcfe31..d9ccd98f2856 100644 +menuconfig SCHED_ALT + bool "Alternative CPU Schedulers" -+ default n ++ default y + help -+ This feature enables the ProjectC alternative CPU schedulers." ++ This feature enable alternative CPU scheduler" + +if SCHED_ALT + +choice -+ prompt "Alternative CPU schedulers" -+ default SCHED_PDS ++ prompt "Alternative CPU Scheduler" ++ default SCHED_BMQ + +config SCHED_BMQ + bool "BMQ CPU scheduler" @@ -685,10 +632,10 @@ index 976092b7bd45..31d587c16ec1 100644 obj-y += build_utility.o diff --git a/kernel/sched/alt_core.c b/kernel/sched/alt_core.c new file mode 100644 -index 000000000000..18dfee354f9b +index 000000000000..03e3956194f7 --- /dev/null +++ b/kernel/sched/alt_core.c -@@ -0,0 +1,7959 @@ +@@ -0,0 +1,7887 @@ +/* + * kernel/sched/alt_core.c + * @@ -758,7 +705,7 @@ index 000000000000..18dfee354f9b +#define sched_feat(x) (0) +#endif /* CONFIG_SCHED_DEBUG */ + -+#define ALT_SCHED_VERSION "v6.0-r0-vd" ++#define ALT_SCHED_VERSION "v6.0-r0" + +/* rt_prio(prio) defined in include/linux/sched/rt.h */ +#define rt_task(p) rt_prio((p)->prio) @@ -838,91 +785,7 @@ index 000000000000..18dfee354f9b +#ifdef CONFIG_SCHED_SMT +static cpumask_t sched_sg_idle_mask ____cacheline_aligned_in_smp; +#endif -+ -+#define BITS_PER_ATOMIC_LONG_T BITS_PER_LONG -+typedef struct sched_bitmask { -+ atomic_long_t bits[DIV_ROUND_UP(SCHED_QUEUE_BITS, BITS_PER_ATOMIC_LONG_T)]; -+} sched_bitmask_t; -+static sched_bitmask_t sched_rq_watermark[NR_CPUS] ____cacheline_aligned_in_smp; -+ -+#define x(p, set, mask) \ -+ do { \ -+ smp_mb__before_atomic(); \ -+ if (set) \ -+ atomic_long_or((mask), (p)); \ -+ else \ -+ atomic_long_and(~(mask), (p)); \ -+ smp_mb__after_atomic(); \ -+ } while (0) -+ -+static __always_inline void sched_rq_watermark_fill_downwards(int cpu, unsigned int end, -+ unsigned int start, bool set) -+{ -+ unsigned int start_idx, start_bit; -+ unsigned int end_idx, end_bit; -+ atomic_long_t *p; -+ -+ if (end == start) { -+ return; -+ } -+ -+ start_idx = start / BITS_PER_ATOMIC_LONG_T; -+ start_bit = start % BITS_PER_ATOMIC_LONG_T; -+ end_idx = (end - 1) / BITS_PER_ATOMIC_LONG_T; -+ end_bit = (end - 1) % BITS_PER_ATOMIC_LONG_T; -+ p = &sched_rq_watermark[cpu].bits[end_idx]; -+ -+ if (end_idx == start_idx) { -+ x(p, set, (~0UL >> (BITS_PER_ATOMIC_LONG_T - 1 - end_bit)) & (~0UL << start_bit)); -+ return; -+ } -+ -+ if (end_bit != BITS_PER_ATOMIC_LONG_T - 1) { -+ x(p, set, (~0UL >> (BITS_PER_ATOMIC_LONG_T - 1 - end_bit))); -+ p -= 1; -+ end_idx -= 1; -+ } -+ -+ while (end_idx != start_idx) { -+ smp_mb__before_atomic(); -+ atomic_long_set(p, set ? ~0UL : 0); -+ smp_mb__after_atomic(); -+ p -= 1; -+ end_idx -= 1; -+ } -+ -+ x(p, set, ~0UL << start_bit); -+} -+ -+#undef x -+ -+static __always_inline bool sched_rq_watermark_and(cpumask_t *dstp, const cpumask_t *cpus, int prio, bool not) -+{ -+ int cpu; -+ bool ret = false; -+ int idx = prio / BITS_PER_ATOMIC_LONG_T; -+ int bit = prio % BITS_PER_ATOMIC_LONG_T; -+ -+ cpumask_clear(dstp); -+ for_each_cpu(cpu, cpus) -+ if (test_bit(bit, (long*)&sched_rq_watermark[cpu].bits[idx].counter) == !not) { -+ __cpumask_set_cpu(cpu, dstp); -+ ret = true; -+ } -+ return ret; -+} -+ -+static __always_inline bool sched_rq_watermark_test(const cpumask_t *cpus, int prio, bool not) -+{ -+ int cpu; -+ int idx = prio / BITS_PER_ATOMIC_LONG_T; -+ int bit = prio % BITS_PER_ATOMIC_LONG_T; -+ -+ for_each_cpu(cpu, cpus) -+ if (test_bit(bit, (long*)&sched_rq_watermark[cpu].bits[idx].counter) == !not) -+ return true; -+ return false; -+} ++static cpumask_t sched_rq_watermark[SCHED_QUEUE_BITS] ____cacheline_aligned_in_smp; + +/* sched_queue related functions */ +static inline void sched_queue_init(struct sched_queue *q) @@ -951,6 +814,7 @@ index 000000000000..18dfee354f9b +{ + unsigned long watermark = find_first_bit(rq->queue.bitmap, SCHED_QUEUE_BITS); + unsigned long last_wm = rq->watermark; ++ unsigned long i; + int cpu; + + if (watermark == last_wm) @@ -959,25 +823,28 @@ index 000000000000..18dfee354f9b + rq->watermark = watermark; + cpu = cpu_of(rq); + if (watermark < last_wm) { -+ sched_rq_watermark_fill_downwards(cpu, SCHED_QUEUE_BITS - watermark, SCHED_QUEUE_BITS - last_wm, false); ++ for (i = last_wm; i > watermark; i--) ++ cpumask_clear_cpu(cpu, sched_rq_watermark + SCHED_QUEUE_BITS - i); +#ifdef CONFIG_SCHED_SMT + if (static_branch_likely(&sched_smt_present) && -+ unlikely(IDLE_TASK_SCHED_PRIO == last_wm)) ++ IDLE_TASK_SCHED_PRIO == last_wm) + cpumask_andnot(&sched_sg_idle_mask, + &sched_sg_idle_mask, cpu_smt_mask(cpu)); +#endif + return; + } + /* last_wm < watermark */ -+ sched_rq_watermark_fill_downwards(cpu, SCHED_QUEUE_BITS - last_wm, SCHED_QUEUE_BITS - watermark, true); ++ for (i = watermark; i > last_wm; i--) ++ cpumask_set_cpu(cpu, sched_rq_watermark + SCHED_QUEUE_BITS - i); +#ifdef CONFIG_SCHED_SMT -+ if (static_branch_likely(&sched_smt_present) && -+ unlikely(IDLE_TASK_SCHED_PRIO == watermark)) { -+ const cpumask_t *smt_mask = cpu_smt_mask(cpu); ++ if (static_branch_likely(&sched_smt_present) && ++ IDLE_TASK_SCHED_PRIO == watermark) { ++ cpumask_t tmp; + -+ if (!sched_rq_watermark_test(smt_mask, 0, true)) ++ cpumask_and(&tmp, cpu_smt_mask(cpu), sched_rq_watermark); ++ if (cpumask_equal(&tmp, cpu_smt_mask(cpu))) + cpumask_or(&sched_sg_idle_mask, -+ &sched_sg_idle_mask, smt_mask); ++ &sched_sg_idle_mask, cpu_smt_mask(cpu)); + } +#endif +} @@ -1394,15 +1261,21 @@ index 000000000000..18dfee354f9b + rq->load_stamp = time; +} + -+unsigned long rq_load_util(struct rq *rq, int cpu) ++unsigned long rq_load_util(int cpu) +{ -+ return RQ_LOAD_HISTORY_TO_UTIL(rq->load_history) * (arch_scale_cpu_capacity(cpu) >> RQ_UTIL_SHIFT); ++ struct rq *rq; ++ unsigned long max; ++ ++ rq = cpu_rq(cpu); ++ max = arch_scale_cpu_capacity(cpu); ++ ++ return RQ_LOAD_HISTORY_TO_UTIL(rq->load_history) * (max >> RQ_UTIL_SHIFT); +} + +#ifdef CONFIG_SMP +unsigned long sched_cpu_util(int cpu) +{ -+ return rq_load_util(cpu_rq(cpu), cpu); ++ return rq_load_util(cpu); +} +#endif /* CONFIG_SMP */ + @@ -1579,11 +1452,15 @@ index 000000000000..18dfee354f9b + ({ \ + typeof(ptr) _ptr = (ptr); \ + typeof(mask) _mask = (mask); \ -+ typeof(*_ptr) _val = *_ptr; \ ++ typeof(*_ptr) _old, _val = *_ptr; \ + \ -+ do { \ -+ } while (!try_cmpxchg(_ptr, &_val, _val | _mask)); \ -+ _val; \ ++ for (;;) { \ ++ _old = cmpxchg(_ptr, _val, _val | _mask); \ ++ if (_old == _val) \ ++ break; \ ++ _val = _old; \ ++ } \ ++ _old; \ +}) + +#if defined(CONFIG_SMP) && defined(TIF_POLLING_NRFLAG) @@ -1592,7 +1469,7 @@ index 000000000000..18dfee354f9b + * this avoids any races wrt polling state changes and thereby avoids + * spurious IPIs. + */ -+static inline bool set_nr_and_not_polling(struct task_struct *p) ++static bool set_nr_and_not_polling(struct task_struct *p) +{ + struct thread_info *ti = task_thread_info(p); + return !(fetch_or(&ti->flags, _TIF_NEED_RESCHED) & _TIF_POLLING_NRFLAG); @@ -1607,28 +1484,30 @@ index 000000000000..18dfee354f9b +static bool set_nr_if_polling(struct task_struct *p) +{ + struct thread_info *ti = task_thread_info(p); -+ typeof(ti->flags) val = READ_ONCE(ti->flags); ++ typeof(ti->flags) old, val = READ_ONCE(ti->flags); + + for (;;) { + if (!(val & _TIF_POLLING_NRFLAG)) + return false; + if (val & _TIF_NEED_RESCHED) + return true; -+ if (try_cmpxchg(&ti->flags, &val, val | _TIF_NEED_RESCHED)) ++ old = cmpxchg(&ti->flags, val, val | _TIF_NEED_RESCHED); ++ if (old == val) + break; ++ val = old; + } + return true; +} + +#else -+static inline bool set_nr_and_not_polling(struct task_struct *p) ++static bool set_nr_and_not_polling(struct task_struct *p) +{ + set_tsk_need_resched(p); + return true; +} + +#ifdef CONFIG_SMP -+static inline bool set_nr_if_polling(struct task_struct *p) ++static bool set_nr_if_polling(struct task_struct *p) +{ + return false; +} @@ -2302,7 +2181,7 @@ index 000000000000..18dfee354f9b + rq = cpu_rq(new_cpu); + + raw_spin_lock(&rq->lock); -+ WARN_ON_ONCE(task_cpu(p) != new_cpu); ++ BUG_ON(task_cpu(p) != new_cpu); + sched_task_sanity_check(p, rq); + enqueue_task(p, rq, 0); + p->on_rq = TASK_ON_RQ_QUEUED; @@ -2668,9 +2547,9 @@ index 000000000000..18dfee354f9b +#ifdef CONFIG_SCHED_SMT + cpumask_and(&tmp, &chk_mask, &sched_sg_idle_mask) || +#endif -+ sched_rq_watermark_and(&tmp, &chk_mask, 0, false) || -+ sched_rq_watermark_and(&tmp, &chk_mask, -+ SCHED_QUEUE_BITS - 1 - task_sched_prio(p), false)) ++ cpumask_and(&tmp, &chk_mask, sched_rq_watermark) || ++ cpumask_and(&tmp, &chk_mask, ++ sched_rq_watermark + SCHED_QUEUE_BITS - 1 - task_sched_prio(p))) + return best_mask_cpu(task_cpu(p), &tmp); + + return best_mask_cpu(task_cpu(p), &chk_mask); @@ -3111,6 +2990,13 @@ index 000000000000..18dfee354f9b + if (!llist) + return; + ++ /* ++ * rq::ttwu_pending racy indication of out-standing wakeups. ++ * Races such that false-negatives are possible, since they ++ * are shorter lived that false-positives would be. ++ */ ++ WRITE_ONCE(rq->ttwu_pending, 0); ++ + rq_lock_irqsave(rq, &rf); + update_rq_clock(rq); + @@ -3124,17 +3010,6 @@ index 000000000000..18dfee354f9b + ttwu_do_activate(rq, p, p->sched_remote_wakeup ? WF_MIGRATED : 0); + } + -+ /* -+ * Must be after enqueueing at least once task such that -+ * idle_cpu() does not observe a false-negative -- if it does, -+ * it is possible for select_idle_siblings() to stack a number -+ * of tasks on this CPU during that window. -+ * -+ * It is ok to clear ttwu_pending when another task pending. -+ * We will receive IPI after local irq enabled and then enqueue it. -+ * Since now nr_running > 0, idle_cpu() will always get correct result. -+ */ -+ WRITE_ONCE(rq->ttwu_pending, 0); + rq_unlock_irqrestore(rq, &rf); +} + @@ -3605,40 +3480,6 @@ index 000000000000..18dfee354f9b + return success; +} + -+static bool __task_needs_rq_lock(struct task_struct *p) -+{ -+ unsigned int state = READ_ONCE(p->__state); -+ -+ /* -+ * Since pi->lock blocks try_to_wake_up(), we don't need rq->lock when -+ * the task is blocked. Make sure to check @state since ttwu() can drop -+ * locks at the end, see ttwu_queue_wakelist(). -+ */ -+ if (state == TASK_RUNNING || state == TASK_WAKING) -+ return true; -+ -+ /* -+ * Ensure we load p->on_rq after p->__state, otherwise it would be -+ * possible to, falsely, observe p->on_rq == 0. -+ * -+ * See try_to_wake_up() for a longer comment. -+ */ -+ smp_rmb(); -+ if (p->on_rq) -+ return true; -+ -+#ifdef CONFIG_SMP -+ /* -+ * Ensure the task has finished __schedule() and will not be referenced -+ * anymore. Again, see try_to_wake_up() for a longer comment. -+ */ -+ smp_rmb(); -+ smp_cond_load_acquire(&p->on_cpu, !VAL); -+#endif -+ -+ return false; -+} -+ +/** + * task_call_func - Invoke a function on task in fixed state + * @p: Process for which the function is to be invoked, can be @current. @@ -3656,12 +3497,28 @@ index 000000000000..18dfee354f9b +int task_call_func(struct task_struct *p, task_call_f func, void *arg) +{ + struct rq *rq = NULL; ++ unsigned int state; + struct rq_flags rf; + int ret; + + raw_spin_lock_irqsave(&p->pi_lock, rf.flags); + -+ if (__task_needs_rq_lock(p)) ++ state = READ_ONCE(p->__state); ++ ++ /* ++ * Ensure we load p->on_rq after p->__state, otherwise it would be ++ * possible to, falsely, observe p->on_rq == 0. ++ * ++ * See try_to_wake_up() for a longer comment. ++ */ ++ smp_rmb(); ++ ++ /* ++ * Since pi->lock blocks try_to_wake_up(), we don't need rq->lock when ++ * the task is blocked. Make sure to check @state since ttwu() can drop ++ * locks at the end, see ttwu_queue_wakelist(). ++ */ ++ if (state == TASK_RUNNING || state == TASK_WAKING || p->on_rq) + rq = __task_rq_lock(p, &rf); + + /* @@ -4074,7 +3931,8 @@ index 000000000000..18dfee354f9b + * Claim the task as running, we do this before switching to it + * such that any running task will have this set. + * -+ * See the ttwu() WF_ON_CPU case and its ordering comment. ++ * See the smp_load_acquire(&p->on_cpu) case in ttwu() and ++ * its ordering comment. + */ + WRITE_ONCE(next->on_cpu, 1); +} @@ -4144,7 +4002,7 @@ index 000000000000..18dfee354f9b + if (likely(!head)) + return NULL; + -+ lockdep_assert_held(&rq->lock); ++ lockdep_assert_rq_held(rq); + /* + * Must not take balance_push_callback off the list when + * splice_balance_callbacks() and balance_callbacks() are not @@ -4448,9 +4306,6 @@ index 000000000000..18dfee354f9b + * finish_task_switch()'s mmdrop(). + */ + switch_mm_irqs_off(prev->active_mm, next->mm, next); -+#ifdef CONFIG_LRU_GEN_ENABLED -+ lru_gen_use_mm(next->mm); -+#endif + + if (!prev->mm) { // from kernel + /* will mmdrop() in finish_task_switch(). */ @@ -4815,7 +4670,7 @@ index 000000000000..18dfee354f9b + * find potential cpus which can migrate the current running task + */ + if (cpumask_test_cpu(cpu, &sched_sg_idle_mask) && -+ sched_rq_watermark_and(&chk, cpu_online_mask, 0, true) && ++ cpumask_andnot(&chk, cpu_online_mask, sched_rq_watermark) && + cpumask_andnot(&chk, &chk, &sched_rq_pending_mask)) { + int i; + @@ -4957,7 +4812,7 @@ index 000000000000..18dfee354f9b +int __init sched_tick_offload_init(void) +{ + tick_work_cpu = alloc_percpu(struct tick_work); -+ WARN_ON_ONCE(!tick_work_cpu); ++ BUG_ON(!tick_work_cpu); + return 0; +} + @@ -5123,8 +4978,9 @@ index 000000000000..18dfee354f9b +#ifdef ALT_SCHED_DEBUG +void alt_sched_debug(void) +{ -+ printk(KERN_INFO "sched: pending: 0x%04lx, sg_idle: 0x%04lx\n", ++ printk(KERN_INFO "sched: pending: 0x%04lx, idle: 0x%04lx, sg_idle: 0x%04lx\n", + sched_rq_pending_mask.bits[0], ++ sched_rq_watermark[0].bits[0], + sched_sg_idle_mask.bits[0]); +} +#else @@ -5762,7 +5618,7 @@ index 000000000000..18dfee354f9b + enum ctx_state prev_state; + + /* Catch callers which need to be fixed */ -+ WARN_ON_ONCE(preempt_count() || !irqs_disabled()); ++ BUG_ON(preempt_count() || !irqs_disabled()); + + prev_state = exception_enter(); + @@ -5937,17 +5793,29 @@ index 000000000000..18dfee354f9b +EXPORT_SYMBOL(set_user_nice); + +/* ++ * is_nice_reduction - check if nice value is an actual reduction ++ * ++ * Similar to can_nice() but does not perform a capability check. ++ * ++ * @p: task ++ * @nice: nice value ++ */ ++static bool is_nice_reduction(const struct task_struct *p, const int nice) ++{ ++ /* Convert nice value [19,-20] to rlimit style value [1,40]: */ ++ int nice_rlim = nice_to_rlimit(nice); ++ ++ return (nice_rlim <= task_rlimit(p, RLIMIT_NICE)); ++} ++ ++/* + * can_nice - check if a task can reduce its nice value + * @p: task + * @nice: nice value + */ +int can_nice(const struct task_struct *p, const int nice) +{ -+ /* Convert nice value [19,-20] to rlimit style value [1,40] */ -+ int nice_rlim = nice_to_rlimit(nice); -+ -+ return (nice_rlim <= task_rlimit(p, RLIMIT_NICE) || -+ capable(CAP_SYS_NICE)); ++ return is_nice_reduction(p, nice) || capable(CAP_SYS_NICE); +} + +#ifdef __ARCH_WANT_SYS_NICE @@ -6098,6 +5966,45 @@ index 000000000000..18dfee354f9b + return match; +} + ++/* ++ * Allow unprivileged RT tasks to decrease priority. ++ * Only issue a capable test if needed and only once to avoid an audit ++ * event on permitted non-privileged operations: ++ */ ++static int user_check_sched_setscheduler(struct task_struct *p, ++ const struct sched_attr *attr, ++ int policy, int reset_on_fork) ++{ ++ if (rt_policy(policy)) { ++ unsigned long rlim_rtprio = task_rlimit(p, RLIMIT_RTPRIO); ++ ++ /* Can't set/change the rt policy: */ ++ if (policy != p->policy && !rlim_rtprio) ++ goto req_priv; ++ ++ /* Can't increase priority: */ ++ if (attr->sched_priority > p->rt_priority && ++ attr->sched_priority > rlim_rtprio) ++ goto req_priv; ++ } ++ ++ /* Can't change other user's priorities: */ ++ if (!check_same_owner(p)) ++ goto req_priv; ++ ++ /* Normal users shall not reset the sched_reset_on_fork flag: */ ++ if (p->sched_reset_on_fork && !reset_on_fork) ++ goto req_priv; ++ ++ return 0; ++ ++req_priv: ++ if (!capable(CAP_SYS_NICE)) ++ return -EPERM; ++ ++ return 0; ++} ++ +static int __sched_setscheduler(struct task_struct *p, + const struct sched_attr *attr, + bool user, bool pi) @@ -6117,7 +6024,7 @@ index 000000000000..18dfee354f9b + raw_spinlock_t *lock; + + /* The pi code expects interrupts enabled */ -+ WARN_ON_ONCE(pi && in_interrupt()); ++ BUG_ON(pi && in_interrupt()); + + /* + * Alt schedule FW supports SCHED_DEADLINE by squash it as prio 0 SCHED_FIFO @@ -6154,34 +6061,11 @@ index 000000000000..18dfee354f9b + (attr->sched_priority != 0)) + return -EINVAL; + -+ /* -+ * Allow unprivileged RT tasks to decrease priority: -+ */ -+ if (user && !capable(CAP_SYS_NICE)) { -+ if (SCHED_FIFO == policy || SCHED_RR == policy) { -+ unsigned long rlim_rtprio = -+ task_rlimit(p, RLIMIT_RTPRIO); -+ -+ /* Can't set/change the rt policy */ -+ if (policy != p->policy && !rlim_rtprio) -+ return -EPERM; -+ -+ /* Can't increase priority */ -+ if (attr->sched_priority > p->rt_priority && -+ attr->sched_priority > rlim_rtprio) -+ return -EPERM; -+ } -+ -+ /* Can't change other user's priorities */ -+ if (!check_same_owner(p)) -+ return -EPERM; -+ -+ /* Normal users shall not reset the sched_reset_on_fork flag */ -+ if (p->sched_reset_on_fork && !reset_on_fork) -+ return -EPERM; -+ } -+ + if (user) { ++ retval = user_check_sched_setscheduler(p, attr, policy, reset_on_fork); ++ if (retval) ++ return retval; ++ + retval = security_task_setscheduler(p); + if (retval) + return retval; @@ -7605,7 +7489,7 @@ index 000000000000..18dfee354f9b +{ + struct mm_struct *mm = current->active_mm; + -+ WARN_ON_ONCE(current != this_rq()->idle); ++ BUG_ON(current != this_rq()->idle); + + if (mm != &init_mm) { + switch_mm(mm, &init_mm, current); @@ -8119,17 +8003,8 @@ index 000000000000..18dfee354f9b + wait_bit_init(); + +#ifdef CONFIG_SMP -+ for (i = 0; i < nr_cpu_ids; i++) { -+ long val = cpumask_test_cpu(i, cpu_present_mask) ? -1L : 0; -+ int j; -+ for (j = 0; j < DIV_ROUND_UP(SCHED_QUEUE_BITS, BITS_PER_ATOMIC_LONG_T); j++) -+ atomic_long_set(&sched_rq_watermark[i].bits[j], val); -+ } -+ for (i = nr_cpu_ids; i < NR_CPUS; i++) { -+ int j; -+ for (j = 0; j < DIV_ROUND_UP(SCHED_QUEUE_BITS, BITS_PER_ATOMIC_LONG_T); j++) -+ atomic_long_set(&sched_rq_watermark[i].bits[j], 0); -+ } ++ for (i = 0; i < SCHED_QUEUE_BITS; i++) ++ cpumask_copy(sched_rq_watermark + i, cpu_present_mask); +#endif + +#ifdef CONFIG_CGROUP_SCHED @@ -8687,7 +8562,7 @@ index 000000000000..1212a031700e +{} diff --git a/kernel/sched/alt_sched.h b/kernel/sched/alt_sched.h new file mode 100644 -index 000000000000..318431c553ca +index 000000000000..bec061f2ae10 --- /dev/null +++ b/kernel/sched/alt_sched.h @@ -0,0 +1,645 @@ @@ -8923,7 +8798,7 @@ index 000000000000..318431c553ca +#endif /* CONFIG_NO_HZ_COMMON */ +}; + -+extern unsigned long rq_load_util(struct rq *rq, int cpu); ++extern unsigned long rq_load_util(int cpu); + +extern unsigned long calc_load_update; +extern atomic_long_t calc_load_tasks; @@ -9478,18 +9353,10 @@ index d9dc9ab3773f..71a25540d65e 100644 +#include "deadline.c" +#endif diff --git a/kernel/sched/build_utility.c b/kernel/sched/build_utility.c -index 99bdd96f454f..bc17d5a6fc41 100644 +index 99bdd96f454f..23f80a86d2d7 100644 --- a/kernel/sched/build_utility.c +++ b/kernel/sched/build_utility.c -@@ -34,7 +34,6 @@ - #include - #include - #include --#include - #include - #include - #include -@@ -85,7 +84,9 @@ +@@ -85,7 +85,9 @@ #ifdef CONFIG_SMP # include "cpupri.c" @@ -9500,7 +9367,7 @@ index 99bdd96f454f..bc17d5a6fc41 100644 #endif diff --git a/kernel/sched/cpufreq_schedutil.c b/kernel/sched/cpufreq_schedutil.c -index 1207c78f85c1..f66b715e4287 100644 +index 1207c78f85c1..3ed06d7ef4f8 100644 --- a/kernel/sched/cpufreq_schedutil.c +++ b/kernel/sched/cpufreq_schedutil.c @@ -159,9 +159,14 @@ static void sugov_get_util(struct sugov_cpu *sg_cpu) @@ -9513,7 +9380,7 @@ index 1207c78f85c1..f66b715e4287 100644 FREQUENCY_UTIL, NULL); +#else + sg_cpu->bw_dl = 0; -+ sg_cpu->util = rq_load_util(rq, sg_cpu->cpu); ++ sg_cpu->util = rq_load_util(cpu_of(rq)); +#endif /* CONFIG_SCHED_ALT */ } @@ -9610,7 +9477,7 @@ index 667876da8382..a416d4bedd98 100644 * to the console @@ -215,6 +216,7 @@ static const struct file_operations sched_scaling_fops = { }; - + #endif /* SMP */ +#endif /* !CONFIG_SCHED_ALT */ @@ -10024,13 +9891,13 @@ index 8739c2a5a54e..d8dd6c15eb47 100644 +#endif /* CONFIG_NUMA */ +#endif diff --git a/kernel/sysctl.c b/kernel/sysctl.c -index 205d605cacc5..c1dac3a542b8 100644 +index 205d605cacc5..7dd950601cca 100644 --- a/kernel/sysctl.c +++ b/kernel/sysctl.c @@ -86,6 +86,10 @@ /* Constants used for minimum and maximum */ - + +#ifdef CONFIG_SCHED_ALT +extern int sched_yield_type; +#endif @@ -10038,7 +9905,23 @@ index 205d605cacc5..c1dac3a542b8 100644 #ifdef CONFIG_PERF_EVENTS static const int six_hundred_forty_kb = 640 * 1024; #endif -@@ -1943,6 +1947,17 @@ static struct ctl_table kern_table[] = { +@@ -1631,6 +1635,7 @@ int proc_do_static_key(struct ctl_table *table, int write, + } + + static struct ctl_table kern_table[] = { ++#ifndef CONFIG_SCHED_ALT + #ifdef CONFIG_NUMA_BALANCING + { + .procname = "numa_balancing", +@@ -1642,6 +1647,7 @@ static struct ctl_table kern_table[] = { + .extra2 = SYSCTL_FOUR, + }, + #endif /* CONFIG_NUMA_BALANCING */ ++#endif /* !CONFIG_SCHED_ALT */ + { + .procname = "panic", + .data = &panic_timeout, +@@ -1943,6 +1949,17 @@ static struct ctl_table kern_table[] = { .proc_handler = proc_dointvec, }, #endif @@ -10151,56 +10034,3 @@ index a2d301f58ced..2ccdede8585c 100644 }; struct wakeup_test_data *x = data; --- -2.38.1 - -diff --git a/kernel/sched/alt_core.c b/kernel/sched/alt_core.c -index f3bac14124c3..27eafbccf23d 100644 ---- a/kernel/sched/alt_core.c -+++ b/kernel/sched/alt_core.c -@@ -1448,11 +1448,13 @@ void set_task_cpu(struct task_struct *p, unsigned int new_cpu) - - WARN_ON_ONCE(is_migration_disabled(p)); - #endif -- if (task_cpu(p) == new_cpu) -- return; -+ - trace_sched_migrate_task(p, new_cpu); -- rseq_migrate(p); -- perf_event_task_migrate(p); -+ -+ if (task_cpu(p) != new_cpu) { -+ rseq_migrate(p); -+ perf_event_task_migrate(p); -+ } - - __set_task_cpu(p, new_cpu); - } -diff --git a/kernel/sched/alt_core.c b/kernel/sched/alt_core.c -index f3bac14124c3..5678c247c0ab 100644 ---- a/kernel/sched/alt_core.c -+++ b/kernel/sched/alt_core.c -@@ -810,8 +810,8 @@ unsigned long get_wchan(struct task_struct *p) - * Context: rq->lock - */ - #define __SCHED_DEQUEUE_TASK(p, rq, flags) \ -- psi_dequeue(p, flags & DEQUEUE_SLEEP); \ - sched_info_dequeue(rq, p); \ -+ psi_dequeue(p, flags & DEQUEUE_SLEEP); \ - \ - list_del(&p->sq_node); \ - if (list_empty(&rq->queue.heads[p->sq_idx])) \ -diff --git a/kernel/sched/alt_core.c b/kernel/sched/alt_core.c -index f3bac14124c3..349a2c92d534 100644 ---- a/kernel/sched/alt_core.c -+++ b/kernel/sched/alt_core.c -@@ -4404,8 +4404,8 @@ static inline void schedule_debug(struct task_struct *prev, bool preempt) - - /* - * Compile time debug macro -- * #define ALT_SCHED_DEBUG - */ -+#define ALT_SCHED_DEBUG - - #ifdef ALT_SCHED_DEBUG - void alt_sched_debug(void)