linux 6.1: Import 6.0 based prjc patchset from ptr1337 until we can find why current upstream is frogged. Enable back pds/bmq and set as unofficial.

23b7e03df8
2022-12-16 00:52:25 +01:00
parent 4547454e3d
commit 38e73fb059
2 changed files with 394 additions and 305 deletions
--- a/linux-tkg-config/prepare
+++ b/linux-tkg-config/prepare
@@ -282,7 +282,8 @@ _set_cpu_scheduler() {
  elif [ "$_kver" = "600" ]; then
    _avail_cpu_scheds=("cfs" "pds" "bmq" "tt" "bore")
  elif [ "$_kver" = "601" ]; then
-    _avail_cpu_scheds=("cfs" "tt" "bore")
+    _avail_cpu_scheds=("cfs" "pds" "bmq" "tt" "bore")
+    _projectc_unoff=1
  else
    _avail_cpu_scheds=("cfs")
  fi
--- a/linux-tkg-patches/6.1/0009-prjc_v6.1-r0.patch
+++ b/linux-tkg-patches/6.1/0009-prjc_v6.1-r0.patch
@@ -1,8 +1,62 @@
+From 4b5e85eec8c30a02e45965aa898d26ed8fdd32be Mon Sep 17 00:00:00 2001
+From: Peter Jung <admin@ptr1337.dev>
+Date: Mon, 12 Dec 2022 11:28:51 +0100
+Subject: [PATCH] prjc-cachy
+
+Signed-off-by: Peter Jung <admin@ptr1337.dev>
+---
+ .../admin-guide/kernel-parameters.txt         |    6 +
+ Documentation/admin-guide/sysctl/kernel.rst   |   10 +
+ Documentation/scheduler/sched-BMQ.txt         |  110 +
+ fs/proc/base.c                                |    2 +-
+ include/asm-generic/resource.h                |    2 +-
+ include/linux/sched.h                         |   33 +-
+ include/linux/sched/deadline.h                |   20 +
+ include/linux/sched/prio.h                    |   26 +
+ include/linux/sched/rt.h                      |    2 +
+ include/linux/sched/topology.h                |    3 +-
+ init/Kconfig                                  |   34 +
+ init/init_task.c                              |   18 +
+ kernel/Kconfig.preempt                        |    2 +-
+ kernel/cgroup/cpuset.c                        |    4 +-
+ kernel/delayacct.c                            |    2 +-
+ kernel/exit.c                                 |    4 +-
+ kernel/locking/rtmutex.c                      |   16 +-
+ kernel/sched/Makefile                         |    5 +
+ kernel/sched/alt_core.c                       | 7971 +++++++++++++++++
+ kernel/sched/alt_debug.c                      |   31 +
+ kernel/sched/alt_sched.h                      |  658 ++
+ kernel/sched/bmq.h                            |  110 +
+ kernel/sched/build_policy.c                   |    8 +-
+ kernel/sched/build_utility.c                  |    3 +-
+ kernel/sched/cpufreq_schedutil.c              |   10 +
+ kernel/sched/cputime.c                        |   10 +-
+ kernel/sched/debug.c                          |   10 +
+ kernel/sched/idle.c                           |    2 +
+ kernel/sched/pds.h                            |  127 +
+ kernel/sched/pelt.c                           |    4 +-
+ kernel/sched/pelt.h                           |    8 +-
+ kernel/sched/sched.h                          |    9 +
+ kernel/sched/stats.c                          |    4 +
+ kernel/sched/stats.h                          |    2 +
+ kernel/sched/topology.c                       |   17 +
+ kernel/sysctl.c                               |   15 +
+ kernel/time/hrtimer.c                         |    2 +
+ kernel/time/posix-cpu-timers.c                |   10 +-
+ kernel/trace/trace_selftest.c                 |    5 +
+ 39 files changed, 9292 insertions(+), 23 deletions(-)
+ create mode 100644 Documentation/scheduler/sched-BMQ.txt
+ create mode 100644 kernel/sched/alt_core.c
+ create mode 100644 kernel/sched/alt_debug.c
+ create mode 100644 kernel/sched/alt_sched.h
+ create mode 100644 kernel/sched/bmq.h
+ create mode 100644 kernel/sched/pds.h
+
 diff --git a/Documentation/admin-guide/kernel-parameters.txt b/Documentation/admin-guide/kernel-parameters.txt
-index 42af9ca0127e..31747ec54f9d 100644
+index 9ffeb6f44966..4dbc3b80f406 100644
 --- a/Documentation/admin-guide/kernel-parameters.txt
 +++ b/Documentation/admin-guide/kernel-parameters.txt
-@@ -5406,6 +5406,12 @@
+@@ -5415,6 +5415,12 @@
 	sa1100ir	[NET]
 			See drivers/net/irda/sa1100_ir.c.

@@ -176,7 +230,7 @@ index 8874f681b056..59eb72bf7d5f 100644
 	[RLIMIT_RTTIME]		= {  RLIM_INFINITY,  RLIM_INFINITY },	\
 }
 diff --git a/include/linux/sched.h b/include/linux/sched.h
-index ffb6eb55cd13..2e730a59caa2 100644
+index 5affff14993d..0fe3ce1d81c0 100644
 --- a/include/linux/sched.h
 +++ b/include/linux/sched.h
@@ -762,8 +762,14 @@ struct task_struct {
@@ -232,7 +286,7 @@ index ffb6eb55cd13..2e730a59caa2 100644

 #ifdef CONFIG_CGROUP_SCHED
 	struct task_group		*sched_task_group;
-@@ -1545,6 +1567,15 @@ struct task_struct {
+@@ -1542,6 +1564,15 @@ struct task_struct {
 	 */
 };

@@ -352,10 +406,10 @@ index 816df6cc444e..c8da08e18c91 100644
 #else
 static inline void rebuild_sched_domains_energy(void)
 diff --git a/init/Kconfig b/init/Kconfig
-index 94125d3b6893..c87ba766d354 100644
+index 5cf5c424fbf1..35d3ec42df0f 100644
 --- a/init/Kconfig
 +++ b/init/Kconfig
-@@ -819,6 +819,7 @@ menu "Scheduler features"
+@@ -836,6 +836,7 @@ menu "Scheduler features"
 config UCLAMP_TASK
 	bool "Enable utilization clamping for RT/FAIR tasks"
 	depends on CPU_FREQ_GOV_SCHEDUTIL
@@ -363,21 +417,21 @@ index 94125d3b6893..c87ba766d354 100644
 	help
 	  This feature enables the scheduler to track the clamped utilization
 	  of each CPU based on RUNNABLE tasks scheduled on that CPU.
-@@ -865,6 +866,35 @@ config UCLAMP_BUCKETS_COUNT
+@@ -882,6 +883,35 @@ config UCLAMP_BUCKETS_COUNT

 	  If in doubt, use the default value.

 +menuconfig SCHED_ALT
 +	bool "Alternative CPU Schedulers"
-+	default y
+	default n
 +	help
-+	  This feature enable alternative CPU scheduler"
+	  This feature enables the ProjectC alternative CPU schedulers."
 +
 +if SCHED_ALT
 +
 +choice
-+	prompt "Alternative CPU Scheduler"
-+	default SCHED_BMQ
+	prompt "Alternative CPU schedulers"
+	default SCHED_PDS
 +
 +config SCHED_BMQ
 +	bool "BMQ CPU scheduler"
@@ -399,7 +453,7 @@ index 94125d3b6893..c87ba766d354 100644
 endmenu

 #
-@@ -918,6 +948,7 @@ config NUMA_BALANCING
+@@ -935,6 +965,7 @@ config NUMA_BALANCING
 	depends on ARCH_SUPPORTS_NUMA_BALANCING
 	depends on !ARCH_WANT_NUMA_VARIABLE_LOCALITY
 	depends on SMP && NUMA && MIGRATION && !PREEMPT_RT
@@ -407,7 +461,7 @@ index 94125d3b6893..c87ba766d354 100644
 	help
 	  This option adds support for automatic NUMA aware memory/task placement.
 	  The mechanism is quite primitive and is based on migrating memory when
-@@ -1015,6 +1046,7 @@ config FAIR_GROUP_SCHED
+@@ -1032,6 +1063,7 @@ config FAIR_GROUP_SCHED
 	depends on CGROUP_SCHED
 	default CGROUP_SCHED

@@ -415,7 +469,7 @@ index 94125d3b6893..c87ba766d354 100644
 config CFS_BANDWIDTH
 	bool "CPU bandwidth provisioning for FAIR_GROUP_SCHED"
 	depends on FAIR_GROUP_SCHED
-@@ -1037,6 +1069,7 @@ config RT_GROUP_SCHED
+@@ -1054,6 +1086,7 @@ config RT_GROUP_SCHED
 	  realtime bandwidth for them.
 	  See Documentation/scheduler/sched-rt-group.rst for more information.

@@ -423,7 +477,7 @@ index 94125d3b6893..c87ba766d354 100644
 endif #CGROUP_SCHED

 config UCLAMP_TASK_GROUP
-@@ -1281,6 +1314,7 @@ config CHECKPOINT_RESTORE
+@@ -1314,6 +1347,7 @@ config CHECKPOINT_RESTORE

 config SCHED_AUTOGROUP
 	bool "Automatic process group scheduling"
@@ -632,10 +686,10 @@ index 976092b7bd45..31d587c16ec1 100644
 obj-y += build_utility.o
 diff --git a/kernel/sched/alt_core.c b/kernel/sched/alt_core.c
 new file mode 100644
-index 000000000000..4bea0c025475
+index 000000000000..d3f6f2e1badd
 --- /dev/null
 +++ b/kernel/sched/alt_core.c
-@@ -0,0 +1,7912 @@
+@@ -0,0 +1,7971 @@
 +/*
 + *  kernel/sched/alt_core.c
 + *
@@ -706,7 +760,7 @@ index 000000000000..4bea0c025475
 +#define sched_feat(x)	(0)
 +#endif /* CONFIG_SCHED_DEBUG */
 +
-+#define ALT_SCHED_VERSION "v6.1-r0"
+#define ALT_SCHED_VERSION "v6.1-r0-CachyOS"
 +
 +/* rt_prio(prio) defined in include/linux/sched/rt.h */
 +#define rt_task(p)		rt_prio((p)->prio)
@@ -786,7 +840,91 @@ index 000000000000..4bea0c025475
 +#ifdef CONFIG_SCHED_SMT
 +static cpumask_t sched_sg_idle_mask ____cacheline_aligned_in_smp;
 +#endif
-+static cpumask_t sched_rq_watermark[SCHED_QUEUE_BITS] ____cacheline_aligned_in_smp;
+
+#define BITS_PER_ATOMIC_LONG_T BITS_PER_LONG
+typedef struct sched_bitmask {
+	atomic_long_t bits[DIV_ROUND_UP(SCHED_QUEUE_BITS, BITS_PER_ATOMIC_LONG_T)];
+} sched_bitmask_t;
+static sched_bitmask_t sched_rq_watermark[NR_CPUS] ____cacheline_aligned_in_smp;
+
+#define x(p, set, mask)                                \
+	do {                                           \
+		smp_mb__before_atomic();               \
+		if (set)                               \
+			atomic_long_or((mask), (p));   \
+		else                                   \
+			atomic_long_and(~(mask), (p)); \
+		smp_mb__after_atomic();                \
+	} while (0)
+
+static __always_inline void sched_rq_watermark_fill_downwards(int cpu, unsigned int end,
+		unsigned int start, bool set)
+{
+	unsigned int start_idx, start_bit;
+	unsigned int end_idx, end_bit;
+	atomic_long_t *p;
+
+	if (end == start) {
+		return;
+	}
+
+	start_idx = start / BITS_PER_ATOMIC_LONG_T;
+	start_bit = start % BITS_PER_ATOMIC_LONG_T;
+	end_idx = (end - 1) / BITS_PER_ATOMIC_LONG_T;
+	end_bit = (end - 1) % BITS_PER_ATOMIC_LONG_T;
+	p = &sched_rq_watermark[cpu].bits[end_idx];
+
+	if (end_idx == start_idx) {
+		x(p, set, (~0UL >> (BITS_PER_ATOMIC_LONG_T - 1 - end_bit)) & (~0UL << start_bit));
+		return;
+	}
+
+	if (end_bit != BITS_PER_ATOMIC_LONG_T - 1) {
+		x(p, set, (~0UL >> (BITS_PER_ATOMIC_LONG_T - 1 - end_bit)));
+		p -= 1;
+		end_idx -= 1;
+	}
+
+	while (end_idx != start_idx) {
+		smp_mb__before_atomic();
+		atomic_long_set(p, set ? ~0UL : 0);
+		smp_mb__after_atomic();
+		p -= 1;
+		end_idx -= 1;
+	}
+
+	x(p, set, ~0UL << start_bit);
+}
+
+#undef x
+
+static __always_inline bool sched_rq_watermark_and(cpumask_t *dstp, const cpumask_t *cpus, int prio, bool not)
+{
+	int cpu;
+	bool ret = false;
+	int idx = prio / BITS_PER_ATOMIC_LONG_T;
+	int bit = prio % BITS_PER_ATOMIC_LONG_T;
+
+	cpumask_clear(dstp);
+	for_each_cpu(cpu, cpus)
+		if (test_bit(bit, (long*)&sched_rq_watermark[cpu].bits[idx].counter) == !not) {
+			__cpumask_set_cpu(cpu, dstp);
+			ret = true;
+		}
+	return ret;
+}
+
+static __always_inline bool sched_rq_watermark_test(const cpumask_t *cpus, int prio, bool not)
+{
+	int cpu;
+	int idx = prio / BITS_PER_ATOMIC_LONG_T;
+	int bit = prio % BITS_PER_ATOMIC_LONG_T;
+
+	for_each_cpu(cpu, cpus)
+		if (test_bit(bit, (long*)&sched_rq_watermark[cpu].bits[idx].counter) == !not)
+			return true;
+	return false;
+}
 +
 +/* sched_queue related functions */
 +static inline void sched_queue_init(struct sched_queue *q)
@@ -815,7 +953,6 @@ index 000000000000..4bea0c025475
 +{
 +	unsigned long watermark = find_first_bit(rq->queue.bitmap, SCHED_QUEUE_BITS);
 +	unsigned long last_wm = rq->watermark;
-+	unsigned long i;
 +	int cpu;
 +
 +	if (watermark == last_wm)
@@ -824,28 +961,25 @@ index 000000000000..4bea0c025475
 +	rq->watermark = watermark;
 +	cpu = cpu_of(rq);
 +	if (watermark < last_wm) {
-+		for (i = last_wm; i > watermark; i--)
-+			cpumask_clear_cpu(cpu, sched_rq_watermark + SCHED_QUEUE_BITS - i);
+		sched_rq_watermark_fill_downwards(cpu, SCHED_QUEUE_BITS - watermark, SCHED_QUEUE_BITS - last_wm, false);
 +#ifdef CONFIG_SCHED_SMT
 +		if (static_branch_likely(&sched_smt_present) &&
-+		    IDLE_TASK_SCHED_PRIO == last_wm)
+		    unlikely(IDLE_TASK_SCHED_PRIO == last_wm))
 +			cpumask_andnot(&sched_sg_idle_mask,
 +				       &sched_sg_idle_mask, cpu_smt_mask(cpu));
 +#endif
 +		return;
 +	}
 +	/* last_wm < watermark */
-+	for (i = watermark; i > last_wm; i--)
-+		cpumask_set_cpu(cpu, sched_rq_watermark + SCHED_QUEUE_BITS - i);
+	sched_rq_watermark_fill_downwards(cpu, SCHED_QUEUE_BITS - last_wm, SCHED_QUEUE_BITS - watermark, true);
 +#ifdef CONFIG_SCHED_SMT
-+	if (static_branch_likely(&sched_smt_present) &&
-+	    IDLE_TASK_SCHED_PRIO == watermark) {
-+		cpumask_t tmp;
+ 	if (static_branch_likely(&sched_smt_present) &&
+	    unlikely(IDLE_TASK_SCHED_PRIO == watermark)) {
+		const cpumask_t *smt_mask = cpu_smt_mask(cpu);
 +
-+		cpumask_and(&tmp, cpu_smt_mask(cpu), sched_rq_watermark);
-+		if (cpumask_equal(&tmp, cpu_smt_mask(cpu)))
+		if (!sched_rq_watermark_test(smt_mask, 0, true))
 +			cpumask_or(&sched_sg_idle_mask,
-+				   &sched_sg_idle_mask, cpu_smt_mask(cpu));
+				   &sched_sg_idle_mask, smt_mask);
 +	}
 +#endif
 +}
@@ -928,7 +1062,8 @@ index 000000000000..4bea0c025475
 + *				p->se.load, p->rt_priority,
 + *				p->dl.dl_{runtime, deadline, period, flags, bw, density}
 + *  - sched_setnuma():		p->numa_preferred_nid
-+ *  - sched_move_task():        p->sched_task_group
+ *  - sched_move_task()/
+ *    cpu_cgroup_fork():	p->sched_task_group
 + *  - uclamp_update_active()	p->uclamp*
 + *
 + * p->state <- TASK_*:
@@ -1189,7 +1324,6 @@ index 000000000000..4bea0c025475
 +
 +	rq->prev_irq_time += irq_delta;
 +	delta -= irq_delta;
-+	psi_account_irqtime(rq->curr, irq_delta);
 +#endif
 +#ifdef CONFIG_PARAVIRT_TIME_ACCOUNTING
 +	if (static_key_false((&paravirt_steal_rq_enabled))) {
@@ -1262,15 +1396,15 @@ index 000000000000..4bea0c025475
 +	rq->load_stamp = time;
 +}
 +
-+unsigned long rq_load_util(struct rq *rq, unsigned long max)
+unsigned long rq_load_util(struct rq *rq, int cpu)
 +{
-+	return RQ_LOAD_HISTORY_TO_UTIL(rq->load_history) * (max >> RQ_UTIL_SHIFT);
+	return RQ_LOAD_HISTORY_TO_UTIL(rq->load_history) * (arch_scale_cpu_capacity(cpu) >> RQ_UTIL_SHIFT);
 +}
 +
 +#ifdef CONFIG_SMP
 +unsigned long sched_cpu_util(int cpu)
 +{
-+	return rq_load_util(cpu_rq(cpu), arch_scale_cpu_capacity(cpu));
+	return rq_load_util(cpu_rq(cpu), cpu);
 +}
 +#endif /* CONFIG_SMP */
 +
@@ -2536,9 +2670,9 @@ index 000000000000..4bea0c025475
 +#ifdef CONFIG_SCHED_SMT
 +	    cpumask_and(&tmp, &chk_mask, &sched_sg_idle_mask) ||
 +#endif
-+	    cpumask_and(&tmp, &chk_mask, sched_rq_watermark) ||
-+	    cpumask_and(&tmp, &chk_mask,
-+			sched_rq_watermark + SCHED_QUEUE_BITS - 1 - task_sched_prio(p)))
+	    sched_rq_watermark_and(&tmp, &chk_mask, 0, false) ||
+	    sched_rq_watermark_and(&tmp, &chk_mask,
+			SCHED_QUEUE_BITS - 1 - task_sched_prio(p), false))
 +		return best_mask_cpu(task_cpu(p), &tmp);
 +
 +	return best_mask_cpu(task_cpu(p), &chk_mask);
@@ -2979,13 +3113,6 @@ index 000000000000..4bea0c025475
 +	if (!llist)
 +		return;
 +
-+	/*
-+	 * rq::ttwu_pending racy indication of out-standing wakeups.
-+	 * Races such that false-negatives are possible, since they
-+	 * are shorter lived that false-positives would be.
-+	 */
-+	WRITE_ONCE(rq->ttwu_pending, 0);
-+
 +	rq_lock_irqsave(rq, &rf);
 +	update_rq_clock(rq);
 +
@@ -2999,6 +3126,17 @@ index 000000000000..4bea0c025475
 +		ttwu_do_activate(rq, p, p->sched_remote_wakeup ? WF_MIGRATED : 0);
 +	}
 +
+	/*
+	 * Must be after enqueueing at least once task such that
+	 * idle_cpu() does not observe a false-negative -- if it does,
+	 * it is possible for select_idle_siblings() to stack a number
+	 * of tasks on this CPU during that window.
+	 *
+	 * It is ok to clear ttwu_pending when another task pending.
+	 * We will receive IPI after local irq enabled and then enqueue it.
+	 * Since now nr_running > 0, idle_cpu() will always get correct result.
+	 */
+	WRITE_ONCE(rq->ttwu_pending, 0);
 +	rq_unlock_irqrestore(rq, &rf);
 +}
 +
@@ -3938,8 +4076,7 @@ index 000000000000..4bea0c025475
 +	 * Claim the task as running, we do this before switching to it
 +	 * such that any running task will have this set.
 +	 *
-+	 * See the smp_load_acquire(&p->on_cpu) case in ttwu() and
-+	 * its ordering comment.
+	 * See the ttwu() WF_ON_CPU case and its ordering comment.
 +	 */
 +	WRITE_ONCE(next->on_cpu, 1);
 +}
@@ -4009,7 +4146,7 @@ index 000000000000..4bea0c025475
 +	if (likely(!head))
 +		return NULL;
 +
-+	lockdep_assert_rq_held(rq);
+	lockdep_assert_held(&rq->lock);
 +	/*
 +	 * Must not take balance_push_callback off the list when
 +	 * splice_balance_callbacks() and balance_callbacks() are not
@@ -4678,7 +4815,7 @@ index 000000000000..4bea0c025475
 +	 * find potential cpus which can migrate the current running task
 +	 */
 +	if (cpumask_test_cpu(cpu, &sched_sg_idle_mask) &&
-+	    cpumask_andnot(&chk, cpu_online_mask, sched_rq_watermark) &&
+	    sched_rq_watermark_and(&chk, cpu_online_mask, 0, true) &&
 +	    cpumask_andnot(&chk, &chk, &sched_rq_pending_mask)) {
 +		int i;
 +
@@ -4820,7 +4957,7 @@ index 000000000000..4bea0c025475
 +int __init sched_tick_offload_init(void)
 +{
 +	tick_work_cpu = alloc_percpu(struct tick_work);
-+	BUG_ON(!tick_work_cpu);
+	WARN_ON_ONCE(!tick_work_cpu);
 +	return 0;
 +}
 +
@@ -4986,9 +5123,8 @@ index 000000000000..4bea0c025475
 +#ifdef ALT_SCHED_DEBUG
 +void alt_sched_debug(void)
 +{
-+	printk(KERN_INFO "sched: pending: 0x%04lx, idle: 0x%04lx, sg_idle: 0x%04lx\n",
+	printk(KERN_INFO "sched: pending: 0x%04lx, sg_idle: 0x%04lx\n",
 +	       sched_rq_pending_mask.bits[0],
-+	       sched_rq_watermark[0].bits[0],
 +	       sched_sg_idle_mask.bits[0]);
 +}
 +#else
@@ -4997,23 +5133,18 @@ index 000000000000..4bea0c025475
 +
 +#ifdef	CONFIG_SMP
 +
-+#ifdef CONFIG_PREEMPT_RT
-+#define SCHED_NR_MIGRATE_BREAK 8
-+#else
-+#define SCHED_NR_MIGRATE_BREAK 32
-+#endif
-+
-+const_debug unsigned int sysctl_sched_nr_migrate = SCHED_NR_MIGRATE_BREAK;
-+
+#define SCHED_RQ_NR_MIGRATION (32U)
 +/*
 + * Migrate pending tasks in @rq to @dest_cpu
+ * Will try to migrate mininal of half of @rq nr_running tasks and
+ * SCHED_RQ_NR_MIGRATION to @dest_cpu
 + */
 +static inline int
 +migrate_pending_tasks(struct rq *rq, struct rq *dest_rq, const int dest_cpu)
 +{
 +	struct task_struct *p, *skip = rq->curr;
 +	int nr_migrated = 0;
-+	int nr_tries = min(rq->nr_running / 2, sysctl_sched_nr_migrate);
+	int nr_tries = min(rq->nr_running / 2, SCHED_RQ_NR_MIGRATION);
 +
 +	while (skip != rq->idle && nr_tries &&
 +	       (p = sched_rq_next_task(skip, rq)) != rq->idle) {
@@ -5631,7 +5762,7 @@ index 000000000000..4bea0c025475
 +	enum ctx_state prev_state;
 +
 +	/* Catch callers which need to be fixed */
-+	BUG_ON(preempt_count() || !irqs_disabled());
+	WARN_ON_ONCE(preempt_count() || !irqs_disabled());
 +
 +	prev_state = exception_enter();
 +
@@ -5806,29 +5937,17 @@ index 000000000000..4bea0c025475
 +EXPORT_SYMBOL(set_user_nice);
 +
 +/*
-+ * is_nice_reduction - check if nice value is an actual reduction
-+ *
-+ * Similar to can_nice() but does not perform a capability check.
-+ *
-+ * @p: task
-+ * @nice: nice value
-+ */
-+static bool is_nice_reduction(const struct task_struct *p, const int nice)
-+{
-+	/* Convert nice value [19,-20] to rlimit style value [1,40]: */
-+	int nice_rlim = nice_to_rlimit(nice);
-+
-+	return (nice_rlim <= task_rlimit(p, RLIMIT_NICE));
-+}
-+
-+/*
 + * can_nice - check if a task can reduce its nice value
 + * @p: task
 + * @nice: nice value
 + */
 +int can_nice(const struct task_struct *p, const int nice)
 +{
-+	return is_nice_reduction(p, nice) || capable(CAP_SYS_NICE);
+	/* Convert nice value [19,-20] to rlimit style value [1,40] */
+	int nice_rlim = nice_to_rlimit(nice);
+
+	return (nice_rlim <= task_rlimit(p, RLIMIT_NICE) ||
+		capable(CAP_SYS_NICE));
 +}
 +
 +#ifdef __ARCH_WANT_SYS_NICE
@@ -5979,45 +6098,6 @@ index 000000000000..4bea0c025475
 +	return match;
 +}
 +
-+/*
-+ * Allow unprivileged RT tasks to decrease priority.
-+ * Only issue a capable test if needed and only once to avoid an audit
-+ * event on permitted non-privileged operations:
-+ */
-+static int user_check_sched_setscheduler(struct task_struct *p,
-+					 const struct sched_attr *attr,
-+					 int policy, int reset_on_fork)
-+{
-+	if (rt_policy(policy)) {
-+		unsigned long rlim_rtprio = task_rlimit(p, RLIMIT_RTPRIO);
-+
-+		/* Can't set/change the rt policy: */
-+		if (policy != p->policy && !rlim_rtprio)
-+			goto req_priv;
-+
-+		/* Can't increase priority: */
-+		if (attr->sched_priority > p->rt_priority &&
-+		    attr->sched_priority > rlim_rtprio)
-+			goto req_priv;
-+	}
-+
-+	/* Can't change other user's priorities: */
-+	if (!check_same_owner(p))
-+		goto req_priv;
-+
-+	/* Normal users shall not reset the sched_reset_on_fork flag: */
-+	if (p->sched_reset_on_fork && !reset_on_fork)
-+		goto req_priv;
-+
-+	return 0;
-+
-+req_priv:
-+	if (!capable(CAP_SYS_NICE))
-+		return -EPERM;
-+
-+	return 0;
-+}
-+
 +static int __sched_setscheduler(struct task_struct *p,
 +				const struct sched_attr *attr,
 +				bool user, bool pi)
@@ -6037,7 +6117,7 @@ index 000000000000..4bea0c025475
 +	raw_spinlock_t *lock;
 +
 +	/* The pi code expects interrupts enabled */
-+	BUG_ON(pi && in_interrupt());
+	WARN_ON_ONCE(pi && in_interrupt());
 +
 +	/*
 +	 * Alt schedule FW supports SCHED_DEADLINE by squash it as prio 0 SCHED_FIFO
@@ -6074,11 +6154,34 @@ index 000000000000..4bea0c025475
 +	    (attr->sched_priority != 0))
 +		return -EINVAL;
 +
-+	if (user) {
-+		retval = user_check_sched_setscheduler(p, attr, policy, reset_on_fork);
-+		if (retval)
-+			return retval;
+	/*
+	 * Allow unprivileged RT tasks to decrease priority:
+	 */
+	if (user && !capable(CAP_SYS_NICE)) {
+		if (SCHED_FIFO == policy || SCHED_RR == policy) {
+			unsigned long rlim_rtprio =
+					task_rlimit(p, RLIMIT_RTPRIO);
 +
+			/* Can't set/change the rt policy */
+			if (policy != p->policy && !rlim_rtprio)
+				return -EPERM;
+
+			/* Can't increase priority */
+			if (attr->sched_priority > p->rt_priority &&
+			    attr->sched_priority > rlim_rtprio)
+				return -EPERM;
+		}
+
+		/* Can't change other user's priorities */
+		if (!check_same_owner(p))
+			return -EPERM;
+
+		/* Normal users shall not reset the sched_reset_on_fork flag */
+		if (p->sched_reset_on_fork && !reset_on_fork)
+			return -EPERM;
+	}
+
+	if (user) {
 +		retval = security_task_setscheduler(p);
 +		if (retval)
 +			return retval;
@@ -7515,7 +7618,7 @@ index 000000000000..4bea0c025475
 +{
 +	struct mm_struct *mm = current->active_mm;
 +
-+	BUG_ON(current != this_rq()->idle);
+	WARN_ON_ONCE(current != this_rq()->idle);
 +
 +	if (mm != &init_mm) {
 +		switch_mm(mm, &init_mm, current);
@@ -8029,8 +8132,17 @@ index 000000000000..4bea0c025475
 +	wait_bit_init();
 +
 +#ifdef CONFIG_SMP
-+	for (i = 0; i < SCHED_QUEUE_BITS; i++)
-+		cpumask_copy(sched_rq_watermark + i, cpu_present_mask);
+	for (i = 0; i < nr_cpu_ids; i++) {
+		long val = cpumask_test_cpu(i, cpu_present_mask) ? -1L : 0;
+		int j;
+		for (j = 0; j < DIV_ROUND_UP(SCHED_QUEUE_BITS, BITS_PER_ATOMIC_LONG_T); j++)
+			atomic_long_set(&sched_rq_watermark[i].bits[j], val);
+	}
+	for (i = nr_cpu_ids; i < NR_CPUS; i++) {
+		int j;
+		for (j = 0; j < DIV_ROUND_UP(SCHED_QUEUE_BITS, BITS_PER_ATOMIC_LONG_T); j++)
+			atomic_long_set(&sched_rq_watermark[i].bits[j], 0);
+	}
 +#endif
 +
 +#ifdef CONFIG_CGROUP_SCHED
@@ -8456,12 +8568,14 @@ index 000000000000..4bea0c025475
 +	sched_unregister_group(tg);
 +}
 +
-+#ifdef CONFIG_RT_GROUP_SCHED
+static void cpu_cgroup_fork(struct task_struct *task)
+{
+}
+
 +static int cpu_cgroup_can_attach(struct cgroup_taskset *tset)
 +{
 +	return 0;
 +}
-+#endif
 +
 +static void cpu_cgroup_attach(struct cgroup_taskset *tset)
 +{
@@ -8535,9 +8649,8 @@ index 000000000000..4bea0c025475
 +	.css_released	= cpu_cgroup_css_released,
 +	.css_free	= cpu_cgroup_css_free,
 +	.css_extra_stat_show = cpu_extra_stat_show,
-+#ifdef CONFIG_RT_GROUP_SCHED
+	.fork		= cpu_cgroup_fork,
 +	.can_attach	= cpu_cgroup_can_attach,
-+#endif
 +	.attach		= cpu_cgroup_attach,
 +	.legacy_cftypes	= cpu_files,
 +	.legacy_cftypes	= cpu_legacy_files,
@@ -8587,10 +8700,10 @@ index 000000000000..1212a031700e
 +{}
 diff --git a/kernel/sched/alt_sched.h b/kernel/sched/alt_sched.h
 new file mode 100644
-index 000000000000..7c1cc0cbca0d
+index 000000000000..6df234aacdd7
 --- /dev/null
 +++ b/kernel/sched/alt_sched.h
-@@ -0,0 +1,660 @@
+@@ -0,0 +1,658 @@
 +#ifndef ALT_SCHED_H
 +#define ALT_SCHED_H
 +
@@ -8673,15 +8786,6 @@ index 000000000000..7c1cc0cbca0d
 +#define MAX_SHARES		(1UL << 18)
 +#endif
 +
-+/*
-+ * Tunables that become constants when CONFIG_SCHED_DEBUG is off:
-+ */
-+#ifdef CONFIG_SCHED_DEBUG
-+# define const_debug __read_mostly
-+#else
-+# define const_debug const
-+#endif
-+
 +/* task_struct::on_rq states: */
 +#define TASK_ON_RQ_QUEUED	1
 +#define TASK_ON_RQ_MIGRATING	2
@@ -8838,7 +8942,7 @@ index 000000000000..7c1cc0cbca0d
 +#endif /* CONFIG_NO_HZ_COMMON */
 +};
 +
-+extern unsigned long rq_load_util(struct rq *rq, unsigned long max);
+extern unsigned long rq_load_util(struct rq *rq, int cpu);
 +
 +extern unsigned long calc_load_update;
 +extern atomic_long_t calc_load_tasks;
@@ -8990,6 +9094,13 @@ index 000000000000..7c1cc0cbca0d
 +}
 +
 +static inline void
+rq_lock_irq(struct rq *rq, struct rq_flags *rf)
+	__acquires(rq->lock)
+{
+	raw_spin_lock_irq(&rq->lock);
+}
+
+static inline void
 +rq_lock(struct rq *rq, struct rq_flags *rf)
 +	__acquires(rq->lock)
 +{
@@ -9393,10 +9504,18 @@ index d9dc9ab3773f..71a25540d65e 100644
 +#include "deadline.c"
 +#endif
 diff --git a/kernel/sched/build_utility.c b/kernel/sched/build_utility.c
-index 99bdd96f454f..23f80a86d2d7 100644
+index 99bdd96f454f..bc17d5a6fc41 100644
 --- a/kernel/sched/build_utility.c
 +++ b/kernel/sched/build_utility.c
-@@ -85,7 +85,9 @@
+@@ -34,7 +34,6 @@
+ #include <linux/nospec.h>
+ #include <linux/proc_fs.h>
+ #include <linux/psi.h>
+-#include <linux/psi.h>
+ #include <linux/ptrace_api.h>
+ #include <linux/sched_clock.h>
+ #include <linux/security.h>
+@@ -85,7 +84,9 @@

 #ifdef CONFIG_SMP
 # include "cpupri.c"
@@ -9407,7 +9526,7 @@ index 99bdd96f454f..23f80a86d2d7 100644
 #endif

 diff --git a/kernel/sched/cpufreq_schedutil.c b/kernel/sched/cpufreq_schedutil.c
-index 1207c78f85c1..68812e0756cb 100644
+index 1207c78f85c1..f66b715e4287 100644
 --- a/kernel/sched/cpufreq_schedutil.c
 +++ b/kernel/sched/cpufreq_schedutil.c
@@ -159,9 +159,14 @@ static void sugov_get_util(struct sugov_cpu *sg_cpu)
@@ -9420,7 +9539,7 @@ index 1207c78f85c1..68812e0756cb 100644
 					  FREQUENCY_UTIL, NULL);
 +#else
 +	sg_cpu->bw_dl = 0;
-+	sg_cpu->util = rq_load_util(rq, sg_cpu->max);
+	sg_cpu->util = rq_load_util(rq, sg_cpu->cpu);
 +#endif /* CONFIG_SCHED_ALT */
 }

@@ -9504,7 +9623,7 @@ index 95fc77853743..b48b3f9ed47f 100644

 	if (task_cputime(p, &cputime.utime, &cputime.stime))
 diff --git a/kernel/sched/debug.c b/kernel/sched/debug.c
-index 1637b65ba07a..033c6deeb515 100644
+index a8377d0e5ebd..b6e8e386bbfc 100644
 --- a/kernel/sched/debug.c
 +++ b/kernel/sched/debug.c
@@ -7,6 +7,7 @@
@@ -9555,7 +9674,7 @@ index 1637b65ba07a..033c6deeb515 100644
 	debugfs_create_u32("latency_ns", 0644, debugfs_sched, &sysctl_sched_latency);
 	debugfs_create_u32("min_granularity_ns", 0644, debugfs_sched, &sysctl_sched_min_granularity);
 	debugfs_create_u32("idle_min_granularity_ns", 0644, debugfs_sched, &sysctl_sched_idle_min_granularity);
-@@ -337,11 +344,13 @@ static __init int sched_init_debug(void)
+@@ -339,11 +346,13 @@ static __init int sched_init_debug(void)
 #endif

 	debugfs_create_file("debug", 0444, debugfs_sched, NULL, &sched_debug_fops);
@@ -9569,7 +9688,7 @@ index 1637b65ba07a..033c6deeb515 100644
 #ifdef CONFIG_SMP

 static cpumask_var_t		sd_sysctl_cpus;
-@@ -1068,6 +1077,7 @@ void proc_sched_set_task(struct task_struct *p)
+@@ -1070,6 +1079,7 @@ void proc_sched_set_task(struct task_struct *p)
 	memset(&p->stats, 0, sizeof(p->stats));
 #endif
 }
@@ -9728,7 +9847,7 @@ index 000000000000..56a649d02e49
 +#endif
 +static inline void sched_task_deactivate(struct task_struct *p, struct rq *rq) {}
 diff --git a/kernel/sched/pelt.c b/kernel/sched/pelt.c
-index 0f310768260c..bd38bf738fe9 100644
+index 036b0e2cd2b4..a00ed09127bd 100644
 --- a/kernel/sched/pelt.c
 +++ b/kernel/sched/pelt.c
@@ -266,6 +266,7 @@ ___update_load_avg(struct sched_avg *sa, unsigned long load)
@@ -9751,7 +9870,7 @@ index 0f310768260c..bd38bf738fe9 100644
  * thermal:
  *
 diff --git a/kernel/sched/pelt.h b/kernel/sched/pelt.h
-index 3a0e0dc28721..e8a7d84aa5a5 100644
+index 9b35b5072bae..6e457b864d66 100644
 --- a/kernel/sched/pelt.h
 +++ b/kernel/sched/pelt.h
@@ -1,13 +1,15 @@
@@ -9779,7 +9898,7 @@ index 3a0e0dc28721..e8a7d84aa5a5 100644
 static inline void cfs_se_util_change(struct sched_avg *avg)
 {
 	unsigned int enqueued;
-@@ -180,9 +183,11 @@ static inline u64 cfs_rq_clock_pelt(struct cfs_rq *cfs_rq)
+@@ -209,9 +212,11 @@ static inline u64 cfs_rq_clock_pelt(struct cfs_rq *cfs_rq)
 	return rq_clock_pelt(rq_of(cfs_rq));
 }
 #endif
@@ -9791,7 +9910,7 @@ index 3a0e0dc28721..e8a7d84aa5a5 100644
 static inline int
 update_cfs_rq_load_avg(u64 now, struct cfs_rq *cfs_rq)
 {
-@@ -200,6 +205,7 @@ update_dl_rq_load_avg(u64 now, struct rq *rq, int running)
+@@ -229,6 +234,7 @@ update_dl_rq_load_avg(u64 now, struct rq *rq, int running)
 {
 	return 0;
 }
@@ -9800,7 +9919,7 @@ index 3a0e0dc28721..e8a7d84aa5a5 100644
 static inline int
 update_thermal_load_avg(u64 now, struct rq *rq, u64 capacity)
 diff --git a/kernel/sched/sched.h b/kernel/sched/sched.h
-index a4a20046e586..c363693cd869 100644
+index 2c89aaa9200f..21d2d4a188fa 100644
 --- a/kernel/sched/sched.h
 +++ b/kernel/sched/sched.h
@@ -5,6 +5,10 @@
@@ -9814,7 +9933,7 @@ index a4a20046e586..c363693cd869 100644
 #include <linux/sched/affinity.h>
 #include <linux/sched/autogroup.h>
 #include <linux/sched/cpufreq.h>
-@@ -3183,4 +3187,9 @@ static inline void update_current_exec_runtime(struct task_struct *curr,
+@@ -3264,4 +3268,9 @@ static inline void update_current_exec_runtime(struct task_struct *curr,
 	cgroup_account_cputime(curr, delta_exec);
 }

@@ -9856,7 +9975,7 @@ index 857f837f52cb..5486c63e4790 100644
 	}
 	return 0;
 diff --git a/kernel/sched/stats.h b/kernel/sched/stats.h
-index 84a188913cc9..53934e7ef5db 100644
+index 38f3698f5e5b..b9d597394316 100644
 --- a/kernel/sched/stats.h
 +++ b/kernel/sched/stats.h
@@ -89,6 +89,7 @@ static inline void rq_sched_info_depart  (struct rq *rq, unsigned long long delt
@@ -9876,7 +9995,7 @@ index 84a188913cc9..53934e7ef5db 100644
 #ifdef CONFIG_PSI
 void psi_task_change(struct task_struct *task, int clear, int set);
 diff --git a/kernel/sched/topology.c b/kernel/sched/topology.c
-index 8739c2a5a54e..d8dd6c15eb47 100644
+index dea9fa39e7c0..b401e6423102 100644
 --- a/kernel/sched/topology.c
 +++ b/kernel/sched/topology.c
@@ -3,6 +3,7 @@
@@ -9931,7 +10050,7 @@ index 8739c2a5a54e..d8dd6c15eb47 100644
 +#endif /* CONFIG_NUMA */
 +#endif
 diff --git a/kernel/sysctl.c b/kernel/sysctl.c
-index c6d9dec11b74..2bc42ce8b48e 100644
+index 1c7c7c953876..f9dc7d89a6d2 100644
 --- a/kernel/sysctl.c
 +++ b/kernel/sysctl.c
@@ -93,6 +93,10 @@ EXPORT_SYMBOL_GPL(sysctl_long_vals);
@@ -9945,23 +10064,7 @@ index c6d9dec11b74..2bc42ce8b48e 100644
 #ifdef CONFIG_PERF_EVENTS
 static const int six_hundred_forty_kb = 640 * 1024;
 #endif
-@@ -1633,6 +1637,7 @@ int proc_do_static_key(struct ctl_table *table, int write,
- }
- 
- static struct ctl_table kern_table[] = {
-+#ifndef CONFIG_SCHED_ALT
- #ifdef CONFIG_NUMA_BALANCING
- 	{
- 		.procname	= "numa_balancing",
-@@ -1652,6 +1657,7 @@ static struct ctl_table kern_table[] = {
- 		.extra1		= SYSCTL_ZERO,
- 	},
- #endif /* CONFIG_NUMA_BALANCING */
-+#endif /* !CONFIG_SCHED_ALT */
- 	{
- 		.procname	= "panic",
- 		.data		= &panic_timeout,
-@@ -1953,6 +1959,17 @@ static struct ctl_table kern_table[] = {
+@@ -1965,6 +1969,17 @@ static struct ctl_table kern_table[] = {
 		.proc_handler	= proc_dointvec,
 	},
 #endif
@@ -10074,21 +10177,6 @@ index a2d301f58ced..2ccdede8585c 100644
 	};
 	struct wakeup_test_data *x = data;

-diff --git a/kernel/sched/alt_sched.h b/kernel/sched/alt_sched.h
-index 93ff3bddd36f85c67a3130a68ed67e2525513353..a00bc84b93b2c8789d4798a2900299fdd39a2e58 100644
--- a/kernel/sched/alt_sched.h
-+++ b/kernel/sched/alt_sched.h
-@@ -387,6 +387,13 @@ task_rq_unlock(struct rq *rq, struct task_struct *p, struct rq_flags *rf)
- 	raw_spin_unlock_irqrestore(&p->pi_lock, rf->flags);
- }
+--
+2.39.0.rc2.1.gbd5df96b79

-+static inline void
-+rq_lock_irq(struct rq *rq, struct rq_flags *rf)
-+	__acquires(rq->lock)
-+{
-+	raw_spin_lock_irq(&rq->lock);
-+}
-+
- static inline void
- rq_lock(struct rq *rq, struct rq_flags *rf)
- 	__acquires(rq->lock)