diff --git a/PKGBUILD b/PKGBUILD
index bd972b6..cee1f51 100644
--- a/PKGBUILD
+++ b/PKGBUILD
@@ -524,7 +524,7 @@ case $_basever in
             'ef48eea194c1c101de0461572eaf311f232fee55c155c52904b20085a92db680'
             '5efd40c392ece498d2d43d5443e6537c2d9ef7cf9820d5ce80b6577fc5d1a4b2'
             'e5ea0bb25ee294c655ac3cc30e1eea497799826108fbfb4ef3258c676c1e8a12'
-            'e131e63149b7beb83e172337c74e3ab6b2d48888946edef6cd77beab93ca5d2a'
+            'd498816b89a46bde060cbea77313ec14e293f820ea76c682870e894e6ff4af22'
             'fca63d15ca4502aebd73e76d7499b243d2c03db71ff5ab0bf5cf268b2e576320'
             '19661ec0d39f9663452b34433214c755179894528bf73a42f6ba52ccf572832a'
             '89d837bfea3515504b1c99fc881ebdc4f15e2999558127a263e795fc69408a39'
diff --git a/customization.cfg b/customization.cfg
index 9da6425..ba8803f 100644
--- a/customization.cfg
+++ b/customization.cfg
@@ -167,6 +167,13 @@ _processor_opt=""
 # MuQSS only - Make IRQ threading compulsory (FORCE_IRQ_THREADING) - Default is "false"
 _irq_threading="false"
 
+# CacULE only - Enable Response Driven Balancer, an experimental load balancer for CacULE
+_cacule_rdb=""
+
+# CacULE only - Load balance time period - Default is 19
+# https://github.com/hamadmarri/cacule-cpu-scheduler/blob/master/patches/CacULE/RDB/rdb.patch#L56
+_cacule_rdb_interval="19"
+
 # MuQSS and PDS only - SMT (Hyperthreading) aware nice priority and policy support (SMT_NICE) - Kernel default is "true" - You can disable this on non-SMT/HT CPUs for lower overhead
 _smt_nice=""
 
diff --git a/linux-tkg-config/prepare b/linux-tkg-config/prepare
index 100dffe..9d39586 100644
--- a/linux-tkg-config/prepare
+++ b/linux-tkg-config/prepare
@@ -796,6 +796,10 @@ CONFIG_DEBUG_INFO_BTF_MODULES=y\r
   elif [ "${_cpusched}" = "cacule" ]; then
     _enable "SCHED_AUTOGROUP" "CACULE_SCHED"
     _disable "BSD_PROCESS_ACCT" "TASK_XACCT" "CGROUP_CPUACCT" "CGROUP_DEBUG"
+    if [ "$_cacule_rdb" = "true" ]; then
+      _enable "CACULE_RDB"
+      scripts/config --set-val "RDB_INTERVAL" "$_cacule_rdb_interval"
+    fi
   elif [ "${_cpusched}" = "upds" ]; then
     # PDS default config
     _enable "SCHED_PDS"
diff --git a/linux-tkg-patches/5.13/0003-cacule-5.13.patch b/linux-tkg-patches/5.13/0003-cacule-5.13.patch
index 1cea0c7..95a2c08 100644
--- a/linux-tkg-patches/5.13/0003-cacule-5.13.patch
+++ b/linux-tkg-patches/5.13/0003-cacule-5.13.patch
@@ -96,10 +96,10 @@ index 000000000000..82b0847c468a
 +    idle timer scheduler in order to avoid to get into priority
 +    inversion problems which would deadlock the machine.
 diff --git a/include/linux/sched.h b/include/linux/sched.h
-index 32813c345115..d1d5717b2728 100644
+index 32813c345115..0dc06f09715f 100644
 --- a/include/linux/sched.h
 +++ b/include/linux/sched.h
-@@ -458,10 +458,22 @@ struct sched_statistics {
+@@ -458,10 +458,23 @@ struct sched_statistics {
  #endif
  };
  
@@ -108,6 +108,7 @@ index 32813c345115..d1d5717b2728 100644
 +	struct cacule_node* 		next;
 +	struct cacule_node* 		prev;
 +	u64				cacule_start_time;
++	u64				last_run;
 +	u64				vruntime;
 +};
 +#endif
@@ -123,27 +124,31 @@ index 32813c345115..d1d5717b2728 100644
  	unsigned int			on_rq;
  
 diff --git a/include/linux/sched/sysctl.h b/include/linux/sched/sysctl.h
-index db2c0f34aaaf..5a66fc5826fc 100644
+index db2c0f34aaaf..a0ef2748ee6e 100644
 --- a/include/linux/sched/sysctl.h
 +++ b/include/linux/sched/sysctl.h
-@@ -32,6 +32,12 @@ extern unsigned int sysctl_sched_latency;
+@@ -32,6 +32,16 @@ extern unsigned int sysctl_sched_latency;
  extern unsigned int sysctl_sched_min_granularity;
  extern unsigned int sysctl_sched_wakeup_granularity;
  
 +#ifdef CONFIG_CACULE_SCHED
 +extern unsigned int interactivity_factor;
-+extern unsigned int interactivity_threshold;
 +extern unsigned int cacule_max_lifetime;
++extern unsigned int cache_factor;
++extern unsigned int cache_divisor;
++extern unsigned int starve_factor;
++extern unsigned int starve_divisor;
++extern int cacule_yield;
 +#endif
 +
  enum sched_tunable_scaling {
  	SCHED_TUNABLESCALING_NONE,
  	SCHED_TUNABLESCALING_LOG,
 diff --git a/init/Kconfig b/init/Kconfig
-index a61c92066c2e..089e3bfe5dbc 100644
+index a61c92066c2e..427593be8c5a 100644
 --- a/init/Kconfig
 +++ b/init/Kconfig
-@@ -834,6 +834,17 @@ config UCLAMP_BUCKETS_COUNT
+@@ -834,6 +834,51 @@ config UCLAMP_BUCKETS_COUNT
  
  endmenu
  
@@ -157,11 +162,45 @@ index a61c92066c2e..089e3bfe5dbc 100644
 +
 +	  If unsure, say Y here.
 +
++config CACULE_RDB
++	bool "RDB (Response Driven Balancer)"
++	default y
++	depends on CACULE_SCHED
++	help
++	  This is an experimental load balancer for CacULE. It is a lightweight
++	  load balancer which is a replacement of CFS load balancer. It migrates
++	  tasks based on their interactivity scores.
++
++	  If unsure, say Y here.
++
++config RDB_INTERVAL
++	int "RDB load balancer interval"
++	default 19
++	depends on CACULE_RDB
++	help
++	  This is an interval to control load balance time period.
++	  The trigger_load_balance runs in every tick. For High HZ values, the
++	  load balance could be overwhelming. RDB load balance includes rq locking
++	  which can reduce the performance. The balance interval can help to avoid
++	  running load balance on every tick. For example, RDB_INTERVAL=3 will
++	  only run load balance every 3ms. Setting RDB_INTERVAL depends on HZ.
++	  If you want load balancer run every 2ms while HZ=500 then it is not
++	  needed and better to set RDB_INTERVAL=0 since 500HZ already (1000ms
++	  / 500HZ = 2ms). However, if you have 1000HZ and want to avoid load
++	  balancer from running every 1ms, you could set RDB_INTERVAL=4ms for
++	  example to make load balancer run every 4ms. Less RDB_INTERVAL values
++	  (or 0 to disable) could make sure tasks are balanced ASAP, but with
++	  the cost of locking/blocking time. High RDB_INTERVAL values can relax
++	  balancing locking but with the cost of imbalanced workload for that
++	  period of time (i.e. if RDB_INTERVAL=100ms) there will be no balancing
++	  for 100ms (except for newidle_balance which is not effected by RDB_INTERVAL).
++
++	  If in doubt, use the default value.
 +
  #
  # For architectures that want to enable the support for NUMA-affine scheduler
  # balancing logic:
-@@ -1231,6 +1242,7 @@ config SCHED_AUTOGROUP
+@@ -1231,6 +1276,7 @@ config SCHED_AUTOGROUP
  	select CGROUPS
  	select CGROUP_SCHED
  	select FAIR_GROUP_SCHED
@@ -192,10 +231,21 @@ index 38ef6d06888e..865f8dbddca8 100644
  config SCHED_HRTICK
  	def_bool HIGH_RES_TIMERS
 diff --git a/kernel/sched/core.c b/kernel/sched/core.c
-index e5858999b54d..bdedde199504 100644
+index e5858999b54d..c326d30424f9 100644
 --- a/kernel/sched/core.c
 +++ b/kernel/sched/core.c
-@@ -3578,6 +3578,11 @@ static void __sched_fork(unsigned long clone_flags, struct task_struct *p)
+@@ -82,6 +82,10 @@ const_debug unsigned int sysctl_sched_nr_migrate = 32;
+  */
+ unsigned int sysctl_sched_rt_period = 1000000;
+ 
++#ifdef CONFIG_CACULE_SCHED
++int __read_mostly cacule_yield = 1;
++#endif
++
+ __read_mostly int scheduler_running;
+ 
+ /*
+@@ -3578,6 +3582,11 @@ static void __sched_fork(unsigned long clone_flags, struct task_struct *p)
  	p->se.prev_sum_exec_runtime	= 0;
  	p->se.nr_migrations		= 0;
  	p->se.vruntime			= 0;
@@ -207,7 +257,7 @@ index e5858999b54d..bdedde199504 100644
  	INIT_LIST_HEAD(&p->se.group_node);
  
  #ifdef CONFIG_FAIR_GROUP_SCHED
-@@ -3863,6 +3868,10 @@ void wake_up_new_task(struct task_struct *p)
+@@ -3863,6 +3872,10 @@ void wake_up_new_task(struct task_struct *p)
  	update_rq_clock(rq);
  	post_init_entity_util_avg(p);
  
@@ -218,7 +268,7 @@ index e5858999b54d..bdedde199504 100644
  	activate_task(rq, p, ENQUEUE_NOCLOCK);
  	trace_sched_wakeup_new(p);
  	check_preempt_curr(rq, p, WF_FORK);
-@@ -4674,7 +4683,9 @@ static void sched_tick_remote(struct work_struct *work)
+@@ -4674,7 +4687,9 @@ static void sched_tick_remote(struct work_struct *work)
  	struct rq *rq = cpu_rq(cpu);
  	struct task_struct *curr;
  	struct rq_flags rf;
@@ -228,7 +278,7 @@ index e5858999b54d..bdedde199504 100644
  	int os;
  
  	/*
-@@ -4694,6 +4705,7 @@ static void sched_tick_remote(struct work_struct *work)
+@@ -4694,6 +4709,7 @@ static void sched_tick_remote(struct work_struct *work)
  
  	update_rq_clock(rq);
  
@@ -236,7 +286,7 @@ index e5858999b54d..bdedde199504 100644
  	if (!is_idle_task(curr)) {
  		/*
  		 * Make sure the next tick runs within a reasonable
-@@ -4702,6 +4714,8 @@ static void sched_tick_remote(struct work_struct *work)
+@@ -4702,6 +4718,8 @@ static void sched_tick_remote(struct work_struct *work)
  		delta = rq_clock_task(rq) - curr->se.exec_start;
  		WARN_ON_ONCE(delta > (u64)NSEC_PER_SEC * 3);
  	}
@@ -245,12 +295,30 @@ index e5858999b54d..bdedde199504 100644
  	curr->sched_class->task_tick(rq, curr, 0);
  
  	calc_load_nohz_remote(rq);
-@@ -8115,6 +8129,10 @@ void __init sched_init(void)
+@@ -6958,6 +6976,13 @@ static void do_sched_yield(void)
+ 	struct rq_flags rf;
+ 	struct rq *rq;
+ 
++#ifdef CONFIG_CACULE_SCHED
++	struct task_struct *curr = current;
++	struct cacule_node *cn = &curr->se.cacule_node;
++
++	if (cacule_yield)
++		cn->vruntime |= YIELD_MARK;
++#endif
+ 	rq = this_rq_lock_irq(&rf);
+ 
+ 	schedstat_inc(rq->yld_count);
+@@ -8115,6 +8140,14 @@ void __init sched_init(void)
  	BUG_ON(&dl_sched_class + 1 != &stop_sched_class);
  #endif
  
 +#ifdef CONFIG_CACULE_SCHED
-+	printk(KERN_INFO "CacULE CPU scheduler v5.13-r2 by Hamad Al Marri.");
++#ifdef CONFIG_CACULE_RDB
++	printk(KERN_INFO "CacULE CPU scheduler (RDB) v5.13-r3 by Hamad Al Marri.");
++#else
++	printk(KERN_INFO "CacULE CPU scheduler v5.13-r3 by Hamad Al Marri.");
++#endif
 +#endif
 +
  	wait_bit_init();
@@ -303,10 +371,10 @@ index c5aacbd492a1..adb021b7da8a 100644
  			cfs_rq->nr_spread_over);
  	SEQ_printf(m, "  .%-30s: %d\n", "nr_running", cfs_rq->nr_running);
 diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c
-index e807b743353d..ddb9e65b0381 100644
+index 7dd0d859d95b..4aa5fced8f69 100644
 --- a/kernel/sched/fair.c
 +++ b/kernel/sched/fair.c
-@@ -19,9 +19,25 @@
+@@ -19,9 +19,24 @@
   *
   *  Adaptive scheduling granularity, math enhancements by Peter Zijlstra
   *  Copyright (C) 2007 Red Hat, Inc., Peter Zijlstra
@@ -321,18 +389,17 @@ index e807b743353d..ddb9e65b0381 100644
 +unsigned int __read_mostly cacule_max_lifetime		= 22000; // in ms
 +unsigned int __read_mostly interactivity_factor		= 32768;
 +
-+#ifdef CONFIG_FAIR_GROUP_SCHED
-+unsigned int __read_mostly interactivity_threshold	= 0;
-+#else
-+unsigned int __read_mostly interactivity_threshold	= 1000;
-+#endif
++unsigned int __read_mostly cache_factor			= 13107;
++unsigned int __read_mostly cache_divisor		= 1000000; // 1ms
 +
++unsigned int __read_mostly starve_factor		= 19660;
++unsigned int __read_mostly starve_divisor		= 3000000; // 3ms
 +#endif
 +
  /*
   * Targeted preemption latency for CPU-bound tasks:
   *
-@@ -82,7 +98,11 @@ unsigned int sysctl_sched_child_runs_first __read_mostly;
+@@ -82,7 +97,11 @@ unsigned int sysctl_sched_child_runs_first __read_mostly;
  unsigned int sysctl_sched_wakeup_granularity			= 1000000UL;
  static unsigned int normalized_sysctl_sched_wakeup_granularity	= 1000000UL;
  
@@ -344,7 +411,7 @@ index e807b743353d..ddb9e65b0381 100644
  
  int sched_thermal_decay_shift;
  static int __init setup_sched_thermal_decay_shift(char *str)
-@@ -263,6 +283,14 @@ static u64 __calc_delta(u64 delta_exec, unsigned long weight, struct load_weight
+@@ -263,6 +282,14 @@ static u64 __calc_delta(u64 delta_exec, unsigned long weight, struct load_weight
  
  const struct sched_class fair_sched_class;
  
@@ -359,7 +426,7 @@ index e807b743353d..ddb9e65b0381 100644
  /**************************************************************
   * CFS operations on generic schedulable entities:
   */
-@@ -522,7 +550,7 @@ void account_cfs_rq_runtime(struct cfs_rq *cfs_rq, u64 delta_exec);
+@@ -522,7 +549,7 @@ void account_cfs_rq_runtime(struct cfs_rq *cfs_rq, u64 delta_exec);
  /**************************************************************
   * Scheduling class tree data structure manipulation methods:
   */
@@ -368,7 +435,7 @@ index e807b743353d..ddb9e65b0381 100644
  static inline u64 max_vruntime(u64 max_vruntime, u64 vruntime)
  {
  	s64 delta = (s64)(vruntime - max_vruntime);
-@@ -585,7 +613,209 @@ static inline bool __entity_less(struct rb_node *a, const struct rb_node *b)
+@@ -585,7 +612,223 @@ static inline bool __entity_less(struct rb_node *a, const struct rb_node *b)
  {
  	return entity_before(__node_2_se(a), __node_2_se(b));
  }
@@ -402,12 +469,58 @@ index e807b743353d..ddb9e65b0381 100644
 +	return score_se;
 +}
 +
-+static inline int is_interactive(struct cacule_node *cn)
++static unsigned int
++calc_cache_score(u64 now, struct cacule_node *cn)
 +{
-+	if (!interactivity_threshold || se_of(cn)->vruntime == 0)
++	struct sched_entity *se = se_of(cn);
++	struct cfs_rq *cfs_rq = cfs_rq_of(se);
++	u64 c_div = cache_divisor;
++	u64 cache_period = 1ULL;
++	u64 u64_factor_m = cache_factor;
++	u64 _2m = u64_factor_m << 1;
++	unsigned int score;
++
++	if (!cache_factor)
 +		return 0;
 +
-+	return calc_interactivity(sched_clock(), cn) < interactivity_threshold;
++	if (se == cfs_rq->curr)
++		return 0;
++
++	cache_period = (now - se->exec_start) | 1;
++
++	if (c_div >= cache_period)
++		score = u64_factor_m / (c_div / cache_period);
++	else
++		score = _2m - (u64_factor_m / (cache_period / c_div));
++
++	return score;
++}
++
++static unsigned int
++calc_starve_score(u64 now, struct cacule_node *cn)
++{
++	struct sched_entity *se = se_of(cn);
++	struct cfs_rq *cfs_rq = cfs_rq_of(se);
++	u64 s_div = starve_divisor;
++	u64 starving = 1ULL;
++	u64 u64_factor_m = starve_factor;
++	u64 _2m = u64_factor_m << 1;
++	unsigned int score;
++
++	if (!starve_factor)
++		return 0;
++
++	if (se == cfs_rq->curr)
++		return _2m;
++
++	starving = (now - cn->last_run) | 1;
++
++	if (s_div >= starving)
++		score = _2m - (u64_factor_m / (s_div / starving));
++	else
++		score = u64_factor_m / (starving / s_div);
++
++	return score;
 +}
 +
 +static inline int cn_has_idle_policy(struct cacule_node *cn)
@@ -420,29 +533,6 @@ index e807b743353d..ddb9e65b0381 100644
 +	return task_has_idle_policy(task_of(se));
 +}
 +
-+static inline int
-+entity_before_cached(u64 now, unsigned int score_curr, struct cacule_node *se)
-+{
-+	unsigned int score_se;
-+	int diff;
-+
-+	/*
-+	 * if se has idle class, then no need to
-+	 * calculate, since we are sure that score_curr
-+	 * is a score for non idle class task
-+	 */
-+	if (cn_has_idle_policy(se))
-+		return -1;
-+
-+	score_se	= calc_interactivity(now, se);
-+	diff		= score_se - score_curr;
-+
-+	if (diff <= 0)
-+		return 1;
-+
-+	return -1;
-+}
- 
 +/*
 + * Does se have lower interactivity score value (i.e. interactive) than curr? If yes, return 1,
 + * otherwise return -1
@@ -465,8 +555,13 @@ index e807b743353d..ddb9e65b0381 100644
 +	if (is_curr_idle && !is_se_idle)
 +		return 1;
 +
-+	score_curr	= calc_interactivity(now, curr);
-+	score_se	= calc_interactivity(now, se);
++	score_curr	 = calc_interactivity(now, curr);
++	score_curr	+= calc_cache_score(now, curr);
++	score_curr	+= calc_starve_score(now, curr);
++
++	score_se	 = calc_interactivity(now, se);
++	score_se	+= calc_cache_score(now, se);
++	score_se	+= calc_starve_score(now, se);
 +
 +	diff		= score_se - score_curr;
 +
@@ -476,64 +571,51 @@ index e807b743353d..ddb9e65b0381 100644
 +	return -1;
 +}
 +
++#ifdef CONFIG_CACULE_RDB
++static void update_IS(struct rq *rq)
++{
++	struct list_head *tasks = &rq->cfs_tasks;
++	struct task_struct *p, *to_migrate = NULL;
++	unsigned int max_IS = ~0, temp_IS;
++
++	list_for_each_entry(p, tasks, se.group_node) {
++		if (task_running(rq, p))
++			continue;
++
++		temp_IS = calc_interactivity(sched_clock(), &p->se.cacule_node);
++		if (temp_IS < max_IS) {
++			to_migrate = p;
++			max_IS = temp_IS;
++		}
++	}
++
++	if (to_migrate) {
++		WRITE_ONCE(rq->max_IS_score, max_IS);
++		WRITE_ONCE(rq->to_migrate_task, to_migrate);
++	} else if (rq->max_IS_score != ~0) {
++		WRITE_ONCE(rq->max_IS_score, ~0);
++		WRITE_ONCE(rq->to_migrate_task, NULL);
++	}
++}
++#endif
+ 
 +/*
 + * Enqueue an entity
 + */
 +static void __enqueue_entity(struct cfs_rq *cfs_rq, struct sched_entity *_se)
 +{
 +	struct cacule_node *se = &(_se->cacule_node);
-+	struct cacule_node *iter, *next = NULL;
-+	u64 now = sched_clock();
-+	unsigned int score_se = calc_interactivity(now, se);
-+	int is_idle_task = cn_has_idle_policy(se);
 +
 +	se->next = NULL;
 +	se->prev = NULL;
 +
-+	if (likely(cfs_rq->head)) {
-+
-+		// start from tail
-+		iter = cfs_rq->tail;
-+
-+		/*
-+		 * if this task has idle class, then
-+		 * push it to the tail right away
-+		 */
-+		if (is_idle_task)
-+			goto to_tail;
-+
-+		/* here we know that this task isn't idle clas */
-+
-+		// does se have higher IS than iter?
-+		while (iter && entity_before_cached(now, score_se, iter) == -1) {
-+			next = iter;
-+			iter = iter->prev;
-+		}
-+
-+		// se in tail position
-+		if (iter == cfs_rq->tail) {
-+to_tail:
-+			cfs_rq->tail->next	= se;
-+			se->prev		= cfs_rq->tail;
-+
-+			cfs_rq->tail		= se;
-+		}
-+		// else if not head no tail, insert se after iter
-+		else if (iter) {
-+			se->next	= next;
-+			se->prev	= iter;
-+
-+			iter->next	= se;
-+			next->prev	= se;
-+		}
++	if (cfs_rq->head) {
 +		// insert se at head
-+		else {
-+			se->next		= cfs_rq->head;
-+			cfs_rq->head->prev	= se;
++		se->next		= cfs_rq->head;
++		cfs_rq->head->prev	= se;
 +
-+			// lastly reset the head
-+			cfs_rq->head		= se;
-+		}
++		// lastly reset the head
++		cfs_rq->head		= se;
 +	} else {
 +		// if empty rq
 +		cfs_rq->head = se;
@@ -549,7 +631,6 @@ index e807b743353d..ddb9e65b0381 100644
 +	if (cfs_rq->head == cfs_rq->tail) {
 +		cfs_rq->head = NULL;
 +		cfs_rq->tail = NULL;
-+
 +	} else if (se == cfs_rq->head) {
 +		// if it is the head
 +		cfs_rq->head		= cfs_rq->head->next;
@@ -578,7 +659,7 @@ index e807b743353d..ddb9e65b0381 100644
  /*
   * Enqueue an entity into the rb-tree:
   */
-@@ -618,16 +848,24 @@ static struct sched_entity *__pick_next_entity(struct sched_entity *se)
+@@ -618,16 +861,24 @@ static struct sched_entity *__pick_next_entity(struct sched_entity *se)
  
  	return __node_2_se(next);
  }
@@ -603,7 +684,7 @@ index e807b743353d..ddb9e65b0381 100644
  }
  
  /**************************************************************
-@@ -717,6 +955,7 @@ static u64 sched_slice(struct cfs_rq *cfs_rq, struct sched_entity *se)
+@@ -717,6 +968,7 @@ static u64 sched_slice(struct cfs_rq *cfs_rq, struct sched_entity *se)
  	return slice;
  }
  
@@ -611,7 +692,7 @@ index e807b743353d..ddb9e65b0381 100644
  /*
   * We calculate the vruntime slice of a to-be-inserted task.
   *
-@@ -726,6 +965,7 @@ static u64 sched_vslice(struct cfs_rq *cfs_rq, struct sched_entity *se)
+@@ -726,6 +978,7 @@ static u64 sched_vslice(struct cfs_rq *cfs_rq, struct sched_entity *se)
  {
  	return calc_delta_fair(sched_slice(cfs_rq, se), se);
  }
@@ -619,7 +700,7 @@ index e807b743353d..ddb9e65b0381 100644
  
  #include "pelt.h"
  #ifdef CONFIG_SMP
-@@ -833,14 +1073,51 @@ static void update_tg_load_avg(struct cfs_rq *cfs_rq)
+@@ -833,14 +1086,55 @@ static void update_tg_load_avg(struct cfs_rq *cfs_rq)
  }
  #endif /* CONFIG_SMP */
  
@@ -627,7 +708,7 @@ index e807b743353d..ddb9e65b0381 100644
 +static void normalize_lifetime(u64 now, struct sched_entity *se)
 +{
 +	struct cacule_node *cn = &se->cacule_node;
-+	u64 max_life_ns, life_time;
++	u64 max_life_ns, life_time, old_hrrn_x;
 +	s64 diff;
 +
 +	/*
@@ -640,8 +721,12 @@ index e807b743353d..ddb9e65b0381 100644
 +	diff		= life_time - max_life_ns;
 +
 +	if (diff > 0) {
++		// unmark YIELD. No need to check or remark since
++		// this normalize action doesn't happen very often
++		cn->vruntime &= YIELD_UNMARK;
++
 +		// multiply life_time by 1024 for more precision
-+		u64 old_hrrn_x	= (life_time << 7) / ((cn->vruntime >> 3) | 1);
++		old_hrrn_x = (life_time << 7) / ((cn->vruntime >> 3) | 1);
 +
 +		// reset life to half max_life (i.e ~15s)
 +		cn->cacule_start_time = now - (max_life_ns >> 1);
@@ -671,11 +756,12 @@ index e807b743353d..ddb9e65b0381 100644
  
  	if (unlikely(!curr))
  		return;
-@@ -857,8 +1134,15 @@ static void update_curr(struct cfs_rq *cfs_rq)
+@@ -857,8 +1151,16 @@ static void update_curr(struct cfs_rq *cfs_rq)
  	curr->sum_exec_runtime += delta_exec;
  	schedstat_add(cfs_rq->exec_clock, delta_exec);
  
 +#ifdef CONFIG_CACULE_SCHED
++	curr->cacule_node.last_run = now;
 +	delta_fair = calc_delta_fair(delta_exec, curr);
 +	curr->vruntime += delta_fair;
 +	curr->cacule_node.vruntime += delta_fair;
@@ -687,7 +773,7 @@ index e807b743353d..ddb9e65b0381 100644
  
  	if (entity_is_task(curr)) {
  		struct task_struct *curtask = task_of(curr);
-@@ -1026,7 +1310,6 @@ update_stats_enqueue(struct cfs_rq *cfs_rq, struct sched_entity *se, int flags)
+@@ -1026,7 +1328,6 @@ update_stats_enqueue(struct cfs_rq *cfs_rq, struct sched_entity *se, int flags)
  static inline void
  update_stats_dequeue(struct cfs_rq *cfs_rq, struct sched_entity *se, int flags)
  {
@@ -695,19 +781,20 @@ index e807b743353d..ddb9e65b0381 100644
  	if (!schedstat_enabled())
  		return;
  
-@@ -1058,7 +1341,11 @@ update_stats_curr_start(struct cfs_rq *cfs_rq, struct sched_entity *se)
+@@ -1058,7 +1359,12 @@ update_stats_curr_start(struct cfs_rq *cfs_rq, struct sched_entity *se)
  	/*
  	 * We are starting a new run period:
  	 */
 +#ifdef CONFIG_CACULE_SCHED
 +	se->exec_start = sched_clock();
++	se->cacule_node.last_run = sched_clock();
 +#else
  	se->exec_start = rq_clock_task(rq_of(cfs_rq));
 +#endif
  }
  
  /**************************************************
-@@ -4178,7 +4465,7 @@ static inline void update_misfit_status(struct task_struct *p, struct rq *rq) {}
+@@ -4178,7 +4484,7 @@ static inline void update_misfit_status(struct task_struct *p, struct rq *rq) {}
  
  static void check_spread(struct cfs_rq *cfs_rq, struct sched_entity *se)
  {
@@ -716,7 +803,7 @@ index e807b743353d..ddb9e65b0381 100644
  	s64 d = se->vruntime - cfs_rq->min_vruntime;
  
  	if (d < 0)
-@@ -4189,6 +4476,7 @@ static void check_spread(struct cfs_rq *cfs_rq, struct sched_entity *se)
+@@ -4189,6 +4495,7 @@ static void check_spread(struct cfs_rq *cfs_rq, struct sched_entity *se)
  #endif
  }
  
@@ -724,7 +811,7 @@ index e807b743353d..ddb9e65b0381 100644
  static void
  place_entity(struct cfs_rq *cfs_rq, struct sched_entity *se, int initial)
  {
-@@ -4220,6 +4508,7 @@ place_entity(struct cfs_rq *cfs_rq, struct sched_entity *se, int initial)
+@@ -4220,6 +4527,7 @@ place_entity(struct cfs_rq *cfs_rq, struct sched_entity *se, int initial)
  	/* ensure we never gain time by being placed backwards. */
  	se->vruntime = max_vruntime(se->vruntime, vruntime);
  }
@@ -732,7 +819,7 @@ index e807b743353d..ddb9e65b0381 100644
  
  static void check_enqueue_throttle(struct cfs_rq *cfs_rq);
  
-@@ -4278,18 +4567,23 @@ static inline bool cfs_bandwidth_used(void);
+@@ -4278,18 +4586,23 @@ static inline bool cfs_bandwidth_used(void);
  static void
  enqueue_entity(struct cfs_rq *cfs_rq, struct sched_entity *se, int flags)
  {
@@ -756,7 +843,7 @@ index e807b743353d..ddb9e65b0381 100644
  	/*
  	 * Otherwise, renormalise after, such that we're placed at the current
  	 * moment in time, instead of some random moment in the past. Being
-@@ -4298,6 +4592,7 @@ enqueue_entity(struct cfs_rq *cfs_rq, struct sched_entity *se, int flags)
+@@ -4298,6 +4611,7 @@ enqueue_entity(struct cfs_rq *cfs_rq, struct sched_entity *se, int flags)
  	 */
  	if (renorm && !curr)
  		se->vruntime += cfs_rq->min_vruntime;
@@ -764,7 +851,7 @@ index e807b743353d..ddb9e65b0381 100644
  
  	/*
  	 * When enqueuing a sched_entity, we must:
-@@ -4312,8 +4607,10 @@ enqueue_entity(struct cfs_rq *cfs_rq, struct sched_entity *se, int flags)
+@@ -4312,8 +4626,10 @@ enqueue_entity(struct cfs_rq *cfs_rq, struct sched_entity *se, int flags)
  	update_cfs_group(se);
  	account_entity_enqueue(cfs_rq, se);
  
@@ -775,7 +862,7 @@ index e807b743353d..ddb9e65b0381 100644
  
  	check_schedstat_required();
  	update_stats_enqueue(cfs_rq, se, flags);
-@@ -4334,6 +4631,7 @@ enqueue_entity(struct cfs_rq *cfs_rq, struct sched_entity *se, int flags)
+@@ -4334,6 +4650,7 @@ enqueue_entity(struct cfs_rq *cfs_rq, struct sched_entity *se, int flags)
  		check_enqueue_throttle(cfs_rq);
  }
  
@@ -783,7 +870,7 @@ index e807b743353d..ddb9e65b0381 100644
  static void __clear_buddies_last(struct sched_entity *se)
  {
  	for_each_sched_entity(se) {
-@@ -4378,6 +4676,7 @@ static void clear_buddies(struct cfs_rq *cfs_rq, struct sched_entity *se)
+@@ -4378,6 +4695,7 @@ static void clear_buddies(struct cfs_rq *cfs_rq, struct sched_entity *se)
  	if (cfs_rq->skip == se)
  		__clear_buddies_skip(se);
  }
@@ -791,7 +878,7 @@ index e807b743353d..ddb9e65b0381 100644
  
  static __always_inline void return_cfs_rq_runtime(struct cfs_rq *cfs_rq);
  
-@@ -4402,13 +4701,16 @@ dequeue_entity(struct cfs_rq *cfs_rq, struct sched_entity *se, int flags)
+@@ -4402,13 +4720,16 @@ dequeue_entity(struct cfs_rq *cfs_rq, struct sched_entity *se, int flags)
  
  	update_stats_dequeue(cfs_rq, se, flags);
  
@@ -808,7 +895,7 @@ index e807b743353d..ddb9e65b0381 100644
  	/*
  	 * Normalize after update_curr(); which will also have moved
  	 * min_vruntime if @se is the one holding it back. But before doing
-@@ -4417,12 +4719,14 @@ dequeue_entity(struct cfs_rq *cfs_rq, struct sched_entity *se, int flags)
+@@ -4417,12 +4738,14 @@ dequeue_entity(struct cfs_rq *cfs_rq, struct sched_entity *se, int flags)
  	 */
  	if (!(flags & DEQUEUE_SLEEP))
  		se->vruntime -= cfs_rq->min_vruntime;
@@ -823,7 +910,7 @@ index e807b743353d..ddb9e65b0381 100644
  	/*
  	 * Now advance min_vruntime if @se was the entity holding it back,
  	 * except when: DEQUEUE_SAVE && !DEQUEUE_MOVE, in this case we'll be
-@@ -4431,8 +4735,21 @@ dequeue_entity(struct cfs_rq *cfs_rq, struct sched_entity *se, int flags)
+@@ -4431,8 +4754,23 @@ dequeue_entity(struct cfs_rq *cfs_rq, struct sched_entity *se, int flags)
  	 */
  	if ((flags & (DEQUEUE_SAVE | DEQUEUE_MOVE)) != DEQUEUE_SAVE)
  		update_min_vruntime(cfs_rq);
@@ -831,21 +918,23 @@ index e807b743353d..ddb9e65b0381 100644
  }
  
 +#ifdef CONFIG_CACULE_SCHED
++static struct sched_entity *
++pick_next_entity(struct cfs_rq *cfs_rq, struct sched_entity *curr);
++
 +/*
 + * Preempt the current task with a newly woken task if needed:
 + */
 +static void
 +check_preempt_tick(struct cfs_rq *cfs_rq, struct sched_entity *curr)
 +{
-+	// does head have higher IS than curr
-+	if (entity_before(sched_clock(), &curr->cacule_node, cfs_rq->head) == 1)
++	if (pick_next_entity(cfs_rq, curr) != curr)
 +		resched_curr(rq_of(cfs_rq));
 +}
 +#else
  /*
   * Preempt the current task with a newly woken task if needed:
   */
-@@ -4472,6 +4789,7 @@ check_preempt_tick(struct cfs_rq *cfs_rq, struct sched_entity *curr)
+@@ -4472,6 +4810,7 @@ check_preempt_tick(struct cfs_rq *cfs_rq, struct sched_entity *curr)
  	if (delta > ideal_runtime)
  		resched_curr(rq_of(cfs_rq));
  }
@@ -853,7 +942,7 @@ index e807b743353d..ddb9e65b0381 100644
  
  static void
  set_next_entity(struct cfs_rq *cfs_rq, struct sched_entity *se)
-@@ -4506,6 +4824,21 @@ set_next_entity(struct cfs_rq *cfs_rq, struct sched_entity *se)
+@@ -4506,6 +4845,31 @@ set_next_entity(struct cfs_rq *cfs_rq, struct sched_entity *se)
  	se->prev_sum_exec_runtime = se->sum_exec_runtime;
  }
  
@@ -862,12 +951,22 @@ index e807b743353d..ddb9e65b0381 100644
 +pick_next_entity(struct cfs_rq *cfs_rq, struct sched_entity *curr)
 +{
 +	struct cacule_node *se = cfs_rq->head;
++	struct cacule_node *next;
++	u64 now = sched_clock();
 +
-+	if (unlikely(!se))
-+		se = &curr->cacule_node;
-+	else if (unlikely(curr
-+			&& entity_before(sched_clock(), se, &curr->cacule_node) == 1))
-+		se = &curr->cacule_node;
++	if (!se)
++		return curr;
++
++	next = se->next;
++	while (next) {
++		if (entity_before(now, se, next) == 1)
++			se = next;
++
++		next = next->next;
++	}
++
++	if (curr && entity_before(now, se, &curr->cacule_node) == 1)
++		return curr;
 +
 +	return se_of(se);
 +}
@@ -875,7 +974,7 @@ index e807b743353d..ddb9e65b0381 100644
  static int
  wakeup_preempt_entity(struct sched_entity *curr, struct sched_entity *se);
  
-@@ -4566,6 +4899,7 @@ pick_next_entity(struct cfs_rq *cfs_rq, struct sched_entity *curr)
+@@ -4566,6 +4930,7 @@ pick_next_entity(struct cfs_rq *cfs_rq, struct sched_entity *curr)
  
  	return se;
  }
@@ -883,8 +982,14 @@ index e807b743353d..ddb9e65b0381 100644
  
  static bool check_cfs_rq_runtime(struct cfs_rq *cfs_rq);
  
-@@ -5668,7 +6002,9 @@ enqueue_task_fair(struct rq *rq, struct task_struct *p, int flags)
+@@ -5666,9 +6031,15 @@ enqueue_task_fair(struct rq *rq, struct task_struct *p, int flags)
+ 	assert_list_leaf_cfs_rq(rq);
+ 
  	hrtick_update(rq);
++
++#ifdef CONFIG_CACULE_RDB
++	update_IS(rq);
++#endif
  }
  
 +#if !defined(CONFIG_CACULE_SCHED)
@@ -893,7 +998,7 @@ index e807b743353d..ddb9e65b0381 100644
  
  /*
   * The dequeue_task method is called before nr_running is
-@@ -5700,12 +6036,14 @@ static void dequeue_task_fair(struct rq *rq, struct task_struct *p, int flags)
+@@ -5700,12 +6071,14 @@ static void dequeue_task_fair(struct rq *rq, struct task_struct *p, int flags)
  		if (cfs_rq->load.weight) {
  			/* Avoid re-evaluating load for this entity: */
  			se = parent_entity(se);
@@ -908,7 +1013,18 @@ index e807b743353d..ddb9e65b0381 100644
  			break;
  		}
  		flags |= DEQUEUE_SLEEP;
-@@ -5821,6 +6159,7 @@ static unsigned long capacity_of(int cpu)
+@@ -5737,6 +6110,10 @@ static void dequeue_task_fair(struct rq *rq, struct task_struct *p, int flags)
+ dequeue_throttle:
+ 	util_est_update(&rq->cfs, p, task_sleep);
+ 	hrtick_update(rq);
++
++#ifdef CONFIG_CACULE_RDB
++	update_IS(rq);
++#endif
+ }
+ 
+ #ifdef CONFIG_SMP
+@@ -5821,6 +6198,7 @@ static unsigned long capacity_of(int cpu)
  	return cpu_rq(cpu)->cpu_capacity;
  }
  
@@ -916,7 +1032,7 @@ index e807b743353d..ddb9e65b0381 100644
  static void record_wakee(struct task_struct *p)
  {
  	/*
-@@ -5867,6 +6206,7 @@ static int wake_wide(struct task_struct *p)
+@@ -5867,6 +6245,7 @@ static int wake_wide(struct task_struct *p)
  		return 0;
  	return 1;
  }
@@ -924,7 +1040,7 @@ index e807b743353d..ddb9e65b0381 100644
  
  /*
   * The purpose of wake_affine() is to quickly determine on which CPU we can run
-@@ -6569,6 +6909,7 @@ static unsigned long cpu_util_without(int cpu, struct task_struct *p)
+@@ -6569,6 +6948,7 @@ static unsigned long cpu_util_without(int cpu, struct task_struct *p)
  	return min_t(unsigned long, util, capacity_orig_of(cpu));
  }
  
@@ -932,92 +1048,23 @@ index e807b743353d..ddb9e65b0381 100644
  /*
   * Predicts what cpu_util(@cpu) would return if @p was migrated (and enqueued)
   * to @dst_cpu.
-@@ -6823,6 +7164,57 @@ static int find_energy_efficient_cpu(struct task_struct *p, int prev_cpu)
+@@ -6823,6 +7203,7 @@ static int find_energy_efficient_cpu(struct task_struct *p, int prev_cpu)
  
  	return -1;
  }
 +#endif /* CONFIG_CACULE_SCHED */
-+
-+#ifdef CONFIG_CACULE_SCHED
-+static int
-+find_least_IS_cpu(struct task_struct *p)
-+{
-+	struct cfs_rq *cfs_rq;
-+	unsigned int max_IS = 0;
-+	unsigned int IS, IS_c, IS_h;
-+	struct sched_entity *curr_se;
-+	struct cacule_node *cn, *head;
-+	int cpu_i;
-+	int new_cpu = -1;
-+
-+	for_each_online_cpu(cpu_i) {
-+		if (!cpumask_test_cpu(cpu_i, p->cpus_ptr))
-+			continue;
-+
-+		cn = NULL;
-+		cfs_rq = &cpu_rq(cpu_i)->cfs;
-+
-+		curr_se = cfs_rq->curr;
-+		head = cfs_rq->head;
-+
-+		if (!curr_se && head)
-+			cn = head;
-+		else if (curr_se && !head)
-+			cn = &curr_se->cacule_node;
-+		else if (curr_se && head) {
-+			IS_c = calc_interactivity(sched_clock(), &curr_se->cacule_node);
-+			IS_h = calc_interactivity(sched_clock(), head);
-+
-+			IS = IS_c > IS_h? IS_c : IS_h;
-+			goto compare;
-+		}
-+
-+		if (!cn)
-+			return cpu_i;
-+
-+		IS = calc_interactivity(sched_clock(), cn);
-+
-+compare:
-+		if (IS > max_IS) {
-+			max_IS = IS;
-+			new_cpu = cpu_i;
-+		}
-+	}
-+
-+	return new_cpu;
-+}
-+#endif
  
  /*
   * select_task_rq_fair: Select target runqueue for the waking task in domains
-@@ -6847,6 +7239,26 @@ select_task_rq_fair(struct task_struct *p, int prev_cpu, int wake_flags)
+@@ -6847,6 +7228,7 @@ select_task_rq_fair(struct task_struct *p, int prev_cpu, int wake_flags)
  	/* SD_flags and WF_flags share the first nibble */
  	int sd_flag = wake_flags & 0xF;
  
-+#ifdef CONFIG_CACULE_SCHED
-+	struct sched_entity *se = &p->se;
-+
-+	if (!is_interactive(&se->cacule_node))
-+		goto cfs_way;
-+
-+	// check first if the prev cpu
-+	// has 0 tasks
-+	if (cpumask_test_cpu(prev_cpu, p->cpus_ptr) &&
-+	    cpu_rq(prev_cpu)->cfs.nr_running == 0)
-+		return prev_cpu;
-+
-+	new_cpu = find_least_IS_cpu(p);
-+
-+	if (new_cpu != -1)
-+		return new_cpu;
-+
-+	new_cpu = prev_cpu;
-+cfs_way:
-+#else
++#if !defined(CONFIG_CACULE_SCHED)
  	if (wake_flags & WF_TTWU) {
  		record_wakee(p);
  
-@@ -6859,6 +7271,7 @@ select_task_rq_fair(struct task_struct *p, int prev_cpu, int wake_flags)
+@@ -6859,6 +7241,7 @@ select_task_rq_fair(struct task_struct *p, int prev_cpu, int wake_flags)
  
  		want_affine = !wake_wide(p) && cpumask_test_cpu(cpu, p->cpus_ptr);
  	}
@@ -1025,7 +1072,7 @@ index e807b743353d..ddb9e65b0381 100644
  
  	rcu_read_lock();
  	for_each_domain(cpu, tmp) {
-@@ -6905,6 +7318,7 @@ static void detach_entity_cfs_rq(struct sched_entity *se);
+@@ -6905,6 +7288,7 @@ static void detach_entity_cfs_rq(struct sched_entity *se);
   */
  static void migrate_task_rq_fair(struct task_struct *p, int new_cpu)
  {
@@ -1033,7 +1080,7 @@ index e807b743353d..ddb9e65b0381 100644
  	/*
  	 * As blocked tasks retain absolute vruntime the migration needs to
  	 * deal with this by subtracting the old and adding the new
-@@ -6930,6 +7344,7 @@ static void migrate_task_rq_fair(struct task_struct *p, int new_cpu)
+@@ -6930,6 +7314,7 @@ static void migrate_task_rq_fair(struct task_struct *p, int new_cpu)
  
  		se->vruntime -= min_vruntime;
  	}
@@ -1041,7 +1088,7 @@ index e807b743353d..ddb9e65b0381 100644
  
  	if (p->on_rq == TASK_ON_RQ_MIGRATING) {
  		/*
-@@ -6975,6 +7390,7 @@ balance_fair(struct rq *rq, struct task_struct *prev, struct rq_flags *rf)
+@@ -6975,6 +7360,7 @@ balance_fair(struct rq *rq, struct task_struct *prev, struct rq_flags *rf)
  }
  #endif /* CONFIG_SMP */
  
@@ -1049,7 +1096,7 @@ index e807b743353d..ddb9e65b0381 100644
  static unsigned long wakeup_gran(struct sched_entity *se)
  {
  	unsigned long gran = sysctl_sched_wakeup_granularity;
-@@ -7053,6 +7469,7 @@ static void set_skip_buddy(struct sched_entity *se)
+@@ -7053,6 +7439,7 @@ static void set_skip_buddy(struct sched_entity *se)
  	for_each_sched_entity(se)
  		cfs_rq_of(se)->skip = se;
  }
@@ -1057,7 +1104,7 @@ index e807b743353d..ddb9e65b0381 100644
  
  /*
   * Preempt the current task with a newly woken task if needed:
-@@ -7061,9 +7478,12 @@ static void check_preempt_wakeup(struct rq *rq, struct task_struct *p, int wake_
+@@ -7061,9 +7448,12 @@ static void check_preempt_wakeup(struct rq *rq, struct task_struct *p, int wake_
  {
  	struct task_struct *curr = rq->curr;
  	struct sched_entity *se = &curr->se, *pse = &p->se;
@@ -1070,7 +1117,7 @@ index e807b743353d..ddb9e65b0381 100644
  
  	if (unlikely(se == pse))
  		return;
-@@ -7077,10 +7497,12 @@ static void check_preempt_wakeup(struct rq *rq, struct task_struct *p, int wake_
+@@ -7077,10 +7467,12 @@ static void check_preempt_wakeup(struct rq *rq, struct task_struct *p, int wake_
  	if (unlikely(throttled_hierarchy(cfs_rq_of(pse))))
  		return;
  
@@ -1083,7 +1130,7 @@ index e807b743353d..ddb9e65b0381 100644
  
  	/*
  	 * We can come here with TIF_NEED_RESCHED already set from new task
-@@ -7110,6 +7532,11 @@ static void check_preempt_wakeup(struct rq *rq, struct task_struct *p, int wake_
+@@ -7110,6 +7502,11 @@ static void check_preempt_wakeup(struct rq *rq, struct task_struct *p, int wake_
  	find_matching_se(&se, &pse);
  	update_curr(cfs_rq_of(se));
  	BUG_ON(!pse);
@@ -1095,7 +1142,7 @@ index e807b743353d..ddb9e65b0381 100644
  	if (wakeup_preempt_entity(se, pse) == 1) {
  		/*
  		 * Bias pick_next to pick the sched entity that is
-@@ -7119,11 +7546,14 @@ static void check_preempt_wakeup(struct rq *rq, struct task_struct *p, int wake_
+@@ -7119,11 +7516,14 @@ static void check_preempt_wakeup(struct rq *rq, struct task_struct *p, int wake_
  			set_next_buddy(pse);
  		goto preempt;
  	}
@@ -1110,7 +1157,7 @@ index e807b743353d..ddb9e65b0381 100644
  	/*
  	 * Only set the backward buddy when the current task is still
  	 * on the rq. This can happen when a wakeup gets interleaved
-@@ -7138,6 +7568,7 @@ static void check_preempt_wakeup(struct rq *rq, struct task_struct *p, int wake_
+@@ -7138,6 +7538,7 @@ static void check_preempt_wakeup(struct rq *rq, struct task_struct *p, int wake_
  
  	if (sched_feat(LAST_BUDDY) && scale && entity_is_task(se))
  		set_last_buddy(se);
@@ -1118,7 +1165,73 @@ index e807b743353d..ddb9e65b0381 100644
  }
  
  struct task_struct *
-@@ -7312,7 +7743,10 @@ static void yield_task_fair(struct rq *rq)
+@@ -7199,6 +7600,11 @@ pick_next_task_fair(struct rq *rq, struct task_struct *prev, struct rq_flags *rf
+ 		cfs_rq = group_cfs_rq(se);
+ 	} while (cfs_rq);
+ 
++	/*
++	 * Here we picked a sched_entity starting from
++	 * the same group of curr, but the task could
++	 * be a child of the selected sched_entity.
++	 */
+ 	p = task_of(se);
+ 
+ 	/*
+@@ -7209,6 +7615,7 @@ pick_next_task_fair(struct rq *rq, struct task_struct *prev, struct rq_flags *rf
+ 	if (prev != p) {
+ 		struct sched_entity *pse = &prev->se;
+ 
++		/* while se and pse are not in the same group */
+ 		while (!(cfs_rq = is_same_group(se, pse))) {
+ 			int se_depth = se->depth;
+ 			int pse_depth = pse->depth;
+@@ -7223,6 +7630,9 @@ pick_next_task_fair(struct rq *rq, struct task_struct *prev, struct rq_flags *rf
+ 			}
+ 		}
+ 
++		/* Here we reached the point were both
++		 * sched_entities are in the same group.
++		 */
+ 		put_prev_entity(cfs_rq, pse);
+ 		set_next_entity(cfs_rq, se);
+ 	}
+@@ -7233,6 +7643,7 @@ pick_next_task_fair(struct rq *rq, struct task_struct *prev, struct rq_flags *rf
+ 	if (prev)
+ 		put_prev_task(rq, prev);
+ 
++	/* Going down the hierarchy */
+ 	do {
+ 		se = pick_next_entity(cfs_rq, NULL);
+ 		set_next_entity(cfs_rq, se);
+@@ -7242,6 +7653,15 @@ pick_next_task_fair(struct rq *rq, struct task_struct *prev, struct rq_flags *rf
+ 	p = task_of(se);
+ 
+ done: __maybe_unused;
++#ifdef CONFIG_CACULE_SCHED
++	if (prev)
++		prev->se.cacule_node.vruntime &= YIELD_UNMARK;
++
++#ifdef CONFIG_CACULE_RDB
++	update_IS(rq);
++#endif
++
++#endif
+ #ifdef CONFIG_SMP
+ 	/*
+ 	 * Move the next running task to the front of
+@@ -7259,6 +7679,11 @@ done: __maybe_unused;
+ 	return p;
+ 
+ idle:
++#ifdef CONFIG_CACULE_RDB
++	WRITE_ONCE(rq->max_IS_score, ~0);
++	WRITE_ONCE(rq->to_migrate_task, NULL);
++#endif
++
+ 	if (!rf)
+ 		return NULL;
+ 
+@@ -7312,7 +7737,10 @@ static void yield_task_fair(struct rq *rq)
  {
  	struct task_struct *curr = rq->curr;
  	struct cfs_rq *cfs_rq = task_cfs_rq(curr);
@@ -1129,7 +1242,7 @@ index e807b743353d..ddb9e65b0381 100644
  
  	/*
  	 * Are we the only task in the tree?
-@@ -7320,7 +7754,9 @@ static void yield_task_fair(struct rq *rq)
+@@ -7320,7 +7748,9 @@ static void yield_task_fair(struct rq *rq)
  	if (unlikely(rq->nr_running == 1))
  		return;
  
@@ -1139,7 +1252,7 @@ index e807b743353d..ddb9e65b0381 100644
  
  	if (curr->policy != SCHED_BATCH) {
  		update_rq_clock(rq);
-@@ -7336,7 +7772,9 @@ static void yield_task_fair(struct rq *rq)
+@@ -7336,7 +7766,9 @@ static void yield_task_fair(struct rq *rq)
  		rq_clock_skip_update(rq);
  	}
  
@@ -1149,7 +1262,7 @@ index e807b743353d..ddb9e65b0381 100644
  }
  
  static bool yield_to_task_fair(struct rq *rq, struct task_struct *p)
-@@ -7347,8 +7785,10 @@ static bool yield_to_task_fair(struct rq *rq, struct task_struct *p)
+@@ -7347,8 +7779,10 @@ static bool yield_to_task_fair(struct rq *rq, struct task_struct *p)
  	if (!se->on_rq || throttled_hierarchy(cfs_rq_of(se)))
  		return false;
  
@@ -1160,7 +1273,15 @@ index e807b743353d..ddb9e65b0381 100644
  
  	yield_task_fair(rq);
  
-@@ -7575,6 +8015,7 @@ static int task_hot(struct task_struct *p, struct lb_env *env)
+@@ -7556,6 +7990,7 @@ struct lb_env {
+ 	struct list_head	tasks;
+ };
+ 
++#if !defined(CONFIG_CACULE_RDB)
+ /*
+  * Is this task likely cache-hot:
+  */
+@@ -7575,6 +8010,7 @@ static int task_hot(struct task_struct *p, struct lb_env *env)
  	if (env->sd->flags & SD_SHARE_CPUCAPACITY)
  		return 0;
  
@@ -1168,7 +1289,7 @@ index e807b743353d..ddb9e65b0381 100644
  	/*
  	 * Buddy candidates are cache hot:
  	 */
-@@ -7582,6 +8023,7 @@ static int task_hot(struct task_struct *p, struct lb_env *env)
+@@ -7582,6 +8018,7 @@ static int task_hot(struct task_struct *p, struct lb_env *env)
  			(&p->se == cfs_rq_of(&p->se)->next ||
  			 &p->se == cfs_rq_of(&p->se)->last))
  		return 1;
@@ -1176,7 +1297,185 @@ index e807b743353d..ddb9e65b0381 100644
  
  	if (sysctl_sched_migration_cost == -1)
  		return 1;
-@@ -10592,9 +11034,11 @@ static void nohz_newidle_balance(struct rq *this_rq)
+@@ -7975,6 +8412,7 @@ static void attach_tasks(struct lb_env *env)
+ 
+ 	rq_unlock(env->dst_rq, &rf);
+ }
++#endif
+ 
+ #ifdef CONFIG_NO_HZ_COMMON
+ static inline bool cfs_rq_has_blocked(struct cfs_rq *cfs_rq)
+@@ -8024,6 +8462,7 @@ static inline void update_blocked_load_tick(struct rq *rq) {}
+ static inline void update_blocked_load_status(struct rq *rq, bool has_blocked) {}
+ #endif
+ 
++#if !defined(CONFIG_CACULE_RDB)
+ static bool __update_blocked_others(struct rq *rq, bool *done)
+ {
+ 	const struct sched_class *curr_class;
+@@ -8049,9 +8488,11 @@ static bool __update_blocked_others(struct rq *rq, bool *done)
+ 
+ 	return decayed;
+ }
++#endif
+ 
+ #ifdef CONFIG_FAIR_GROUP_SCHED
+ 
++#if !defined(CONFIG_CACULE_RDB)
+ static bool __update_blocked_fair(struct rq *rq, bool *done)
+ {
+ 	struct cfs_rq *cfs_rq, *pos;
+@@ -8091,6 +8532,7 @@ static bool __update_blocked_fair(struct rq *rq, bool *done)
+ 
+ 	return decayed;
+ }
++#endif
+ 
+ /*
+  * Compute the hierarchical load factor for cfs_rq and all its ascendants.
+@@ -8157,6 +8599,7 @@ static unsigned long task_h_load(struct task_struct *p)
+ }
+ #endif
+ 
++#if !defined(CONFIG_CACULE_RDB)
+ static void update_blocked_averages(int cpu)
+ {
+ 	bool decayed = false, done = true;
+@@ -8175,6 +8618,7 @@ static void update_blocked_averages(int cpu)
+ 		cpufreq_update_util(rq, 0);
+ 	rq_unlock_irqrestore(rq, &rf);
+ }
++#endif
+ 
+ /********** Helpers for find_busiest_group ************************/
+ 
+@@ -9278,6 +9722,7 @@ static inline void calculate_imbalance(struct lb_env *env, struct sd_lb_stats *s
+  *            different in groups.
+  */
+ 
++#if !defined(CONFIG_CACULE_RDB)
+ /**
+  * find_busiest_group - Returns the busiest group within the sched_domain
+  * if there is an imbalance.
+@@ -9546,6 +9991,7 @@ static struct rq *find_busiest_queue(struct lb_env *env,
+ 
+ 	return busiest;
+ }
++#endif
+ 
+ /*
+  * Max backoff if we encounter pinned tasks. Pretty arbitrary value, but
+@@ -9582,6 +10028,7 @@ imbalanced_active_balance(struct lb_env *env)
+ 	return 0;
+ }
+ 
++#if !defined(CONFIG_CACULE_RDB)
+ static int need_active_balance(struct lb_env *env)
+ {
+ 	struct sched_domain *sd = env->sd;
+@@ -9914,6 +10361,7 @@ static int load_balance(int this_cpu, struct rq *this_rq,
+ out:
+ 	return ld_moved;
+ }
++#endif
+ 
+ static inline unsigned long
+ get_sd_balance_interval(struct sched_domain *sd, int cpu_busy)
+@@ -9952,6 +10400,7 @@ update_next_balance(struct sched_domain *sd, unsigned long *next_balance)
+ 		*next_balance = next;
+ }
+ 
++#if !defined(CONFIG_CACULE_RDB)
+ /*
+  * active_load_balance_cpu_stop is run by the CPU stopper. It pushes
+  * running tasks off the busiest CPU onto idle CPUs. It requires at
+@@ -10037,6 +10486,7 @@ static int active_load_balance_cpu_stop(void *data)
+ }
+ 
+ static DEFINE_SPINLOCK(balancing);
++#endif
+ 
+ /*
+  * Scale the max load_balance interval with the number of CPUs in the system.
+@@ -10047,6 +10497,7 @@ void update_max_interval(void)
+ 	max_load_balance_interval = HZ*num_online_cpus()/10;
+ }
+ 
++#if !defined(CONFIG_CACULE_RDB)
+ /*
+  * It checks each scheduling domain to see if it is due to be balanced,
+  * and initiates a balancing operation if so.
+@@ -10139,6 +10590,7 @@ static void rebalance_domains(struct rq *rq, enum cpu_idle_type idle)
+ 		rq->next_balance = next_balance;
+ 
+ }
++#endif
+ 
+ static inline int on_null_domain(struct rq *rq)
+ {
+@@ -10172,6 +10624,7 @@ static inline int find_new_ilb(void)
+ 	return nr_cpu_ids;
+ }
+ 
++#if !defined(CONFIG_CACULE_RDB)
+ /*
+  * Kick a CPU to do the nohz balancing, if it is time for it. We pick any
+  * idle CPU in the HK_FLAG_MISC housekeeping set (if there is one).
+@@ -10322,6 +10775,7 @@ static void nohz_balancer_kick(struct rq *rq)
+ 	if (flags)
+ 		kick_ilb(flags);
+ }
++#endif /* CONFIG_CACULE_RDB */
+ 
+ static void set_cpu_sd_state_busy(int cpu)
+ {
+@@ -10442,11 +10896,17 @@ static bool update_nohz_stats(struct rq *rq)
+ 	if (!time_after(jiffies, READ_ONCE(rq->last_blocked_load_update_tick)))
+ 		return true;
+ 
++#if !defined(CONFIG_CACULE_RDB)
+ 	update_blocked_averages(cpu);
++#endif
+ 
+ 	return rq->has_blocked_load;
+ }
+ 
++#ifdef CONFIG_CACULE_RDB
++static int idle_try_pull_any(struct cfs_rq *cfs_rq);
++#endif
++
+ /*
+  * Internal function that runs load balance for all idle cpus. The load balance
+  * can be a simple update of blocked load or a complete load balance with
+@@ -10516,7 +10976,11 @@ static void _nohz_idle_balance(struct rq *this_rq, unsigned int flags,
+ 			rq_unlock_irqrestore(rq, &rf);
+ 
+ 			if (flags & NOHZ_BALANCE_KICK)
++#if !defined(CONFIG_CACULE_RDB)
+ 				rebalance_domains(rq, CPU_IDLE);
++#else
++				idle_try_pull_any(&rq->cfs);
++#endif
+ 		}
+ 
+ 		if (time_after(next_balance, rq->next_balance)) {
+@@ -10542,6 +11006,7 @@ static void _nohz_idle_balance(struct rq *this_rq, unsigned int flags,
+ 		WRITE_ONCE(nohz.has_blocked, 1);
+ }
+ 
++#if !defined(CONFIG_CACULE_RDB)
+ /*
+  * In CONFIG_NO_HZ_COMMON case, the idle balance kickee will do the
+  * rebalancing for all the cpus for whom scheduler ticks are stopped.
+@@ -10562,6 +11027,7 @@ static bool nohz_idle_balance(struct rq *this_rq, enum cpu_idle_type idle)
+ 
+ 	return true;
+ }
++#endif
+ 
+ /*
+  * Check if we need to run the ILB for updating blocked load before entering
+@@ -10592,9 +11058,11 @@ static void nohz_newidle_balance(struct rq *this_rq)
  	if (!housekeeping_cpu(this_cpu, HK_FLAG_SCHED))
  		return;
  
@@ -1188,7 +1487,262 @@ index e807b743353d..ddb9e65b0381 100644
  
  	/* Don't need to update blocked load of idle CPUs*/
  	if (!READ_ONCE(nohz.has_blocked) ||
-@@ -10657,7 +11101,10 @@ static int newidle_balance(struct rq *this_rq, struct rq_flags *rf)
+@@ -10609,6 +11077,7 @@ static void nohz_newidle_balance(struct rq *this_rq)
+ }
+ 
+ #else /* !CONFIG_NO_HZ_COMMON */
++#if !defined(CONFIG_CACULE_RDB)
+ static inline void nohz_balancer_kick(struct rq *rq) { }
+ 
+ static inline bool nohz_idle_balance(struct rq *this_rq, enum cpu_idle_type idle)
+@@ -10617,8 +11086,134 @@ static inline bool nohz_idle_balance(struct rq *this_rq, enum cpu_idle_type idle
+ }
+ 
+ static inline void nohz_newidle_balance(struct rq *this_rq) { }
++#endif
++
+ #endif /* CONFIG_NO_HZ_COMMON */
+ 
++#ifdef CONFIG_CACULE_RDB
++static int
++can_migrate_task(struct task_struct *p, int dst_cpu, struct rq *src_rq)
++{
++	if (task_running(src_rq, p))
++		return 0;
++
++	/* Disregard pcpu kthreads; they are where they need to be. */
++	if (kthread_is_per_cpu(p))
++		return 0;
++
++	if (!cpumask_test_cpu(dst_cpu, p->cpus_ptr))
++		return 0;
++
++	if (p->se.exec_start == 0)
++		return 0;
++
++	return 1;
++}
++
++static void push_to_unlock(struct rq *this_rq,
++			   struct rq *dst_rq,
++			   struct task_struct *p,
++			   int dst_cpu)
++{
++	struct rq_flags rf;
++
++	// detach task
++	deactivate_task(this_rq, p, DEQUEUE_NOCLOCK);
++	set_task_cpu(p, dst_cpu);
++
++	// unlock this rq
++	raw_spin_unlock(&this_rq->lock);
++
++	/* push to */
++	rq_lock_irqsave(dst_rq, &rf);
++	update_rq_clock(dst_rq);
++
++	activate_task(dst_rq, p, ENQUEUE_NOCLOCK);
++	check_preempt_curr(dst_rq, p, 0);
++
++	// unlock src rq
++	rq_unlock(dst_rq, &rf);
++	local_irq_restore(rf.flags);
++}
++
++static void pull_from_unlock(struct rq *this_rq,
++			     struct rq *src_rq,
++			     struct rq_flags *rf,
++			     struct task_struct *p,
++			     int dst_cpu)
++{
++	// detach task
++	deactivate_task(src_rq, p, DEQUEUE_NOCLOCK);
++	set_task_cpu(p, dst_cpu);
++
++	// unlock src rq
++	rq_unlock(src_rq, rf);
++	local_irq_restore(rf->flags);
++
++	// lock this rq
++	raw_spin_lock(&this_rq->lock);
++	update_rq_clock(this_rq);
++
++	activate_task(this_rq, p, ENQUEUE_NOCLOCK);
++	check_preempt_curr(this_rq, p, 0);
++
++	// unlock this rq
++	raw_spin_unlock(&this_rq->lock);
++}
++
++static inline struct rq *
++find_max_IS_rq(struct rq *this_rq, int dst_cpu)
++{
++	struct rq *tmp_rq, *max_rq = NULL;
++	int cpu;
++	unsigned int max_IS = this_rq->max_IS_score;
++	unsigned int local_IS;
++
++	// find max hrrn
++	for_each_online_cpu(cpu) {
++		if (cpu == dst_cpu)
++			continue;
++
++		tmp_rq = cpu_rq(cpu);
++
++		if (tmp_rq->nr_running < 2 || !(READ_ONCE(tmp_rq->to_migrate_task)))
++			continue;
++
++		local_IS = READ_ONCE(tmp_rq->max_IS_score);
++
++		if (local_IS < max_IS) {
++			max_IS = local_IS;
++			max_rq = tmp_rq;
++		}
++	}
++
++	return max_rq;
++}
++
++static int try_pull_from(struct rq *src_rq, struct rq *this_rq)
++{
++	struct rq_flags rf;
++	int dst_cpu = cpu_of(this_rq);
++	struct task_struct *p;
++
++	rq_lock_irqsave(src_rq, &rf);
++	update_rq_clock(src_rq);
++
++	if (src_rq->to_migrate_task && src_rq->nr_running > 1) {
++		p = src_rq->to_migrate_task;
++
++		if (can_migrate_task(p, dst_cpu, src_rq)) {
++			pull_from_unlock(this_rq, src_rq, &rf, p, dst_cpu);
++			return 1;
++		}
++	}
++
++	rq_unlock(src_rq, &rf);
++	local_irq_restore(rf.flags);
++
++	return 0;
++}
++
+ /*
+  * newidle_balance is called by schedule() if this_cpu is about to become
+  * idle. Attempts to pull tasks from other CPUs.
+@@ -10629,6 +11224,111 @@ static inline void nohz_newidle_balance(struct rq *this_rq) { }
+  *   > 0 - success, new (fair) tasks present
+  */
+ static int newidle_balance(struct rq *this_rq, struct rq_flags *rf)
++{
++	int this_cpu = this_rq->cpu;
++	struct task_struct *p = NULL;
++	struct rq *src_rq;
++	int src_cpu;
++	struct rq_flags src_rf;
++	int pulled_task = 0;
++	int cores_round = 1;
++
++	update_misfit_status(NULL, this_rq);
++	/*
++	 * We must set idle_stamp _before_ calling idle_balance(), such that we
++	 * measure the duration of idle_balance() as idle time.
++	 */
++	this_rq->idle_stamp = rq_clock(this_rq);
++
++	/*
++	 * Do not pull tasks towards !active CPUs...
++	 */
++	if (!cpu_active(this_cpu))
++		return 0;
++
++	/*
++	 * This is OK, because current is on_cpu, which avoids it being picked
++	 * for load-balance and preemption/IRQs are still disabled avoiding
++	 * further scheduler activity on it and we're being very careful to
++	 * re-start the picking loop.
++	 */
++	rq_unpin_lock(this_rq, rf);
++	raw_spin_unlock(&this_rq->lock);
++
++again:
++	for_each_online_cpu(src_cpu) {
++
++		if (src_cpu == this_cpu)
++			continue;
++
++		if (cores_round && !cpus_share_cache(src_cpu, this_cpu))
++			continue;
++
++		src_rq = cpu_rq(src_cpu);
++
++		if (src_rq->nr_running < 2
++		    || !(READ_ONCE(src_rq->to_migrate_task)))
++			continue;
++
++		rq_lock_irqsave(src_rq, &src_rf);
++		update_rq_clock(src_rq);
++
++		if (src_rq->nr_running < 2 || !(src_rq->to_migrate_task))
++			goto next;
++
++		p = src_rq->to_migrate_task;
++
++		if (can_migrate_task(p, this_cpu, src_rq)) {
++			pull_from_unlock(this_rq, src_rq, &src_rf, p, this_cpu);
++
++			pulled_task = 1;
++			goto out;
++		}
++
++next:
++		rq_unlock(src_rq, &src_rf);
++		local_irq_restore(src_rf.flags);
++
++		/*
++		 * Stop searching for tasks to pull if there are
++		 * now runnable tasks on this rq.
++		 */
++		if (pulled_task || this_rq->nr_running > 0)
++			goto out;
++	}
++
++	if (cores_round) {
++		// now search for all cpus
++		cores_round = 0;
++		goto again;
++	}
++
++out:
++	raw_spin_lock(&this_rq->lock);
++
++	/*
++	 * While browsing the domains, we released the rq lock, a task could
++	 * have been enqueued in the meantime. Since we're not going idle,
++	 * pretend we pulled a task.
++	 */
++	if (this_rq->cfs.h_nr_running && !pulled_task)
++		pulled_task = 1;
++
++	/* Is there a task of a high priority class? */
++	if (this_rq->nr_running != this_rq->cfs.h_nr_running)
++		pulled_task = -1;
++
++	if (pulled_task)
++		this_rq->idle_stamp = 0;
++	else
++		nohz_newidle_balance(this_rq);
++
++	rq_repin_lock(this_rq, rf);
++
++	return pulled_task;
++}
++#else
++static int newidle_balance(struct rq *this_rq, struct rq_flags *rf)
+ {
+ 	unsigned long next_balance = jiffies + HZ;
+ 	int this_cpu = this_rq->cpu;
+@@ -10657,7 +11357,10 @@ static int newidle_balance(struct rq *this_rq, struct rq_flags *rf)
  	 */
  	rq_unpin_lock(this_rq, rf);
  
@@ -1200,7 +1754,236 @@ index e807b743353d..ddb9e65b0381 100644
  	    !READ_ONCE(this_rq->rd->overload)) {
  
  		rcu_read_lock();
-@@ -10825,11 +11272,28 @@ static void task_tick_fair(struct rq *rq, struct task_struct *curr, int queued)
+@@ -10782,6 +11485,217 @@ void trigger_load_balance(struct rq *rq)
+ 
+ 	nohz_balancer_kick(rq);
+ }
++#endif
++
++#ifdef CONFIG_CACULE_RDB
++static int
++idle_try_pull_any(struct cfs_rq *cfs_rq)
++{
++	struct task_struct *p = NULL;
++	struct rq *this_rq = rq_of(cfs_rq), *src_rq;
++	int dst_cpu = cpu_of(this_rq);
++	int src_cpu;
++	struct rq_flags rf;
++	int pulled = 0;
++	int cores_round = 1;
++
++again:
++	for_each_online_cpu(src_cpu) {
++
++		if (src_cpu == dst_cpu)
++			continue;
++
++		if (cores_round && !cpus_share_cache(src_cpu, dst_cpu))
++			continue;
++
++		src_rq = cpu_rq(src_cpu);
++
++		if (src_rq->nr_running < 2
++		    || !(READ_ONCE(src_rq->to_migrate_task)))
++			continue;
++
++		rq_lock_irqsave(src_rq, &rf);
++		update_rq_clock(src_rq);
++
++		if (src_rq->nr_running < 2 || !(src_rq->to_migrate_task))
++			goto next;
++
++		p = src_rq->to_migrate_task;
++
++		if (can_migrate_task(p, dst_cpu, src_rq)) {
++			pull_from_unlock(this_rq, src_rq, &rf, p, dst_cpu);
++			pulled = 1;
++			goto out;
++		}
++
++next:
++		rq_unlock(src_rq, &rf);
++		local_irq_restore(rf.flags);
++	}
++
++	if (cores_round) {
++		// now search for all cpus
++		cores_round = 0;
++		goto again;
++	}
++
++out:
++	return pulled;
++}
++
++
++static int
++try_pull_higher_IS(struct rq *this_rq)
++{
++	struct rq *max_rq;
++	int dst_cpu = cpu_of(this_rq);
++
++	max_rq = find_max_IS_rq(this_rq, dst_cpu);
++
++	if (!max_rq)
++		return 0;
++
++	if (try_pull_from(max_rq, this_rq))
++		return 1;
++
++	return 0;
++}
++
++static void try_push_any(struct rq *this_rq)
++{
++	struct task_struct *p = NULL;
++	struct rq *dst_rq;
++	int dst_cpu;
++	int src_cpu = cpu_of(this_rq);
++	int cores_round = 1;
++
++again:
++	for_each_online_cpu(dst_cpu) {
++
++		if (dst_cpu == src_cpu)
++			continue;
++
++		if (cores_round && !cpus_share_cache(src_cpu, dst_cpu))
++			continue;
++
++		dst_rq = cpu_rq(dst_cpu);
++
++		if (dst_rq->nr_running >= this_rq->nr_running - 1)
++			continue;
++
++		// lock this rq
++		raw_spin_lock(&this_rq->lock);
++		update_rq_clock(this_rq);
++
++		if (!this_rq->to_migrate_task) {
++			// unlock this rq
++			raw_spin_unlock(&this_rq->lock);
++			return;
++		}
++
++		p = this_rq->to_migrate_task;
++
++		if (can_migrate_task(p, dst_cpu, this_rq)) {
++			push_to_unlock(this_rq, dst_rq, p, dst_cpu);
++			return;
++		}
++
++		// unlock this rq
++		raw_spin_unlock(&this_rq->lock);
++	}
++
++	if (cores_round) {
++		// now search for all cpus
++		cores_round = 0;
++		goto again;
++	}
++}
++
++static void try_pull_any(struct rq *this_rq)
++{
++	struct task_struct *p = NULL;
++	struct rq *src_rq;
++	int dst_cpu = cpu_of(this_rq);
++	int src_cpu;
++	struct rq_flags src_rf;
++	int cores_round = 1;
++	unsigned int this_max_IS = this_rq->max_IS_score;
++
++again:
++	for_each_online_cpu(src_cpu) {
++
++		if (src_cpu == dst_cpu)
++			continue;
++
++		if (cores_round && !cpus_share_cache(src_cpu, dst_cpu))
++			continue;
++
++		src_rq = cpu_rq(src_cpu);
++
++		p = READ_ONCE(src_rq->to_migrate_task);
++		if (src_rq->nr_running < 2 || !p
++                    || READ_ONCE(src_rq->max_IS_score) >= this_max_IS)
++			continue;
++
++		rq_lock_irqsave(src_rq, &src_rf);
++		update_rq_clock(src_rq);
++
++		if (src_rq->nr_running < 2 || !(src_rq->to_migrate_task)
++                    || src_rq->max_IS_score >= this_max_IS)
++			goto next;
++
++		p = src_rq->to_migrate_task;
++
++		if (can_migrate_task(p, dst_cpu, src_rq)) {
++			pull_from_unlock(this_rq, src_rq, &src_rf, p, dst_cpu);
++			return;
++		}
++
++next:
++		rq_unlock(src_rq, &src_rf);
++		local_irq_restore(src_rf.flags);
++	}
++
++	if (cores_round) {
++		// now search for all cpus
++		cores_round = 0;
++		goto again;
++	}
++}
++
++static inline void
++active_balance(struct rq *rq)
++{
++	if (rq->nr_running < 2)
++		try_pull_higher_IS(rq);
++	else {
++		try_push_any(rq);
++		try_pull_any(rq);
++	}
++}
++
++void trigger_load_balance(struct rq *rq)
++{
++	unsigned long interval;
++
++#ifdef CONFIG_RDB_INTERVAL
++	if (time_before(jiffies, rq->next_balance))
++		return;
++#endif
++
++	if (rq->idle_balance)
++		idle_try_pull_any(&rq->cfs);
++	else {
++		active_balance(rq);
++
++#ifdef CONFIG_RDB_INTERVAL
++		/* scale ms to jiffies */
++		interval = msecs_to_jiffies(CONFIG_RDB_INTERVAL);
++		rq->next_balance = jiffies + interval;
++#endif
++	}
++}
++#endif
+ 
+ static void rq_online_fair(struct rq *rq)
+ {
+@@ -10818,6 +11732,10 @@ static void task_tick_fair(struct rq *rq, struct task_struct *curr, int queued)
+ 		entity_tick(cfs_rq, se, queued);
+ 	}
+ 
++#ifdef CONFIG_CACULE_RDB
++	update_IS(rq);
++#endif
++
+ 	if (static_branch_unlikely(&sched_numa_balancing))
+ 		task_tick_numa(rq, curr);
+ 
+@@ -10825,11 +11743,28 @@ static void task_tick_fair(struct rq *rq, struct task_struct *curr, int queued)
  	update_overutilized_status(task_rq(curr));
  }
  
@@ -1229,7 +2012,7 @@ index e807b743353d..ddb9e65b0381 100644
  static void task_fork_fair(struct task_struct *p)
  {
  	struct cfs_rq *cfs_rq;
-@@ -10860,6 +11324,7 @@ static void task_fork_fair(struct task_struct *p)
+@@ -10860,6 +11795,7 @@ static void task_fork_fair(struct task_struct *p)
  	se->vruntime -= cfs_rq->min_vruntime;
  	rq_unlock(rq, &rf);
  }
@@ -1237,7 +2020,7 @@ index e807b743353d..ddb9e65b0381 100644
  
  /*
   * Priority of the task has changed. Check to see if we preempt
-@@ -10978,6 +11443,8 @@ static void attach_entity_cfs_rq(struct sched_entity *se)
+@@ -10978,6 +11914,8 @@ static void attach_entity_cfs_rq(struct sched_entity *se)
  static void detach_task_cfs_rq(struct task_struct *p)
  {
  	struct sched_entity *se = &p->se;
@@ -1246,7 +2029,7 @@ index e807b743353d..ddb9e65b0381 100644
  	struct cfs_rq *cfs_rq = cfs_rq_of(se);
  
  	if (!vruntime_normalized(p)) {
-@@ -10988,6 +11455,7 @@ static void detach_task_cfs_rq(struct task_struct *p)
+@@ -10988,6 +11926,7 @@ static void detach_task_cfs_rq(struct task_struct *p)
  		place_entity(cfs_rq, se, 0);
  		se->vruntime -= cfs_rq->min_vruntime;
  	}
@@ -1254,7 +2037,7 @@ index e807b743353d..ddb9e65b0381 100644
  
  	detach_entity_cfs_rq(se);
  }
-@@ -10995,12 +11463,17 @@ static void detach_task_cfs_rq(struct task_struct *p)
+@@ -10995,12 +11934,17 @@ static void detach_task_cfs_rq(struct task_struct *p)
  static void attach_task_cfs_rq(struct task_struct *p)
  {
  	struct sched_entity *se = &p->se;
@@ -1272,7 +2055,7 @@ index e807b743353d..ddb9e65b0381 100644
  }
  
  static void switched_from_fair(struct rq *rq, struct task_struct *p)
-@@ -11056,13 +11529,22 @@ static void set_next_task_fair(struct rq *rq, struct task_struct *p, bool first)
+@@ -11056,13 +12000,22 @@ static void set_next_task_fair(struct rq *rq, struct task_struct *p, bool first)
  void init_cfs_rq(struct cfs_rq *cfs_rq)
  {
  	cfs_rq->tasks_timeline = RB_ROOT_CACHED;
@@ -1295,11 +2078,33 @@ index e807b743353d..ddb9e65b0381 100644
  }
  
  #ifdef CONFIG_FAIR_GROUP_SCHED
+@@ -11387,7 +12340,9 @@ void show_numa_stats(struct task_struct *p, struct seq_file *m)
+ __init void init_sched_fair_class(void)
+ {
+ #ifdef CONFIG_SMP
++#if !defined(CONFIG_CACULE_RDB)
+ 	open_softirq(SCHED_SOFTIRQ, run_rebalance_domains);
++#endif
+ 
+ #ifdef CONFIG_NO_HZ_COMMON
+ 	nohz.next_balance = jiffies;
 diff --git a/kernel/sched/sched.h b/kernel/sched/sched.h
-index a189bec13729..0affe3be7c21 100644
+index 35f7efed75c4..6ab803743b40 100644
 --- a/kernel/sched/sched.h
 +++ b/kernel/sched/sched.h
-@@ -525,10 +525,13 @@ struct cfs_rq {
+@@ -159,6 +159,11 @@ extern void call_trace_sched_update_nr_running(struct rq *rq, int count);
+  */
+ #define RUNTIME_INF		((u64)~0ULL)
+ 
++#ifdef CONFIG_CACULE_SCHED
++#define YIELD_MARK	0x8000000000000000ULL
++#define YIELD_UNMARK	0x7FFFFFFFFFFFFFFFULL
++#endif
++
+ static inline int idle_policy(int policy)
+ {
+ 	return policy == SCHED_IDLE;
+@@ -525,10 +530,13 @@ struct cfs_rq {
  	unsigned int		idle_h_nr_running; /* SCHED_IDLE */
  
  	u64			exec_clock;
@@ -1313,14 +2118,13 @@ index a189bec13729..0affe3be7c21 100644
  
  	struct rb_root_cached	tasks_timeline;
  
-@@ -537,9 +540,15 @@ struct cfs_rq {
+@@ -537,9 +545,14 @@ struct cfs_rq {
  	 * It is set to NULL otherwise (i.e when none are currently running).
  	 */
  	struct sched_entity	*curr;
 +#ifdef CONFIG_CACULE_SCHED
 +	struct cacule_node	*head;
 +	struct cacule_node	*tail;
-+
 +#else
  	struct sched_entity	*next;
  	struct sched_entity	*last;
@@ -1329,11 +2133,23 @@ index a189bec13729..0affe3be7c21 100644
  
  #ifdef	CONFIG_SCHED_DEBUG
  	unsigned int		nr_spread_over;
+@@ -943,6 +956,11 @@ struct rq {
+ 	struct rt_rq		rt;
+ 	struct dl_rq		dl;
+ 
++#ifdef CONFIG_CACULE_RDB
++	unsigned int		max_IS_score;
++	struct task_struct	*to_migrate_task;
++#endif
++
+ #ifdef CONFIG_FAIR_GROUP_SCHED
+ 	/* list of leaf cfs_rq on this CPU: */
+ 	struct list_head	leaf_cfs_rq_list;
 diff --git a/kernel/sysctl.c b/kernel/sysctl.c
-index d4a78e08f6d8..e8cdedf74fed 100644
+index d4a78e08f6d8..d85615ec6cb9 100644
 --- a/kernel/sysctl.c
 +++ b/kernel/sysctl.c
-@@ -1736,6 +1736,29 @@ static struct ctl_table kern_table[] = {
+@@ -1736,6 +1736,59 @@ static struct ctl_table kern_table[] = {
  		.mode		= 0644,
  		.proc_handler	= proc_dointvec,
  	},
@@ -1346,19 +2162,49 @@ index d4a78e08f6d8..e8cdedf74fed 100644
 +		.proc_handler	= proc_dointvec,
 +	},
 +	{
-+		.procname	= "sched_interactivity_threshold",
-+		.data		= &interactivity_threshold,
-+		.maxlen		= sizeof(unsigned int),
-+		.mode		= 0644,
-+		.proc_handler	= proc_dointvec,
-+	},
-+	{
 +		.procname	= "sched_max_lifetime_ms",
 +		.data		= &cacule_max_lifetime,
 +		.maxlen		= sizeof(unsigned int),
 +		.mode		= 0644,
 +		.proc_handler	= proc_dointvec,
 +	},
++	{
++		.procname	= "sched_cache_factor",
++		.data		= &cache_factor,
++		.maxlen		= sizeof(unsigned int),
++		.mode		= 0644,
++		.proc_handler	= proc_dointvec,
++	},
++	{
++		.procname	= "sched_cache_divisor",
++		.data		= &cache_divisor,
++		.maxlen		= sizeof(unsigned int),
++		.mode		= 0644,
++		.proc_handler	= proc_dointvec,
++	},
++	{
++		.procname	= "sched_starve_factor",
++		.data		= &starve_factor,
++		.maxlen		= sizeof(unsigned int),
++		.mode		= 0644,
++		.proc_handler	= proc_dointvec,
++	},
++	{
++		.procname	= "sched_starve_divisor",
++		.data		= &starve_divisor,
++		.maxlen		= sizeof(unsigned int),
++		.mode		= 0644,
++		.proc_handler	= proc_dointvec,
++	},
++	{
++		.procname	= "sched_cacule_yield",
++		.data		= &cacule_yield,
++		.maxlen		= sizeof (int),
++		.mode		= 0644,
++		.proc_handler	= &proc_dointvec_minmax,
++		.extra1		= SYSCTL_ZERO,
++		.extra2		= &one_ul,
++	},
 +#endif
  #ifdef CONFIG_SCHEDSTATS
  	{