[Linaro-open-discussions] [PATCH v2 2/2] sched/fair: Scan cluster before scanning LLC in wake-up path

24 Nov 2021

From: Barry Song song.bao.hua@hisilicon.com
For platforms having clusters like Kunpeng 920, tasks in the same
cluster sharing L3 Cache Tag will have lower latency when synchronizing
and accessing shared resources. Based on this, wake the task within
the same cluster with the waker will make migration cost smaller.
This patch tries to find a wake cpu by scanning the cluster first
before scanning LLC.
Benchmark Tests have been done on 2-socket 4-NUMA Kunpeng 920 with
8 clusters in each NUMA. And the results from tbench
and hackbench are rather positive.
tbench4
                    5.16-rc1-vanilla         5.16-rc1+patch
Hmean     1        341.78 (   0.00%)      350.10 *   2.43%*
Hmean     2        684.31 (   0.00%)      700.25 *   2.33%*
Hmean     4       1350.03 (   0.00%)     1374.33 *   1.80%*
Hmean     8       2563.33 (   0.00%)     2615.74 *   2.04%*
Hmean     16      4976.31 (   0.00%)     4911.05 *  -1.31%*
Hmean     32      8446.80 (   0.00%)     9076.71 *   7.46%*
Hmean     64      4938.98 (   0.00%)     5890.29 *  19.26%*
Hmean     128     7422.75 (   0.00%)     8941.65 *  20.46%*
Hmean     256     7503.72 (   0.00%)     7609.30 *   1.41%*
Hmean     512     6526.50 (   0.00%)     7616.90 *  16.71%*
hackbench-process-pipes
                    5.16-rc1-vanilla         5.16-rc1+patch
Amean     1        0.7233 (   0.00%)      0.6048 *  16.38%*
Amean     4        1.6168 (   0.00%)      0.9831 *  39.19%*
Amean     7        1.7604 (   0.00%)      1.3456 *  23.56%*
Amean     12       2.1637 (   0.00%)      2.0515 *   5.19%*
Amean     21       3.7302 (   0.00%)      3.4755 *   6.83%*
Amean     30       6.8281 (   0.00%)      5.4964 *  19.50%*
Amean     48      11.5442 (   0.00%)      9.2672 *  19.72%*
Amean     79      14.1319 (   0.00%)     12.1617 *  13.94%*
Amean     110     17.2689 (   0.00%)     15.0081 *  13.09%*
Amean     141     20.2057 (   0.00%)     18.4041 *   8.92%*
Amean     172     25.2087 (   0.00%)     21.2069 *  15.87%*
Amean     203     28.4038 (   0.00%)     24.8319 *  12.58%*
Amean     234     32.4690 (   0.00%)     28.2500 *  12.99%*
Amean     256     33.1803 (   0.00%)     30.0114 *   9.55%*
Tested-by: Yicong Yang yangyicong@hisilicon.com
Signed-off-by: Barry Song song.bao.hua@hisilicon.com
Signed-off-by: Yicong Yang yangyicong@hisilicon.com
---
 kernel/sched/fair.c | 41 ++++++++++++++++++++++++++++++++++++++++-
 1 file changed, 40 insertions(+), 1 deletion(-)

diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c
index 6e476f6d9435..f8b094738c03 100644
--- a/kernel/sched/fair.c
+++ b/kernel/sched/fair.c
@@ -6230,6 +6230,34 @@ static inline int select_idle_smt(struct task_struct *p, struct sched_domain *sd
#endif /* CONFIG_SCHED_SMT */
+#ifdef CONFIG_SCHED_CLUSTER
+static inline int scan_cluster(struct task_struct *p, struct sched_domain *sd, bool has_idle_core, int target, int *idle_cpu)
+{
+	struct cpumask *cpus = this_cpu_cpumask_var_ptr(select_idle_mask);
+	int i, cpu;
+
+	cpumask_and(cpus, sched_domain_span(sd), p->cpus_ptr);
+	cpumask_clear_cpu(target, cpus);
+
+	for_each_cpu_wrap(cpu, cpus, target + 1) {
+		if (has_idle_core)
+			i = select_idle_core(p, cpu, cpus, idle_cpu);
+		else
+			i = __select_idle_cpu(cpu, p);
+
+		if ((unsigned int)i < nr_cpumask_bits)
+			return i;
+	}
+
+	return -1;
+}
+#else
+static inline int scan_cluster(struct task_struct *p, struct sched_domain *sd, bool has_idle_core, int target, int *idle_cpu)
+{
+	return -1;
+}
+#endif
+
 /*
  * Scan the LLC domain for idle CPUs; this is dynamically regulated by
  * comparing the average scan cost (tracked in sd->avg_scan_cost) against the
@@ -6241,14 +6269,25 @@ static int select_idle_cpu(struct task_struct *p, struct sched_domain *sd, bool
    int i, cpu, idle_cpu = -1, nr = INT_MAX;
    struct rq *this_rq = this_rq();
    int this = smp_processor_id();
-	struct sched_domain *this_sd;
+	struct sched_domain *this_sd, *cluster_sd;
    u64 time = 0;
this_sd = rcu_dereference(*this_cpu_ptr(&sd_llc));
    if (!this_sd)
    	return -1;
+	/* scan cluster before scanning LLC */
+	cluster_sd = rcu_dereference(per_cpu(sd_cluster, target));
+	if (cluster_sd) {
+		i = scan_cluster(p, cluster_sd, has_idle_core, target, &idle_cpu);
+		if ((unsigned int)i < nr_cpumask_bits)
+			return i;
+	}
+
+	/* scan LLC excluding cluster */
    cpumask_and(cpus, sched_domain_span(sd), p->cpus_ptr);
+	if (cluster_sd)
+		cpumask_andnot(cpus, cpus, sched_domain_span(cluster_sd));
if (sched_feat(SIS_PROP) && !has_idle_core) {
    	u64 avg_cost, avg_idle, span_avg;
-- 
2.33.0


    

2025

2024

2023

2022

2021

2020

[Linaro-open-discussions] [PATCH v2 2/2] sched/fair: Scan cluster before scanning LLC in wake-up path