On 2021/11/11 5:46, Barry Song wrote:
diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c index ff69f245b939..852a048a5f8c 100644 --- a/kernel/sched/fair.c +++ b/kernel/sched/fair.c @@ -6265,10 +6265,10 @@ static inline int select_idle_smt(struct task_struct *p, struct sched_domain *sd static int select_idle_cpu(struct task_struct *p, struct sched_domain *sd, bool has_idle_core, int target) { struct cpumask *cpus = this_cpu_cpumask_var_ptr(select_idle_mask);
int i, cpu, idle_cpu = -1, nr = INT_MAX;
int i, cpu, scan_from, idle_cpu = -1, nr = INT_MAX;
struct sched_domain *this_sd, *cluster_sd; struct rq *this_rq = this_rq(); int this = smp_processor_id();
struct sched_domain *this_sd; u64 time = 0; this_sd = rcu_dereference(*this_cpu_ptr(&sd_llc));
@@ -6276,6 +6276,10 @@ static int select_idle_cpu(struct task_struct *p, struct sched_domain *sd, bool return -1;
cpumask_and(cpus, sched_domain_span(sd), p->cpus_ptr);
cpumask_clear_cpu(target, cpus);
May double test if it is necessary to clear the target as target can be idle after it was scanned.
cluster_sd = rcu_dereference(*this_cpu_ptr(&sd_cluster));
This line is wrong. should be:
diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c index 852a048..0a946ba 100644 --- a/kernel/sched/fair.c +++ b/kernel/sched/fair.c @@ -6278,7 +6278,7 @@ static int select_idle_cpu(struct task_struct *p, struct sched_domain *sd, bool cpumask_and(cpus, sched_domain_span(sd), p->cpus_ptr); cpumask_clear_cpu(target, cpus);
cluster_sd = rcu_dereference(*this_cpu_ptr(&sd_cluster));
cluster_sd = rcu_dereference(per_cpu(sd_cluster, target)); scan_from = cluster_sd ?
cpumask_first(sched_domain_span(cluster_sd)) : target + 1;
testing needs to be done again with this fix.
Since we also need to reconsider which LLC domain we should take?
Currently we take the LLC of this CPU and as tested by @shenyang, there're cases that target and this cpu from different NUMA(=LLC on 920).
scan_from = cluster_sd ? cpumask_first(sched_domain_span(cluster_sd)) : target + 1; if (sched_feat(SIS_PROP) && !has_idle_core) { u64 avg_cost, avg_idle, span_avg;
@@ -6305,7 +6309,7 @@ static int select_idle_cpu(struct task_struct *p, struct sched_domain *sd, bool time = cpu_clock(this); }
for_each_cpu_wrap(cpu, cpus, target + 1) {
for_each_cpu_wrap(cpu, cpus, scan_from) { if (has_idle_core) { i = select_idle_core(p, cpu, cpus, &idle_cpu); if ((unsigned int)i < nr_cpumask_bits)
-- 2.33.0
Thanks barry .