On 2021/11/11 5:46, Barry Song wrote:
diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c index ff69f245b939..852a048a5f8c 100644 --- a/kernel/sched/fair.c +++ b/kernel/sched/fair.c @@ -6265,10 +6265,10 @@ static inline int select_idle_smt(struct task_struct *p, struct sched_domain *sd static int select_idle_cpu(struct task_struct *p, struct sched_domain *sd, bool has_idle_core, int target) { struct cpumask *cpus = this_cpu_cpumask_var_ptr(select_idle_mask);
int i, cpu, idle_cpu = -1, nr = INT_MAX;
int i, cpu, scan_from, idle_cpu = -1, nr = INT_MAX;
struct sched_domain *this_sd, *cluster_sd; struct rq *this_rq = this_rq(); int this = smp_processor_id();
struct sched_domain *this_sd; u64 time = 0; this_sd = rcu_dereference(*this_cpu_ptr(&sd_llc));
@@ -6276,6 +6276,10 @@ static int select_idle_cpu(struct task_struct *p, struct sched_domain *sd, bool return -1;
cpumask_and(cpus, sched_domain_span(sd), p->cpus_ptr);
cpumask_clear_cpu(target, cpus);
May double test if it is necessary to clear the target as target can be idle after it was scanned.
These two situations make little difference in my previous test. And as we discussed, if the nr throttling works we won't scan the whole LLC in most times.
well I'll test in these two cases with below fixed to see whether there is an obvious difference.
Thanks, Yicong
cluster_sd = rcu_dereference(*this_cpu_ptr(&sd_cluster));
This line is wrong. should be:
diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c index 852a048..0a946ba 100644 --- a/kernel/sched/fair.c +++ b/kernel/sched/fair.c @@ -6278,7 +6278,7 @@ static int select_idle_cpu(struct task_struct *p, struct sched_domain *sd, bool cpumask_and(cpus, sched_domain_span(sd), p->cpus_ptr); cpumask_clear_cpu(target, cpus);
cluster_sd = rcu_dereference(*this_cpu_ptr(&sd_cluster));
cluster_sd = rcu_dereference(per_cpu(sd_cluster, target)); scan_from = cluster_sd ?
cpumask_first(sched_domain_span(cluster_sd)) : target + 1;
testing needs to be done again with this fix.
scan_from = cluster_sd ? cpumask_first(sched_domain_span(cluster_sd)) : target + 1; if (sched_feat(SIS_PROP) && !has_idle_core) { u64 avg_cost, avg_idle, span_avg;
@@ -6305,7 +6309,7 @@ static int select_idle_cpu(struct task_struct *p, struct sched_domain *sd, bool time = cpu_clock(this); }
for_each_cpu_wrap(cpu, cpus, target + 1) {
for_each_cpu_wrap(cpu, cpus, scan_from) { if (has_idle_core) { i = select_idle_core(p, cpu, cpus, &idle_cpu); if ((unsigned int)i < nr_cpumask_bits)
-- 2.33.0
Thanks barry .