psi: Reduce calls to sched_clock() in psi
We noticed that the cost of psi increases with the increase in the levels of the cgroups. Particularly the cost of cpu_clock() sticks out as the kernel calls it multiple times as it traverses up the cgroup tree. This patch reduces the calls to cpu_clock(). Performed perf bench on Intel Broadwell with 3 levels of cgroup. Before the patch: $ perf bench sched all # Running sched/messaging benchmark... # 20 sender and receiver processes per group # 10 groups == 400 processes run Total time: 0.747 [sec] # Running sched/pipe benchmark... # Executed 1000000 pipe operations between two processes Total time: 3.516 [sec] 3.516689 usecs/op 284358 ops/sec After the patch: $ perf bench sched all # Running sched/messaging benchmark... # 20 sender and receiver processes per group # 10 groups == 400 processes run Total time: 0.640 [sec] # Running sched/pipe benchmark... # Executed 1000000 pipe operations between two processes Total time: 3.329 [sec] 3.329820 usecs/op 300316 ops/sec Signed-off-by: Shakeel Butt <shakeelb@google.com> Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org> Acked-by: Johannes Weiner <hannes@cmpxchg.org> Link: https://lkml.kernel.org/r/20210321205156.4186483-1-shakeelb@google.com
This commit is contained in:
parent
2a2f80ff63
commit
df77430639
1 changed files with 10 additions and 9 deletions
|
@ -644,12 +644,10 @@ static void poll_timer_fn(struct timer_list *t)
|
||||||
wake_up_interruptible(&group->poll_wait);
|
wake_up_interruptible(&group->poll_wait);
|
||||||
}
|
}
|
||||||
|
|
||||||
static void record_times(struct psi_group_cpu *groupc, int cpu)
|
static void record_times(struct psi_group_cpu *groupc, u64 now)
|
||||||
{
|
{
|
||||||
u32 delta;
|
u32 delta;
|
||||||
u64 now;
|
|
||||||
|
|
||||||
now = cpu_clock(cpu);
|
|
||||||
delta = now - groupc->state_start;
|
delta = now - groupc->state_start;
|
||||||
groupc->state_start = now;
|
groupc->state_start = now;
|
||||||
|
|
||||||
|
@ -676,7 +674,7 @@ static void record_times(struct psi_group_cpu *groupc, int cpu)
|
||||||
}
|
}
|
||||||
|
|
||||||
static void psi_group_change(struct psi_group *group, int cpu,
|
static void psi_group_change(struct psi_group *group, int cpu,
|
||||||
unsigned int clear, unsigned int set,
|
unsigned int clear, unsigned int set, u64 now,
|
||||||
bool wake_clock)
|
bool wake_clock)
|
||||||
{
|
{
|
||||||
struct psi_group_cpu *groupc;
|
struct psi_group_cpu *groupc;
|
||||||
|
@ -696,7 +694,7 @@ static void psi_group_change(struct psi_group *group, int cpu,
|
||||||
*/
|
*/
|
||||||
write_seqcount_begin(&groupc->seq);
|
write_seqcount_begin(&groupc->seq);
|
||||||
|
|
||||||
record_times(groupc, cpu);
|
record_times(groupc, now);
|
||||||
|
|
||||||
for (t = 0, m = clear; m; m &= ~(1 << t), t++) {
|
for (t = 0, m = clear; m; m &= ~(1 << t), t++) {
|
||||||
if (!(m & (1 << t)))
|
if (!(m & (1 << t)))
|
||||||
|
@ -788,12 +786,14 @@ void psi_task_change(struct task_struct *task, int clear, int set)
|
||||||
struct psi_group *group;
|
struct psi_group *group;
|
||||||
bool wake_clock = true;
|
bool wake_clock = true;
|
||||||
void *iter = NULL;
|
void *iter = NULL;
|
||||||
|
u64 now;
|
||||||
|
|
||||||
if (!task->pid)
|
if (!task->pid)
|
||||||
return;
|
return;
|
||||||
|
|
||||||
psi_flags_change(task, clear, set);
|
psi_flags_change(task, clear, set);
|
||||||
|
|
||||||
|
now = cpu_clock(cpu);
|
||||||
/*
|
/*
|
||||||
* Periodic aggregation shuts off if there is a period of no
|
* Periodic aggregation shuts off if there is a period of no
|
||||||
* task changes, so we wake it back up if necessary. However,
|
* task changes, so we wake it back up if necessary. However,
|
||||||
|
@ -806,7 +806,7 @@ void psi_task_change(struct task_struct *task, int clear, int set)
|
||||||
wake_clock = false;
|
wake_clock = false;
|
||||||
|
|
||||||
while ((group = iterate_groups(task, &iter)))
|
while ((group = iterate_groups(task, &iter)))
|
||||||
psi_group_change(group, cpu, clear, set, wake_clock);
|
psi_group_change(group, cpu, clear, set, now, wake_clock);
|
||||||
}
|
}
|
||||||
|
|
||||||
void psi_task_switch(struct task_struct *prev, struct task_struct *next,
|
void psi_task_switch(struct task_struct *prev, struct task_struct *next,
|
||||||
|
@ -815,6 +815,7 @@ void psi_task_switch(struct task_struct *prev, struct task_struct *next,
|
||||||
struct psi_group *group, *common = NULL;
|
struct psi_group *group, *common = NULL;
|
||||||
int cpu = task_cpu(prev);
|
int cpu = task_cpu(prev);
|
||||||
void *iter;
|
void *iter;
|
||||||
|
u64 now = cpu_clock(cpu);
|
||||||
|
|
||||||
if (next->pid) {
|
if (next->pid) {
|
||||||
bool identical_state;
|
bool identical_state;
|
||||||
|
@ -836,7 +837,7 @@ void psi_task_switch(struct task_struct *prev, struct task_struct *next,
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
|
|
||||||
psi_group_change(group, cpu, 0, TSK_ONCPU, true);
|
psi_group_change(group, cpu, 0, TSK_ONCPU, now, true);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -858,7 +859,7 @@ void psi_task_switch(struct task_struct *prev, struct task_struct *next,
|
||||||
|
|
||||||
iter = NULL;
|
iter = NULL;
|
||||||
while ((group = iterate_groups(prev, &iter)) && group != common)
|
while ((group = iterate_groups(prev, &iter)) && group != common)
|
||||||
psi_group_change(group, cpu, clear, set, true);
|
psi_group_change(group, cpu, clear, set, now, true);
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* TSK_ONCPU is handled up to the common ancestor. If we're tasked
|
* TSK_ONCPU is handled up to the common ancestor. If we're tasked
|
||||||
|
@ -867,7 +868,7 @@ void psi_task_switch(struct task_struct *prev, struct task_struct *next,
|
||||||
if (sleep) {
|
if (sleep) {
|
||||||
clear &= ~TSK_ONCPU;
|
clear &= ~TSK_ONCPU;
|
||||||
for (; group; group = iterate_groups(prev, &iter))
|
for (; group; group = iterate_groups(prev, &iter))
|
||||||
psi_group_change(group, cpu, clear, set, true);
|
psi_group_change(group, cpu, clear, set, now, true);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
Loading…
Add table
Reference in a new issue