sched/psi: Rename existing poll members in preparation
Renaming in PSI implementation to make a clear distinction between privileged and unprivileged triggers code to be implemented in the next patch. Suggested-by: Johannes Weiner <hannes@cmpxchg.org> Signed-off-by: Domenico Cerasuolo <cerasuolodomenico@gmail.com> Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org> Acked-by: Johannes Weiner <hannes@cmpxchg.org> Link: https://lore.kernel.org/r/20230330105418.77061-3-cerasuolodomenico@gmail.com
This commit is contained in:
parent
7fab21fa0d
commit
65457b74aa
2 changed files with 98 additions and 97 deletions
|
@ -175,26 +175,26 @@ struct psi_group {
|
||||||
u64 total[NR_PSI_AGGREGATORS][NR_PSI_STATES - 1];
|
u64 total[NR_PSI_AGGREGATORS][NR_PSI_STATES - 1];
|
||||||
unsigned long avg[NR_PSI_STATES - 1][3];
|
unsigned long avg[NR_PSI_STATES - 1][3];
|
||||||
|
|
||||||
/* Monitor work control */
|
/* Monitor RT polling work control */
|
||||||
struct task_struct __rcu *poll_task;
|
struct task_struct __rcu *rtpoll_task;
|
||||||
struct timer_list poll_timer;
|
struct timer_list rtpoll_timer;
|
||||||
wait_queue_head_t poll_wait;
|
wait_queue_head_t rtpoll_wait;
|
||||||
atomic_t poll_wakeup;
|
atomic_t rtpoll_wakeup;
|
||||||
atomic_t poll_scheduled;
|
atomic_t rtpoll_scheduled;
|
||||||
|
|
||||||
/* Protects data used by the monitor */
|
/* Protects data used by the monitor */
|
||||||
struct mutex trigger_lock;
|
struct mutex rtpoll_trigger_lock;
|
||||||
|
|
||||||
/* Configured polling triggers */
|
/* Configured RT polling triggers */
|
||||||
struct list_head triggers;
|
struct list_head rtpoll_triggers;
|
||||||
u32 nr_triggers[NR_PSI_STATES - 1];
|
u32 rtpoll_nr_triggers[NR_PSI_STATES - 1];
|
||||||
u32 poll_states;
|
u32 rtpoll_states;
|
||||||
u64 poll_min_period;
|
u64 rtpoll_min_period;
|
||||||
|
|
||||||
/* Total stall times at the start of monitor activation */
|
/* Total stall times at the start of RT polling monitor activation */
|
||||||
u64 polling_total[NR_PSI_STATES - 1];
|
u64 rtpoll_total[NR_PSI_STATES - 1];
|
||||||
u64 polling_next_update;
|
u64 rtpoll_next_update;
|
||||||
u64 polling_until;
|
u64 rtpoll_until;
|
||||||
};
|
};
|
||||||
|
|
||||||
#else /* CONFIG_PSI */
|
#else /* CONFIG_PSI */
|
||||||
|
|
|
@ -189,14 +189,14 @@ static void group_init(struct psi_group *group)
|
||||||
INIT_DELAYED_WORK(&group->avgs_work, psi_avgs_work);
|
INIT_DELAYED_WORK(&group->avgs_work, psi_avgs_work);
|
||||||
mutex_init(&group->avgs_lock);
|
mutex_init(&group->avgs_lock);
|
||||||
/* Init trigger-related members */
|
/* Init trigger-related members */
|
||||||
atomic_set(&group->poll_scheduled, 0);
|
atomic_set(&group->rtpoll_scheduled, 0);
|
||||||
mutex_init(&group->trigger_lock);
|
mutex_init(&group->rtpoll_trigger_lock);
|
||||||
INIT_LIST_HEAD(&group->triggers);
|
INIT_LIST_HEAD(&group->rtpoll_triggers);
|
||||||
group->poll_min_period = U32_MAX;
|
group->rtpoll_min_period = U32_MAX;
|
||||||
group->polling_next_update = ULLONG_MAX;
|
group->rtpoll_next_update = ULLONG_MAX;
|
||||||
init_waitqueue_head(&group->poll_wait);
|
init_waitqueue_head(&group->rtpoll_wait);
|
||||||
timer_setup(&group->poll_timer, poll_timer_fn, 0);
|
timer_setup(&group->rtpoll_timer, poll_timer_fn, 0);
|
||||||
rcu_assign_pointer(group->poll_task, NULL);
|
rcu_assign_pointer(group->rtpoll_task, NULL);
|
||||||
}
|
}
|
||||||
|
|
||||||
void __init psi_init(void)
|
void __init psi_init(void)
|
||||||
|
@ -440,11 +440,11 @@ static u64 update_triggers(struct psi_group *group, u64 now)
|
||||||
* On subsequent updates, calculate growth deltas and let
|
* On subsequent updates, calculate growth deltas and let
|
||||||
* watchers know when their specified thresholds are exceeded.
|
* watchers know when their specified thresholds are exceeded.
|
||||||
*/
|
*/
|
||||||
list_for_each_entry(t, &group->triggers, node) {
|
list_for_each_entry(t, &group->rtpoll_triggers, node) {
|
||||||
u64 growth;
|
u64 growth;
|
||||||
bool new_stall;
|
bool new_stall;
|
||||||
|
|
||||||
new_stall = group->polling_total[t->state] != total[t->state];
|
new_stall = group->rtpoll_total[t->state] != total[t->state];
|
||||||
|
|
||||||
/* Check for stall activity or a previous threshold breach */
|
/* Check for stall activity or a previous threshold breach */
|
||||||
if (!new_stall && !t->pending_event)
|
if (!new_stall && !t->pending_event)
|
||||||
|
@ -486,10 +486,10 @@ static u64 update_triggers(struct psi_group *group, u64 now)
|
||||||
}
|
}
|
||||||
|
|
||||||
if (update_total)
|
if (update_total)
|
||||||
memcpy(group->polling_total, total,
|
memcpy(group->rtpoll_total, total,
|
||||||
sizeof(group->polling_total));
|
sizeof(group->rtpoll_total));
|
||||||
|
|
||||||
return now + group->poll_min_period;
|
return now + group->rtpoll_min_period;
|
||||||
}
|
}
|
||||||
|
|
||||||
static u64 update_averages(struct psi_group *group, u64 now)
|
static u64 update_averages(struct psi_group *group, u64 now)
|
||||||
|
@ -582,53 +582,53 @@ static void init_triggers(struct psi_group *group, u64 now)
|
||||||
{
|
{
|
||||||
struct psi_trigger *t;
|
struct psi_trigger *t;
|
||||||
|
|
||||||
list_for_each_entry(t, &group->triggers, node)
|
list_for_each_entry(t, &group->rtpoll_triggers, node)
|
||||||
window_reset(&t->win, now,
|
window_reset(&t->win, now,
|
||||||
group->total[PSI_POLL][t->state], 0);
|
group->total[PSI_POLL][t->state], 0);
|
||||||
memcpy(group->polling_total, group->total[PSI_POLL],
|
memcpy(group->rtpoll_total, group->total[PSI_POLL],
|
||||||
sizeof(group->polling_total));
|
sizeof(group->rtpoll_total));
|
||||||
group->polling_next_update = now + group->poll_min_period;
|
group->rtpoll_next_update = now + group->rtpoll_min_period;
|
||||||
}
|
}
|
||||||
|
|
||||||
/* Schedule polling if it's not already scheduled or forced. */
|
/* Schedule polling if it's not already scheduled or forced. */
|
||||||
static void psi_schedule_poll_work(struct psi_group *group, unsigned long delay,
|
static void psi_schedule_rtpoll_work(struct psi_group *group, unsigned long delay,
|
||||||
bool force)
|
bool force)
|
||||||
{
|
{
|
||||||
struct task_struct *task;
|
struct task_struct *task;
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* atomic_xchg should be called even when !force to provide a
|
* atomic_xchg should be called even when !force to provide a
|
||||||
* full memory barrier (see the comment inside psi_poll_work).
|
* full memory barrier (see the comment inside psi_rtpoll_work).
|
||||||
*/
|
*/
|
||||||
if (atomic_xchg(&group->poll_scheduled, 1) && !force)
|
if (atomic_xchg(&group->rtpoll_scheduled, 1) && !force)
|
||||||
return;
|
return;
|
||||||
|
|
||||||
rcu_read_lock();
|
rcu_read_lock();
|
||||||
|
|
||||||
task = rcu_dereference(group->poll_task);
|
task = rcu_dereference(group->rtpoll_task);
|
||||||
/*
|
/*
|
||||||
* kworker might be NULL in case psi_trigger_destroy races with
|
* kworker might be NULL in case psi_trigger_destroy races with
|
||||||
* psi_task_change (hotpath) which can't use locks
|
* psi_task_change (hotpath) which can't use locks
|
||||||
*/
|
*/
|
||||||
if (likely(task))
|
if (likely(task))
|
||||||
mod_timer(&group->poll_timer, jiffies + delay);
|
mod_timer(&group->rtpoll_timer, jiffies + delay);
|
||||||
else
|
else
|
||||||
atomic_set(&group->poll_scheduled, 0);
|
atomic_set(&group->rtpoll_scheduled, 0);
|
||||||
|
|
||||||
rcu_read_unlock();
|
rcu_read_unlock();
|
||||||
}
|
}
|
||||||
|
|
||||||
static void psi_poll_work(struct psi_group *group)
|
static void psi_rtpoll_work(struct psi_group *group)
|
||||||
{
|
{
|
||||||
bool force_reschedule = false;
|
bool force_reschedule = false;
|
||||||
u32 changed_states;
|
u32 changed_states;
|
||||||
u64 now;
|
u64 now;
|
||||||
|
|
||||||
mutex_lock(&group->trigger_lock);
|
mutex_lock(&group->rtpoll_trigger_lock);
|
||||||
|
|
||||||
now = sched_clock();
|
now = sched_clock();
|
||||||
|
|
||||||
if (now > group->polling_until) {
|
if (now > group->rtpoll_until) {
|
||||||
/*
|
/*
|
||||||
* We are either about to start or might stop polling if no
|
* We are either about to start or might stop polling if no
|
||||||
* state change was recorded. Resetting poll_scheduled leaves
|
* state change was recorded. Resetting poll_scheduled leaves
|
||||||
|
@ -638,7 +638,7 @@ static void psi_poll_work(struct psi_group *group)
|
||||||
* should be negligible and polling_next_update still keeps
|
* should be negligible and polling_next_update still keeps
|
||||||
* updates correctly on schedule.
|
* updates correctly on schedule.
|
||||||
*/
|
*/
|
||||||
atomic_set(&group->poll_scheduled, 0);
|
atomic_set(&group->rtpoll_scheduled, 0);
|
||||||
/*
|
/*
|
||||||
* A task change can race with the poll worker that is supposed to
|
* A task change can race with the poll worker that is supposed to
|
||||||
* report on it. To avoid missing events, ensure ordering between
|
* report on it. To avoid missing events, ensure ordering between
|
||||||
|
@ -667,9 +667,9 @@ static void psi_poll_work(struct psi_group *group)
|
||||||
|
|
||||||
collect_percpu_times(group, PSI_POLL, &changed_states);
|
collect_percpu_times(group, PSI_POLL, &changed_states);
|
||||||
|
|
||||||
if (changed_states & group->poll_states) {
|
if (changed_states & group->rtpoll_states) {
|
||||||
/* Initialize trigger windows when entering polling mode */
|
/* Initialize trigger windows when entering polling mode */
|
||||||
if (now > group->polling_until)
|
if (now > group->rtpoll_until)
|
||||||
init_triggers(group, now);
|
init_triggers(group, now);
|
||||||
|
|
||||||
/*
|
/*
|
||||||
|
@ -677,50 +677,50 @@ static void psi_poll_work(struct psi_group *group)
|
||||||
* minimum tracking window as long as monitor states are
|
* minimum tracking window as long as monitor states are
|
||||||
* changing.
|
* changing.
|
||||||
*/
|
*/
|
||||||
group->polling_until = now +
|
group->rtpoll_until = now +
|
||||||
group->poll_min_period * UPDATES_PER_WINDOW;
|
group->rtpoll_min_period * UPDATES_PER_WINDOW;
|
||||||
}
|
}
|
||||||
|
|
||||||
if (now > group->polling_until) {
|
if (now > group->rtpoll_until) {
|
||||||
group->polling_next_update = ULLONG_MAX;
|
group->rtpoll_next_update = ULLONG_MAX;
|
||||||
goto out;
|
goto out;
|
||||||
}
|
}
|
||||||
|
|
||||||
if (now >= group->polling_next_update)
|
if (now >= group->rtpoll_next_update)
|
||||||
group->polling_next_update = update_triggers(group, now);
|
group->rtpoll_next_update = update_triggers(group, now);
|
||||||
|
|
||||||
psi_schedule_poll_work(group,
|
psi_schedule_rtpoll_work(group,
|
||||||
nsecs_to_jiffies(group->polling_next_update - now) + 1,
|
nsecs_to_jiffies(group->rtpoll_next_update - now) + 1,
|
||||||
force_reschedule);
|
force_reschedule);
|
||||||
|
|
||||||
out:
|
out:
|
||||||
mutex_unlock(&group->trigger_lock);
|
mutex_unlock(&group->rtpoll_trigger_lock);
|
||||||
}
|
}
|
||||||
|
|
||||||
static int psi_poll_worker(void *data)
|
static int psi_rtpoll_worker(void *data)
|
||||||
{
|
{
|
||||||
struct psi_group *group = (struct psi_group *)data;
|
struct psi_group *group = (struct psi_group *)data;
|
||||||
|
|
||||||
sched_set_fifo_low(current);
|
sched_set_fifo_low(current);
|
||||||
|
|
||||||
while (true) {
|
while (true) {
|
||||||
wait_event_interruptible(group->poll_wait,
|
wait_event_interruptible(group->rtpoll_wait,
|
||||||
atomic_cmpxchg(&group->poll_wakeup, 1, 0) ||
|
atomic_cmpxchg(&group->rtpoll_wakeup, 1, 0) ||
|
||||||
kthread_should_stop());
|
kthread_should_stop());
|
||||||
if (kthread_should_stop())
|
if (kthread_should_stop())
|
||||||
break;
|
break;
|
||||||
|
|
||||||
psi_poll_work(group);
|
psi_rtpoll_work(group);
|
||||||
}
|
}
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
static void poll_timer_fn(struct timer_list *t)
|
static void poll_timer_fn(struct timer_list *t)
|
||||||
{
|
{
|
||||||
struct psi_group *group = from_timer(group, t, poll_timer);
|
struct psi_group *group = from_timer(group, t, rtpoll_timer);
|
||||||
|
|
||||||
atomic_set(&group->poll_wakeup, 1);
|
atomic_set(&group->rtpoll_wakeup, 1);
|
||||||
wake_up_interruptible(&group->poll_wait);
|
wake_up_interruptible(&group->rtpoll_wait);
|
||||||
}
|
}
|
||||||
|
|
||||||
static void record_times(struct psi_group_cpu *groupc, u64 now)
|
static void record_times(struct psi_group_cpu *groupc, u64 now)
|
||||||
|
@ -851,8 +851,8 @@ static void psi_group_change(struct psi_group *group, int cpu,
|
||||||
|
|
||||||
write_seqcount_end(&groupc->seq);
|
write_seqcount_end(&groupc->seq);
|
||||||
|
|
||||||
if (state_mask & group->poll_states)
|
if (state_mask & group->rtpoll_states)
|
||||||
psi_schedule_poll_work(group, 1, false);
|
psi_schedule_rtpoll_work(group, 1, false);
|
||||||
|
|
||||||
if (wake_clock && !delayed_work_pending(&group->avgs_work))
|
if (wake_clock && !delayed_work_pending(&group->avgs_work))
|
||||||
schedule_delayed_work(&group->avgs_work, PSI_FREQ);
|
schedule_delayed_work(&group->avgs_work, PSI_FREQ);
|
||||||
|
@ -1005,8 +1005,8 @@ void psi_account_irqtime(struct task_struct *task, u32 delta)
|
||||||
|
|
||||||
write_seqcount_end(&groupc->seq);
|
write_seqcount_end(&groupc->seq);
|
||||||
|
|
||||||
if (group->poll_states & (1 << PSI_IRQ_FULL))
|
if (group->rtpoll_states & (1 << PSI_IRQ_FULL))
|
||||||
psi_schedule_poll_work(group, 1, false);
|
psi_schedule_rtpoll_work(group, 1, false);
|
||||||
} while ((group = group->parent));
|
} while ((group = group->parent));
|
||||||
}
|
}
|
||||||
#endif
|
#endif
|
||||||
|
@ -1101,7 +1101,7 @@ void psi_cgroup_free(struct cgroup *cgroup)
|
||||||
cancel_delayed_work_sync(&cgroup->psi->avgs_work);
|
cancel_delayed_work_sync(&cgroup->psi->avgs_work);
|
||||||
free_percpu(cgroup->psi->pcpu);
|
free_percpu(cgroup->psi->pcpu);
|
||||||
/* All triggers must be removed by now */
|
/* All triggers must be removed by now */
|
||||||
WARN_ONCE(cgroup->psi->poll_states, "psi: trigger leak\n");
|
WARN_ONCE(cgroup->psi->rtpoll_states, "psi: trigger leak\n");
|
||||||
kfree(cgroup->psi);
|
kfree(cgroup->psi);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -1302,29 +1302,29 @@ struct psi_trigger *psi_trigger_create(struct psi_group *group,
|
||||||
init_waitqueue_head(&t->event_wait);
|
init_waitqueue_head(&t->event_wait);
|
||||||
t->pending_event = false;
|
t->pending_event = false;
|
||||||
|
|
||||||
mutex_lock(&group->trigger_lock);
|
mutex_lock(&group->rtpoll_trigger_lock);
|
||||||
|
|
||||||
if (!rcu_access_pointer(group->poll_task)) {
|
if (!rcu_access_pointer(group->rtpoll_task)) {
|
||||||
struct task_struct *task;
|
struct task_struct *task;
|
||||||
|
|
||||||
task = kthread_create(psi_poll_worker, group, "psimon");
|
task = kthread_create(psi_rtpoll_worker, group, "psimon");
|
||||||
if (IS_ERR(task)) {
|
if (IS_ERR(task)) {
|
||||||
kfree(t);
|
kfree(t);
|
||||||
mutex_unlock(&group->trigger_lock);
|
mutex_unlock(&group->rtpoll_trigger_lock);
|
||||||
return ERR_CAST(task);
|
return ERR_CAST(task);
|
||||||
}
|
}
|
||||||
atomic_set(&group->poll_wakeup, 0);
|
atomic_set(&group->rtpoll_wakeup, 0);
|
||||||
wake_up_process(task);
|
wake_up_process(task);
|
||||||
rcu_assign_pointer(group->poll_task, task);
|
rcu_assign_pointer(group->rtpoll_task, task);
|
||||||
}
|
}
|
||||||
|
|
||||||
list_add(&t->node, &group->triggers);
|
list_add(&t->node, &group->rtpoll_triggers);
|
||||||
group->poll_min_period = min(group->poll_min_period,
|
group->rtpoll_min_period = min(group->rtpoll_min_period,
|
||||||
div_u64(t->win.size, UPDATES_PER_WINDOW));
|
div_u64(t->win.size, UPDATES_PER_WINDOW));
|
||||||
group->nr_triggers[t->state]++;
|
group->rtpoll_nr_triggers[t->state]++;
|
||||||
group->poll_states |= (1 << t->state);
|
group->rtpoll_states |= (1 << t->state);
|
||||||
|
|
||||||
mutex_unlock(&group->trigger_lock);
|
mutex_unlock(&group->rtpoll_trigger_lock);
|
||||||
|
|
||||||
return t;
|
return t;
|
||||||
}
|
}
|
||||||
|
@ -1349,51 +1349,52 @@ void psi_trigger_destroy(struct psi_trigger *t)
|
||||||
*/
|
*/
|
||||||
wake_up_pollfree(&t->event_wait);
|
wake_up_pollfree(&t->event_wait);
|
||||||
|
|
||||||
mutex_lock(&group->trigger_lock);
|
mutex_lock(&group->rtpoll_trigger_lock);
|
||||||
|
|
||||||
if (!list_empty(&t->node)) {
|
if (!list_empty(&t->node)) {
|
||||||
struct psi_trigger *tmp;
|
struct psi_trigger *tmp;
|
||||||
u64 period = ULLONG_MAX;
|
u64 period = ULLONG_MAX;
|
||||||
|
|
||||||
list_del(&t->node);
|
list_del(&t->node);
|
||||||
group->nr_triggers[t->state]--;
|
group->rtpoll_nr_triggers[t->state]--;
|
||||||
if (!group->nr_triggers[t->state])
|
if (!group->rtpoll_nr_triggers[t->state])
|
||||||
group->poll_states &= ~(1 << t->state);
|
group->rtpoll_states &= ~(1 << t->state);
|
||||||
/* reset min update period for the remaining triggers */
|
/* reset min update period for the remaining triggers */
|
||||||
list_for_each_entry(tmp, &group->triggers, node)
|
list_for_each_entry(tmp, &group->rtpoll_triggers, node)
|
||||||
period = min(period, div_u64(tmp->win.size,
|
period = min(period, div_u64(tmp->win.size,
|
||||||
UPDATES_PER_WINDOW));
|
UPDATES_PER_WINDOW));
|
||||||
group->poll_min_period = period;
|
group->rtpoll_min_period = period;
|
||||||
/* Destroy poll_task when the last trigger is destroyed */
|
/* Destroy rtpoll_task when the last trigger is destroyed */
|
||||||
if (group->poll_states == 0) {
|
if (group->rtpoll_states == 0) {
|
||||||
group->polling_until = 0;
|
group->rtpoll_until = 0;
|
||||||
task_to_destroy = rcu_dereference_protected(
|
task_to_destroy = rcu_dereference_protected(
|
||||||
group->poll_task,
|
group->rtpoll_task,
|
||||||
lockdep_is_held(&group->trigger_lock));
|
lockdep_is_held(&group->rtpoll_trigger_lock));
|
||||||
rcu_assign_pointer(group->poll_task, NULL);
|
rcu_assign_pointer(group->rtpoll_task, NULL);
|
||||||
del_timer(&group->poll_timer);
|
del_timer(&group->rtpoll_timer);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
mutex_unlock(&group->trigger_lock);
|
mutex_unlock(&group->rtpoll_trigger_lock);
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Wait for psi_schedule_poll_work RCU to complete its read-side
|
* Wait for psi_schedule_rtpoll_work RCU to complete its read-side
|
||||||
* critical section before destroying the trigger and optionally the
|
* critical section before destroying the trigger and optionally the
|
||||||
* poll_task.
|
* rtpoll_task.
|
||||||
*/
|
*/
|
||||||
synchronize_rcu();
|
synchronize_rcu();
|
||||||
/*
|
/*
|
||||||
* Stop kthread 'psimon' after releasing trigger_lock to prevent a
|
* Stop kthread 'psimon' after releasing rtpoll_trigger_lock to prevent
|
||||||
* deadlock while waiting for psi_poll_work to acquire trigger_lock
|
* a deadlock while waiting for psi_rtpoll_work to acquire
|
||||||
|
* rtpoll_trigger_lock
|
||||||
*/
|
*/
|
||||||
if (task_to_destroy) {
|
if (task_to_destroy) {
|
||||||
/*
|
/*
|
||||||
* After the RCU grace period has expired, the worker
|
* After the RCU grace period has expired, the worker
|
||||||
* can no longer be found through group->poll_task.
|
* can no longer be found through group->rtpoll_task.
|
||||||
*/
|
*/
|
||||||
kthread_stop(task_to_destroy);
|
kthread_stop(task_to_destroy);
|
||||||
atomic_set(&group->poll_scheduled, 0);
|
atomic_set(&group->rtpoll_scheduled, 0);
|
||||||
}
|
}
|
||||||
kfree(t);
|
kfree(t);
|
||||||
}
|
}
|
||||||
|
|
Loading…
Add table
Reference in a new issue