perf: Disallow mis-matched inherited group reads
Because group consistency is non-atomic between parent (filedesc) and children (inherited) events, it is possible for PERF_FORMAT_GROUP read() to try and sum non-matching counter groups -- with non-sensical results. Add group_generation to distinguish the case where a parent group removes and adds an event and thus has the same number, but a different configuration of events as inherited groups. This became a problem when commitfa8c269353
("perf/core: Invert perf_read_group() loops") flipped the order of child_list and sibling_list. Previously it would iterate the group (sibling_list) first, and for each sibling traverse the child_list. In this order, only the group composition of the parent is relevant. By flipping the order the group composition of the child (inherited) events becomes an issue and the mis-match in group composition becomes evident. That said; even prior to this commit, while reading of a group that is not equally inherited was not broken, it still made no sense. (Ab)use ECHILD as error return to indicate issues with child process group composition. Fixes:fa8c269353
("perf/core: Invert perf_read_group() loops") Reported-by: Budimir Markovic <markovicbudimir@gmail.com> Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org> Link: https://lkml.kernel.org/r/20231018115654.GK33217@noisy.programming.kicks-ass.net
This commit is contained in:
parent
58720809f5
commit
32671e3799
2 changed files with 34 additions and 6 deletions
|
@ -704,6 +704,7 @@ struct perf_event {
|
||||||
/* The cumulative AND of all event_caps for events in this group. */
|
/* The cumulative AND of all event_caps for events in this group. */
|
||||||
int group_caps;
|
int group_caps;
|
||||||
|
|
||||||
|
unsigned int group_generation;
|
||||||
struct perf_event *group_leader;
|
struct perf_event *group_leader;
|
||||||
/*
|
/*
|
||||||
* event->pmu will always point to pmu in which this event belongs.
|
* event->pmu will always point to pmu in which this event belongs.
|
||||||
|
|
|
@ -1954,6 +1954,7 @@ static void perf_group_attach(struct perf_event *event)
|
||||||
|
|
||||||
list_add_tail(&event->sibling_list, &group_leader->sibling_list);
|
list_add_tail(&event->sibling_list, &group_leader->sibling_list);
|
||||||
group_leader->nr_siblings++;
|
group_leader->nr_siblings++;
|
||||||
|
group_leader->group_generation++;
|
||||||
|
|
||||||
perf_event__header_size(group_leader);
|
perf_event__header_size(group_leader);
|
||||||
|
|
||||||
|
@ -2144,6 +2145,7 @@ static void perf_group_detach(struct perf_event *event)
|
||||||
if (leader != event) {
|
if (leader != event) {
|
||||||
list_del_init(&event->sibling_list);
|
list_del_init(&event->sibling_list);
|
||||||
event->group_leader->nr_siblings--;
|
event->group_leader->nr_siblings--;
|
||||||
|
event->group_leader->group_generation++;
|
||||||
goto out;
|
goto out;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -5440,7 +5442,7 @@ static int __perf_read_group_add(struct perf_event *leader,
|
||||||
u64 read_format, u64 *values)
|
u64 read_format, u64 *values)
|
||||||
{
|
{
|
||||||
struct perf_event_context *ctx = leader->ctx;
|
struct perf_event_context *ctx = leader->ctx;
|
||||||
struct perf_event *sub;
|
struct perf_event *sub, *parent;
|
||||||
unsigned long flags;
|
unsigned long flags;
|
||||||
int n = 1; /* skip @nr */
|
int n = 1; /* skip @nr */
|
||||||
int ret;
|
int ret;
|
||||||
|
@ -5450,6 +5452,33 @@ static int __perf_read_group_add(struct perf_event *leader,
|
||||||
return ret;
|
return ret;
|
||||||
|
|
||||||
raw_spin_lock_irqsave(&ctx->lock, flags);
|
raw_spin_lock_irqsave(&ctx->lock, flags);
|
||||||
|
/*
|
||||||
|
* Verify the grouping between the parent and child (inherited)
|
||||||
|
* events is still in tact.
|
||||||
|
*
|
||||||
|
* Specifically:
|
||||||
|
* - leader->ctx->lock pins leader->sibling_list
|
||||||
|
* - parent->child_mutex pins parent->child_list
|
||||||
|
* - parent->ctx->mutex pins parent->sibling_list
|
||||||
|
*
|
||||||
|
* Because parent->ctx != leader->ctx (and child_list nests inside
|
||||||
|
* ctx->mutex), group destruction is not atomic between children, also
|
||||||
|
* see perf_event_release_kernel(). Additionally, parent can grow the
|
||||||
|
* group.
|
||||||
|
*
|
||||||
|
* Therefore it is possible to have parent and child groups in a
|
||||||
|
* different configuration and summing over such a beast makes no sense
|
||||||
|
* what so ever.
|
||||||
|
*
|
||||||
|
* Reject this.
|
||||||
|
*/
|
||||||
|
parent = leader->parent;
|
||||||
|
if (parent &&
|
||||||
|
(parent->group_generation != leader->group_generation ||
|
||||||
|
parent->nr_siblings != leader->nr_siblings)) {
|
||||||
|
ret = -ECHILD;
|
||||||
|
goto unlock;
|
||||||
|
}
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Since we co-schedule groups, {enabled,running} times of siblings
|
* Since we co-schedule groups, {enabled,running} times of siblings
|
||||||
|
@ -5483,8 +5512,9 @@ static int __perf_read_group_add(struct perf_event *leader,
|
||||||
values[n++] = atomic64_read(&sub->lost_samples);
|
values[n++] = atomic64_read(&sub->lost_samples);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
unlock:
|
||||||
raw_spin_unlock_irqrestore(&ctx->lock, flags);
|
raw_spin_unlock_irqrestore(&ctx->lock, flags);
|
||||||
return 0;
|
return ret;
|
||||||
}
|
}
|
||||||
|
|
||||||
static int perf_read_group(struct perf_event *event,
|
static int perf_read_group(struct perf_event *event,
|
||||||
|
@ -5503,10 +5533,6 @@ static int perf_read_group(struct perf_event *event,
|
||||||
|
|
||||||
values[0] = 1 + leader->nr_siblings;
|
values[0] = 1 + leader->nr_siblings;
|
||||||
|
|
||||||
/*
|
|
||||||
* By locking the child_mutex of the leader we effectively
|
|
||||||
* lock the child list of all siblings.. XXX explain how.
|
|
||||||
*/
|
|
||||||
mutex_lock(&leader->child_mutex);
|
mutex_lock(&leader->child_mutex);
|
||||||
|
|
||||||
ret = __perf_read_group_add(leader, read_format, values);
|
ret = __perf_read_group_add(leader, read_format, values);
|
||||||
|
@ -13346,6 +13372,7 @@ static int inherit_group(struct perf_event *parent_event,
|
||||||
!perf_get_aux_event(child_ctr, leader))
|
!perf_get_aux_event(child_ctr, leader))
|
||||||
return -EINVAL;
|
return -EINVAL;
|
||||||
}
|
}
|
||||||
|
leader->group_generation = parent_event->group_generation;
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
Loading…
Add table
Reference in a new issue