Use BPF to collect statistics on softirq events based on perf BPF skeletons. Example usage: # perf kwork top -b Starting trace, Hit <Ctrl+C> to stop and report ^C Total : 135445.704 ms, 8 cpus %Cpu(s): 28.35% id, 0.00% hi, 0.25% si %Cpu0 [|||||||||||||||||||| 69.85%] %Cpu1 [|||||||||||||||||||||| 74.10%] %Cpu2 [||||||||||||||||||||| 71.18%] %Cpu3 [|||||||||||||||||||| 69.61%] %Cpu4 [|||||||||||||||||||||| 74.05%] %Cpu5 [|||||||||||||||||||| 69.33%] %Cpu6 [|||||||||||||||||||| 69.71%] %Cpu7 [|||||||||||||||||||||| 73.77%] PID SPID %CPU RUNTIME COMMMAND ------------------------------------------------------------- 0 0 30.43 5271.005 ms [swapper/5] 0 0 30.17 5226.644 ms [swapper/3] 0 0 30.08 5210.257 ms [swapper/6] 0 0 29.89 5177.177 ms [swapper/0] 0 0 28.51 4938.672 ms [swapper/2] 0 0 25.93 4223.464 ms [swapper/7] 0 0 25.69 4181.411 ms [swapper/4] 0 0 25.63 4173.804 ms [swapper/1] 16665 16265 2.16 360.600 ms sched-messaging 16537 16265 2.05 356.275 ms sched-messaging 16503 16265 2.01 343.063 ms sched-messaging 16424 16265 1.97 336.876 ms sched-messaging 16580 16265 1.94 323.658 ms sched-messaging 16515 16265 1.92 321.616 ms sched-messaging 16659 16265 1.91 325.538 ms sched-messaging 16634 16265 1.88 327.766 ms sched-messaging 16454 16265 1.87 326.843 ms sched-messaging 16382 16265 1.87 322.591 ms sched-messaging 16642 16265 1.86 320.506 ms sched-messaging 16582 16265 1.86 320.164 ms sched-messaging 16315 16265 1.86 326.872 ms sched-messaging 16637 16265 1.85 323.766 ms sched-messaging 16506 16265 1.82 311.688 ms sched-messaging 16512 16265 1.81 304.643 ms sched-messaging 16560 16265 1.80 314.751 ms sched-messaging 16320 16265 1.80 313.405 ms sched-messaging 16442 16265 1.80 314.403 ms sched-messaging 16626 16265 1.78 295.380 ms sched-messaging 16600 16265 1.77 309.444 ms sched-messaging 16550 16265 1.76 301.161 ms sched-messaging 16525 16265 1.75 296.560 ms sched-messaging 16314 16265 1.75 298.338 ms sched-messaging 16595 16265 1.74 304.390 ms sched-messaging 16555 16265 1.74 287.564 ms sched-messaging 16520 16265 1.74 295.734 ms sched-messaging 16507 16265 1.73 293.956 ms sched-messaging 16593 16265 1.72 296.443 ms sched-messaging 16531 16265 1.72 299.950 ms sched-messaging 16281 16265 1.72 301.339 ms sched-messaging <SNIP> Reviewed-by: Ian Rogers <irogers@google.com> Signed-off-by: Yang Jihong <yangjihong1@huawei.com> Cc: Adrian Hunter <adrian.hunter@intel.com> Cc: Alexander Shishkin <alexander.shishkin@linux.intel.com> Cc: Ingo Molnar <mingo@redhat.com> Cc: Jiri Olsa <jolsa@kernel.org> Cc: Kan Liang <kan.liang@linux.intel.com> Cc: Mark Rutland <mark.rutland@arm.com> Cc: Namhyung Kim <namhyung@kernel.org> Cc: Peter Zijlstra <peterz@infradead.org> Cc: Ravi Bangoria <ravi.bangoria@amd.com> Cc: Sandipan Das <sandipan.das@amd.com> Link: https://lore.kernel.org/r/20230812084917.169338-17-yangjihong1@huawei.com Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
338 lines
6.2 KiB
C
338 lines
6.2 KiB
C
// SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause)
|
|
// Copyright (c) 2022, Huawei
|
|
|
|
#include "vmlinux.h"
|
|
#include <bpf/bpf_helpers.h>
|
|
#include <bpf/bpf_tracing.h>
|
|
#include <bpf/bpf_core_read.h>
|
|
|
|
/*
|
|
* This should be in sync with "util/kwork.h"
|
|
*/
|
|
enum kwork_class_type {
|
|
KWORK_CLASS_IRQ,
|
|
KWORK_CLASS_SOFTIRQ,
|
|
KWORK_CLASS_WORKQUEUE,
|
|
KWORK_CLASS_SCHED,
|
|
KWORK_CLASS_MAX,
|
|
};
|
|
|
|
#define MAX_ENTRIES 102400
|
|
#define MAX_NR_CPUS 2048
|
|
#define PF_KTHREAD 0x00200000
|
|
#define MAX_COMMAND_LEN 16
|
|
|
|
struct time_data {
|
|
__u64 timestamp;
|
|
};
|
|
|
|
struct work_data {
|
|
__u64 runtime;
|
|
};
|
|
|
|
struct task_data {
|
|
__u32 tgid;
|
|
__u32 is_kthread;
|
|
char comm[MAX_COMMAND_LEN];
|
|
};
|
|
|
|
struct work_key {
|
|
__u32 type;
|
|
__u32 pid;
|
|
__u64 task_p;
|
|
};
|
|
|
|
struct task_key {
|
|
__u32 pid;
|
|
__u32 cpu;
|
|
};
|
|
|
|
struct {
|
|
__uint(type, BPF_MAP_TYPE_TASK_STORAGE);
|
|
__uint(map_flags, BPF_F_NO_PREALLOC);
|
|
__type(key, int);
|
|
__type(value, struct time_data);
|
|
} kwork_top_task_time SEC(".maps");
|
|
|
|
struct {
|
|
__uint(type, BPF_MAP_TYPE_PERCPU_HASH);
|
|
__uint(key_size, sizeof(struct work_key));
|
|
__uint(value_size, sizeof(struct time_data));
|
|
__uint(max_entries, MAX_ENTRIES);
|
|
} kwork_top_irq_time SEC(".maps");
|
|
|
|
struct {
|
|
__uint(type, BPF_MAP_TYPE_HASH);
|
|
__uint(key_size, sizeof(struct task_key));
|
|
__uint(value_size, sizeof(struct task_data));
|
|
__uint(max_entries, MAX_ENTRIES);
|
|
} kwork_top_tasks SEC(".maps");
|
|
|
|
struct {
|
|
__uint(type, BPF_MAP_TYPE_PERCPU_HASH);
|
|
__uint(key_size, sizeof(struct work_key));
|
|
__uint(value_size, sizeof(struct work_data));
|
|
__uint(max_entries, MAX_ENTRIES);
|
|
} kwork_top_works SEC(".maps");
|
|
|
|
struct {
|
|
__uint(type, BPF_MAP_TYPE_HASH);
|
|
__uint(key_size, sizeof(u32));
|
|
__uint(value_size, sizeof(u8));
|
|
__uint(max_entries, MAX_NR_CPUS);
|
|
} kwork_top_cpu_filter SEC(".maps");
|
|
|
|
int enabled = 0;
|
|
|
|
int has_cpu_filter = 0;
|
|
|
|
__u64 from_timestamp = 0;
|
|
__u64 to_timestamp = 0;
|
|
|
|
static __always_inline int cpu_is_filtered(__u32 cpu)
|
|
{
|
|
__u8 *cpu_val;
|
|
|
|
if (has_cpu_filter) {
|
|
cpu_val = bpf_map_lookup_elem(&kwork_top_cpu_filter, &cpu);
|
|
if (!cpu_val)
|
|
return 1;
|
|
}
|
|
|
|
return 0;
|
|
}
|
|
|
|
static __always_inline void update_task_info(struct task_struct *task, __u32 cpu)
|
|
{
|
|
struct task_key key = {
|
|
.pid = task->pid,
|
|
.cpu = cpu,
|
|
};
|
|
|
|
if (!bpf_map_lookup_elem(&kwork_top_tasks, &key)) {
|
|
struct task_data data = {
|
|
.tgid = task->tgid,
|
|
.is_kthread = task->flags & PF_KTHREAD ? 1 : 0,
|
|
};
|
|
BPF_CORE_READ_STR_INTO(&data.comm, task, comm);
|
|
|
|
bpf_map_update_elem(&kwork_top_tasks, &key, &data, BPF_ANY);
|
|
}
|
|
}
|
|
|
|
static __always_inline void update_work(struct work_key *key, __u64 delta)
|
|
{
|
|
struct work_data *data;
|
|
|
|
data = bpf_map_lookup_elem(&kwork_top_works, key);
|
|
if (data) {
|
|
data->runtime += delta;
|
|
} else {
|
|
struct work_data new_data = {
|
|
.runtime = delta,
|
|
};
|
|
|
|
bpf_map_update_elem(&kwork_top_works, key, &new_data, BPF_ANY);
|
|
}
|
|
}
|
|
|
|
static void on_sched_out(struct task_struct *task, __u64 ts, __u32 cpu)
|
|
{
|
|
__u64 delta;
|
|
struct time_data *pelem;
|
|
|
|
pelem = bpf_task_storage_get(&kwork_top_task_time, task, NULL, 0);
|
|
if (pelem)
|
|
delta = ts - pelem->timestamp;
|
|
else
|
|
delta = ts - from_timestamp;
|
|
|
|
struct work_key key = {
|
|
.type = KWORK_CLASS_SCHED,
|
|
.pid = task->pid,
|
|
.task_p = (__u64)task,
|
|
};
|
|
|
|
update_work(&key, delta);
|
|
update_task_info(task, cpu);
|
|
}
|
|
|
|
static void on_sched_in(struct task_struct *task, __u64 ts)
|
|
{
|
|
struct time_data *pelem;
|
|
|
|
pelem = bpf_task_storage_get(&kwork_top_task_time, task, NULL,
|
|
BPF_LOCAL_STORAGE_GET_F_CREATE);
|
|
if (pelem)
|
|
pelem->timestamp = ts;
|
|
}
|
|
|
|
SEC("tp_btf/sched_switch")
|
|
int on_switch(u64 *ctx)
|
|
{
|
|
struct task_struct *prev, *next;
|
|
|
|
prev = (struct task_struct *)ctx[1];
|
|
next = (struct task_struct *)ctx[2];
|
|
|
|
if (!enabled)
|
|
return 0;
|
|
|
|
__u32 cpu = bpf_get_smp_processor_id();
|
|
|
|
if (cpu_is_filtered(cpu))
|
|
return 0;
|
|
|
|
__u64 ts = bpf_ktime_get_ns();
|
|
|
|
on_sched_out(prev, ts, cpu);
|
|
on_sched_in(next, ts);
|
|
|
|
return 0;
|
|
}
|
|
|
|
SEC("tp_btf/irq_handler_entry")
|
|
int on_irq_handler_entry(u64 *cxt)
|
|
{
|
|
struct task_struct *task;
|
|
|
|
if (!enabled)
|
|
return 0;
|
|
|
|
__u32 cpu = bpf_get_smp_processor_id();
|
|
|
|
if (cpu_is_filtered(cpu))
|
|
return 0;
|
|
|
|
__u64 ts = bpf_ktime_get_ns();
|
|
|
|
task = (struct task_struct *)bpf_get_current_task();
|
|
if (!task)
|
|
return 0;
|
|
|
|
struct work_key key = {
|
|
.type = KWORK_CLASS_IRQ,
|
|
.pid = BPF_CORE_READ(task, pid),
|
|
.task_p = (__u64)task,
|
|
};
|
|
|
|
struct time_data data = {
|
|
.timestamp = ts,
|
|
};
|
|
|
|
bpf_map_update_elem(&kwork_top_irq_time, &key, &data, BPF_ANY);
|
|
|
|
return 0;
|
|
}
|
|
|
|
SEC("tp_btf/irq_handler_exit")
|
|
int on_irq_handler_exit(u64 *cxt)
|
|
{
|
|
__u64 delta;
|
|
struct task_struct *task;
|
|
struct time_data *pelem;
|
|
|
|
if (!enabled)
|
|
return 0;
|
|
|
|
__u32 cpu = bpf_get_smp_processor_id();
|
|
|
|
if (cpu_is_filtered(cpu))
|
|
return 0;
|
|
|
|
__u64 ts = bpf_ktime_get_ns();
|
|
|
|
task = (struct task_struct *)bpf_get_current_task();
|
|
if (!task)
|
|
return 0;
|
|
|
|
struct work_key key = {
|
|
.type = KWORK_CLASS_IRQ,
|
|
.pid = BPF_CORE_READ(task, pid),
|
|
.task_p = (__u64)task,
|
|
};
|
|
|
|
pelem = bpf_map_lookup_elem(&kwork_top_irq_time, &key);
|
|
if (pelem && pelem->timestamp != 0)
|
|
delta = ts - pelem->timestamp;
|
|
else
|
|
delta = ts - from_timestamp;
|
|
|
|
update_work(&key, delta);
|
|
|
|
return 0;
|
|
}
|
|
|
|
SEC("tp_btf/softirq_entry")
|
|
int on_softirq_entry(u64 *cxt)
|
|
{
|
|
struct task_struct *task;
|
|
|
|
if (!enabled)
|
|
return 0;
|
|
|
|
__u32 cpu = bpf_get_smp_processor_id();
|
|
|
|
if (cpu_is_filtered(cpu))
|
|
return 0;
|
|
|
|
__u64 ts = bpf_ktime_get_ns();
|
|
|
|
task = (struct task_struct *)bpf_get_current_task();
|
|
if (!task)
|
|
return 0;
|
|
|
|
struct work_key key = {
|
|
.type = KWORK_CLASS_SOFTIRQ,
|
|
.pid = BPF_CORE_READ(task, pid),
|
|
.task_p = (__u64)task,
|
|
};
|
|
|
|
struct time_data data = {
|
|
.timestamp = ts,
|
|
};
|
|
|
|
bpf_map_update_elem(&kwork_top_irq_time, &key, &data, BPF_ANY);
|
|
|
|
return 0;
|
|
}
|
|
|
|
SEC("tp_btf/softirq_exit")
|
|
int on_softirq_exit(u64 *cxt)
|
|
{
|
|
__u64 delta;
|
|
struct task_struct *task;
|
|
struct time_data *pelem;
|
|
|
|
if (!enabled)
|
|
return 0;
|
|
|
|
__u32 cpu = bpf_get_smp_processor_id();
|
|
|
|
if (cpu_is_filtered(cpu))
|
|
return 0;
|
|
|
|
__u64 ts = bpf_ktime_get_ns();
|
|
|
|
task = (struct task_struct *)bpf_get_current_task();
|
|
if (!task)
|
|
return 0;
|
|
|
|
struct work_key key = {
|
|
.type = KWORK_CLASS_SOFTIRQ,
|
|
.pid = BPF_CORE_READ(task, pid),
|
|
.task_p = (__u64)task,
|
|
};
|
|
|
|
pelem = bpf_map_lookup_elem(&kwork_top_irq_time, &key);
|
|
if (pelem)
|
|
delta = ts - pelem->timestamp;
|
|
else
|
|
delta = ts - from_timestamp;
|
|
|
|
update_work(&key, delta);
|
|
|
|
return 0;
|
|
}
|
|
|
|
char LICENSE[] SEC("license") = "Dual BSD/GPL";
|