1
0
Fork 0
mirror of synced 2025-03-06 20:59:54 +01:00
linux/tools/testing/selftests/bpf/prog_tests/send_signal.c
Yonghong Song 7015843afc selftests/bpf: Fix send_signal test with nested CONFIG_PARAVIRT
Alexei reported that send_signal test may fail with nested CONFIG_PARAVIRT
configs. In this particular case, the base VM is AMD with 166 cpus, and I
run selftests with regular qemu on top of that and indeed send_signal test
failed. I also tried with an Intel box with 80 cpus and there is no issue.

The main qemu command line includes:

  -enable-kvm -smp 16 -cpu host

The failure log looks like:

  $ ./test_progs -t send_signal
  [   48.501588] watchdog: BUG: soft lockup - CPU#9 stuck for 26s! [test_progs:2225]
  [   48.503622] Modules linked in: bpf_testmod(O)
  [   48.503622] CPU: 9 PID: 2225 Comm: test_progs Tainted: G           O       6.9.0-08561-g2c1713a8f1c9-dirty #69
  [   48.507629] Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS rel-1.15.0-0-g2dd4b9b3f840-prebuilt.qemu.org 04/01/2014
  [   48.511635] RIP: 0010:handle_softirqs+0x71/0x290
  [   48.511635] Code: [...] 10 0a 00 00 00 31 c0 65 66 89 05 d5 f4 fa 7e fb bb ff ff ff ff <49> c7 c2 cb
  [   48.518527] RSP: 0018:ffffc90000310fa0 EFLAGS: 00000246
  [   48.519579] RAX: 0000000000000000 RBX: 00000000ffffffff RCX: 00000000000006e0
  [   48.522526] RDX: 0000000000000006 RSI: ffff88810791ae80 RDI: 0000000000000000
  [   48.523587] RBP: ffffc90000fabc88 R08: 00000005a0af4f7f R09: 0000000000000000
  [   48.525525] R10: 0000000561d2f29c R11: 0000000000006534 R12: 0000000000000280
  [   48.528525] R13: 0000000000000000 R14: 0000000000000000 R15: 0000000000000000
  [   48.528525] FS:  00007f2f2885cd00(0000) GS:ffff888237c40000(0000) knlGS:0000000000000000
  [   48.531600] CS:  0010 DS: 0000 ES: 0000 CR0: 0000000080050033
  [   48.535520] CR2: 00007f2f287059f0 CR3: 0000000106a28002 CR4: 00000000003706f0
  [   48.537538] Call Trace:
  [   48.537538]  <IRQ>
  [   48.537538]  ? watchdog_timer_fn+0x1cd/0x250
  [   48.539590]  ? lockup_detector_update_enable+0x50/0x50
  [   48.539590]  ? __hrtimer_run_queues+0xff/0x280
  [   48.542520]  ? hrtimer_interrupt+0x103/0x230
  [   48.544524]  ? __sysvec_apic_timer_interrupt+0x4f/0x140
  [   48.545522]  ? sysvec_apic_timer_interrupt+0x3a/0x90
  [   48.547612]  ? asm_sysvec_apic_timer_interrupt+0x1a/0x20
  [   48.547612]  ? handle_softirqs+0x71/0x290
  [   48.547612]  irq_exit_rcu+0x63/0x80
  [   48.551585]  sysvec_apic_timer_interrupt+0x75/0x90
  [   48.552521]  </IRQ>
  [   48.553529]  <TASK>
  [   48.553529]  asm_sysvec_apic_timer_interrupt+0x1a/0x20
  [   48.555609] RIP: 0010:finish_task_switch.isra.0+0x90/0x260
  [   48.556526] Code: [...] 9f 58 0a 00 00 48 85 db 0f 85 89 01 00 00 4c 89 ff e8 53 d9 bd 00 fb 66 90 <4d> 85 ed 74
  [   48.562524] RSP: 0018:ffffc90000fabd38 EFLAGS: 00000282
  [   48.563589] RAX: 0000000000000000 RBX: 0000000000000000 RCX: ffffffff83385620
  [   48.563589] RDX: ffff888237c73ae4 RSI: 0000000000000000 RDI: ffff888237c6fd00
  [   48.568521] RBP: ffffc90000fabd68 R08: 0000000000000000 R09: 0000000000000000
  [   48.569528] R10: 0000000000000001 R11: 0000000000000000 R12: ffff8881009d0000
  [   48.573525] R13: ffff8881024e5400 R14: ffff88810791ae80 R15: ffff888237c6fd00
  [   48.575614]  ? finish_task_switch.isra.0+0x8d/0x260
  [   48.576523]  __schedule+0x364/0xac0
  [   48.577535]  schedule+0x2e/0x110
  [   48.578555]  pipe_read+0x301/0x400
  [   48.579589]  ? destroy_sched_domains_rcu+0x30/0x30
  [   48.579589]  vfs_read+0x2b3/0x2f0
  [   48.579589]  ksys_read+0x8b/0xc0
  [   48.583590]  do_syscall_64+0x3d/0xc0
  [   48.583590]  entry_SYSCALL_64_after_hwframe+0x4b/0x53
  [   48.586525] RIP: 0033:0x7f2f28703fa1
  [   48.587592] Code: [...] 00 00 00 0f 1f 44 00 00 f3 0f 1e fa 80 3d c5 23 14 00 00 74 13 31 c0 0f 05 <48> 3d 00 f0
  [   48.593534] RSP: 002b:00007ffd90f8cf88 EFLAGS: 00000246 ORIG_RAX: 0000000000000000
  [   48.595589] RAX: ffffffffffffffda RBX: 00007ffd90f8d5e8 RCX: 00007f2f28703fa1
  [   48.595589] RDX: 0000000000000001 RSI: 00007ffd90f8cfb0 RDI: 0000000000000006
  [   48.599592] RBP: 00007ffd90f8d2f0 R08: 0000000000000064 R09: 0000000000000000
  [   48.602527] R10: 0000000000000000 R11: 0000000000000246 R12: 0000000000000000
  [   48.603589] R13: 00007ffd90f8d608 R14: 00007f2f288d8000 R15: 0000000000f6bdb0
  [   48.605527]  </TASK>

In the test, two processes are communicating through pipe. Further debugging
with strace found that the above splat is triggered as read() syscall could
not receive the data even if the corresponding write() syscall in another
process successfully wrote data into the pipe.

The failed subtest is "send_signal_perf". The corresponding perf event has
sample_period 1 and config PERF_COUNT_SW_CPU_CLOCK. sample_period 1 means every
overflow event will trigger a call to the BPF program. So I suspect this may
overwhelm the system. So I increased the sample_period to 100,000 and the test
passed. The sample_period 10,000 still has the test failed.

In other parts of selftest, e.g., [1], sample_freq is used instead. So I
decided to use sample_freq = 1,000 since the test can pass as well.

  [1] https://lore.kernel.org/bpf/20240604070700.3032142-1-song@kernel.org/

Reported-by: Alexei Starovoitov <ast@kernel.org>
Signed-off-by: Yonghong Song <yonghong.song@linux.dev>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
Link: https://lore.kernel.org/bpf/20240605201203.2603846-1-yonghong.song@linux.dev
2024-06-06 15:49:13 +02:00

211 lines
5.3 KiB
C

// SPDX-License-Identifier: GPL-2.0
#include <test_progs.h>
#include <sys/time.h>
#include <sys/resource.h>
#include "test_send_signal_kern.skel.h"
static int sigusr1_received;
static void sigusr1_handler(int signum)
{
sigusr1_received = 1;
}
static void test_send_signal_common(struct perf_event_attr *attr,
bool signal_thread)
{
struct test_send_signal_kern *skel;
int pipe_c2p[2], pipe_p2c[2];
int err = -1, pmu_fd = -1;
char buf[256];
pid_t pid;
if (!ASSERT_OK(pipe(pipe_c2p), "pipe_c2p"))
return;
if (!ASSERT_OK(pipe(pipe_p2c), "pipe_p2c")) {
close(pipe_c2p[0]);
close(pipe_c2p[1]);
return;
}
pid = fork();
if (!ASSERT_GE(pid, 0, "fork")) {
close(pipe_c2p[0]);
close(pipe_c2p[1]);
close(pipe_p2c[0]);
close(pipe_p2c[1]);
return;
}
if (pid == 0) {
int old_prio;
volatile int j = 0;
/* install signal handler and notify parent */
ASSERT_NEQ(signal(SIGUSR1, sigusr1_handler), SIG_ERR, "signal");
close(pipe_c2p[0]); /* close read */
close(pipe_p2c[1]); /* close write */
/* boost with a high priority so we got a higher chance
* that if an interrupt happens, the underlying task
* is this process.
*/
errno = 0;
old_prio = getpriority(PRIO_PROCESS, 0);
ASSERT_OK(errno, "getpriority");
ASSERT_OK(setpriority(PRIO_PROCESS, 0, -20), "setpriority");
/* notify parent signal handler is installed */
ASSERT_EQ(write(pipe_c2p[1], buf, 1), 1, "pipe_write");
/* make sure parent enabled bpf program to send_signal */
ASSERT_EQ(read(pipe_p2c[0], buf, 1), 1, "pipe_read");
/* wait a little for signal handler */
for (int i = 0; i < 1000000000 && !sigusr1_received; i++) {
j /= i + j + 1;
if (!attr)
/* trigger the nanosleep tracepoint program. */
usleep(1);
}
buf[0] = sigusr1_received ? '2' : '0';
ASSERT_EQ(sigusr1_received, 1, "sigusr1_received");
ASSERT_EQ(write(pipe_c2p[1], buf, 1), 1, "pipe_write");
/* wait for parent notification and exit */
ASSERT_EQ(read(pipe_p2c[0], buf, 1), 1, "pipe_read");
/* restore the old priority */
ASSERT_OK(setpriority(PRIO_PROCESS, 0, old_prio), "setpriority");
close(pipe_c2p[1]);
close(pipe_p2c[0]);
exit(0);
}
close(pipe_c2p[1]); /* close write */
close(pipe_p2c[0]); /* close read */
skel = test_send_signal_kern__open_and_load();
if (!ASSERT_OK_PTR(skel, "skel_open_and_load"))
goto skel_open_load_failure;
if (!attr) {
err = test_send_signal_kern__attach(skel);
if (!ASSERT_OK(err, "skel_attach")) {
err = -1;
goto destroy_skel;
}
} else {
pmu_fd = syscall(__NR_perf_event_open, attr, pid, -1 /* cpu */,
-1 /* group id */, 0 /* flags */);
if (!ASSERT_GE(pmu_fd, 0, "perf_event_open")) {
err = -1;
goto destroy_skel;
}
skel->links.send_signal_perf =
bpf_program__attach_perf_event(skel->progs.send_signal_perf, pmu_fd);
if (!ASSERT_OK_PTR(skel->links.send_signal_perf, "attach_perf_event"))
goto disable_pmu;
}
/* wait until child signal handler installed */
ASSERT_EQ(read(pipe_c2p[0], buf, 1), 1, "pipe_read");
/* trigger the bpf send_signal */
skel->bss->signal_thread = signal_thread;
skel->bss->sig = SIGUSR1;
skel->bss->pid = pid;
/* notify child that bpf program can send_signal now */
ASSERT_EQ(write(pipe_p2c[1], buf, 1), 1, "pipe_write");
/* wait for result */
err = read(pipe_c2p[0], buf, 1);
if (!ASSERT_GE(err, 0, "reading pipe"))
goto disable_pmu;
if (!ASSERT_GT(err, 0, "reading pipe error: size 0")) {
err = -1;
goto disable_pmu;
}
ASSERT_EQ(buf[0], '2', "incorrect result");
/* notify child safe to exit */
ASSERT_EQ(write(pipe_p2c[1], buf, 1), 1, "pipe_write");
disable_pmu:
close(pmu_fd);
destroy_skel:
test_send_signal_kern__destroy(skel);
skel_open_load_failure:
close(pipe_c2p[0]);
close(pipe_p2c[1]);
wait(NULL);
}
static void test_send_signal_tracepoint(bool signal_thread)
{
test_send_signal_common(NULL, signal_thread);
}
static void test_send_signal_perf(bool signal_thread)
{
struct perf_event_attr attr = {
.freq = 1,
.sample_freq = 1000,
.type = PERF_TYPE_SOFTWARE,
.config = PERF_COUNT_SW_CPU_CLOCK,
};
test_send_signal_common(&attr, signal_thread);
}
static void test_send_signal_nmi(bool signal_thread)
{
struct perf_event_attr attr = {
.sample_period = 1,
.type = PERF_TYPE_HARDWARE,
.config = PERF_COUNT_HW_CPU_CYCLES,
};
int pmu_fd;
/* Some setups (e.g. virtual machines) might run with hardware
* perf events disabled. If this is the case, skip this test.
*/
pmu_fd = syscall(__NR_perf_event_open, &attr, 0 /* pid */,
-1 /* cpu */, -1 /* group_fd */, 0 /* flags */);
if (pmu_fd == -1) {
if (errno == ENOENT || errno == EOPNOTSUPP) {
printf("%s:SKIP:no PERF_COUNT_HW_CPU_CYCLES\n",
__func__);
test__skip();
return;
}
/* Let the test fail with a more informative message */
} else {
close(pmu_fd);
}
test_send_signal_common(&attr, signal_thread);
}
void test_send_signal(void)
{
if (test__start_subtest("send_signal_tracepoint"))
test_send_signal_tracepoint(false);
if (test__start_subtest("send_signal_perf"))
test_send_signal_perf(false);
if (test__start_subtest("send_signal_nmi"))
test_send_signal_nmi(false);
if (test__start_subtest("send_signal_tracepoint_thread"))
test_send_signal_tracepoint(true);
if (test__start_subtest("send_signal_perf_thread"))
test_send_signal_perf(true);
if (test__start_subtest("send_signal_nmi_thread"))
test_send_signal_nmi(true);
}