Show the branch speculation info if provided by the branch recording hardware feature. This can be useful for purposes of code optimization. E.g. $ perf record -j any,u ./test_branch $ perf report --dump-raw-trace Before: [...] 8380958377610 0x40b178 [0x1b0]: PERF_RECORD_SAMPLE(IP, 0x2): 7952/7952: 0x4f851a period: 48973 addr: 0 ... branch stack: nr:16 ..... 0: 00000000004b52fd -> 00000000004f82c0 0 cycles P 0 ..... 1: ffffffff8220137c -> 00000000004b52f0 0 cycles M 0 ..... 2: 000000000041d1c4 -> 00000000004b52f0 0 cycles P 0 ..... 3: 00000000004e7ead -> 000000000041d1b0 0 cycles M 0 ..... 4: 00000000004e7f91 -> 00000000004e7ead 0 cycles P 0 ..... 5: 00000000004e7ea8 -> 00000000004e7f70 0 cycles P 0 ..... 6: 00000000004e7e52 -> 00000000004e7e98 0 cycles M 0 ..... 7: 00000000004e7e1f -> 00000000004e7e40 0 cycles M 0 ..... 8: 00000000004e7f60 -> 00000000004e7df0 0 cycles P 0 ..... 9: 00000000004e7f58 -> 00000000004e7f60 0 cycles M 0 ..... 10: 000000000041d85d -> 00000000004e7f50 0 cycles P 0 ..... 11: 000000000043306a -> 000000000041d840 0 cycles P 0 ..... 12: ffffffff8220137c -> 0000000000433040 0 cycles M 0 ..... 13: 000000000041e4a1 -> 0000000000433040 0 cycles P 0 ..... 14: ffffffff8220137c -> 000000000041e490 0 cycles M 0 ..... 15: 000000000041d89b -> 000000000041e487 0 cycles P 0 ... thread: test_branch:7952 ...... dso: /data/sandipan/test_branch [...] After: [...] 8380958377610 0x40b178 [0x1b0]: PERF_RECORD_SAMPLE(IP, 0x2): 7952/7952: 0x4f851a period: 48973 addr: 0 ... branch stack: nr:16 ..... 0: 00000000004b52fd -> 00000000004f82c0 0 cycles P 0 NON_SPEC_CORRECT_PATH ..... 1: ffffffff8220137c -> 00000000004b52f0 0 cycles M 0 NON_SPEC_CORRECT_PATH ..... 2: 000000000041d1c4 -> 00000000004b52f0 0 cycles P 0 NON_SPEC_CORRECT_PATH ..... 3: 00000000004e7ead -> 000000000041d1b0 0 cycles M 0 NON_SPEC_CORRECT_PATH ..... 4: 00000000004e7f91 -> 00000000004e7ead 0 cycles P 0 NON_SPEC_CORRECT_PATH ..... 5: 00000000004e7ea8 -> 00000000004e7f70 0 cycles P 0 NON_SPEC_CORRECT_PATH ..... 6: 00000000004e7e52 -> 00000000004e7e98 0 cycles M 0 SPEC_CORRECT_PATH ..... 7: 00000000004e7e1f -> 00000000004e7e40 0 cycles M 0 NON_SPEC_CORRECT_PATH ..... 8: 00000000004e7f60 -> 00000000004e7df0 0 cycles P 0 NON_SPEC_CORRECT_PATH ..... 9: 00000000004e7f58 -> 00000000004e7f60 0 cycles M 0 NON_SPEC_CORRECT_PATH ..... 10: 000000000041d85d -> 00000000004e7f50 0 cycles P 0 NON_SPEC_CORRECT_PATH ..... 11: 000000000043306a -> 000000000041d840 0 cycles P 0 NON_SPEC_CORRECT_PATH ..... 12: ffffffff8220137c -> 0000000000433040 0 cycles M 0 NON_SPEC_CORRECT_PATH ..... 13: 000000000041e4a1 -> 0000000000433040 0 cycles P 0 NON_SPEC_CORRECT_PATH ..... 14: ffffffff8220137c -> 000000000041e490 0 cycles M 0 NON_SPEC_CORRECT_PATH ..... 15: 000000000041d89b -> 000000000041e487 0 cycles P 0 NON_SPEC_CORRECT_PATH ... thread: test_branch:7952 ...... dso: /data/sandipan/test_branch [...] With the addition of new branch flags, the "brstacksym" fields in perf script output now shows speculation information after the branch type. Change the regular expressions accordingly for the test to pass. Since branch speculation information may vary across platforms, the test does not look for specific values. E.g. $ perf test -v 110 Before: 110: Check branch stack sampling : --- start --- test child forked, pid 54154 Testing user branch stack sampling + grep -E -m1 ^brstack_bench\+[^ ]*/brstack_foo\+[^ ]*/IND_CALL$ /tmp/__perf_test.program.AfhUI/perf.script + cleanup + rm -rf /tmp/__perf_test.program.AfhUI test child finished with -1 ---- end ---- Check branch stack sampling: FAILED! After: 110: Check branch stack sampling : --- start --- test child forked, pid 43716 Testing user branch stack sampling + grep -E -m1 ^brstack_bench\+[^ ]*/brstack_foo\+[^ ]*/IND_CALL/.*$ /tmp/__perf_test.program.xgzAi/perf.script brstack_bench+0x66/brstack_foo+0x0/P/-/-/0/IND_CALL/NON_SPEC_CORRECT_PATH + grep -E -m1 ^brstack_foo\+[^ ]*/brstack_bar\+[^ ]*/CALL/.*$ /tmp/__perf_test.program.xgzAi/perf.script brstack_foo+0x1b/brstack_bar+0x0/P/-/-/0/CALL/NON_SPEC_CORRECT_PATH + grep -E -m1 ^brstack_bench\+[^ ]*/brstack_foo\+[^ ]*/CALL/.*$ /tmp/__perf_test.program.xgzAi/perf.script brstack_bench+0x58/brstack_foo+0x0/P/-/-/0/CALL/NON_SPEC_CORRECT_PATH + grep -E -m1 ^brstack_bench\+[^ ]*/brstack_bar\+[^ ]*/CALL/.*$ /tmp/__perf_test.program.xgzAi/perf.script brstack_bench+0x5d/brstack_bar+0x0/P/-/-/0/CALL/NON_SPEC_CORRECT_PATH + grep -E -m1 ^brstack_bar\+[^ ]*/brstack_foo\+[^ ]*/RET/.*$ /tmp/__perf_test.program.xgzAi/perf.script brstack_bar+0x31/brstack_foo+0x20/P/-/-/0/RET/NON_SPEC_CORRECT_PATH + grep -E -m1 ^brstack_foo\+[^ ]*/brstack_bench\+[^ ]*/RET/.*$ /tmp/__perf_test.program.xgzAi/perf.script brstack_foo+0x36/brstack_bench+0x5d/P/-/-/0/RET/NON_SPEC_CORRECT_PATH + grep -E -m1 ^brstack_bench\+[^ ]*/brstack_bench\+[^ ]*/COND/.*$ /tmp/__perf_test.program.xgzAi/perf.script brstack_bench+0x76/brstack_bench+0x7d/P/-/-/0/COND/NON_SPEC_CORRECT_PATH + grep -E -m1 ^brstack\+[^ ]*/brstack\+[^ ]*/UNCOND/.*$ /tmp/__perf_test.program.xgzAi/perf.script brstack+0x5a/brstack+0x41/P/-/-/0/UNCOND/NON_SPEC_CORRECT_PATH + set +x Testing branch stack filtering permutation (any_call,CALL|IND_CALL|COND_CALL|SYSCALL|IRQ) Testing branch stack filtering permutation (call,CALL|SYSCALL) Testing branch stack filtering permutation (cond,COND) Testing branch stack filtering permutation (any_ret,RET|COND_RET|SYSRET|ERET) Testing branch stack filtering permutation (call,cond,CALL|SYSCALL|COND) Testing branch stack filtering permutation (any_call,cond,CALL|IND_CALL|COND_CALL|IRQ|SYSCALL|COND) Testing branch stack filtering permutation (cond,any_call,any_ret,COND|CALL|IND_CALL|COND_CALL|SYSCALL|IRQ|RET|COND_RET|SYSRET|ERET) test child finished with 0 ---- end ---- Check branch stack sampling: Ok Signed-off-by: Sandipan Das <sandipan.das@amd.com> Cc: Alexander Shishkin <alexander.shishkin@linux.intel.com> Cc: Ananth Narayan <ananth.narayan@amd.com> Cc: Borislav Petkov <bp@alien8.de> Cc: Dave Hansen <dave.hansen@linux.intel.com> Cc: Ian Rogers <irogers@google.com> Cc: Ingo Molnar <mingo@redhat.com> Cc: James Clark <james.clark@arm.com> Cc: Jiri Olsa <jolsa@kernel.org> Cc: Kajol Jain <kjain@linux.ibm.com> Cc: Madhavan Srinivasan <maddy@linux.ibm.com> Cc: Mark Rutland <mark.rutland@arm.com> Cc: Namhyung Kim <namhyung@kernel.org> Cc: Peter Zijlstra <peterz@infradead.org> Cc: Ravi Bangoria <ravi.bangoria@amd.com> Cc: Santosh Shukla <santosh.shukla@amd.com> Cc: Stephane Eranian <eranian@google.com> Cc: Thomas Gleixner <tglx@linutronix.de> Cc: Thomas Richter <tmricht@linux.ibm.com> Cc: x86@kernel.org Link: https://lore.kernel.org/r/048d67c9de3cc8e3dbf19aaa7ff718dec91364c5.1675333809.git.sandipan.das@amd.com Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
76 lines
2.8 KiB
Bash
Executable file
76 lines
2.8 KiB
Bash
Executable file
#!/bin/sh
|
|
# Check branch stack sampling
|
|
|
|
# SPDX-License-Identifier: GPL-2.0
|
|
# German Gomez <german.gomez@arm.com>, 2022
|
|
|
|
# skip the test if the hardware doesn't support branch stack sampling
|
|
# and if the architecture doesn't support filter types: any,save_type,u
|
|
if ! perf record -o- --no-buildid --branch-filter any,save_type,u -- true > /dev/null 2>&1 ; then
|
|
echo "skip: system doesn't support filter types: any,save_type,u"
|
|
exit 2
|
|
fi
|
|
|
|
TMPDIR=$(mktemp -d /tmp/__perf_test.program.XXXXX)
|
|
TESTPROG="perf test -w brstack"
|
|
|
|
cleanup() {
|
|
rm -rf $TMPDIR
|
|
}
|
|
|
|
trap cleanup exit term int
|
|
|
|
test_user_branches() {
|
|
echo "Testing user branch stack sampling"
|
|
|
|
perf record -o $TMPDIR/perf.data --branch-filter any,save_type,u -- ${TESTPROG} > /dev/null 2>&1
|
|
perf script -i $TMPDIR/perf.data --fields brstacksym | xargs -n1 > $TMPDIR/perf.script
|
|
|
|
# example of branch entries:
|
|
# brstack_foo+0x14/brstack_bar+0x40/P/-/-/0/CALL
|
|
|
|
set -x
|
|
grep -E -m1 "^brstack_bench\+[^ ]*/brstack_foo\+[^ ]*/IND_CALL/.*$" $TMPDIR/perf.script
|
|
grep -E -m1 "^brstack_foo\+[^ ]*/brstack_bar\+[^ ]*/CALL/.*$" $TMPDIR/perf.script
|
|
grep -E -m1 "^brstack_bench\+[^ ]*/brstack_foo\+[^ ]*/CALL/.*$" $TMPDIR/perf.script
|
|
grep -E -m1 "^brstack_bench\+[^ ]*/brstack_bar\+[^ ]*/CALL/.*$" $TMPDIR/perf.script
|
|
grep -E -m1 "^brstack_bar\+[^ ]*/brstack_foo\+[^ ]*/RET/.*$" $TMPDIR/perf.script
|
|
grep -E -m1 "^brstack_foo\+[^ ]*/brstack_bench\+[^ ]*/RET/.*$" $TMPDIR/perf.script
|
|
grep -E -m1 "^brstack_bench\+[^ ]*/brstack_bench\+[^ ]*/COND/.*$" $TMPDIR/perf.script
|
|
grep -E -m1 "^brstack\+[^ ]*/brstack\+[^ ]*/UNCOND/.*$" $TMPDIR/perf.script
|
|
set +x
|
|
|
|
# some branch types are still not being tested:
|
|
# IND COND_CALL COND_RET SYSCALL SYSRET IRQ SERROR NO_TX
|
|
}
|
|
|
|
# first argument <arg0> is the argument passed to "--branch-stack <arg0>,save_type,u"
|
|
# second argument are the expected branch types for the given filter
|
|
test_filter() {
|
|
local filter=$1
|
|
local expect=$2
|
|
|
|
echo "Testing branch stack filtering permutation ($filter,$expect)"
|
|
|
|
perf record -o $TMPDIR/perf.data --branch-filter $filter,save_type,u -- ${TESTPROG} > /dev/null 2>&1
|
|
perf script -i $TMPDIR/perf.data --fields brstack | xargs -n1 > $TMPDIR/perf.script
|
|
|
|
# fail if we find any branch type that doesn't match any of the expected ones
|
|
# also consider UNKNOWN branch types (-)
|
|
if grep -E -vm1 "^[^ ]*/($expect|-|( *))/.*$" $TMPDIR/perf.script; then
|
|
return 1
|
|
fi
|
|
}
|
|
|
|
set -e
|
|
|
|
test_user_branches
|
|
|
|
test_filter "any_call" "CALL|IND_CALL|COND_CALL|SYSCALL|IRQ"
|
|
test_filter "call" "CALL|SYSCALL"
|
|
test_filter "cond" "COND"
|
|
test_filter "any_ret" "RET|COND_RET|SYSRET|ERET"
|
|
|
|
test_filter "call,cond" "CALL|SYSCALL|COND"
|
|
test_filter "any_call,cond" "CALL|IND_CALL|COND_CALL|IRQ|SYSCALL|COND"
|
|
test_filter "cond,any_call,any_ret" "COND|CALL|IND_CALL|COND_CALL|SYSCALL|IRQ|RET|COND_RET|SYSRET|ERET"
|