1
0
Fork 0
mirror of synced 2025-03-06 20:59:54 +01:00
linux/tools/perf/tests/shell/stat+json_output.sh
Yicong Yang cbc917a1b0 perf stat: Support per-cluster aggregation
Some platforms have 'cluster' topology and CPUs in the cluster will
share resources like L3 Cache Tag (for HiSilicon Kunpeng SoC) or L2
cache (for Intel Jacobsville). Currently parsing and building cluster
topology have been supported since [1].

perf stat has already supported aggregation for other topologies like
die or socket, etc. It'll be useful to aggregate per-cluster to find
problems like L3T bandwidth contention.

This patch add support for "--per-cluster" option for per-cluster
aggregation. Also update the docs and related test. The output will
be like:

[root@localhost tmp]# perf stat -a -e LLC-load --per-cluster -- sleep 5

 Performance counter stats for 'system wide':

S56-D0-CLS158    4      1,321,521,570      LLC-load
S56-D0-CLS594    4        794,211,453      LLC-load
S56-D0-CLS1030    4             41,623      LLC-load
S56-D0-CLS1466    4             41,646      LLC-load
S56-D0-CLS1902    4             16,863      LLC-load
S56-D0-CLS2338    4             15,721      LLC-load
S56-D0-CLS2774    4             22,671      LLC-load
[...]

On a legacy system without cluster or cluster support, the output will
be look like:
[root@localhost perf]# perf stat -a -e cycles --per-cluster -- sleep 1

 Performance counter stats for 'system wide':

S56-D0-CLS0   64         18,011,485      cycles
S7182-D0-CLS0   64         16,548,835      cycles

Note that this patch doesn't mix the cluster information in the outputs
of --per-core to avoid breaking any tools/scripts using it.

Note that perf recently supports "--per-cache" aggregation, but it's not
the same with the cluster although cluster CPUs may share some cache
resources. For example on my machine all clusters within a die share the
same L3 cache:
$ cat /sys/devices/system/cpu/cpu0/cache/index3/shared_cpu_list
0-31
$ cat /sys/devices/system/cpu/cpu0/topology/cluster_cpus_list
0-3

[1] commit c5e22feffd ("topology: Represent clusters of CPUs within a die")

Tested-by: Jie Zhan <zhanjie9@hisilicon.com>
Reviewed-by: Tim Chen <tim.c.chen@linux.intel.com>
Reviewed-by: Ian Rogers <irogers@google.com>
Signed-off-by: Yicong Yang <yangyicong@hisilicon.com>
Cc: james.clark@arm.com
Cc: 21cnbao@gmail.com
Cc: prime.zeng@hisilicon.com
Cc: Jonathan.Cameron@huawei.com
Cc: fanghao11@huawei.com
Cc: linuxarm@huawei.com
Cc: tim.c.chen@intel.com
Cc: linux-arm-kernel@lists.infradead.org
Signed-off-by: Namhyung Kim <namhyung@kernel.org>
Link: https://lore.kernel.org/r/20240208024026.2691-1-yangyicong@huawei.com
2024-02-09 14:59:53 -08:00

222 lines
5 KiB
Bash
Executable file

#!/bin/bash
# perf stat JSON output linter
# SPDX-License-Identifier: (LGPL-2.1 OR BSD-2-Clause)
# Checks various perf stat JSON output commands for the
# correct number of fields.
set -e
skip_test=0
shelldir=$(dirname "$0")
# shellcheck source=lib/setup_python.sh
. "${shelldir}"/lib/setup_python.sh
pythonchecker=$(dirname $0)/lib/perf_json_output_lint.py
stat_output=$(mktemp /tmp/__perf_test.stat_output.json.XXXXX)
cleanup() {
rm -f "${stat_output}"
trap - EXIT TERM INT
}
trap_cleanup() {
cleanup
exit 1
}
trap trap_cleanup EXIT TERM INT
# Return true if perf_event_paranoid is > $1 and not running as root.
function ParanoidAndNotRoot()
{
[ "$(id -u)" != 0 ] && [ "$(cat /proc/sys/kernel/perf_event_paranoid)" -gt $1 ]
}
check_no_args()
{
echo -n "Checking json output: no args "
perf stat -j -o "${stat_output}" true
$PYTHON $pythonchecker --no-args --file "${stat_output}"
echo "[Success]"
}
check_system_wide()
{
echo -n "Checking json output: system wide "
if ParanoidAndNotRoot 0
then
echo "[Skip] paranoia and not root"
return
fi
perf stat -j -a -o "${stat_output}" true
$PYTHON $pythonchecker --system-wide --file "${stat_output}"
echo "[Success]"
}
check_system_wide_no_aggr()
{
echo -n "Checking json output: system wide no aggregation "
if ParanoidAndNotRoot 0
then
echo "[Skip] paranoia and not root"
return
fi
perf stat -j -A -a --no-merge -o "${stat_output}" true
$PYTHON $pythonchecker --system-wide-no-aggr --file "${stat_output}"
echo "[Success]"
}
check_interval()
{
echo -n "Checking json output: interval "
perf stat -j -I 1000 -o "${stat_output}" true
$PYTHON $pythonchecker --interval --file "${stat_output}"
echo "[Success]"
}
check_event()
{
echo -n "Checking json output: event "
perf stat -j -e cpu-clock -o "${stat_output}" true
$PYTHON $pythonchecker --event --file "${stat_output}"
echo "[Success]"
}
check_per_core()
{
echo -n "Checking json output: per core "
if ParanoidAndNotRoot 0
then
echo "[Skip] paranoia and not root"
return
fi
perf stat -j --per-core -a -o "${stat_output}" true
$PYTHON $pythonchecker --per-core --file "${stat_output}"
echo "[Success]"
}
check_per_thread()
{
echo -n "Checking json output: per thread "
if ParanoidAndNotRoot 0
then
echo "[Skip] paranoia and not root"
return
fi
perf stat -j --per-thread -a -o "${stat_output}" true
$PYTHON $pythonchecker --per-thread --file "${stat_output}"
echo "[Success]"
}
check_per_cache_instance()
{
echo -n "Checking json output: per cache_instance "
if ParanoidAndNotRoot 0
then
echo "[Skip] paranoia and not root"
return
fi
perf stat -j --per-cache -a true 2>&1 | $PYTHON $pythonchecker --per-cache
echo "[Success]"
}
check_per_cluster()
{
echo -n "Checking json output: per cluster "
if ParanoidAndNotRoot 0
then
echo "[Skip] paranoia and not root"
return
fi
perf stat -j --per-cluster -a true 2>&1 | $PYTHON $pythonchecker --per-cluster
echo "[Success]"
}
check_per_die()
{
echo -n "Checking json output: per die "
if ParanoidAndNotRoot 0
then
echo "[Skip] paranoia and not root"
return
fi
perf stat -j --per-die -a -o "${stat_output}" true
$PYTHON $pythonchecker --per-die --file "${stat_output}"
echo "[Success]"
}
check_per_node()
{
echo -n "Checking json output: per node "
if ParanoidAndNotRoot 0
then
echo "[Skip] paranoia and not root"
return
fi
perf stat -j --per-node -a -o "${stat_output}" true
$PYTHON $pythonchecker --per-node --file "${stat_output}"
echo "[Success]"
}
check_per_socket()
{
echo -n "Checking json output: per socket "
if ParanoidAndNotRoot 0
then
echo "[Skip] paranoia and not root"
return
fi
perf stat -j --per-socket -a -o "${stat_output}" true
$PYTHON $pythonchecker --per-socket --file "${stat_output}"
echo "[Success]"
}
# The perf stat options for per-socket, per-core, per-die
# and -A ( no_aggr mode ) uses the info fetched from this
# directory: "/sys/devices/system/cpu/cpu*/topology". For
# example, socket value is fetched from "physical_package_id"
# file in topology directory.
# Reference: cpu__get_topology_int in util/cpumap.c
# If the platform doesn't expose topology information, values
# will be set to -1. For example, incase of pSeries platform
# of powerpc, value for "physical_package_id" is restricted
# and set to -1. Check here validates the socket-id read from
# topology file before proceeding further
FILE_LOC="/sys/devices/system/cpu/cpu*/topology/"
FILE_NAME="physical_package_id"
check_for_topology()
{
if ! ParanoidAndNotRoot 0
then
socket_file=`ls $FILE_LOC/$FILE_NAME | head -n 1`
[ -z $socket_file ] && return 0
socket_id=`cat $socket_file`
[ $socket_id == -1 ] && skip_test=1
return 0
fi
}
check_for_topology
check_no_args
check_system_wide
check_interval
check_event
check_per_thread
check_per_node
if [ $skip_test -ne 1 ]
then
check_system_wide_no_aggr
check_per_core
check_per_cache_instance
check_per_cluster
check_per_die
check_per_socket
else
echo "[Skip] Skipping tests for system_wide_no_aggr, per_core, per_die and per_socket since socket id exposed via topology is invalid"
fi
cleanup
exit 0