The default config is computed during creation of the PMU and may do things like scanning sysfs, when the PMU may just be used as part of scanning. Change default_config to perf_event_attr_init_default, a callback that is used when a default config needs initializing. This avoids holding onto the memory for a perf_event_attr and copying. On a tigerlake laptop running the pmu-scan benchmark: Before: Running 'internals/pmu-scan' benchmark: Computing performance of sysfs PMU event scan for 100 times Average core PMU scanning took: 28.780 usec (+- 0.503 usec) Average PMU scanning took: 283.480 usec (+- 18.471 usec) Number of openat syscalls: 30,227 After: Running 'internals/pmu-scan' benchmark: Computing performance of sysfs PMU event scan for 100 times Average core PMU scanning took: 27.880 usec (+- 0.169 usec) Average PMU scanning took: 245.260 usec (+- 15.758 usec) Number of openat syscalls: 28,914 Over 3 runs it is a nearly 12% reduction in execution time and a 4.3% of openat calls. Signed-off-by: Ian Rogers <irogers@google.com> Reviewed-by: Adrian Hunter <adrian.hunter@intel.com> Cc: Ravi Bangoria <ravi.bangoria@amd.com> Cc: James Clark <james.clark@arm.com> Cc: Suzuki K Poulose <suzuki.poulose@arm.com> Cc: Yang Jihong <yangjihong1@huawei.com> Cc: Will Deacon <will@kernel.org> Cc: Leo Yan <leo.yan@linaro.org> Cc: Mike Leach <mike.leach@linaro.org> Cc: Jing Zhang <renyu.zj@linux.alibaba.com> Cc: Kajol Jain <kjain@linux.ibm.com> Cc: Thomas Richter <tmricht@linux.ibm.com> Cc: Kan Liang <kan.liang@linux.intel.com> Cc: John Garry <john.g.garry@oracle.com> Cc: linux-arm-kernel@lists.infradead.org Cc: coresight@lists.linaro.org Link: https://lore.kernel.org/r/20231012175645.1849503-8-irogers@google.com Signed-off-by: Namhyung Kim <namhyung@kernel.org>
43 lines
1 KiB
C
43 lines
1 KiB
C
// SPDX-License-Identifier: GPL-2.0
|
|
#include <string.h>
|
|
#include <stdio.h>
|
|
#include <sys/types.h>
|
|
#include <dirent.h>
|
|
#include <fcntl.h>
|
|
#include <linux/stddef.h>
|
|
#include <linux/perf_event.h>
|
|
#include <linux/zalloc.h>
|
|
#include <api/fs/fs.h>
|
|
#include <errno.h>
|
|
|
|
#include "../../../util/intel-pt.h"
|
|
#include "../../../util/intel-bts.h"
|
|
#include "../../../util/pmu.h"
|
|
#include "../../../util/fncache.h"
|
|
#include "../../../util/pmus.h"
|
|
#include "env.h"
|
|
|
|
void perf_pmu__arch_init(struct perf_pmu *pmu __maybe_unused)
|
|
{
|
|
#ifdef HAVE_AUXTRACE_SUPPORT
|
|
if (!strcmp(pmu->name, INTEL_PT_PMU_NAME)) {
|
|
pmu->auxtrace = true;
|
|
pmu->selectable = true;
|
|
pmu->perf_event_attr_init_default = intel_pt_pmu_default_config;
|
|
}
|
|
if (!strcmp(pmu->name, INTEL_BTS_PMU_NAME)) {
|
|
pmu->auxtrace = true;
|
|
pmu->selectable = true;
|
|
}
|
|
#endif
|
|
}
|
|
|
|
int perf_pmus__num_mem_pmus(void)
|
|
{
|
|
/* AMD uses IBS OP pmu and not a core PMU for perf mem/c2c */
|
|
if (x86__is_amd_cpu())
|
|
return 1;
|
|
|
|
/* Intel uses core pmus for perf mem/c2c */
|
|
return perf_pmus__num_core_pmus();
|
|
}
|