KVM: x86/pmu: Reprogram PEBS event to emulate guest PEBS counter
When a guest counter is configured as a PEBS counter through IA32_PEBS_ENABLE, a guest PEBS event will be reprogrammed by configuring a non-zero precision level in the perf_event_attr. The guest PEBS overflow PMI bit would be set in the guest GLOBAL_STATUS MSR when PEBS facility generates a PEBS overflow PMI based on guest IA32_DS_AREA MSR. Even with the same counter index and the same event code and mask, guest PEBS events will not be reused for non-PEBS events. Originally-by: Andi Kleen <ak@linux.intel.com> Co-developed-by: Kan Liang <kan.liang@linux.intel.com> Signed-off-by: Kan Liang <kan.liang@linux.intel.com> Signed-off-by: Like Xu <likexu@tencent.com> Acked-by: Peter Zijlstra (Intel) <peterz@infradead.org> Message-Id: <20220411101946.20262-9-likexu@tencent.com> Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
This commit is contained in:
parent
c59a1f106f
commit
79f3e3b583
1 changed files with 33 additions and 3 deletions
|
@ -86,15 +86,22 @@ static void kvm_pmi_trigger_fn(struct irq_work *irq_work)
|
||||||
static inline void __kvm_perf_overflow(struct kvm_pmc *pmc, bool in_pmi)
|
static inline void __kvm_perf_overflow(struct kvm_pmc *pmc, bool in_pmi)
|
||||||
{
|
{
|
||||||
struct kvm_pmu *pmu = pmc_to_pmu(pmc);
|
struct kvm_pmu *pmu = pmc_to_pmu(pmc);
|
||||||
|
bool skip_pmi = false;
|
||||||
|
|
||||||
/* Ignore counters that have been reprogrammed already. */
|
/* Ignore counters that have been reprogrammed already. */
|
||||||
if (test_and_set_bit(pmc->idx, pmu->reprogram_pmi))
|
if (test_and_set_bit(pmc->idx, pmu->reprogram_pmi))
|
||||||
return;
|
return;
|
||||||
|
|
||||||
__set_bit(pmc->idx, (unsigned long *)&pmu->global_status);
|
if (pmc->perf_event && pmc->perf_event->attr.precise_ip) {
|
||||||
|
/* Indicate PEBS overflow PMI to guest. */
|
||||||
|
skip_pmi = __test_and_set_bit(GLOBAL_STATUS_BUFFER_OVF_BIT,
|
||||||
|
(unsigned long *)&pmu->global_status);
|
||||||
|
} else {
|
||||||
|
__set_bit(pmc->idx, (unsigned long *)&pmu->global_status);
|
||||||
|
}
|
||||||
kvm_make_request(KVM_REQ_PMU, pmc->vcpu);
|
kvm_make_request(KVM_REQ_PMU, pmc->vcpu);
|
||||||
|
|
||||||
if (!pmc->intr)
|
if (!pmc->intr || skip_pmi)
|
||||||
return;
|
return;
|
||||||
|
|
||||||
/*
|
/*
|
||||||
|
@ -124,6 +131,7 @@ static void pmc_reprogram_counter(struct kvm_pmc *pmc, u32 type,
|
||||||
u64 config, bool exclude_user,
|
u64 config, bool exclude_user,
|
||||||
bool exclude_kernel, bool intr)
|
bool exclude_kernel, bool intr)
|
||||||
{
|
{
|
||||||
|
struct kvm_pmu *pmu = pmc_to_pmu(pmc);
|
||||||
struct perf_event *event;
|
struct perf_event *event;
|
||||||
struct perf_event_attr attr = {
|
struct perf_event_attr attr = {
|
||||||
.type = type,
|
.type = type,
|
||||||
|
@ -135,6 +143,7 @@ static void pmc_reprogram_counter(struct kvm_pmc *pmc, u32 type,
|
||||||
.exclude_kernel = exclude_kernel,
|
.exclude_kernel = exclude_kernel,
|
||||||
.config = config,
|
.config = config,
|
||||||
};
|
};
|
||||||
|
bool pebs = test_bit(pmc->idx, (unsigned long *)&pmu->pebs_enable);
|
||||||
|
|
||||||
if (type == PERF_TYPE_HARDWARE && config >= PERF_COUNT_HW_MAX)
|
if (type == PERF_TYPE_HARDWARE && config >= PERF_COUNT_HW_MAX)
|
||||||
return;
|
return;
|
||||||
|
@ -150,6 +159,23 @@ static void pmc_reprogram_counter(struct kvm_pmc *pmc, u32 type,
|
||||||
*/
|
*/
|
||||||
attr.sample_period = 0;
|
attr.sample_period = 0;
|
||||||
}
|
}
|
||||||
|
if (pebs) {
|
||||||
|
/*
|
||||||
|
* The non-zero precision level of guest event makes the ordinary
|
||||||
|
* guest event becomes a guest PEBS event and triggers the host
|
||||||
|
* PEBS PMI handler to determine whether the PEBS overflow PMI
|
||||||
|
* comes from the host counters or the guest.
|
||||||
|
*
|
||||||
|
* For most PEBS hardware events, the difference in the software
|
||||||
|
* precision levels of guest and host PEBS events will not affect
|
||||||
|
* the accuracy of the PEBS profiling result, because the "event IP"
|
||||||
|
* in the PEBS record is calibrated on the guest side.
|
||||||
|
*
|
||||||
|
* On Icelake everything is fine. Other hardware (GLC+, TNT+) that
|
||||||
|
* could possibly care here is unsupported and needs changes.
|
||||||
|
*/
|
||||||
|
attr.precise_ip = 1;
|
||||||
|
}
|
||||||
|
|
||||||
event = perf_event_create_kernel_counter(&attr, -1, current,
|
event = perf_event_create_kernel_counter(&attr, -1, current,
|
||||||
kvm_perf_overflow, pmc);
|
kvm_perf_overflow, pmc);
|
||||||
|
@ -163,7 +189,7 @@ static void pmc_reprogram_counter(struct kvm_pmc *pmc, u32 type,
|
||||||
pmc_to_pmu(pmc)->event_count++;
|
pmc_to_pmu(pmc)->event_count++;
|
||||||
clear_bit(pmc->idx, pmc_to_pmu(pmc)->reprogram_pmi);
|
clear_bit(pmc->idx, pmc_to_pmu(pmc)->reprogram_pmi);
|
||||||
pmc->is_paused = false;
|
pmc->is_paused = false;
|
||||||
pmc->intr = intr;
|
pmc->intr = intr || pebs;
|
||||||
}
|
}
|
||||||
|
|
||||||
static void pmc_pause_counter(struct kvm_pmc *pmc)
|
static void pmc_pause_counter(struct kvm_pmc *pmc)
|
||||||
|
@ -189,6 +215,10 @@ static bool pmc_resume_counter(struct kvm_pmc *pmc)
|
||||||
get_sample_period(pmc, pmc->counter)))
|
get_sample_period(pmc, pmc->counter)))
|
||||||
return false;
|
return false;
|
||||||
|
|
||||||
|
if (!test_bit(pmc->idx, (unsigned long *)&pmc_to_pmu(pmc)->pebs_enable) &&
|
||||||
|
pmc->perf_event->attr.precise_ip)
|
||||||
|
return false;
|
||||||
|
|
||||||
/* reuse perf_event to serve as pmc_reprogram_counter() does*/
|
/* reuse perf_event to serve as pmc_reprogram_counter() does*/
|
||||||
perf_event_enable(pmc->perf_event);
|
perf_event_enable(pmc->perf_event);
|
||||||
pmc->is_paused = false;
|
pmc->is_paused = false;
|
||||||
|
|
Loading…
Add table
Reference in a new issue