KVM x86 misc changes for 6.11
- Add a global struct to consolidate tracking of host values, e.g. EFER, and move "shadow_phys_bits" into the structure as "maxphyaddr". - Add KVM_CAP_X86_APIC_BUS_CYCLES_NS to allow configuring the effective APIC bus frequency, because TDX. - Print the name of the APICv/AVIC inhibits in the relevant tracepoint. - Clean up KVM's handling of vendor specific emulation to consistently act on "compatible with Intel/AMD", versus checking for a specific vendor. - Misc cleanups -----BEGIN PGP SIGNATURE----- iQIzBAABCgAdFiEEKTobbabEP7vbhhN9OlYIJqCjN/0FAmaRub0ACgkQOlYIJqCj N/2LMxAArGzhcWZ6Qdo2aMRaMIPtSBJHmbEgEuHvHMumgsTZQzDcn9cxDi/hNSrc l8ODOwAM2qNcq95YfwjU7F0ae3E+HRzGvKcBnmZWuQeCDp2HhVEoCphFu1sHst+t XEJTL02b6OgyJUEU3h40mYk12eiq2S4FCnFYXPCqijwwuL6Y5KQvvTqek3c2/SDn c+VneutYGax/S0GiiCkYh4wrwWh9g7qm0IX70ycBwJbW5qBFKgyglvHxvL8JLJC9 Nkkw/p2657wcOdraH+fOBuRy2dMwE5fv++1tOjWwB5WAAhSOJPZh0BGYvgA2yfN7 OE+k7APKUQd9Xxtud8H3LrTPoyMA4hz2sdDFyqrrWK9yjpBY7zXNyN50Fxi7VVsm T8nTIiKAGyRbjotY+m7krXQPXjfZYhVqrJ/jtxESOZLZ93q2gSWU2p/ZXpUPVHnH +YOBAI1owP3wepaYlrthtI4LQx9lF422dnmeSflztfKFGabRbQZxg3uHMCCxIaGc lJ6CD546+D45f/uBXRDMqk//qFTqXhKUbDk9sutmU/C2oWufMwW0R8kOyItGPyvk 9PP1vd8vSsIHj+tpwg+i04jBqYDaAcPBOcTZaHm9SYYP+1e11Uu5Vjep37JL1bkA xJWxnDZOCGcfKQi2jkh51HJ/dOAHXY1GQKMfyAoPQOSonYHvGVY= =Cf2R -----END PGP SIGNATURE----- Merge tag 'kvm-x86-misc-6.11' of https://github.com/kvm-x86/linux into HEAD KVM x86 misc changes for 6.11 - Add a global struct to consolidate tracking of host values, e.g. EFER, and move "shadow_phys_bits" into the structure as "maxphyaddr". - Add KVM_CAP_X86_APIC_BUS_CYCLES_NS to allow configuring the effective APIC bus frequency, because TDX. - Print the name of the APICv/AVIC inhibits in the relevant tracepoint. - Clean up KVM's handling of vendor specific emulation to consistently act on "compatible with Intel/AMD", versus checking for a specific vendor. - Misc cleanups
This commit is contained in:
commit
5dcc1e7614
31 changed files with 503 additions and 209 deletions
|
@ -6483,9 +6483,12 @@ More architecture-specific flags detailing state of the VCPU that may
|
||||||
affect the device's behavior. Current defined flags::
|
affect the device's behavior. Current defined flags::
|
||||||
|
|
||||||
/* x86, set if the VCPU is in system management mode */
|
/* x86, set if the VCPU is in system management mode */
|
||||||
#define KVM_RUN_X86_SMM (1 << 0)
|
#define KVM_RUN_X86_SMM (1 << 0)
|
||||||
/* x86, set if bus lock detected in VM */
|
/* x86, set if bus lock detected in VM */
|
||||||
#define KVM_RUN_BUS_LOCK (1 << 1)
|
#define KVM_RUN_X86_BUS_LOCK (1 << 1)
|
||||||
|
/* x86, set if the VCPU is executing a nested (L2) guest */
|
||||||
|
#define KVM_RUN_X86_GUEST_MODE (1 << 2)
|
||||||
|
|
||||||
/* arm64, set for KVM_EXIT_DEBUG */
|
/* arm64, set for KVM_EXIT_DEBUG */
|
||||||
#define KVM_DEBUG_ARCH_HSR_HIGH_VALID (1 << 0)
|
#define KVM_DEBUG_ARCH_HSR_HIGH_VALID (1 << 0)
|
||||||
|
|
||||||
|
@ -7831,29 +7834,31 @@ Valid bits in args[0] are::
|
||||||
#define KVM_BUS_LOCK_DETECTION_OFF (1 << 0)
|
#define KVM_BUS_LOCK_DETECTION_OFF (1 << 0)
|
||||||
#define KVM_BUS_LOCK_DETECTION_EXIT (1 << 1)
|
#define KVM_BUS_LOCK_DETECTION_EXIT (1 << 1)
|
||||||
|
|
||||||
Enabling this capability on a VM provides userspace with a way to select
|
Enabling this capability on a VM provides userspace with a way to select a
|
||||||
a policy to handle the bus locks detected in guest. Userspace can obtain
|
policy to handle the bus locks detected in guest. Userspace can obtain the
|
||||||
the supported modes from the result of KVM_CHECK_EXTENSION and define it
|
supported modes from the result of KVM_CHECK_EXTENSION and define it through
|
||||||
through the KVM_ENABLE_CAP.
|
the KVM_ENABLE_CAP. The supported modes are mutually-exclusive.
|
||||||
|
|
||||||
KVM_BUS_LOCK_DETECTION_OFF and KVM_BUS_LOCK_DETECTION_EXIT are supported
|
This capability allows userspace to force VM exits on bus locks detected in the
|
||||||
currently and mutually exclusive with each other. More bits can be added in
|
guest, irrespective whether or not the host has enabled split-lock detection
|
||||||
the future.
|
(which triggers an #AC exception that KVM intercepts). This capability is
|
||||||
|
intended to mitigate attacks where a malicious/buggy guest can exploit bus
|
||||||
|
locks to degrade the performance of the whole system.
|
||||||
|
|
||||||
With KVM_BUS_LOCK_DETECTION_OFF set, bus locks in guest will not cause vm exits
|
If KVM_BUS_LOCK_DETECTION_OFF is set, KVM doesn't force guest bus locks to VM
|
||||||
so that no additional actions are needed. This is the default mode.
|
exit, although the host kernel's split-lock #AC detection still applies, if
|
||||||
|
enabled.
|
||||||
|
|
||||||
With KVM_BUS_LOCK_DETECTION_EXIT set, vm exits happen when bus lock detected
|
If KVM_BUS_LOCK_DETECTION_EXIT is set, KVM enables a CPU feature that ensures
|
||||||
in VM. KVM just exits to userspace when handling them. Userspace can enforce
|
bus locks in the guest trigger a VM exit, and KVM exits to userspace for all
|
||||||
its own throttling or other policy based mitigations.
|
such VM exits, e.g. to allow userspace to throttle the offending guest and/or
|
||||||
|
apply some other policy-based mitigation. When exiting to userspace, KVM sets
|
||||||
|
KVM_RUN_X86_BUS_LOCK in vcpu-run->flags, and conditionally sets the exit_reason
|
||||||
|
to KVM_EXIT_X86_BUS_LOCK.
|
||||||
|
|
||||||
This capability is aimed to address the thread that VM can exploit bus locks to
|
Note! Detected bus locks may be coincident with other exits to userspace, i.e.
|
||||||
degree the performance of the whole system. Once the userspace enable this
|
KVM_RUN_X86_BUS_LOCK should be checked regardless of the primary exit reason if
|
||||||
capability and select the KVM_BUS_LOCK_DETECTION_EXIT mode, KVM will set the
|
userspace wants to take action on all detected bus locks.
|
||||||
KVM_RUN_BUS_LOCK flag in vcpu-run->flags field and exit to userspace. Concerning
|
|
||||||
the bus lock vm exit can be preempted by a higher priority VM exit, the exit
|
|
||||||
notifications to userspace can be KVM_EXIT_BUS_LOCK or other reasons.
|
|
||||||
KVM_RUN_BUS_LOCK flag is used to distinguish between them.
|
|
||||||
|
|
||||||
7.23 KVM_CAP_PPC_DAWR1
|
7.23 KVM_CAP_PPC_DAWR1
|
||||||
----------------------
|
----------------------
|
||||||
|
@ -8137,6 +8142,37 @@ error/annotated fault.
|
||||||
|
|
||||||
See KVM_EXIT_MEMORY_FAULT for more information.
|
See KVM_EXIT_MEMORY_FAULT for more information.
|
||||||
|
|
||||||
|
7.35 KVM_CAP_X86_APIC_BUS_CYCLES_NS
|
||||||
|
-----------------------------------
|
||||||
|
|
||||||
|
:Architectures: x86
|
||||||
|
:Target: VM
|
||||||
|
:Parameters: args[0] is the desired APIC bus clock rate, in nanoseconds
|
||||||
|
:Returns: 0 on success, -EINVAL if args[0] contains an invalid value for the
|
||||||
|
frequency or if any vCPUs have been created, -ENXIO if a virtual
|
||||||
|
local APIC has not been created using KVM_CREATE_IRQCHIP.
|
||||||
|
|
||||||
|
This capability sets the VM's APIC bus clock frequency, used by KVM's in-kernel
|
||||||
|
virtual APIC when emulating APIC timers. KVM's default value can be retrieved
|
||||||
|
by KVM_CHECK_EXTENSION.
|
||||||
|
|
||||||
|
Note: Userspace is responsible for correctly configuring CPUID 0x15, a.k.a. the
|
||||||
|
core crystal clock frequency, if a non-zero CPUID 0x15 is exposed to the guest.
|
||||||
|
|
||||||
|
7.36 KVM_CAP_X86_GUEST_MODE
|
||||||
|
------------------------------
|
||||||
|
|
||||||
|
:Architectures: x86
|
||||||
|
:Returns: Informational only, -EINVAL on direct KVM_ENABLE_CAP.
|
||||||
|
|
||||||
|
The presence of this capability indicates that KVM_RUN will update the
|
||||||
|
KVM_RUN_X86_GUEST_MODE bit in kvm_run.flags to indicate whether the
|
||||||
|
vCPU was executing nested guest code when it exited.
|
||||||
|
|
||||||
|
KVM exits with the register state of either the L1 or L2 guest
|
||||||
|
depending on which executed at the time of an exit. Userspace must
|
||||||
|
take care to differentiate between these cases.
|
||||||
|
|
||||||
8. Other capabilities.
|
8. Other capabilities.
|
||||||
======================
|
======================
|
||||||
|
|
||||||
|
|
|
@ -1208,7 +1208,7 @@ enum kvm_apicv_inhibit {
|
||||||
* APIC acceleration is disabled by a module parameter
|
* APIC acceleration is disabled by a module parameter
|
||||||
* and/or not supported in hardware.
|
* and/or not supported in hardware.
|
||||||
*/
|
*/
|
||||||
APICV_INHIBIT_REASON_DISABLE,
|
APICV_INHIBIT_REASON_DISABLED,
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* APIC acceleration is inhibited because AutoEOI feature is
|
* APIC acceleration is inhibited because AutoEOI feature is
|
||||||
|
@ -1278,8 +1278,27 @@ enum kvm_apicv_inhibit {
|
||||||
* mapping between logical ID and vCPU.
|
* mapping between logical ID and vCPU.
|
||||||
*/
|
*/
|
||||||
APICV_INHIBIT_REASON_LOGICAL_ID_ALIASED,
|
APICV_INHIBIT_REASON_LOGICAL_ID_ALIASED,
|
||||||
|
|
||||||
|
NR_APICV_INHIBIT_REASONS,
|
||||||
};
|
};
|
||||||
|
|
||||||
|
#define __APICV_INHIBIT_REASON(reason) \
|
||||||
|
{ BIT(APICV_INHIBIT_REASON_##reason), #reason }
|
||||||
|
|
||||||
|
#define APICV_INHIBIT_REASONS \
|
||||||
|
__APICV_INHIBIT_REASON(DISABLED), \
|
||||||
|
__APICV_INHIBIT_REASON(HYPERV), \
|
||||||
|
__APICV_INHIBIT_REASON(ABSENT), \
|
||||||
|
__APICV_INHIBIT_REASON(BLOCKIRQ), \
|
||||||
|
__APICV_INHIBIT_REASON(PHYSICAL_ID_ALIASED), \
|
||||||
|
__APICV_INHIBIT_REASON(APIC_ID_MODIFIED), \
|
||||||
|
__APICV_INHIBIT_REASON(APIC_BASE_MODIFIED), \
|
||||||
|
__APICV_INHIBIT_REASON(NESTED), \
|
||||||
|
__APICV_INHIBIT_REASON(IRQWIN), \
|
||||||
|
__APICV_INHIBIT_REASON(PIT_REINJ), \
|
||||||
|
__APICV_INHIBIT_REASON(SEV), \
|
||||||
|
__APICV_INHIBIT_REASON(LOGICAL_ID_ALIASED)
|
||||||
|
|
||||||
struct kvm_arch {
|
struct kvm_arch {
|
||||||
unsigned long n_used_mmu_pages;
|
unsigned long n_used_mmu_pages;
|
||||||
unsigned long n_requested_mmu_pages;
|
unsigned long n_requested_mmu_pages;
|
||||||
|
@ -1365,6 +1384,7 @@ struct kvm_arch {
|
||||||
|
|
||||||
u32 default_tsc_khz;
|
u32 default_tsc_khz;
|
||||||
bool user_set_tsc;
|
bool user_set_tsc;
|
||||||
|
u64 apic_bus_cycle_ns;
|
||||||
|
|
||||||
seqcount_raw_spinlock_t pvclock_sc;
|
seqcount_raw_spinlock_t pvclock_sc;
|
||||||
bool use_master_clock;
|
bool use_master_clock;
|
||||||
|
@ -1709,7 +1729,6 @@ struct kvm_x86_ops {
|
||||||
void (*enable_nmi_window)(struct kvm_vcpu *vcpu);
|
void (*enable_nmi_window)(struct kvm_vcpu *vcpu);
|
||||||
void (*enable_irq_window)(struct kvm_vcpu *vcpu);
|
void (*enable_irq_window)(struct kvm_vcpu *vcpu);
|
||||||
void (*update_cr8_intercept)(struct kvm_vcpu *vcpu, int tpr, int irr);
|
void (*update_cr8_intercept)(struct kvm_vcpu *vcpu, int tpr, int irr);
|
||||||
bool (*check_apicv_inhibit_reasons)(enum kvm_apicv_inhibit reason);
|
|
||||||
const unsigned long required_apicv_inhibits;
|
const unsigned long required_apicv_inhibits;
|
||||||
bool allow_apicv_in_x2apic_without_x2apic_virtualization;
|
bool allow_apicv_in_x2apic_without_x2apic_virtualization;
|
||||||
void (*refresh_apicv_exec_ctrl)(struct kvm_vcpu *vcpu);
|
void (*refresh_apicv_exec_ctrl)(struct kvm_vcpu *vcpu);
|
||||||
|
@ -1855,7 +1874,6 @@ struct kvm_arch_async_pf {
|
||||||
};
|
};
|
||||||
|
|
||||||
extern u32 __read_mostly kvm_nr_uret_msrs;
|
extern u32 __read_mostly kvm_nr_uret_msrs;
|
||||||
extern u64 __read_mostly host_efer;
|
|
||||||
extern bool __read_mostly allow_smaller_maxphyaddr;
|
extern bool __read_mostly allow_smaller_maxphyaddr;
|
||||||
extern bool __read_mostly enable_apicv;
|
extern bool __read_mostly enable_apicv;
|
||||||
extern struct kvm_x86_ops kvm_x86_ops;
|
extern struct kvm_x86_ops kvm_x86_ops;
|
||||||
|
|
|
@ -106,6 +106,7 @@ struct kvm_ioapic_state {
|
||||||
|
|
||||||
#define KVM_RUN_X86_SMM (1 << 0)
|
#define KVM_RUN_X86_SMM (1 << 0)
|
||||||
#define KVM_RUN_X86_BUS_LOCK (1 << 1)
|
#define KVM_RUN_X86_BUS_LOCK (1 << 1)
|
||||||
|
#define KVM_RUN_X86_GUEST_MODE (1 << 2)
|
||||||
|
|
||||||
/* for KVM_GET_REGS and KVM_SET_REGS */
|
/* for KVM_GET_REGS and KVM_SET_REGS */
|
||||||
struct kvm_regs {
|
struct kvm_regs {
|
||||||
|
|
|
@ -335,6 +335,18 @@ static bool kvm_cpuid_has_hyperv(struct kvm_cpuid_entry2 *entries, int nent)
|
||||||
#endif
|
#endif
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static bool guest_cpuid_is_amd_or_hygon(struct kvm_vcpu *vcpu)
|
||||||
|
{
|
||||||
|
struct kvm_cpuid_entry2 *entry;
|
||||||
|
|
||||||
|
entry = kvm_find_cpuid_entry(vcpu, 0);
|
||||||
|
if (!entry)
|
||||||
|
return false;
|
||||||
|
|
||||||
|
return is_guest_vendor_amd(entry->ebx, entry->ecx, entry->edx) ||
|
||||||
|
is_guest_vendor_hygon(entry->ebx, entry->ecx, entry->edx);
|
||||||
|
}
|
||||||
|
|
||||||
static void kvm_vcpu_after_set_cpuid(struct kvm_vcpu *vcpu)
|
static void kvm_vcpu_after_set_cpuid(struct kvm_vcpu *vcpu)
|
||||||
{
|
{
|
||||||
struct kvm_lapic *apic = vcpu->arch.apic;
|
struct kvm_lapic *apic = vcpu->arch.apic;
|
||||||
|
|
|
@ -102,24 +102,6 @@ static __always_inline void guest_cpuid_clear(struct kvm_vcpu *vcpu,
|
||||||
*reg &= ~__feature_bit(x86_feature);
|
*reg &= ~__feature_bit(x86_feature);
|
||||||
}
|
}
|
||||||
|
|
||||||
static inline bool guest_cpuid_is_amd_or_hygon(struct kvm_vcpu *vcpu)
|
|
||||||
{
|
|
||||||
struct kvm_cpuid_entry2 *best;
|
|
||||||
|
|
||||||
best = kvm_find_cpuid_entry(vcpu, 0);
|
|
||||||
return best &&
|
|
||||||
(is_guest_vendor_amd(best->ebx, best->ecx, best->edx) ||
|
|
||||||
is_guest_vendor_hygon(best->ebx, best->ecx, best->edx));
|
|
||||||
}
|
|
||||||
|
|
||||||
static inline bool guest_cpuid_is_intel(struct kvm_vcpu *vcpu)
|
|
||||||
{
|
|
||||||
struct kvm_cpuid_entry2 *best;
|
|
||||||
|
|
||||||
best = kvm_find_cpuid_entry(vcpu, 0);
|
|
||||||
return best && is_guest_vendor_intel(best->ebx, best->ecx, best->edx);
|
|
||||||
}
|
|
||||||
|
|
||||||
static inline bool guest_cpuid_is_amd_compatible(struct kvm_vcpu *vcpu)
|
static inline bool guest_cpuid_is_amd_compatible(struct kvm_vcpu *vcpu)
|
||||||
{
|
{
|
||||||
return vcpu->arch.is_amd_compatible;
|
return vcpu->arch.is_amd_compatible;
|
||||||
|
|
|
@ -2354,50 +2354,6 @@ setup_syscalls_segments(struct desc_struct *cs, struct desc_struct *ss)
|
||||||
ss->avl = 0;
|
ss->avl = 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
static bool vendor_intel(struct x86_emulate_ctxt *ctxt)
|
|
||||||
{
|
|
||||||
u32 eax, ebx, ecx, edx;
|
|
||||||
|
|
||||||
eax = ecx = 0;
|
|
||||||
ctxt->ops->get_cpuid(ctxt, &eax, &ebx, &ecx, &edx, true);
|
|
||||||
return is_guest_vendor_intel(ebx, ecx, edx);
|
|
||||||
}
|
|
||||||
|
|
||||||
static bool em_syscall_is_enabled(struct x86_emulate_ctxt *ctxt)
|
|
||||||
{
|
|
||||||
const struct x86_emulate_ops *ops = ctxt->ops;
|
|
||||||
u32 eax, ebx, ecx, edx;
|
|
||||||
|
|
||||||
/*
|
|
||||||
* syscall should always be enabled in longmode - so only become
|
|
||||||
* vendor specific (cpuid) if other modes are active...
|
|
||||||
*/
|
|
||||||
if (ctxt->mode == X86EMUL_MODE_PROT64)
|
|
||||||
return true;
|
|
||||||
|
|
||||||
eax = 0x00000000;
|
|
||||||
ecx = 0x00000000;
|
|
||||||
ops->get_cpuid(ctxt, &eax, &ebx, &ecx, &edx, true);
|
|
||||||
/*
|
|
||||||
* remark: Intel CPUs only support "syscall" in 64bit longmode. Also a
|
|
||||||
* 64bit guest with a 32bit compat-app running will #UD !! While this
|
|
||||||
* behaviour can be fixed (by emulating) into AMD response - CPUs of
|
|
||||||
* AMD can't behave like Intel.
|
|
||||||
*/
|
|
||||||
if (is_guest_vendor_intel(ebx, ecx, edx))
|
|
||||||
return false;
|
|
||||||
|
|
||||||
if (is_guest_vendor_amd(ebx, ecx, edx) ||
|
|
||||||
is_guest_vendor_hygon(ebx, ecx, edx))
|
|
||||||
return true;
|
|
||||||
|
|
||||||
/*
|
|
||||||
* default: (not Intel, not AMD, not Hygon), apply Intel's
|
|
||||||
* stricter rules...
|
|
||||||
*/
|
|
||||||
return false;
|
|
||||||
}
|
|
||||||
|
|
||||||
static int em_syscall(struct x86_emulate_ctxt *ctxt)
|
static int em_syscall(struct x86_emulate_ctxt *ctxt)
|
||||||
{
|
{
|
||||||
const struct x86_emulate_ops *ops = ctxt->ops;
|
const struct x86_emulate_ops *ops = ctxt->ops;
|
||||||
|
@ -2411,7 +2367,15 @@ static int em_syscall(struct x86_emulate_ctxt *ctxt)
|
||||||
ctxt->mode == X86EMUL_MODE_VM86)
|
ctxt->mode == X86EMUL_MODE_VM86)
|
||||||
return emulate_ud(ctxt);
|
return emulate_ud(ctxt);
|
||||||
|
|
||||||
if (!(em_syscall_is_enabled(ctxt)))
|
/*
|
||||||
|
* Intel compatible CPUs only support SYSCALL in 64-bit mode, whereas
|
||||||
|
* AMD allows SYSCALL in any flavor of protected mode. Note, it's
|
||||||
|
* infeasible to emulate Intel behavior when running on AMD hardware,
|
||||||
|
* as SYSCALL won't fault in the "wrong" mode, i.e. there is no #UD
|
||||||
|
* for KVM to trap-and-emulate, unlike emulating AMD on Intel.
|
||||||
|
*/
|
||||||
|
if (ctxt->mode != X86EMUL_MODE_PROT64 &&
|
||||||
|
ctxt->ops->guest_cpuid_is_intel_compatible(ctxt))
|
||||||
return emulate_ud(ctxt);
|
return emulate_ud(ctxt);
|
||||||
|
|
||||||
ops->get_msr(ctxt, MSR_EFER, &efer);
|
ops->get_msr(ctxt, MSR_EFER, &efer);
|
||||||
|
@ -2471,11 +2435,11 @@ static int em_sysenter(struct x86_emulate_ctxt *ctxt)
|
||||||
return emulate_gp(ctxt, 0);
|
return emulate_gp(ctxt, 0);
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Not recognized on AMD in compat mode (but is recognized in legacy
|
* Intel's architecture allows SYSENTER in compatibility mode, but AMD
|
||||||
* mode).
|
* does not. Note, AMD does allow SYSENTER in legacy protected mode.
|
||||||
*/
|
*/
|
||||||
if ((ctxt->mode != X86EMUL_MODE_PROT64) && (efer & EFER_LMA)
|
if ((ctxt->mode != X86EMUL_MODE_PROT64) && (efer & EFER_LMA) &&
|
||||||
&& !vendor_intel(ctxt))
|
!ctxt->ops->guest_cpuid_is_intel_compatible(ctxt))
|
||||||
return emulate_ud(ctxt);
|
return emulate_ud(ctxt);
|
||||||
|
|
||||||
/* sysenter/sysexit have not been tested in 64bit mode. */
|
/* sysenter/sysexit have not been tested in 64bit mode. */
|
||||||
|
@ -2647,7 +2611,14 @@ static void string_registers_quirk(struct x86_emulate_ctxt *ctxt)
|
||||||
* manner when ECX is zero due to REP-string optimizations.
|
* manner when ECX is zero due to REP-string optimizations.
|
||||||
*/
|
*/
|
||||||
#ifdef CONFIG_X86_64
|
#ifdef CONFIG_X86_64
|
||||||
if (ctxt->ad_bytes != 4 || !vendor_intel(ctxt))
|
u32 eax, ebx, ecx, edx;
|
||||||
|
|
||||||
|
if (ctxt->ad_bytes != 4)
|
||||||
|
return;
|
||||||
|
|
||||||
|
eax = ecx = 0;
|
||||||
|
ctxt->ops->get_cpuid(ctxt, &eax, &ebx, &ecx, &edx, true);
|
||||||
|
if (!is_guest_vendor_intel(ebx, ecx, edx))
|
||||||
return;
|
return;
|
||||||
|
|
||||||
*reg_write(ctxt, VCPU_REGS_RCX) = 0;
|
*reg_write(ctxt, VCPU_REGS_RCX) = 0;
|
||||||
|
|
|
@ -1737,7 +1737,8 @@ static int kvm_hv_get_msr(struct kvm_vcpu *vcpu, u32 msr, u64 *pdata,
|
||||||
data = (u64)vcpu->arch.virtual_tsc_khz * 1000;
|
data = (u64)vcpu->arch.virtual_tsc_khz * 1000;
|
||||||
break;
|
break;
|
||||||
case HV_X64_MSR_APIC_FREQUENCY:
|
case HV_X64_MSR_APIC_FREQUENCY:
|
||||||
data = APIC_BUS_FREQUENCY;
|
data = div64_u64(1000000000ULL,
|
||||||
|
vcpu->kvm->arch.apic_bus_cycle_ns);
|
||||||
break;
|
break;
|
||||||
default:
|
default:
|
||||||
kvm_pr_unimpl_rdmsr(vcpu, msr);
|
kvm_pr_unimpl_rdmsr(vcpu, msr);
|
||||||
|
|
|
@ -223,6 +223,7 @@ struct x86_emulate_ops {
|
||||||
bool (*guest_has_movbe)(struct x86_emulate_ctxt *ctxt);
|
bool (*guest_has_movbe)(struct x86_emulate_ctxt *ctxt);
|
||||||
bool (*guest_has_fxsr)(struct x86_emulate_ctxt *ctxt);
|
bool (*guest_has_fxsr)(struct x86_emulate_ctxt *ctxt);
|
||||||
bool (*guest_has_rdpid)(struct x86_emulate_ctxt *ctxt);
|
bool (*guest_has_rdpid)(struct x86_emulate_ctxt *ctxt);
|
||||||
|
bool (*guest_cpuid_is_intel_compatible)(struct x86_emulate_ctxt *ctxt);
|
||||||
|
|
||||||
void (*set_nmi_mask)(struct x86_emulate_ctxt *ctxt, bool masked);
|
void (*set_nmi_mask)(struct x86_emulate_ctxt *ctxt, bool masked);
|
||||||
|
|
||||||
|
|
|
@ -1557,7 +1557,8 @@ static u32 apic_get_tmcct(struct kvm_lapic *apic)
|
||||||
remaining = 0;
|
remaining = 0;
|
||||||
|
|
||||||
ns = mod_64(ktime_to_ns(remaining), apic->lapic_timer.period);
|
ns = mod_64(ktime_to_ns(remaining), apic->lapic_timer.period);
|
||||||
return div64_u64(ns, (APIC_BUS_CYCLE_NS * apic->divide_count));
|
return div64_u64(ns, (apic->vcpu->kvm->arch.apic_bus_cycle_ns *
|
||||||
|
apic->divide_count));
|
||||||
}
|
}
|
||||||
|
|
||||||
static void __report_tpr_access(struct kvm_lapic *apic, bool write)
|
static void __report_tpr_access(struct kvm_lapic *apic, bool write)
|
||||||
|
@ -1973,7 +1974,8 @@ static void start_sw_tscdeadline(struct kvm_lapic *apic)
|
||||||
|
|
||||||
static inline u64 tmict_to_ns(struct kvm_lapic *apic, u32 tmict)
|
static inline u64 tmict_to_ns(struct kvm_lapic *apic, u32 tmict)
|
||||||
{
|
{
|
||||||
return (u64)tmict * APIC_BUS_CYCLE_NS * (u64)apic->divide_count;
|
return (u64)tmict * apic->vcpu->kvm->arch.apic_bus_cycle_ns *
|
||||||
|
(u64)apic->divide_count;
|
||||||
}
|
}
|
||||||
|
|
||||||
static void update_target_expiration(struct kvm_lapic *apic, uint32_t old_divisor)
|
static void update_target_expiration(struct kvm_lapic *apic, uint32_t old_divisor)
|
||||||
|
|
|
@ -16,8 +16,7 @@
|
||||||
#define APIC_DEST_NOSHORT 0x0
|
#define APIC_DEST_NOSHORT 0x0
|
||||||
#define APIC_DEST_MASK 0x800
|
#define APIC_DEST_MASK 0x800
|
||||||
|
|
||||||
#define APIC_BUS_CYCLE_NS 1
|
#define APIC_BUS_CYCLE_NS_DEFAULT 1
|
||||||
#define APIC_BUS_FREQUENCY (1000000000ULL / APIC_BUS_CYCLE_NS)
|
|
||||||
|
|
||||||
#define APIC_BROADCAST 0xFF
|
#define APIC_BROADCAST 0xFF
|
||||||
#define X2APIC_BROADCAST 0xFFFFFFFFul
|
#define X2APIC_BROADCAST 0xFFFFFFFFul
|
||||||
|
|
|
@ -57,12 +57,6 @@ static __always_inline u64 rsvd_bits(int s, int e)
|
||||||
return ((2ULL << (e - s)) - 1) << s;
|
return ((2ULL << (e - s)) - 1) << s;
|
||||||
}
|
}
|
||||||
|
|
||||||
/*
|
|
||||||
* The number of non-reserved physical address bits irrespective of features
|
|
||||||
* that repurpose legal bits, e.g. MKTME.
|
|
||||||
*/
|
|
||||||
extern u8 __read_mostly shadow_phys_bits;
|
|
||||||
|
|
||||||
static inline gfn_t kvm_mmu_max_gfn(void)
|
static inline gfn_t kvm_mmu_max_gfn(void)
|
||||||
{
|
{
|
||||||
/*
|
/*
|
||||||
|
@ -76,30 +70,11 @@ static inline gfn_t kvm_mmu_max_gfn(void)
|
||||||
* than hardware's real MAXPHYADDR. Using the host MAXPHYADDR
|
* than hardware's real MAXPHYADDR. Using the host MAXPHYADDR
|
||||||
* disallows such SPTEs entirely and simplifies the TDP MMU.
|
* disallows such SPTEs entirely and simplifies the TDP MMU.
|
||||||
*/
|
*/
|
||||||
int max_gpa_bits = likely(tdp_enabled) ? shadow_phys_bits : 52;
|
int max_gpa_bits = likely(tdp_enabled) ? kvm_host.maxphyaddr : 52;
|
||||||
|
|
||||||
return (1ULL << (max_gpa_bits - PAGE_SHIFT)) - 1;
|
return (1ULL << (max_gpa_bits - PAGE_SHIFT)) - 1;
|
||||||
}
|
}
|
||||||
|
|
||||||
static inline u8 kvm_get_shadow_phys_bits(void)
|
|
||||||
{
|
|
||||||
/*
|
|
||||||
* boot_cpu_data.x86_phys_bits is reduced when MKTME or SME are detected
|
|
||||||
* in CPU detection code, but the processor treats those reduced bits as
|
|
||||||
* 'keyID' thus they are not reserved bits. Therefore KVM needs to look at
|
|
||||||
* the physical address bits reported by CPUID.
|
|
||||||
*/
|
|
||||||
if (likely(boot_cpu_data.extended_cpuid_level >= 0x80000008))
|
|
||||||
return cpuid_eax(0x80000008) & 0xff;
|
|
||||||
|
|
||||||
/*
|
|
||||||
* Quite weird to have VMX or SVM but not MAXPHYADDR; probably a VM with
|
|
||||||
* custom CPUID. Proceed with whatever the kernel found since these features
|
|
||||||
* aren't virtualizable (SME/SEV also require CPUIDs higher than 0x80000008).
|
|
||||||
*/
|
|
||||||
return boot_cpu_data.x86_phys_bits;
|
|
||||||
}
|
|
||||||
|
|
||||||
u8 kvm_mmu_get_max_tdp_level(void);
|
u8 kvm_mmu_get_max_tdp_level(void);
|
||||||
|
|
||||||
void kvm_mmu_set_mmio_spte_mask(u64 mmio_value, u64 mmio_mask, u64 access_mask);
|
void kvm_mmu_set_mmio_spte_mask(u64 mmio_value, u64 mmio_mask, u64 access_mask);
|
||||||
|
|
|
@ -5109,7 +5109,7 @@ static void reset_rsvds_bits_mask_ept(struct kvm_vcpu *vcpu,
|
||||||
|
|
||||||
static inline u64 reserved_hpa_bits(void)
|
static inline u64 reserved_hpa_bits(void)
|
||||||
{
|
{
|
||||||
return rsvd_bits(shadow_phys_bits, 63);
|
return rsvd_bits(kvm_host.maxphyaddr, 63);
|
||||||
}
|
}
|
||||||
|
|
||||||
/*
|
/*
|
||||||
|
|
|
@ -43,7 +43,25 @@ u64 __read_mostly shadow_acc_track_mask;
|
||||||
u64 __read_mostly shadow_nonpresent_or_rsvd_mask;
|
u64 __read_mostly shadow_nonpresent_or_rsvd_mask;
|
||||||
u64 __read_mostly shadow_nonpresent_or_rsvd_lower_gfn_mask;
|
u64 __read_mostly shadow_nonpresent_or_rsvd_lower_gfn_mask;
|
||||||
|
|
||||||
u8 __read_mostly shadow_phys_bits;
|
static u8 __init kvm_get_host_maxphyaddr(void)
|
||||||
|
{
|
||||||
|
/*
|
||||||
|
* boot_cpu_data.x86_phys_bits is reduced when MKTME or SME are detected
|
||||||
|
* in CPU detection code, but the processor treats those reduced bits as
|
||||||
|
* 'keyID' thus they are not reserved bits. Therefore KVM needs to look at
|
||||||
|
* the physical address bits reported by CPUID, i.e. the raw MAXPHYADDR,
|
||||||
|
* when reasoning about CPU behavior with respect to MAXPHYADDR.
|
||||||
|
*/
|
||||||
|
if (likely(boot_cpu_data.extended_cpuid_level >= 0x80000008))
|
||||||
|
return cpuid_eax(0x80000008) & 0xff;
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Quite weird to have VMX or SVM but not MAXPHYADDR; probably a VM with
|
||||||
|
* custom CPUID. Proceed with whatever the kernel found since these features
|
||||||
|
* aren't virtualizable (SME/SEV also require CPUIDs higher than 0x80000008).
|
||||||
|
*/
|
||||||
|
return boot_cpu_data.x86_phys_bits;
|
||||||
|
}
|
||||||
|
|
||||||
void __init kvm_mmu_spte_module_init(void)
|
void __init kvm_mmu_spte_module_init(void)
|
||||||
{
|
{
|
||||||
|
@ -55,6 +73,8 @@ void __init kvm_mmu_spte_module_init(void)
|
||||||
* will change when the vendor module is (re)loaded.
|
* will change when the vendor module is (re)loaded.
|
||||||
*/
|
*/
|
||||||
allow_mmio_caching = enable_mmio_caching;
|
allow_mmio_caching = enable_mmio_caching;
|
||||||
|
|
||||||
|
kvm_host.maxphyaddr = kvm_get_host_maxphyaddr();
|
||||||
}
|
}
|
||||||
|
|
||||||
static u64 generation_mmio_spte_mask(u64 gen)
|
static u64 generation_mmio_spte_mask(u64 gen)
|
||||||
|
@ -441,8 +461,6 @@ void kvm_mmu_reset_all_pte_masks(void)
|
||||||
u8 low_phys_bits;
|
u8 low_phys_bits;
|
||||||
u64 mask;
|
u64 mask;
|
||||||
|
|
||||||
shadow_phys_bits = kvm_get_shadow_phys_bits();
|
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* If the CPU has 46 or less physical address bits, then set an
|
* If the CPU has 46 or less physical address bits, then set an
|
||||||
* appropriate mask to guard against L1TF attacks. Otherwise, it is
|
* appropriate mask to guard against L1TF attacks. Otherwise, it is
|
||||||
|
@ -494,7 +512,7 @@ void kvm_mmu_reset_all_pte_masks(void)
|
||||||
* 52-bit physical addresses then there are no reserved PA bits in the
|
* 52-bit physical addresses then there are no reserved PA bits in the
|
||||||
* PTEs and so the reserved PA approach must be disabled.
|
* PTEs and so the reserved PA approach must be disabled.
|
||||||
*/
|
*/
|
||||||
if (shadow_phys_bits < 52)
|
if (kvm_host.maxphyaddr < 52)
|
||||||
mask = BIT_ULL(51) | PT_PRESENT_MASK;
|
mask = BIT_ULL(51) | PT_PRESENT_MASK;
|
||||||
else
|
else
|
||||||
mask = 0;
|
mask = 0;
|
||||||
|
|
|
@ -194,7 +194,7 @@ static int pmc_reprogram_counter(struct kvm_pmc *pmc, u32 type, u64 config,
|
||||||
attr.sample_period = get_sample_period(pmc, pmc->counter);
|
attr.sample_period = get_sample_period(pmc, pmc->counter);
|
||||||
|
|
||||||
if ((attr.config & HSW_IN_TX_CHECKPOINTED) &&
|
if ((attr.config & HSW_IN_TX_CHECKPOINTED) &&
|
||||||
guest_cpuid_is_intel(pmc->vcpu)) {
|
(boot_cpu_has(X86_FEATURE_RTM) || boot_cpu_has(X86_FEATURE_HLE))) {
|
||||||
/*
|
/*
|
||||||
* HSW_IN_TX_CHECKPOINTED is not supported with nonzero
|
* HSW_IN_TX_CHECKPOINTED is not supported with nonzero
|
||||||
* period. Just clear the sample period so at least
|
* period. Just clear the sample period so at least
|
||||||
|
|
|
@ -4401,9 +4401,9 @@ void sev_es_prepare_switch_to_guest(struct vcpu_svm *svm, struct sev_es_save_are
|
||||||
* isn't saved by VMRUN, that isn't already saved by VMSAVE (performed
|
* isn't saved by VMRUN, that isn't already saved by VMSAVE (performed
|
||||||
* by common SVM code).
|
* by common SVM code).
|
||||||
*/
|
*/
|
||||||
hostsa->xcr0 = xgetbv(XCR_XFEATURE_ENABLED_MASK);
|
hostsa->xcr0 = kvm_host.xcr0;
|
||||||
hostsa->pkru = read_pkru();
|
hostsa->pkru = read_pkru();
|
||||||
hostsa->xss = host_xss;
|
hostsa->xss = kvm_host.xss;
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* If DebugSwap is enabled, debug registers are loaded but NOT saved by
|
* If DebugSwap is enabled, debug registers are loaded but NOT saved by
|
||||||
|
|
|
@ -53,6 +53,7 @@
|
||||||
#include "svm_onhyperv.h"
|
#include "svm_onhyperv.h"
|
||||||
|
|
||||||
MODULE_AUTHOR("Qumranet");
|
MODULE_AUTHOR("Qumranet");
|
||||||
|
MODULE_DESCRIPTION("KVM support for SVM (AMD-V) extensions");
|
||||||
MODULE_LICENSE("GPL");
|
MODULE_LICENSE("GPL");
|
||||||
|
|
||||||
#ifdef MODULE
|
#ifdef MODULE
|
||||||
|
@ -1202,7 +1203,7 @@ static inline void init_vmcb_after_set_cpuid(struct kvm_vcpu *vcpu)
|
||||||
{
|
{
|
||||||
struct vcpu_svm *svm = to_svm(vcpu);
|
struct vcpu_svm *svm = to_svm(vcpu);
|
||||||
|
|
||||||
if (guest_cpuid_is_intel(vcpu)) {
|
if (guest_cpuid_is_intel_compatible(vcpu)) {
|
||||||
/*
|
/*
|
||||||
* We must intercept SYSENTER_EIP and SYSENTER_ESP
|
* We must intercept SYSENTER_EIP and SYSENTER_ESP
|
||||||
* accesses because the processor only stores 32 bits.
|
* accesses because the processor only stores 32 bits.
|
||||||
|
@ -2890,12 +2891,12 @@ static int svm_get_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
|
||||||
break;
|
break;
|
||||||
case MSR_IA32_SYSENTER_EIP:
|
case MSR_IA32_SYSENTER_EIP:
|
||||||
msr_info->data = (u32)svm->vmcb01.ptr->save.sysenter_eip;
|
msr_info->data = (u32)svm->vmcb01.ptr->save.sysenter_eip;
|
||||||
if (guest_cpuid_is_intel(vcpu))
|
if (guest_cpuid_is_intel_compatible(vcpu))
|
||||||
msr_info->data |= (u64)svm->sysenter_eip_hi << 32;
|
msr_info->data |= (u64)svm->sysenter_eip_hi << 32;
|
||||||
break;
|
break;
|
||||||
case MSR_IA32_SYSENTER_ESP:
|
case MSR_IA32_SYSENTER_ESP:
|
||||||
msr_info->data = svm->vmcb01.ptr->save.sysenter_esp;
|
msr_info->data = svm->vmcb01.ptr->save.sysenter_esp;
|
||||||
if (guest_cpuid_is_intel(vcpu))
|
if (guest_cpuid_is_intel_compatible(vcpu))
|
||||||
msr_info->data |= (u64)svm->sysenter_esp_hi << 32;
|
msr_info->data |= (u64)svm->sysenter_esp_hi << 32;
|
||||||
break;
|
break;
|
||||||
case MSR_TSC_AUX:
|
case MSR_TSC_AUX:
|
||||||
|
@ -3122,11 +3123,11 @@ static int svm_set_msr(struct kvm_vcpu *vcpu, struct msr_data *msr)
|
||||||
* 32 bit part of these msrs to support Intel's
|
* 32 bit part of these msrs to support Intel's
|
||||||
* implementation of SYSENTER/SYSEXIT.
|
* implementation of SYSENTER/SYSEXIT.
|
||||||
*/
|
*/
|
||||||
svm->sysenter_eip_hi = guest_cpuid_is_intel(vcpu) ? (data >> 32) : 0;
|
svm->sysenter_eip_hi = guest_cpuid_is_intel_compatible(vcpu) ? (data >> 32) : 0;
|
||||||
break;
|
break;
|
||||||
case MSR_IA32_SYSENTER_ESP:
|
case MSR_IA32_SYSENTER_ESP:
|
||||||
svm->vmcb01.ptr->save.sysenter_esp = (u32)data;
|
svm->vmcb01.ptr->save.sysenter_esp = (u32)data;
|
||||||
svm->sysenter_esp_hi = guest_cpuid_is_intel(vcpu) ? (data >> 32) : 0;
|
svm->sysenter_esp_hi = guest_cpuid_is_intel_compatible(vcpu) ? (data >> 32) : 0;
|
||||||
break;
|
break;
|
||||||
case MSR_TSC_AUX:
|
case MSR_TSC_AUX:
|
||||||
/*
|
/*
|
||||||
|
@ -4387,11 +4388,11 @@ static void svm_vcpu_after_set_cpuid(struct kvm_vcpu *vcpu)
|
||||||
kvm_governed_feature_check_and_set(vcpu, X86_FEATURE_LBRV);
|
kvm_governed_feature_check_and_set(vcpu, X86_FEATURE_LBRV);
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Intercept VMLOAD if the vCPU mode is Intel in order to emulate that
|
* Intercept VMLOAD if the vCPU model is Intel in order to emulate that
|
||||||
* VMLOAD drops bits 63:32 of SYSENTER (ignoring the fact that exposing
|
* VMLOAD drops bits 63:32 of SYSENTER (ignoring the fact that exposing
|
||||||
* SVM on Intel is bonkers and extremely unlikely to work).
|
* SVM on Intel is bonkers and extremely unlikely to work).
|
||||||
*/
|
*/
|
||||||
if (!guest_cpuid_is_intel(vcpu))
|
if (!guest_cpuid_is_intel_compatible(vcpu))
|
||||||
kvm_governed_feature_check_and_set(vcpu, X86_FEATURE_V_VMSAVE_VMLOAD);
|
kvm_governed_feature_check_and_set(vcpu, X86_FEATURE_V_VMSAVE_VMLOAD);
|
||||||
|
|
||||||
kvm_governed_feature_check_and_set(vcpu, X86_FEATURE_PAUSEFILTER);
|
kvm_governed_feature_check_and_set(vcpu, X86_FEATURE_PAUSEFILTER);
|
||||||
|
|
|
@ -668,7 +668,7 @@ extern struct kvm_x86_nested_ops svm_nested_ops;
|
||||||
/* avic.c */
|
/* avic.c */
|
||||||
#define AVIC_REQUIRED_APICV_INHIBITS \
|
#define AVIC_REQUIRED_APICV_INHIBITS \
|
||||||
( \
|
( \
|
||||||
BIT(APICV_INHIBIT_REASON_DISABLE) | \
|
BIT(APICV_INHIBIT_REASON_DISABLED) | \
|
||||||
BIT(APICV_INHIBIT_REASON_ABSENT) | \
|
BIT(APICV_INHIBIT_REASON_ABSENT) | \
|
||||||
BIT(APICV_INHIBIT_REASON_HYPERV) | \
|
BIT(APICV_INHIBIT_REASON_HYPERV) | \
|
||||||
BIT(APICV_INHIBIT_REASON_NESTED) | \
|
BIT(APICV_INHIBIT_REASON_NESTED) | \
|
||||||
|
|
|
@ -1375,6 +1375,10 @@ TRACE_EVENT(kvm_hv_stimer_cleanup,
|
||||||
__entry->vcpu_id, __entry->timer_index)
|
__entry->vcpu_id, __entry->timer_index)
|
||||||
);
|
);
|
||||||
|
|
||||||
|
#define kvm_print_apicv_inhibit_reasons(inhibits) \
|
||||||
|
(inhibits), (inhibits) ? " " : "", \
|
||||||
|
(inhibits) ? __print_flags(inhibits, "|", APICV_INHIBIT_REASONS) : ""
|
||||||
|
|
||||||
TRACE_EVENT(kvm_apicv_inhibit_changed,
|
TRACE_EVENT(kvm_apicv_inhibit_changed,
|
||||||
TP_PROTO(int reason, bool set, unsigned long inhibits),
|
TP_PROTO(int reason, bool set, unsigned long inhibits),
|
||||||
TP_ARGS(reason, set, inhibits),
|
TP_ARGS(reason, set, inhibits),
|
||||||
|
@ -1391,9 +1395,10 @@ TRACE_EVENT(kvm_apicv_inhibit_changed,
|
||||||
__entry->inhibits = inhibits;
|
__entry->inhibits = inhibits;
|
||||||
),
|
),
|
||||||
|
|
||||||
TP_printk("%s reason=%u, inhibits=0x%lx",
|
TP_printk("%s reason=%u, inhibits=0x%lx%s%s",
|
||||||
__entry->set ? "set" : "cleared",
|
__entry->set ? "set" : "cleared",
|
||||||
__entry->reason, __entry->inhibits)
|
__entry->reason,
|
||||||
|
kvm_print_apicv_inhibit_reasons(__entry->inhibits))
|
||||||
);
|
);
|
||||||
|
|
||||||
TRACE_EVENT(kvm_apicv_accept_irq,
|
TRACE_EVENT(kvm_apicv_accept_irq,
|
||||||
|
|
|
@ -8,7 +8,7 @@
|
||||||
#include "posted_intr.h"
|
#include "posted_intr.h"
|
||||||
|
|
||||||
#define VMX_REQUIRED_APICV_INHIBITS \
|
#define VMX_REQUIRED_APICV_INHIBITS \
|
||||||
(BIT(APICV_INHIBIT_REASON_DISABLE)| \
|
(BIT(APICV_INHIBIT_REASON_DISABLED) | \
|
||||||
BIT(APICV_INHIBIT_REASON_ABSENT) | \
|
BIT(APICV_INHIBIT_REASON_ABSENT) | \
|
||||||
BIT(APICV_INHIBIT_REASON_HYPERV) | \
|
BIT(APICV_INHIBIT_REASON_HYPERV) | \
|
||||||
BIT(APICV_INHIBIT_REASON_BLOCKIRQ) | \
|
BIT(APICV_INHIBIT_REASON_BLOCKIRQ) | \
|
||||||
|
|
|
@ -2425,7 +2425,7 @@ static void prepare_vmcs02_early(struct vcpu_vmx *vmx, struct loaded_vmcs *vmcs0
|
||||||
if (cpu_has_load_ia32_efer()) {
|
if (cpu_has_load_ia32_efer()) {
|
||||||
if (guest_efer & EFER_LMA)
|
if (guest_efer & EFER_LMA)
|
||||||
exec_control |= VM_ENTRY_IA32E_MODE;
|
exec_control |= VM_ENTRY_IA32E_MODE;
|
||||||
if (guest_efer != host_efer)
|
if (guest_efer != kvm_host.efer)
|
||||||
exec_control |= VM_ENTRY_LOAD_IA32_EFER;
|
exec_control |= VM_ENTRY_LOAD_IA32_EFER;
|
||||||
}
|
}
|
||||||
vm_entry_controls_set(vmx, exec_control);
|
vm_entry_controls_set(vmx, exec_control);
|
||||||
|
@ -2438,7 +2438,7 @@ static void prepare_vmcs02_early(struct vcpu_vmx *vmx, struct loaded_vmcs *vmcs0
|
||||||
* bits may be modified by vmx_set_efer() in prepare_vmcs02().
|
* bits may be modified by vmx_set_efer() in prepare_vmcs02().
|
||||||
*/
|
*/
|
||||||
exec_control = __vm_exit_controls_get(vmcs01);
|
exec_control = __vm_exit_controls_get(vmcs01);
|
||||||
if (cpu_has_load_ia32_efer() && guest_efer != host_efer)
|
if (cpu_has_load_ia32_efer() && guest_efer != kvm_host.efer)
|
||||||
exec_control |= VM_EXIT_LOAD_IA32_EFER;
|
exec_control |= VM_EXIT_LOAD_IA32_EFER;
|
||||||
else
|
else
|
||||||
exec_control &= ~VM_EXIT_LOAD_IA32_EFER;
|
exec_control &= ~VM_EXIT_LOAD_IA32_EFER;
|
||||||
|
@ -4665,7 +4665,7 @@ static inline u64 nested_vmx_get_vmcs01_guest_efer(struct vcpu_vmx *vmx)
|
||||||
return vmcs_read64(GUEST_IA32_EFER);
|
return vmcs_read64(GUEST_IA32_EFER);
|
||||||
|
|
||||||
if (cpu_has_load_ia32_efer())
|
if (cpu_has_load_ia32_efer())
|
||||||
return host_efer;
|
return kvm_host.efer;
|
||||||
|
|
||||||
for (i = 0; i < vmx->msr_autoload.guest.nr; ++i) {
|
for (i = 0; i < vmx->msr_autoload.guest.nr; ++i) {
|
||||||
if (vmx->msr_autoload.guest.val[i].index == MSR_EFER)
|
if (vmx->msr_autoload.guest.val[i].index == MSR_EFER)
|
||||||
|
@ -4676,7 +4676,7 @@ static inline u64 nested_vmx_get_vmcs01_guest_efer(struct vcpu_vmx *vmx)
|
||||||
if (efer_msr)
|
if (efer_msr)
|
||||||
return efer_msr->data;
|
return efer_msr->data;
|
||||||
|
|
||||||
return host_efer;
|
return kvm_host.efer;
|
||||||
}
|
}
|
||||||
|
|
||||||
static void nested_vmx_restore_host_state(struct kvm_vcpu *vcpu)
|
static void nested_vmx_restore_host_state(struct kvm_vcpu *vcpu)
|
||||||
|
|
|
@ -74,6 +74,7 @@
|
||||||
#include "posted_intr.h"
|
#include "posted_intr.h"
|
||||||
|
|
||||||
MODULE_AUTHOR("Qumranet");
|
MODULE_AUTHOR("Qumranet");
|
||||||
|
MODULE_DESCRIPTION("KVM support for VMX (Intel VT-x) extensions");
|
||||||
MODULE_LICENSE("GPL");
|
MODULE_LICENSE("GPL");
|
||||||
|
|
||||||
#ifdef MODULE
|
#ifdef MODULE
|
||||||
|
@ -259,7 +260,7 @@ static int vmx_setup_l1d_flush(enum vmx_l1d_flush_state l1tf)
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
if (host_arch_capabilities & ARCH_CAP_SKIP_VMENTRY_L1DFLUSH) {
|
if (kvm_host.arch_capabilities & ARCH_CAP_SKIP_VMENTRY_L1DFLUSH) {
|
||||||
l1tf_vmx_mitigation = VMENTER_L1D_FLUSH_NOT_REQUIRED;
|
l1tf_vmx_mitigation = VMENTER_L1D_FLUSH_NOT_REQUIRED;
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
@ -404,7 +405,7 @@ static void vmx_update_fb_clear_dis(struct kvm_vcpu *vcpu, struct vcpu_vmx *vmx)
|
||||||
* and VM-Exit.
|
* and VM-Exit.
|
||||||
*/
|
*/
|
||||||
vmx->disable_fb_clear = !cpu_feature_enabled(X86_FEATURE_CLEAR_CPU_BUF) &&
|
vmx->disable_fb_clear = !cpu_feature_enabled(X86_FEATURE_CLEAR_CPU_BUF) &&
|
||||||
(host_arch_capabilities & ARCH_CAP_FB_CLEAR_CTRL) &&
|
(kvm_host.arch_capabilities & ARCH_CAP_FB_CLEAR_CTRL) &&
|
||||||
!boot_cpu_has_bug(X86_BUG_MDS) &&
|
!boot_cpu_has_bug(X86_BUG_MDS) &&
|
||||||
!boot_cpu_has_bug(X86_BUG_TAA);
|
!boot_cpu_has_bug(X86_BUG_TAA);
|
||||||
|
|
||||||
|
@ -1123,12 +1124,12 @@ static bool update_transition_efer(struct vcpu_vmx *vmx)
|
||||||
* atomically, since it's faster than switching it manually.
|
* atomically, since it's faster than switching it manually.
|
||||||
*/
|
*/
|
||||||
if (cpu_has_load_ia32_efer() ||
|
if (cpu_has_load_ia32_efer() ||
|
||||||
(enable_ept && ((vmx->vcpu.arch.efer ^ host_efer) & EFER_NX))) {
|
(enable_ept && ((vmx->vcpu.arch.efer ^ kvm_host.efer) & EFER_NX))) {
|
||||||
if (!(guest_efer & EFER_LMA))
|
if (!(guest_efer & EFER_LMA))
|
||||||
guest_efer &= ~EFER_LME;
|
guest_efer &= ~EFER_LME;
|
||||||
if (guest_efer != host_efer)
|
if (guest_efer != kvm_host.efer)
|
||||||
add_atomic_switch_msr(vmx, MSR_EFER,
|
add_atomic_switch_msr(vmx, MSR_EFER,
|
||||||
guest_efer, host_efer, false);
|
guest_efer, kvm_host.efer, false);
|
||||||
else
|
else
|
||||||
clear_atomic_switch_msr(vmx, MSR_EFER);
|
clear_atomic_switch_msr(vmx, MSR_EFER);
|
||||||
return false;
|
return false;
|
||||||
|
@ -1141,7 +1142,7 @@ static bool update_transition_efer(struct vcpu_vmx *vmx)
|
||||||
clear_atomic_switch_msr(vmx, MSR_EFER);
|
clear_atomic_switch_msr(vmx, MSR_EFER);
|
||||||
|
|
||||||
guest_efer &= ~ignore_bits;
|
guest_efer &= ~ignore_bits;
|
||||||
guest_efer |= host_efer & ignore_bits;
|
guest_efer |= kvm_host.efer & ignore_bits;
|
||||||
|
|
||||||
vmx->guest_uret_msrs[i].data = guest_efer;
|
vmx->guest_uret_msrs[i].data = guest_efer;
|
||||||
vmx->guest_uret_msrs[i].mask = ~ignore_bits;
|
vmx->guest_uret_msrs[i].mask = ~ignore_bits;
|
||||||
|
@ -4392,7 +4393,7 @@ void vmx_set_constant_host_state(struct vcpu_vmx *vmx)
|
||||||
}
|
}
|
||||||
|
|
||||||
if (cpu_has_load_ia32_efer())
|
if (cpu_has_load_ia32_efer())
|
||||||
vmcs_write64(HOST_IA32_EFER, host_efer);
|
vmcs_write64(HOST_IA32_EFER, kvm_host.efer);
|
||||||
}
|
}
|
||||||
|
|
||||||
void set_cr4_guest_host_mask(struct vcpu_vmx *vmx)
|
void set_cr4_guest_host_mask(struct vcpu_vmx *vmx)
|
||||||
|
@ -8394,18 +8395,16 @@ static void __init vmx_setup_me_spte_mask(void)
|
||||||
u64 me_mask = 0;
|
u64 me_mask = 0;
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* kvm_get_shadow_phys_bits() returns shadow_phys_bits. Use
|
|
||||||
* the former to avoid exposing shadow_phys_bits.
|
|
||||||
*
|
|
||||||
* On pre-MKTME system, boot_cpu_data.x86_phys_bits equals to
|
* On pre-MKTME system, boot_cpu_data.x86_phys_bits equals to
|
||||||
* shadow_phys_bits. On MKTME and/or TDX capable systems,
|
* kvm_host.maxphyaddr. On MKTME and/or TDX capable systems,
|
||||||
* boot_cpu_data.x86_phys_bits holds the actual physical address
|
* boot_cpu_data.x86_phys_bits holds the actual physical address
|
||||||
* w/o the KeyID bits, and shadow_phys_bits equals to MAXPHYADDR
|
* w/o the KeyID bits, and kvm_host.maxphyaddr equals to
|
||||||
* reported by CPUID. Those bits between are KeyID bits.
|
* MAXPHYADDR reported by CPUID. Those bits between are KeyID bits.
|
||||||
*/
|
*/
|
||||||
if (boot_cpu_data.x86_phys_bits != kvm_get_shadow_phys_bits())
|
if (boot_cpu_data.x86_phys_bits != kvm_host.maxphyaddr)
|
||||||
me_mask = rsvd_bits(boot_cpu_data.x86_phys_bits,
|
me_mask = rsvd_bits(boot_cpu_data.x86_phys_bits,
|
||||||
kvm_get_shadow_phys_bits() - 1);
|
kvm_host.maxphyaddr - 1);
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Unlike SME, host kernel doesn't support setting up any
|
* Unlike SME, host kernel doesn't support setting up any
|
||||||
* MKTME KeyID on Intel platforms. No memory encryption
|
* MKTME KeyID on Intel platforms. No memory encryption
|
||||||
|
|
|
@ -727,7 +727,7 @@ static inline bool vmx_need_pf_intercept(struct kvm_vcpu *vcpu)
|
||||||
return true;
|
return true;
|
||||||
|
|
||||||
return allow_smaller_maxphyaddr &&
|
return allow_smaller_maxphyaddr &&
|
||||||
cpuid_maxphyaddr(vcpu) < kvm_get_shadow_phys_bits();
|
cpuid_maxphyaddr(vcpu) < kvm_host.maxphyaddr;
|
||||||
}
|
}
|
||||||
|
|
||||||
static inline bool is_unrestricted_guest(struct kvm_vcpu *vcpu)
|
static inline bool is_unrestricted_guest(struct kvm_vcpu *vcpu)
|
||||||
|
|
|
@ -46,7 +46,6 @@ bool vmx_apic_init_signal_blocked(struct kvm_vcpu *vcpu);
|
||||||
void vmx_migrate_timers(struct kvm_vcpu *vcpu);
|
void vmx_migrate_timers(struct kvm_vcpu *vcpu);
|
||||||
void vmx_set_virtual_apic_mode(struct kvm_vcpu *vcpu);
|
void vmx_set_virtual_apic_mode(struct kvm_vcpu *vcpu);
|
||||||
void vmx_apicv_pre_state_restore(struct kvm_vcpu *vcpu);
|
void vmx_apicv_pre_state_restore(struct kvm_vcpu *vcpu);
|
||||||
bool vmx_check_apicv_inhibit_reasons(enum kvm_apicv_inhibit reason);
|
|
||||||
void vmx_hwapic_irr_update(struct kvm_vcpu *vcpu, int max_irr);
|
void vmx_hwapic_irr_update(struct kvm_vcpu *vcpu, int max_irr);
|
||||||
void vmx_hwapic_isr_update(int max_isr);
|
void vmx_hwapic_isr_update(int max_isr);
|
||||||
bool vmx_guest_apic_has_interrupt(struct kvm_vcpu *vcpu);
|
bool vmx_guest_apic_has_interrupt(struct kvm_vcpu *vcpu);
|
||||||
|
@ -111,7 +110,6 @@ u64 vmx_get_l2_tsc_offset(struct kvm_vcpu *vcpu);
|
||||||
u64 vmx_get_l2_tsc_multiplier(struct kvm_vcpu *vcpu);
|
u64 vmx_get_l2_tsc_multiplier(struct kvm_vcpu *vcpu);
|
||||||
void vmx_write_tsc_offset(struct kvm_vcpu *vcpu);
|
void vmx_write_tsc_offset(struct kvm_vcpu *vcpu);
|
||||||
void vmx_write_tsc_multiplier(struct kvm_vcpu *vcpu);
|
void vmx_write_tsc_multiplier(struct kvm_vcpu *vcpu);
|
||||||
void vmx_request_immediate_exit(struct kvm_vcpu *vcpu);
|
|
||||||
void vmx_update_cpu_dirty_logging(struct kvm_vcpu *vcpu);
|
void vmx_update_cpu_dirty_logging(struct kvm_vcpu *vcpu);
|
||||||
#ifdef CONFIG_X86_64
|
#ifdef CONFIG_X86_64
|
||||||
int vmx_set_hv_timer(struct kvm_vcpu *vcpu, u64 guest_deadline_tsc,
|
int vmx_set_hv_timer(struct kvm_vcpu *vcpu, u64 guest_deadline_tsc,
|
||||||
|
|
|
@ -100,6 +100,9 @@
|
||||||
struct kvm_caps kvm_caps __read_mostly;
|
struct kvm_caps kvm_caps __read_mostly;
|
||||||
EXPORT_SYMBOL_GPL(kvm_caps);
|
EXPORT_SYMBOL_GPL(kvm_caps);
|
||||||
|
|
||||||
|
struct kvm_host_values kvm_host __read_mostly;
|
||||||
|
EXPORT_SYMBOL_GPL(kvm_host);
|
||||||
|
|
||||||
#define ERR_PTR_USR(e) ((void __user *)ERR_PTR(e))
|
#define ERR_PTR_USR(e) ((void __user *)ERR_PTR(e))
|
||||||
|
|
||||||
#define emul_to_vcpu(ctxt) \
|
#define emul_to_vcpu(ctxt) \
|
||||||
|
@ -220,21 +223,12 @@ static struct kvm_user_return_msrs __percpu *user_return_msrs;
|
||||||
| XFEATURE_MASK_BNDCSR | XFEATURE_MASK_AVX512 \
|
| XFEATURE_MASK_BNDCSR | XFEATURE_MASK_AVX512 \
|
||||||
| XFEATURE_MASK_PKRU | XFEATURE_MASK_XTILE)
|
| XFEATURE_MASK_PKRU | XFEATURE_MASK_XTILE)
|
||||||
|
|
||||||
u64 __read_mostly host_efer;
|
|
||||||
EXPORT_SYMBOL_GPL(host_efer);
|
|
||||||
|
|
||||||
bool __read_mostly allow_smaller_maxphyaddr = 0;
|
bool __read_mostly allow_smaller_maxphyaddr = 0;
|
||||||
EXPORT_SYMBOL_GPL(allow_smaller_maxphyaddr);
|
EXPORT_SYMBOL_GPL(allow_smaller_maxphyaddr);
|
||||||
|
|
||||||
bool __read_mostly enable_apicv = true;
|
bool __read_mostly enable_apicv = true;
|
||||||
EXPORT_SYMBOL_GPL(enable_apicv);
|
EXPORT_SYMBOL_GPL(enable_apicv);
|
||||||
|
|
||||||
u64 __read_mostly host_xss;
|
|
||||||
EXPORT_SYMBOL_GPL(host_xss);
|
|
||||||
|
|
||||||
u64 __read_mostly host_arch_capabilities;
|
|
||||||
EXPORT_SYMBOL_GPL(host_arch_capabilities);
|
|
||||||
|
|
||||||
const struct _kvm_stats_desc kvm_vm_stats_desc[] = {
|
const struct _kvm_stats_desc kvm_vm_stats_desc[] = {
|
||||||
KVM_GENERIC_VM_STATS(),
|
KVM_GENERIC_VM_STATS(),
|
||||||
STATS_DESC_COUNTER(VM, mmu_shadow_zapped),
|
STATS_DESC_COUNTER(VM, mmu_shadow_zapped),
|
||||||
|
@ -308,8 +302,6 @@ const struct kvm_stats_header kvm_vcpu_stats_header = {
|
||||||
sizeof(kvm_vcpu_stats_desc),
|
sizeof(kvm_vcpu_stats_desc),
|
||||||
};
|
};
|
||||||
|
|
||||||
u64 __read_mostly host_xcr0;
|
|
||||||
|
|
||||||
static struct kmem_cache *x86_emulator_cache;
|
static struct kmem_cache *x86_emulator_cache;
|
||||||
|
|
||||||
/*
|
/*
|
||||||
|
@ -1016,11 +1008,11 @@ void kvm_load_guest_xsave_state(struct kvm_vcpu *vcpu)
|
||||||
|
|
||||||
if (kvm_is_cr4_bit_set(vcpu, X86_CR4_OSXSAVE)) {
|
if (kvm_is_cr4_bit_set(vcpu, X86_CR4_OSXSAVE)) {
|
||||||
|
|
||||||
if (vcpu->arch.xcr0 != host_xcr0)
|
if (vcpu->arch.xcr0 != kvm_host.xcr0)
|
||||||
xsetbv(XCR_XFEATURE_ENABLED_MASK, vcpu->arch.xcr0);
|
xsetbv(XCR_XFEATURE_ENABLED_MASK, vcpu->arch.xcr0);
|
||||||
|
|
||||||
if (guest_can_use(vcpu, X86_FEATURE_XSAVES) &&
|
if (guest_can_use(vcpu, X86_FEATURE_XSAVES) &&
|
||||||
vcpu->arch.ia32_xss != host_xss)
|
vcpu->arch.ia32_xss != kvm_host.xss)
|
||||||
wrmsrl(MSR_IA32_XSS, vcpu->arch.ia32_xss);
|
wrmsrl(MSR_IA32_XSS, vcpu->arch.ia32_xss);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -1047,12 +1039,12 @@ void kvm_load_host_xsave_state(struct kvm_vcpu *vcpu)
|
||||||
|
|
||||||
if (kvm_is_cr4_bit_set(vcpu, X86_CR4_OSXSAVE)) {
|
if (kvm_is_cr4_bit_set(vcpu, X86_CR4_OSXSAVE)) {
|
||||||
|
|
||||||
if (vcpu->arch.xcr0 != host_xcr0)
|
if (vcpu->arch.xcr0 != kvm_host.xcr0)
|
||||||
xsetbv(XCR_XFEATURE_ENABLED_MASK, host_xcr0);
|
xsetbv(XCR_XFEATURE_ENABLED_MASK, kvm_host.xcr0);
|
||||||
|
|
||||||
if (guest_can_use(vcpu, X86_FEATURE_XSAVES) &&
|
if (guest_can_use(vcpu, X86_FEATURE_XSAVES) &&
|
||||||
vcpu->arch.ia32_xss != host_xss)
|
vcpu->arch.ia32_xss != kvm_host.xss)
|
||||||
wrmsrl(MSR_IA32_XSS, host_xss);
|
wrmsrl(MSR_IA32_XSS, kvm_host.xss);
|
||||||
}
|
}
|
||||||
|
|
||||||
}
|
}
|
||||||
|
@ -1619,7 +1611,7 @@ static bool kvm_is_immutable_feature_msr(u32 msr)
|
||||||
|
|
||||||
static u64 kvm_get_arch_capabilities(void)
|
static u64 kvm_get_arch_capabilities(void)
|
||||||
{
|
{
|
||||||
u64 data = host_arch_capabilities & KVM_SUPPORTED_ARCH_CAP;
|
u64 data = kvm_host.arch_capabilities & KVM_SUPPORTED_ARCH_CAP;
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* If nx_huge_pages is enabled, KVM's shadow paging will ensure that
|
* If nx_huge_pages is enabled, KVM's shadow paging will ensure that
|
||||||
|
@ -1877,11 +1869,11 @@ static int __kvm_set_msr(struct kvm_vcpu *vcpu, u32 index, u64 data,
|
||||||
* incomplete and conflicting architectural behavior. Current
|
* incomplete and conflicting architectural behavior. Current
|
||||||
* AMD CPUs completely ignore bits 63:32, i.e. they aren't
|
* AMD CPUs completely ignore bits 63:32, i.e. they aren't
|
||||||
* reserved and always read as zeros. Enforce Intel's reserved
|
* reserved and always read as zeros. Enforce Intel's reserved
|
||||||
* bits check if and only if the guest CPU is Intel, and clear
|
* bits check if the guest CPU is Intel compatible, otherwise
|
||||||
* the bits in all other cases. This ensures cross-vendor
|
* clear the bits. This ensures cross-vendor migration will
|
||||||
* migration will provide consistent behavior for the guest.
|
* provide consistent behavior for the guest.
|
||||||
*/
|
*/
|
||||||
if (guest_cpuid_is_intel(vcpu) && (data >> 32) != 0)
|
if (guest_cpuid_is_intel_compatible(vcpu) && (data >> 32) != 0)
|
||||||
return 1;
|
return 1;
|
||||||
|
|
||||||
data = (u32)data;
|
data = (u32)data;
|
||||||
|
@ -4703,11 +4695,15 @@ int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext)
|
||||||
case KVM_CAP_VM_DISABLE_NX_HUGE_PAGES:
|
case KVM_CAP_VM_DISABLE_NX_HUGE_PAGES:
|
||||||
case KVM_CAP_IRQFD_RESAMPLE:
|
case KVM_CAP_IRQFD_RESAMPLE:
|
||||||
case KVM_CAP_MEMORY_FAULT_INFO:
|
case KVM_CAP_MEMORY_FAULT_INFO:
|
||||||
|
case KVM_CAP_X86_GUEST_MODE:
|
||||||
r = 1;
|
r = 1;
|
||||||
break;
|
break;
|
||||||
case KVM_CAP_PRE_FAULT_MEMORY:
|
case KVM_CAP_PRE_FAULT_MEMORY:
|
||||||
r = tdp_enabled;
|
r = tdp_enabled;
|
||||||
break;
|
break;
|
||||||
|
case KVM_CAP_X86_APIC_BUS_CYCLES_NS:
|
||||||
|
r = APIC_BUS_CYCLE_NS_DEFAULT;
|
||||||
|
break;
|
||||||
case KVM_CAP_EXIT_HYPERCALL:
|
case KVM_CAP_EXIT_HYPERCALL:
|
||||||
r = KVM_EXIT_HYPERCALL_VALID_MASK;
|
r = KVM_EXIT_HYPERCALL_VALID_MASK;
|
||||||
break;
|
break;
|
||||||
|
@ -5891,8 +5887,7 @@ long kvm_arch_vcpu_ioctl(struct file *filp,
|
||||||
r = -EINVAL;
|
r = -EINVAL;
|
||||||
if (!lapic_in_kernel(vcpu))
|
if (!lapic_in_kernel(vcpu))
|
||||||
goto out;
|
goto out;
|
||||||
u.lapic = kzalloc(sizeof(struct kvm_lapic_state),
|
u.lapic = kzalloc(sizeof(struct kvm_lapic_state), GFP_KERNEL);
|
||||||
GFP_KERNEL_ACCOUNT);
|
|
||||||
|
|
||||||
r = -ENOMEM;
|
r = -ENOMEM;
|
||||||
if (!u.lapic)
|
if (!u.lapic)
|
||||||
|
@ -6085,7 +6080,7 @@ long kvm_arch_vcpu_ioctl(struct file *filp,
|
||||||
if (vcpu->arch.guest_fpu.uabi_size > sizeof(struct kvm_xsave))
|
if (vcpu->arch.guest_fpu.uabi_size > sizeof(struct kvm_xsave))
|
||||||
break;
|
break;
|
||||||
|
|
||||||
u.xsave = kzalloc(sizeof(struct kvm_xsave), GFP_KERNEL_ACCOUNT);
|
u.xsave = kzalloc(sizeof(struct kvm_xsave), GFP_KERNEL);
|
||||||
r = -ENOMEM;
|
r = -ENOMEM;
|
||||||
if (!u.xsave)
|
if (!u.xsave)
|
||||||
break;
|
break;
|
||||||
|
@ -6116,7 +6111,7 @@ long kvm_arch_vcpu_ioctl(struct file *filp,
|
||||||
case KVM_GET_XSAVE2: {
|
case KVM_GET_XSAVE2: {
|
||||||
int size = vcpu->arch.guest_fpu.uabi_size;
|
int size = vcpu->arch.guest_fpu.uabi_size;
|
||||||
|
|
||||||
u.xsave = kzalloc(size, GFP_KERNEL_ACCOUNT);
|
u.xsave = kzalloc(size, GFP_KERNEL);
|
||||||
r = -ENOMEM;
|
r = -ENOMEM;
|
||||||
if (!u.xsave)
|
if (!u.xsave)
|
||||||
break;
|
break;
|
||||||
|
@ -6134,7 +6129,7 @@ long kvm_arch_vcpu_ioctl(struct file *filp,
|
||||||
}
|
}
|
||||||
|
|
||||||
case KVM_GET_XCRS: {
|
case KVM_GET_XCRS: {
|
||||||
u.xcrs = kzalloc(sizeof(struct kvm_xcrs), GFP_KERNEL_ACCOUNT);
|
u.xcrs = kzalloc(sizeof(struct kvm_xcrs), GFP_KERNEL);
|
||||||
r = -ENOMEM;
|
r = -ENOMEM;
|
||||||
if (!u.xcrs)
|
if (!u.xcrs)
|
||||||
break;
|
break;
|
||||||
|
@ -6756,6 +6751,30 @@ split_irqchip_unlock:
|
||||||
}
|
}
|
||||||
mutex_unlock(&kvm->lock);
|
mutex_unlock(&kvm->lock);
|
||||||
break;
|
break;
|
||||||
|
case KVM_CAP_X86_APIC_BUS_CYCLES_NS: {
|
||||||
|
u64 bus_cycle_ns = cap->args[0];
|
||||||
|
u64 unused;
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Guard against overflow in tmict_to_ns(). 128 is the highest
|
||||||
|
* divide value that can be programmed in APIC_TDCR.
|
||||||
|
*/
|
||||||
|
r = -EINVAL;
|
||||||
|
if (!bus_cycle_ns ||
|
||||||
|
check_mul_overflow((u64)U32_MAX * 128, bus_cycle_ns, &unused))
|
||||||
|
break;
|
||||||
|
|
||||||
|
r = 0;
|
||||||
|
mutex_lock(&kvm->lock);
|
||||||
|
if (!irqchip_in_kernel(kvm))
|
||||||
|
r = -ENXIO;
|
||||||
|
else if (kvm->created_vcpus)
|
||||||
|
r = -EINVAL;
|
||||||
|
else
|
||||||
|
kvm->arch.apic_bus_cycle_ns = bus_cycle_ns;
|
||||||
|
mutex_unlock(&kvm->lock);
|
||||||
|
break;
|
||||||
|
}
|
||||||
default:
|
default:
|
||||||
r = -EINVAL;
|
r = -EINVAL;
|
||||||
break;
|
break;
|
||||||
|
@ -8535,6 +8554,11 @@ static bool emulator_guest_has_rdpid(struct x86_emulate_ctxt *ctxt)
|
||||||
return guest_cpuid_has(emul_to_vcpu(ctxt), X86_FEATURE_RDPID);
|
return guest_cpuid_has(emul_to_vcpu(ctxt), X86_FEATURE_RDPID);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static bool emulator_guest_cpuid_is_intel_compatible(struct x86_emulate_ctxt *ctxt)
|
||||||
|
{
|
||||||
|
return guest_cpuid_is_intel_compatible(emul_to_vcpu(ctxt));
|
||||||
|
}
|
||||||
|
|
||||||
static ulong emulator_read_gpr(struct x86_emulate_ctxt *ctxt, unsigned reg)
|
static ulong emulator_read_gpr(struct x86_emulate_ctxt *ctxt, unsigned reg)
|
||||||
{
|
{
|
||||||
return kvm_register_read_raw(emul_to_vcpu(ctxt), reg);
|
return kvm_register_read_raw(emul_to_vcpu(ctxt), reg);
|
||||||
|
@ -8633,6 +8657,7 @@ static const struct x86_emulate_ops emulate_ops = {
|
||||||
.guest_has_movbe = emulator_guest_has_movbe,
|
.guest_has_movbe = emulator_guest_has_movbe,
|
||||||
.guest_has_fxsr = emulator_guest_has_fxsr,
|
.guest_has_fxsr = emulator_guest_has_fxsr,
|
||||||
.guest_has_rdpid = emulator_guest_has_rdpid,
|
.guest_has_rdpid = emulator_guest_has_rdpid,
|
||||||
|
.guest_cpuid_is_intel_compatible = emulator_guest_cpuid_is_intel_compatible,
|
||||||
.set_nmi_mask = emulator_set_nmi_mask,
|
.set_nmi_mask = emulator_set_nmi_mask,
|
||||||
.is_smm = emulator_is_smm,
|
.is_smm = emulator_is_smm,
|
||||||
.is_guest_mode = emulator_is_guest_mode,
|
.is_guest_mode = emulator_is_guest_mode,
|
||||||
|
@ -9014,19 +9039,17 @@ EXPORT_SYMBOL_GPL(kvm_skip_emulated_instruction);
|
||||||
|
|
||||||
static bool kvm_is_code_breakpoint_inhibited(struct kvm_vcpu *vcpu)
|
static bool kvm_is_code_breakpoint_inhibited(struct kvm_vcpu *vcpu)
|
||||||
{
|
{
|
||||||
u32 shadow;
|
|
||||||
|
|
||||||
if (kvm_get_rflags(vcpu) & X86_EFLAGS_RF)
|
if (kvm_get_rflags(vcpu) & X86_EFLAGS_RF)
|
||||||
return true;
|
return true;
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Intel CPUs inhibit code #DBs when MOV/POP SS blocking is active,
|
* Intel compatible CPUs inhibit code #DBs when MOV/POP SS blocking is
|
||||||
* but AMD CPUs do not. MOV/POP SS blocking is rare, check that first
|
* active, but AMD compatible CPUs do not.
|
||||||
* to avoid the relatively expensive CPUID lookup.
|
|
||||||
*/
|
*/
|
||||||
shadow = static_call(kvm_x86_get_interrupt_shadow)(vcpu);
|
if (!guest_cpuid_is_intel_compatible(vcpu))
|
||||||
return (shadow & KVM_X86_SHADOW_INT_MOV_SS) &&
|
return false;
|
||||||
guest_cpuid_is_intel(vcpu);
|
|
||||||
|
return static_call(kvm_x86_get_interrupt_shadow)(vcpu) & KVM_X86_SHADOW_INT_MOV_SS;
|
||||||
}
|
}
|
||||||
|
|
||||||
static bool kvm_vcpu_check_code_breakpoint(struct kvm_vcpu *vcpu,
|
static bool kvm_vcpu_check_code_breakpoint(struct kvm_vcpu *vcpu,
|
||||||
|
@ -9786,19 +9809,19 @@ int kvm_x86_vendor_init(struct kvm_x86_init_ops *ops)
|
||||||
kvm_caps.supported_mce_cap = MCG_CTL_P | MCG_SER_P;
|
kvm_caps.supported_mce_cap = MCG_CTL_P | MCG_SER_P;
|
||||||
|
|
||||||
if (boot_cpu_has(X86_FEATURE_XSAVE)) {
|
if (boot_cpu_has(X86_FEATURE_XSAVE)) {
|
||||||
host_xcr0 = xgetbv(XCR_XFEATURE_ENABLED_MASK);
|
kvm_host.xcr0 = xgetbv(XCR_XFEATURE_ENABLED_MASK);
|
||||||
kvm_caps.supported_xcr0 = host_xcr0 & KVM_SUPPORTED_XCR0;
|
kvm_caps.supported_xcr0 = kvm_host.xcr0 & KVM_SUPPORTED_XCR0;
|
||||||
}
|
}
|
||||||
|
|
||||||
rdmsrl_safe(MSR_EFER, &host_efer);
|
rdmsrl_safe(MSR_EFER, &kvm_host.efer);
|
||||||
|
|
||||||
if (boot_cpu_has(X86_FEATURE_XSAVES))
|
if (boot_cpu_has(X86_FEATURE_XSAVES))
|
||||||
rdmsrl(MSR_IA32_XSS, host_xss);
|
rdmsrl(MSR_IA32_XSS, kvm_host.xss);
|
||||||
|
|
||||||
kvm_init_pmu_capability(ops->pmu_ops);
|
kvm_init_pmu_capability(ops->pmu_ops);
|
||||||
|
|
||||||
if (boot_cpu_has(X86_FEATURE_ARCH_CAPABILITIES))
|
if (boot_cpu_has(X86_FEATURE_ARCH_CAPABILITIES))
|
||||||
rdmsrl(MSR_IA32_ARCH_CAPABILITIES, host_arch_capabilities);
|
rdmsrl(MSR_IA32_ARCH_CAPABILITIES, kvm_host.arch_capabilities);
|
||||||
|
|
||||||
r = ops->hardware_setup();
|
r = ops->hardware_setup();
|
||||||
if (r != 0)
|
if (r != 0)
|
||||||
|
@ -10023,6 +10046,10 @@ EXPORT_SYMBOL_GPL(kvm_vcpu_apicv_activated);
|
||||||
static void set_or_clear_apicv_inhibit(unsigned long *inhibits,
|
static void set_or_clear_apicv_inhibit(unsigned long *inhibits,
|
||||||
enum kvm_apicv_inhibit reason, bool set)
|
enum kvm_apicv_inhibit reason, bool set)
|
||||||
{
|
{
|
||||||
|
const struct trace_print_flags apicv_inhibits[] = { APICV_INHIBIT_REASONS };
|
||||||
|
|
||||||
|
BUILD_BUG_ON(ARRAY_SIZE(apicv_inhibits) != NR_APICV_INHIBIT_REASONS);
|
||||||
|
|
||||||
if (set)
|
if (set)
|
||||||
__set_bit(reason, inhibits);
|
__set_bit(reason, inhibits);
|
||||||
else
|
else
|
||||||
|
@ -10034,7 +10061,7 @@ static void set_or_clear_apicv_inhibit(unsigned long *inhibits,
|
||||||
static void kvm_apicv_init(struct kvm *kvm)
|
static void kvm_apicv_init(struct kvm *kvm)
|
||||||
{
|
{
|
||||||
enum kvm_apicv_inhibit reason = enable_apicv ? APICV_INHIBIT_REASON_ABSENT :
|
enum kvm_apicv_inhibit reason = enable_apicv ? APICV_INHIBIT_REASON_ABSENT :
|
||||||
APICV_INHIBIT_REASON_DISABLE;
|
APICV_INHIBIT_REASON_DISABLED;
|
||||||
|
|
||||||
set_or_clear_apicv_inhibit(&kvm->arch.apicv_inhibit_reasons, reason, true);
|
set_or_clear_apicv_inhibit(&kvm->arch.apicv_inhibit_reasons, reason, true);
|
||||||
|
|
||||||
|
@ -10255,6 +10282,8 @@ static void post_kvm_run_save(struct kvm_vcpu *vcpu)
|
||||||
|
|
||||||
if (is_smm(vcpu))
|
if (is_smm(vcpu))
|
||||||
kvm_run->flags |= KVM_RUN_X86_SMM;
|
kvm_run->flags |= KVM_RUN_X86_SMM;
|
||||||
|
if (is_guest_mode(vcpu))
|
||||||
|
kvm_run->flags |= KVM_RUN_X86_GUEST_MODE;
|
||||||
}
|
}
|
||||||
|
|
||||||
static void update_cr8_intercept(struct kvm_vcpu *vcpu)
|
static void update_cr8_intercept(struct kvm_vcpu *vcpu)
|
||||||
|
@ -12629,6 +12658,7 @@ int kvm_arch_init_vm(struct kvm *kvm, unsigned long type)
|
||||||
raw_spin_unlock_irqrestore(&kvm->arch.tsc_write_lock, flags);
|
raw_spin_unlock_irqrestore(&kvm->arch.tsc_write_lock, flags);
|
||||||
|
|
||||||
kvm->arch.default_tsc_khz = max_tsc_khz ? : tsc_khz;
|
kvm->arch.default_tsc_khz = max_tsc_khz ? : tsc_khz;
|
||||||
|
kvm->arch.apic_bus_cycle_ns = APIC_BUS_CYCLE_NS_DEFAULT;
|
||||||
kvm->arch.guest_can_read_msr_platform_info = true;
|
kvm->arch.guest_can_read_msr_platform_info = true;
|
||||||
kvm->arch.enable_pmu = enable_pmu;
|
kvm->arch.enable_pmu = enable_pmu;
|
||||||
|
|
||||||
|
|
|
@ -33,6 +33,20 @@ struct kvm_caps {
|
||||||
u64 supported_perf_cap;
|
u64 supported_perf_cap;
|
||||||
};
|
};
|
||||||
|
|
||||||
|
struct kvm_host_values {
|
||||||
|
/*
|
||||||
|
* The host's raw MAXPHYADDR, i.e. the number of non-reserved physical
|
||||||
|
* address bits irrespective of features that repurpose legal bits,
|
||||||
|
* e.g. MKTME.
|
||||||
|
*/
|
||||||
|
u8 maxphyaddr;
|
||||||
|
|
||||||
|
u64 efer;
|
||||||
|
u64 xcr0;
|
||||||
|
u64 xss;
|
||||||
|
u64 arch_capabilities;
|
||||||
|
};
|
||||||
|
|
||||||
void kvm_spurious_fault(void);
|
void kvm_spurious_fault(void);
|
||||||
|
|
||||||
#define KVM_NESTED_VMENTER_CONSISTENCY_CHECK(consistency_check) \
|
#define KVM_NESTED_VMENTER_CONSISTENCY_CHECK(consistency_check) \
|
||||||
|
@ -325,11 +339,8 @@ int x86_emulate_instruction(struct kvm_vcpu *vcpu, gpa_t cr2_or_gpa,
|
||||||
int emulation_type, void *insn, int insn_len);
|
int emulation_type, void *insn, int insn_len);
|
||||||
fastpath_t handle_fastpath_set_msr_irqoff(struct kvm_vcpu *vcpu);
|
fastpath_t handle_fastpath_set_msr_irqoff(struct kvm_vcpu *vcpu);
|
||||||
|
|
||||||
extern u64 host_xcr0;
|
|
||||||
extern u64 host_xss;
|
|
||||||
extern u64 host_arch_capabilities;
|
|
||||||
|
|
||||||
extern struct kvm_caps kvm_caps;
|
extern struct kvm_caps kvm_caps;
|
||||||
|
extern struct kvm_host_values kvm_host;
|
||||||
|
|
||||||
extern bool enable_pmu;
|
extern bool enable_pmu;
|
||||||
|
|
||||||
|
|
|
@ -931,6 +931,8 @@ struct kvm_enable_cap {
|
||||||
#define KVM_CAP_GUEST_MEMFD 234
|
#define KVM_CAP_GUEST_MEMFD 234
|
||||||
#define KVM_CAP_VM_TYPES 235
|
#define KVM_CAP_VM_TYPES 235
|
||||||
#define KVM_CAP_PRE_FAULT_MEMORY 236
|
#define KVM_CAP_PRE_FAULT_MEMORY 236
|
||||||
|
#define KVM_CAP_X86_APIC_BUS_CYCLES_NS 237
|
||||||
|
#define KVM_CAP_X86_GUEST_MODE 238
|
||||||
|
|
||||||
struct kvm_irq_routing_irqchip {
|
struct kvm_irq_routing_irqchip {
|
||||||
__u32 irqchip;
|
__u32 irqchip;
|
||||||
|
|
|
@ -112,6 +112,7 @@ TEST_GEN_PROGS_x86_64 += x86_64/vmx_invalid_nested_guest_state
|
||||||
TEST_GEN_PROGS_x86_64 += x86_64/vmx_set_nested_state_test
|
TEST_GEN_PROGS_x86_64 += x86_64/vmx_set_nested_state_test
|
||||||
TEST_GEN_PROGS_x86_64 += x86_64/vmx_tsc_adjust_test
|
TEST_GEN_PROGS_x86_64 += x86_64/vmx_tsc_adjust_test
|
||||||
TEST_GEN_PROGS_x86_64 += x86_64/vmx_nested_tsc_scaling_test
|
TEST_GEN_PROGS_x86_64 += x86_64/vmx_nested_tsc_scaling_test
|
||||||
|
TEST_GEN_PROGS_x86_64 += x86_64/apic_bus_clock_test
|
||||||
TEST_GEN_PROGS_x86_64 += x86_64/xapic_ipi_test
|
TEST_GEN_PROGS_x86_64 += x86_64/xapic_ipi_test
|
||||||
TEST_GEN_PROGS_x86_64 += x86_64/xapic_state_test
|
TEST_GEN_PROGS_x86_64 += x86_64/xapic_state_test
|
||||||
TEST_GEN_PROGS_x86_64 += x86_64/xcr0_cpuid_test
|
TEST_GEN_PROGS_x86_64 += x86_64/xcr0_cpuid_test
|
||||||
|
|
|
@ -60,6 +60,14 @@
|
||||||
#define APIC_VECTOR_MASK 0x000FF
|
#define APIC_VECTOR_MASK 0x000FF
|
||||||
#define APIC_ICR2 0x310
|
#define APIC_ICR2 0x310
|
||||||
#define SET_APIC_DEST_FIELD(x) ((x) << 24)
|
#define SET_APIC_DEST_FIELD(x) ((x) << 24)
|
||||||
|
#define APIC_LVTT 0x320
|
||||||
|
#define APIC_LVT_TIMER_ONESHOT (0 << 17)
|
||||||
|
#define APIC_LVT_TIMER_PERIODIC (1 << 17)
|
||||||
|
#define APIC_LVT_TIMER_TSCDEADLINE (2 << 17)
|
||||||
|
#define APIC_LVT_MASKED (1 << 16)
|
||||||
|
#define APIC_TMICT 0x380
|
||||||
|
#define APIC_TMCCT 0x390
|
||||||
|
#define APIC_TDCR 0x3E0
|
||||||
|
|
||||||
void apic_disable(void);
|
void apic_disable(void);
|
||||||
void xapic_enable(void);
|
void xapic_enable(void);
|
||||||
|
|
|
@ -23,6 +23,7 @@
|
||||||
|
|
||||||
extern bool host_cpu_is_intel;
|
extern bool host_cpu_is_intel;
|
||||||
extern bool host_cpu_is_amd;
|
extern bool host_cpu_is_amd;
|
||||||
|
extern uint64_t guest_tsc_khz;
|
||||||
|
|
||||||
/* Forced emulation prefix, used to invoke the emulator unconditionally. */
|
/* Forced emulation prefix, used to invoke the emulator unconditionally. */
|
||||||
#define KVM_FEP "ud2; .byte 'k', 'v', 'm';"
|
#define KVM_FEP "ud2; .byte 'k', 'v', 'm';"
|
||||||
|
@ -816,6 +817,23 @@ static inline void cpu_relax(void)
|
||||||
asm volatile("rep; nop" ::: "memory");
|
asm volatile("rep; nop" ::: "memory");
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static inline void udelay(unsigned long usec)
|
||||||
|
{
|
||||||
|
uint64_t start, now, cycles;
|
||||||
|
|
||||||
|
GUEST_ASSERT(guest_tsc_khz);
|
||||||
|
cycles = guest_tsc_khz / 1000 * usec;
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Deliberately don't PAUSE, a.k.a. cpu_relax(), so that the delay is
|
||||||
|
* as accurate as possible, e.g. doesn't trigger PAUSE-Loop VM-Exits.
|
||||||
|
*/
|
||||||
|
start = rdtsc();
|
||||||
|
do {
|
||||||
|
now = rdtsc();
|
||||||
|
} while (now - start < cycles);
|
||||||
|
}
|
||||||
|
|
||||||
#define ud2() \
|
#define ud2() \
|
||||||
__asm__ __volatile__( \
|
__asm__ __volatile__( \
|
||||||
"ud2\n" \
|
"ud2\n" \
|
||||||
|
|
|
@ -25,6 +25,7 @@ vm_vaddr_t exception_handlers;
|
||||||
bool host_cpu_is_amd;
|
bool host_cpu_is_amd;
|
||||||
bool host_cpu_is_intel;
|
bool host_cpu_is_intel;
|
||||||
bool is_forced_emulation_enabled;
|
bool is_forced_emulation_enabled;
|
||||||
|
uint64_t guest_tsc_khz;
|
||||||
|
|
||||||
static void regs_dump(FILE *stream, struct kvm_regs *regs, uint8_t indent)
|
static void regs_dump(FILE *stream, struct kvm_regs *regs, uint8_t indent)
|
||||||
{
|
{
|
||||||
|
@ -616,6 +617,11 @@ void assert_on_unhandled_exception(struct kvm_vcpu *vcpu)
|
||||||
|
|
||||||
void kvm_arch_vm_post_create(struct kvm_vm *vm)
|
void kvm_arch_vm_post_create(struct kvm_vm *vm)
|
||||||
{
|
{
|
||||||
|
int r;
|
||||||
|
|
||||||
|
TEST_ASSERT(kvm_has_cap(KVM_CAP_GET_TSC_KHZ),
|
||||||
|
"Require KVM_GET_TSC_KHZ to provide udelay() to guest.");
|
||||||
|
|
||||||
vm_create_irqchip(vm);
|
vm_create_irqchip(vm);
|
||||||
vm_init_descriptor_tables(vm);
|
vm_init_descriptor_tables(vm);
|
||||||
|
|
||||||
|
@ -628,6 +634,11 @@ void kvm_arch_vm_post_create(struct kvm_vm *vm)
|
||||||
|
|
||||||
vm_sev_ioctl(vm, KVM_SEV_INIT2, &init);
|
vm_sev_ioctl(vm, KVM_SEV_INIT2, &init);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
r = __vm_ioctl(vm, KVM_GET_TSC_KHZ, NULL);
|
||||||
|
TEST_ASSERT(r > 0, "KVM_GET_TSC_KHZ did not provide a valid TSC frequency.");
|
||||||
|
guest_tsc_khz = r;
|
||||||
|
sync_global_to_guest(vm, guest_tsc_khz);
|
||||||
}
|
}
|
||||||
|
|
||||||
void vcpu_arch_set_entry_point(struct kvm_vcpu *vcpu, void *guest_code)
|
void vcpu_arch_set_entry_point(struct kvm_vcpu *vcpu, void *guest_code)
|
||||||
|
|
194
tools/testing/selftests/kvm/x86_64/apic_bus_clock_test.c
Normal file
194
tools/testing/selftests/kvm/x86_64/apic_bus_clock_test.c
Normal file
|
@ -0,0 +1,194 @@
|
||||||
|
// SPDX-License-Identifier: GPL-2.0-only
|
||||||
|
/*
|
||||||
|
* Copyright (c) 2024 Intel Corporation
|
||||||
|
*
|
||||||
|
* Verify KVM correctly emulates the APIC bus frequency when the VMM configures
|
||||||
|
* the frequency via KVM_CAP_X86_APIC_BUS_CYCLES_NS. Start the APIC timer by
|
||||||
|
* programming TMICT (timer initial count) to the largest value possible (so
|
||||||
|
* that the timer will not expire during the test). Then, after an arbitrary
|
||||||
|
* amount of time has elapsed, verify TMCCT (timer current count) is within 1%
|
||||||
|
* of the expected value based on the time elapsed, the APIC bus frequency, and
|
||||||
|
* the programmed TDCR (timer divide configuration register).
|
||||||
|
*/
|
||||||
|
|
||||||
|
#include "apic.h"
|
||||||
|
#include "test_util.h"
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Possible TDCR values with matching divide count. Used to modify APIC
|
||||||
|
* timer frequency.
|
||||||
|
*/
|
||||||
|
static const struct {
|
||||||
|
const uint32_t tdcr;
|
||||||
|
const uint32_t divide_count;
|
||||||
|
} tdcrs[] = {
|
||||||
|
{0x0, 2},
|
||||||
|
{0x1, 4},
|
||||||
|
{0x2, 8},
|
||||||
|
{0x3, 16},
|
||||||
|
{0x8, 32},
|
||||||
|
{0x9, 64},
|
||||||
|
{0xa, 128},
|
||||||
|
{0xb, 1},
|
||||||
|
};
|
||||||
|
|
||||||
|
static bool is_x2apic;
|
||||||
|
|
||||||
|
static void apic_enable(void)
|
||||||
|
{
|
||||||
|
if (is_x2apic)
|
||||||
|
x2apic_enable();
|
||||||
|
else
|
||||||
|
xapic_enable();
|
||||||
|
}
|
||||||
|
|
||||||
|
static uint32_t apic_read_reg(unsigned int reg)
|
||||||
|
{
|
||||||
|
return is_x2apic ? x2apic_read_reg(reg) : xapic_read_reg(reg);
|
||||||
|
}
|
||||||
|
|
||||||
|
static void apic_write_reg(unsigned int reg, uint32_t val)
|
||||||
|
{
|
||||||
|
if (is_x2apic)
|
||||||
|
x2apic_write_reg(reg, val);
|
||||||
|
else
|
||||||
|
xapic_write_reg(reg, val);
|
||||||
|
}
|
||||||
|
|
||||||
|
static void apic_guest_code(uint64_t apic_hz, uint64_t delay_ms)
|
||||||
|
{
|
||||||
|
uint64_t tsc_hz = guest_tsc_khz * 1000;
|
||||||
|
const uint32_t tmict = ~0u;
|
||||||
|
uint64_t tsc0, tsc1, freq;
|
||||||
|
uint32_t tmcct;
|
||||||
|
int i;
|
||||||
|
|
||||||
|
apic_enable();
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Setup one-shot timer. The vector does not matter because the
|
||||||
|
* interrupt should not fire.
|
||||||
|
*/
|
||||||
|
apic_write_reg(APIC_LVTT, APIC_LVT_TIMER_ONESHOT | APIC_LVT_MASKED);
|
||||||
|
|
||||||
|
for (i = 0; i < ARRAY_SIZE(tdcrs); i++) {
|
||||||
|
apic_write_reg(APIC_TDCR, tdcrs[i].tdcr);
|
||||||
|
apic_write_reg(APIC_TMICT, tmict);
|
||||||
|
|
||||||
|
tsc0 = rdtsc();
|
||||||
|
udelay(delay_ms * 1000);
|
||||||
|
tmcct = apic_read_reg(APIC_TMCCT);
|
||||||
|
tsc1 = rdtsc();
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Stop the timer _after_ reading the current, final count, as
|
||||||
|
* writing the initial counter also modifies the current count.
|
||||||
|
*/
|
||||||
|
apic_write_reg(APIC_TMICT, 0);
|
||||||
|
|
||||||
|
freq = (tmict - tmcct) * tdcrs[i].divide_count * tsc_hz / (tsc1 - tsc0);
|
||||||
|
/* Check if measured frequency is within 5% of configured frequency. */
|
||||||
|
__GUEST_ASSERT(freq < apic_hz * 105 / 100 && freq > apic_hz * 95 / 100,
|
||||||
|
"Frequency = %lu (wanted %lu - %lu), bus = %lu, div = %u, tsc = %lu",
|
||||||
|
freq, apic_hz * 95 / 100, apic_hz * 105 / 100,
|
||||||
|
apic_hz, tdcrs[i].divide_count, tsc_hz);
|
||||||
|
}
|
||||||
|
|
||||||
|
GUEST_DONE();
|
||||||
|
}
|
||||||
|
|
||||||
|
static void test_apic_bus_clock(struct kvm_vcpu *vcpu)
|
||||||
|
{
|
||||||
|
bool done = false;
|
||||||
|
struct ucall uc;
|
||||||
|
|
||||||
|
while (!done) {
|
||||||
|
vcpu_run(vcpu);
|
||||||
|
|
||||||
|
TEST_ASSERT_KVM_EXIT_REASON(vcpu, KVM_EXIT_IO);
|
||||||
|
|
||||||
|
switch (get_ucall(vcpu, &uc)) {
|
||||||
|
case UCALL_DONE:
|
||||||
|
done = true;
|
||||||
|
break;
|
||||||
|
case UCALL_ABORT:
|
||||||
|
REPORT_GUEST_ASSERT(uc);
|
||||||
|
break;
|
||||||
|
default:
|
||||||
|
TEST_FAIL("Unknown ucall %lu", uc.cmd);
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
static void run_apic_bus_clock_test(uint64_t apic_hz, uint64_t delay_ms,
|
||||||
|
bool x2apic)
|
||||||
|
{
|
||||||
|
struct kvm_vcpu *vcpu;
|
||||||
|
struct kvm_vm *vm;
|
||||||
|
int ret;
|
||||||
|
|
||||||
|
is_x2apic = x2apic;
|
||||||
|
|
||||||
|
vm = vm_create(1);
|
||||||
|
|
||||||
|
sync_global_to_guest(vm, is_x2apic);
|
||||||
|
|
||||||
|
vm_enable_cap(vm, KVM_CAP_X86_APIC_BUS_CYCLES_NS,
|
||||||
|
NSEC_PER_SEC / apic_hz);
|
||||||
|
|
||||||
|
vcpu = vm_vcpu_add(vm, 0, apic_guest_code);
|
||||||
|
vcpu_args_set(vcpu, 2, apic_hz, delay_ms);
|
||||||
|
|
||||||
|
ret = __vm_enable_cap(vm, KVM_CAP_X86_APIC_BUS_CYCLES_NS,
|
||||||
|
NSEC_PER_SEC / apic_hz);
|
||||||
|
TEST_ASSERT(ret < 0 && errno == EINVAL,
|
||||||
|
"Setting of APIC bus frequency after vCPU is created should fail.");
|
||||||
|
|
||||||
|
if (!is_x2apic)
|
||||||
|
virt_pg_map(vm, APIC_DEFAULT_GPA, APIC_DEFAULT_GPA);
|
||||||
|
|
||||||
|
test_apic_bus_clock(vcpu);
|
||||||
|
kvm_vm_free(vm);
|
||||||
|
}
|
||||||
|
|
||||||
|
static void help(char *name)
|
||||||
|
{
|
||||||
|
puts("");
|
||||||
|
printf("usage: %s [-h] [-d delay] [-f APIC bus freq]\n", name);
|
||||||
|
puts("");
|
||||||
|
printf("-d: Delay (in msec) guest uses to measure APIC bus frequency.\n");
|
||||||
|
printf("-f: The APIC bus frequency (in MHz) to be configured for the guest.\n");
|
||||||
|
puts("");
|
||||||
|
}
|
||||||
|
|
||||||
|
int main(int argc, char *argv[])
|
||||||
|
{
|
||||||
|
/*
|
||||||
|
* Arbitrarilty default to 25MHz for the APIC bus frequency, which is
|
||||||
|
* different enough from the default 1GHz to be interesting.
|
||||||
|
*/
|
||||||
|
uint64_t apic_hz = 25 * 1000 * 1000;
|
||||||
|
uint64_t delay_ms = 100;
|
||||||
|
int opt;
|
||||||
|
|
||||||
|
TEST_REQUIRE(kvm_has_cap(KVM_CAP_X86_APIC_BUS_CYCLES_NS));
|
||||||
|
|
||||||
|
while ((opt = getopt(argc, argv, "d:f:h")) != -1) {
|
||||||
|
switch (opt) {
|
||||||
|
case 'f':
|
||||||
|
apic_hz = atoi_positive("APIC bus frequency", optarg) * 1000 * 1000;
|
||||||
|
break;
|
||||||
|
case 'd':
|
||||||
|
delay_ms = atoi_positive("Delay in milliseconds", optarg);
|
||||||
|
break;
|
||||||
|
case 'h':
|
||||||
|
default:
|
||||||
|
help(argv[0]);
|
||||||
|
exit(KSFT_SKIP);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
run_apic_bus_clock_test(apic_hz, delay_ms, false);
|
||||||
|
run_apic_bus_clock_test(apic_hz, delay_ms, true);
|
||||||
|
}
|
Loading…
Add table
Reference in a new issue