1
0
Fork 0
mirror of synced 2025-03-06 20:59:54 +01:00
linux/arch/x86/boot/compressed/sev.c
Nikunj A Dadhania 8c29f01654 x86/sev: Add SEV-SNP guest feature negotiation support
The hypervisor can enable various new features (SEV_FEATURES[1:63]) and start a
SNP guest. Some of these features need guest side implementation. If any of
these features are enabled without it, the behavior of the SNP guest will be
undefined.  It may fail booting in a non-obvious way making it difficult to
debug.

Instead of allowing the guest to continue and have it fail randomly later,
detect this early and fail gracefully.

The SEV_STATUS MSR indicates features which the hypervisor has enabled.  While
booting, SNP guests should ascertain that all the enabled features have guest
side implementation. In case a feature is not implemented in the guest, the
guest terminates booting with GHCB protocol Non-Automatic Exit(NAE) termination
request event, see "SEV-ES Guest-Hypervisor Communication Block Standardization"
document (currently at https://developer.amd.com/wp-content/resources/56421.pdf),
section "Termination Request".

Populate SW_EXITINFO2 with mask of unsupported features that the hypervisor can
easily report to the user.

More details in the AMD64 APM Vol 2, Section "SEV_STATUS MSR".

  [ bp:
    - Massage.
    - Move snp_check_features() call to C code.
    Note: the CC:stable@ aspect here is to be able to protect older, stable
    kernels when running on newer hypervisors. Or not "running" but fail
    reliably and in a well-defined manner instead of randomly. ]

Fixes: cbd3d4f7c4 ("x86/sev: Check SEV-SNP features support")
Signed-off-by: Nikunj A Dadhania <nikunj@amd.com>
Signed-off-by: Borislav Petkov (AMD) <bp@alien8.de>
Reviewed-by: Tom Lendacky <thomas.lendacky@amd.com>
Cc: <stable@kernel.org>
Link: https://lore.kernel.org/r/20230118061943.534309-1-nikunj@amd.com
2023-01-19 17:29:58 +01:00

515 lines
13 KiB
C

// SPDX-License-Identifier: GPL-2.0
/*
* AMD Encrypted Register State Support
*
* Author: Joerg Roedel <jroedel@suse.de>
*/
/*
* misc.h needs to be first because it knows how to include the other kernel
* headers in the pre-decompression code in a way that does not break
* compilation.
*/
#include "misc.h"
#include <asm/pgtable_types.h>
#include <asm/sev.h>
#include <asm/trapnr.h>
#include <asm/trap_pf.h>
#include <asm/msr-index.h>
#include <asm/fpu/xcr.h>
#include <asm/ptrace.h>
#include <asm/svm.h>
#include <asm/cpuid.h>
#include "error.h"
#include "../msr.h"
struct ghcb boot_ghcb_page __aligned(PAGE_SIZE);
struct ghcb *boot_ghcb;
/*
* Copy a version of this function here - insn-eval.c can't be used in
* pre-decompression code.
*/
static bool insn_has_rep_prefix(struct insn *insn)
{
insn_byte_t p;
int i;
insn_get_prefixes(insn);
for_each_insn_prefix(insn, i, p) {
if (p == 0xf2 || p == 0xf3)
return true;
}
return false;
}
/*
* Only a dummy for insn_get_seg_base() - Early boot-code is 64bit only and
* doesn't use segments.
*/
static unsigned long insn_get_seg_base(struct pt_regs *regs, int seg_reg_idx)
{
return 0UL;
}
static inline u64 sev_es_rd_ghcb_msr(void)
{
struct msr m;
boot_rdmsr(MSR_AMD64_SEV_ES_GHCB, &m);
return m.q;
}
static inline void sev_es_wr_ghcb_msr(u64 val)
{
struct msr m;
m.q = val;
boot_wrmsr(MSR_AMD64_SEV_ES_GHCB, &m);
}
static enum es_result vc_decode_insn(struct es_em_ctxt *ctxt)
{
char buffer[MAX_INSN_SIZE];
int ret;
memcpy(buffer, (unsigned char *)ctxt->regs->ip, MAX_INSN_SIZE);
ret = insn_decode(&ctxt->insn, buffer, MAX_INSN_SIZE, INSN_MODE_64);
if (ret < 0)
return ES_DECODE_FAILED;
return ES_OK;
}
static enum es_result vc_write_mem(struct es_em_ctxt *ctxt,
void *dst, char *buf, size_t size)
{
memcpy(dst, buf, size);
return ES_OK;
}
static enum es_result vc_read_mem(struct es_em_ctxt *ctxt,
void *src, char *buf, size_t size)
{
memcpy(buf, src, size);
return ES_OK;
}
#undef __init
#undef __pa
#define __init
#define __pa(x) ((unsigned long)(x))
#define __BOOT_COMPRESSED
/* Basic instruction decoding support needed */
#include "../../lib/inat.c"
#include "../../lib/insn.c"
/* Include code for early handlers */
#include "../../kernel/sev-shared.c"
static inline bool sev_snp_enabled(void)
{
return sev_status & MSR_AMD64_SEV_SNP_ENABLED;
}
static void __page_state_change(unsigned long paddr, enum psc_op op)
{
u64 val;
if (!sev_snp_enabled())
return;
/*
* If private -> shared then invalidate the page before requesting the
* state change in the RMP table.
*/
if (op == SNP_PAGE_STATE_SHARED && pvalidate(paddr, RMP_PG_SIZE_4K, 0))
sev_es_terminate(SEV_TERM_SET_LINUX, GHCB_TERM_PVALIDATE);
/* Issue VMGEXIT to change the page state in RMP table. */
sev_es_wr_ghcb_msr(GHCB_MSR_PSC_REQ_GFN(paddr >> PAGE_SHIFT, op));
VMGEXIT();
/* Read the response of the VMGEXIT. */
val = sev_es_rd_ghcb_msr();
if ((GHCB_RESP_CODE(val) != GHCB_MSR_PSC_RESP) || GHCB_MSR_PSC_RESP_VAL(val))
sev_es_terminate(SEV_TERM_SET_LINUX, GHCB_TERM_PSC);
/*
* Now that page state is changed in the RMP table, validate it so that it is
* consistent with the RMP entry.
*/
if (op == SNP_PAGE_STATE_PRIVATE && pvalidate(paddr, RMP_PG_SIZE_4K, 1))
sev_es_terminate(SEV_TERM_SET_LINUX, GHCB_TERM_PVALIDATE);
}
void snp_set_page_private(unsigned long paddr)
{
__page_state_change(paddr, SNP_PAGE_STATE_PRIVATE);
}
void snp_set_page_shared(unsigned long paddr)
{
__page_state_change(paddr, SNP_PAGE_STATE_SHARED);
}
static bool early_setup_ghcb(void)
{
if (set_page_decrypted((unsigned long)&boot_ghcb_page))
return false;
/* Page is now mapped decrypted, clear it */
memset(&boot_ghcb_page, 0, sizeof(boot_ghcb_page));
boot_ghcb = &boot_ghcb_page;
/* Initialize lookup tables for the instruction decoder */
inat_init_tables();
/* SNP guest requires the GHCB GPA must be registered */
if (sev_snp_enabled())
snp_register_ghcb_early(__pa(&boot_ghcb_page));
return true;
}
void sev_es_shutdown_ghcb(void)
{
if (!boot_ghcb)
return;
if (!sev_es_check_cpu_features())
error("SEV-ES CPU Features missing.");
/*
* GHCB Page must be flushed from the cache and mapped encrypted again.
* Otherwise the running kernel will see strange cache effects when
* trying to use that page.
*/
if (set_page_encrypted((unsigned long)&boot_ghcb_page))
error("Can't map GHCB page encrypted");
/*
* GHCB page is mapped encrypted again and flushed from the cache.
* Mark it non-present now to catch bugs when #VC exceptions trigger
* after this point.
*/
if (set_page_non_present((unsigned long)&boot_ghcb_page))
error("Can't unmap GHCB page");
}
static void __noreturn sev_es_ghcb_terminate(struct ghcb *ghcb, unsigned int set,
unsigned int reason, u64 exit_info_2)
{
u64 exit_info_1 = SVM_VMGEXIT_TERM_REASON(set, reason);
vc_ghcb_invalidate(ghcb);
ghcb_set_sw_exit_code(ghcb, SVM_VMGEXIT_TERM_REQUEST);
ghcb_set_sw_exit_info_1(ghcb, exit_info_1);
ghcb_set_sw_exit_info_2(ghcb, exit_info_2);
sev_es_wr_ghcb_msr(__pa(ghcb));
VMGEXIT();
while (true)
asm volatile("hlt\n" : : : "memory");
}
bool sev_es_check_ghcb_fault(unsigned long address)
{
/* Check whether the fault was on the GHCB page */
return ((address & PAGE_MASK) == (unsigned long)&boot_ghcb_page);
}
void do_boot_stage2_vc(struct pt_regs *regs, unsigned long exit_code)
{
struct es_em_ctxt ctxt;
enum es_result result;
if (!boot_ghcb && !early_setup_ghcb())
sev_es_terminate(SEV_TERM_SET_GEN, GHCB_SEV_ES_GEN_REQ);
vc_ghcb_invalidate(boot_ghcb);
result = vc_init_em_ctxt(&ctxt, regs, exit_code);
if (result != ES_OK)
goto finish;
switch (exit_code) {
case SVM_EXIT_RDTSC:
case SVM_EXIT_RDTSCP:
result = vc_handle_rdtsc(boot_ghcb, &ctxt, exit_code);
break;
case SVM_EXIT_IOIO:
result = vc_handle_ioio(boot_ghcb, &ctxt);
break;
case SVM_EXIT_CPUID:
result = vc_handle_cpuid(boot_ghcb, &ctxt);
break;
default:
result = ES_UNSUPPORTED;
break;
}
finish:
if (result == ES_OK)
vc_finish_insn(&ctxt);
else if (result != ES_RETRY)
sev_es_terminate(SEV_TERM_SET_GEN, GHCB_SEV_ES_GEN_REQ);
}
static void enforce_vmpl0(void)
{
u64 attrs;
int err;
/*
* RMPADJUST modifies RMP permissions of a lesser-privileged (numerically
* higher) privilege level. Here, clear the VMPL1 permission mask of the
* GHCB page. If the guest is not running at VMPL0, this will fail.
*
* If the guest is running at VMPL0, it will succeed. Even if that operation
* modifies permission bits, it is still ok to do so currently because Linux
* SNP guests are supported only on VMPL0 so VMPL1 or higher permission masks
* changing is a don't-care.
*/
attrs = 1;
if (rmpadjust((unsigned long)&boot_ghcb_page, RMP_PG_SIZE_4K, attrs))
sev_es_terminate(SEV_TERM_SET_LINUX, GHCB_TERM_NOT_VMPL0);
}
/*
* SNP_FEATURES_IMPL_REQ is the mask of SNP features that will need
* guest side implementation for proper functioning of the guest. If any
* of these features are enabled in the hypervisor but are lacking guest
* side implementation, the behavior of the guest will be undefined. The
* guest could fail in non-obvious way making it difficult to debug.
*
* As the behavior of reserved feature bits is unknown to be on the
* safe side add them to the required features mask.
*/
#define SNP_FEATURES_IMPL_REQ (MSR_AMD64_SNP_VTOM | \
MSR_AMD64_SNP_REFLECT_VC | \
MSR_AMD64_SNP_RESTRICTED_INJ | \
MSR_AMD64_SNP_ALT_INJ | \
MSR_AMD64_SNP_DEBUG_SWAP | \
MSR_AMD64_SNP_VMPL_SSS | \
MSR_AMD64_SNP_SECURE_TSC | \
MSR_AMD64_SNP_VMGEXIT_PARAM | \
MSR_AMD64_SNP_VMSA_REG_PROTECTION | \
MSR_AMD64_SNP_RESERVED_BIT13 | \
MSR_AMD64_SNP_RESERVED_BIT15 | \
MSR_AMD64_SNP_RESERVED_MASK)
/*
* SNP_FEATURES_PRESENT is the mask of SNP features that are implemented
* by the guest kernel. As and when a new feature is implemented in the
* guest kernel, a corresponding bit should be added to the mask.
*/
#define SNP_FEATURES_PRESENT (0)
void snp_check_features(void)
{
u64 unsupported;
if (!(sev_status & MSR_AMD64_SEV_SNP_ENABLED))
return;
/*
* Terminate the boot if hypervisor has enabled any feature lacking
* guest side implementation. Pass on the unsupported features mask through
* EXIT_INFO_2 of the GHCB protocol so that those features can be reported
* as part of the guest boot failure.
*/
unsupported = sev_status & SNP_FEATURES_IMPL_REQ & ~SNP_FEATURES_PRESENT;
if (unsupported) {
if (ghcb_version < 2 || (!boot_ghcb && !early_setup_ghcb()))
sev_es_terminate(SEV_TERM_SET_GEN, GHCB_SNP_UNSUPPORTED);
sev_es_ghcb_terminate(boot_ghcb, SEV_TERM_SET_GEN,
GHCB_SNP_UNSUPPORTED, unsupported);
}
}
void sev_enable(struct boot_params *bp)
{
unsigned int eax, ebx, ecx, edx;
struct msr m;
bool snp;
/*
* bp->cc_blob_address should only be set by boot/compressed kernel.
* Initialize it to 0 to ensure that uninitialized values from
* buggy bootloaders aren't propagated.
*/
if (bp)
bp->cc_blob_address = 0;
/*
* Setup/preliminary detection of SNP. This will be sanity-checked
* against CPUID/MSR values later.
*/
snp = snp_init(bp);
/* Check for the SME/SEV support leaf */
eax = 0x80000000;
ecx = 0;
native_cpuid(&eax, &ebx, &ecx, &edx);
if (eax < 0x8000001f)
return;
/*
* Check for the SME/SEV feature:
* CPUID Fn8000_001F[EAX]
* - Bit 0 - Secure Memory Encryption support
* - Bit 1 - Secure Encrypted Virtualization support
* CPUID Fn8000_001F[EBX]
* - Bits 5:0 - Pagetable bit position used to indicate encryption
*/
eax = 0x8000001f;
ecx = 0;
native_cpuid(&eax, &ebx, &ecx, &edx);
/* Check whether SEV is supported */
if (!(eax & BIT(1))) {
if (snp)
error("SEV-SNP support indicated by CC blob, but not CPUID.");
return;
}
/* Set the SME mask if this is an SEV guest. */
boot_rdmsr(MSR_AMD64_SEV, &m);
sev_status = m.q;
if (!(sev_status & MSR_AMD64_SEV_ENABLED))
return;
/* Negotiate the GHCB protocol version. */
if (sev_status & MSR_AMD64_SEV_ES_ENABLED) {
if (!sev_es_negotiate_protocol())
sev_es_terminate(SEV_TERM_SET_GEN, GHCB_SEV_ES_PROT_UNSUPPORTED);
}
/*
* SNP is supported in v2 of the GHCB spec which mandates support for HV
* features.
*/
if (sev_status & MSR_AMD64_SEV_SNP_ENABLED) {
if (!(get_hv_features() & GHCB_HV_FT_SNP))
sev_es_terminate(SEV_TERM_SET_GEN, GHCB_SNP_UNSUPPORTED);
enforce_vmpl0();
}
if (snp && !(sev_status & MSR_AMD64_SEV_SNP_ENABLED))
error("SEV-SNP supported indicated by CC blob, but not SEV status MSR.");
sme_me_mask = BIT_ULL(ebx & 0x3f);
}
/* Search for Confidential Computing blob in the EFI config table. */
static struct cc_blob_sev_info *find_cc_blob_efi(struct boot_params *bp)
{
unsigned long cfg_table_pa;
unsigned int cfg_table_len;
int ret;
ret = efi_get_conf_table(bp, &cfg_table_pa, &cfg_table_len);
if (ret)
return NULL;
return (struct cc_blob_sev_info *)efi_find_vendor_table(bp, cfg_table_pa,
cfg_table_len,
EFI_CC_BLOB_GUID);
}
/*
* Initial set up of SNP relies on information provided by the
* Confidential Computing blob, which can be passed to the boot kernel
* by firmware/bootloader in the following ways:
*
* - via an entry in the EFI config table
* - via a setup_data structure, as defined by the Linux Boot Protocol
*
* Scan for the blob in that order.
*/
static struct cc_blob_sev_info *find_cc_blob(struct boot_params *bp)
{
struct cc_blob_sev_info *cc_info;
cc_info = find_cc_blob_efi(bp);
if (cc_info)
goto found_cc_info;
cc_info = find_cc_blob_setup_data(bp);
if (!cc_info)
return NULL;
found_cc_info:
if (cc_info->magic != CC_BLOB_SEV_HDR_MAGIC)
sev_es_terminate(SEV_TERM_SET_GEN, GHCB_SNP_UNSUPPORTED);
return cc_info;
}
/*
* Indicate SNP based on presence of SNP-specific CC blob. Subsequent checks
* will verify the SNP CPUID/MSR bits.
*/
bool snp_init(struct boot_params *bp)
{
struct cc_blob_sev_info *cc_info;
if (!bp)
return false;
cc_info = find_cc_blob(bp);
if (!cc_info)
return false;
/*
* If a SNP-specific Confidential Computing blob is present, then
* firmware/bootloader have indicated SNP support. Verifying this
* involves CPUID checks which will be more reliable if the SNP
* CPUID table is used. See comments over snp_setup_cpuid_table() for
* more details.
*/
setup_cpuid_table(cc_info);
/*
* Pass run-time kernel a pointer to CC info via boot_params so EFI
* config table doesn't need to be searched again during early startup
* phase.
*/
bp->cc_blob_address = (u32)(unsigned long)cc_info;
return true;
}
void sev_prep_identity_maps(unsigned long top_level_pgt)
{
/*
* The Confidential Computing blob is used very early in uncompressed
* kernel to find the in-memory CPUID table to handle CPUID
* instructions. Make sure an identity-mapping exists so it can be
* accessed after switchover.
*/
if (sev_snp_enabled()) {
unsigned long cc_info_pa = boot_params->cc_blob_address;
struct cc_blob_sev_info *cc_info;
kernel_add_identity_map(cc_info_pa, cc_info_pa + sizeof(*cc_info));
cc_info = (struct cc_blob_sev_info *)cc_info_pa;
kernel_add_identity_map(cc_info->cpuid_phys, cc_info->cpuid_phys + cc_info->cpuid_len);
}
sev_verify_cbit(top_level_pgt);
}