This is the Intel version of a confidential computing solution called Trust Domain Extensions (TDX). This series adds support to run the kernel as part of a TDX guest. It provides similar guest protections to AMD's SEV-SNP like guest memory and register state encryption, memory integrity protection and a lot more. Design-wise, it differs from AMD's solution considerably: it uses a software module which runs in a special CPU mode called (Secure Arbitration Mode) SEAM. As the name suggests, this module serves as sort of an arbiter which the confidential guest calls for services it needs during its lifetime. Just like AMD's SNP set, this series reworks and streamlines certain parts of x86 arch code so that this feature can be properly accomodated. -----BEGIN PGP SIGNATURE----- iQIzBAABCgAdFiEEzv7L6UO9uDPlPSfHEsHwGGHeVUoFAmKLbisACgkQEsHwGGHe VUqZLg/7B55iygCwzz0W/KLcXL2cISatUpzGbFs1XTbE9DMz06BPkOsEjF2k8ckv kfZjgqhSx3GvUI80gK0Tn2M2DfIj3nKuNSXd1pfextP7AxEf68FFJsQz1Ju7bHpT pZaG+g8IK4+mnEHEKTCO9ANg/Zw8yqJLdtsCaCNE9SUGUfQ6m/ujTEfsambXDHNm khyCAgpIGSOt51/4apoR9ebyrNCaeVbDawpIPjTy+iyFRc/WyaLFV9CQ8klw4gbw r/90x2JYxvAf0/z/ifT9Wa+TnYiQ0d4VjFbfr0iJ4GcPn5L3EIoIKPE8vPGMpoSX fLSzoNmAOT3ja57ytUUQ3o0edoRUIPEdixOebf9qWvE/aj7W37YRzrlJ8Ej/x9Jy HcI4WZF6Dr1bh6FnI/xX2eVZRzLOL4j9gNyPCwIbvgr1NjDqQnxU7nhxVMmQhJrs IdiEcP5WYerLKfka/uF//QfWUg5mDBgFa1/3xK57Z3j0iKWmgjaPpR0SWlOKjj8G tr0gGN9ejikZTqXKGsHn8fv/R3bjXvbVD8z0IEcx+MIrRmZPnX2QBlg7UA1AXV5n HoVwPFdH1QAtjZq1MRcL4hTOjz3FkS68rg7ZH0f2GWJAzWmEGytBIhECRnN/PFFq VwRB4dCCt0bzqRxkiH5lzdgR+xqRe61juQQsMzg+Flv/trpXDqM= =ac9K -----END PGP SIGNATURE----- Merge tag 'x86_tdx_for_v5.19_rc1' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip Pull Intel TDX support from Borislav Petkov: "Intel Trust Domain Extensions (TDX) support. This is the Intel version of a confidential computing solution called Trust Domain Extensions (TDX). This series adds support to run the kernel as part of a TDX guest. It provides similar guest protections to AMD's SEV-SNP like guest memory and register state encryption, memory integrity protection and a lot more. Design-wise, it differs from AMD's solution considerably: it uses a software module which runs in a special CPU mode called (Secure Arbitration Mode) SEAM. As the name suggests, this module serves as sort of an arbiter which the confidential guest calls for services it needs during its lifetime. Just like AMD's SNP set, this series reworks and streamlines certain parts of x86 arch code so that this feature can be properly accomodated" * tag 'x86_tdx_for_v5.19_rc1' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip: (34 commits) x86/tdx: Fix RETs in TDX asm x86/tdx: Annotate a noreturn function x86/mm: Fix spacing within memory encryption features message x86/kaslr: Fix build warning in KASLR code in boot stub Documentation/x86: Document TDX kernel architecture ACPICA: Avoid cache flush inside virtual machines x86/tdx/ioapic: Add shared bit for IOAPIC base address x86/mm: Make DMA memory shared for TD guest x86/mm/cpa: Add support for TDX shared memory x86/tdx: Make pages shared in ioremap() x86/topology: Disable CPU online/offline control for TDX guests x86/boot: Avoid #VE during boot for TDX platforms x86/boot: Set CR0.NE early and keep it set during the boot x86/acpi/x86/boot: Add multiprocessor wake-up support x86/boot: Add a trampoline for booting APs via firmware handoff x86/tdx: Wire up KVM hypercalls x86/tdx: Port I/O: Add early boot support x86/tdx: Port I/O: Add runtime hypercalls x86/boot: Port I/O: Add decompression-time support for TDX x86/boot: Port I/O: Allow to hook up alternative helpers ...
472 lines
12 KiB
C
472 lines
12 KiB
C
// SPDX-License-Identifier: GPL-2.0
|
|
/*
|
|
* misc.c
|
|
*
|
|
* This is a collection of several routines used to extract the kernel
|
|
* which includes KASLR relocation, decompression, ELF parsing, and
|
|
* relocation processing. Additionally included are the screen and serial
|
|
* output functions and related debugging support functions.
|
|
*
|
|
* malloc by Hannu Savolainen 1993 and Matthias Urlichs 1994
|
|
* puts by Nick Holloway 1993, better puts by Martin Mares 1995
|
|
* High loaded stuff by Hans Lermen & Werner Almesberger, Feb. 1996
|
|
*/
|
|
|
|
#include "misc.h"
|
|
#include "error.h"
|
|
#include "pgtable.h"
|
|
#include "../string.h"
|
|
#include "../voffset.h"
|
|
#include <asm/bootparam_utils.h>
|
|
|
|
/*
|
|
* WARNING!!
|
|
* This code is compiled with -fPIC and it is relocated dynamically at
|
|
* run time, but no relocation processing is performed. This means that
|
|
* it is not safe to place pointers in static structures.
|
|
*/
|
|
|
|
/* Macros used by the included decompressor code below. */
|
|
#define STATIC static
|
|
/* Define an externally visible malloc()/free(). */
|
|
#define MALLOC_VISIBLE
|
|
#include <linux/decompress/mm.h>
|
|
|
|
/*
|
|
* Provide definitions of memzero and memmove as some of the decompressors will
|
|
* try to define their own functions if these are not defined as macros.
|
|
*/
|
|
#define memzero(s, n) memset((s), 0, (n))
|
|
#ifndef memmove
|
|
#define memmove memmove
|
|
/* Functions used by the included decompressor code below. */
|
|
void *memmove(void *dest, const void *src, size_t n);
|
|
#endif
|
|
|
|
/*
|
|
* This is set up by the setup-routine at boot-time
|
|
*/
|
|
struct boot_params *boot_params;
|
|
|
|
struct port_io_ops pio_ops;
|
|
|
|
memptr free_mem_ptr;
|
|
memptr free_mem_end_ptr;
|
|
|
|
static char *vidmem;
|
|
static int vidport;
|
|
|
|
/* These might be accessed before .bss is cleared, so use .data instead. */
|
|
static int lines __section(".data");
|
|
static int cols __section(".data");
|
|
|
|
#ifdef CONFIG_KERNEL_GZIP
|
|
#include "../../../../lib/decompress_inflate.c"
|
|
#endif
|
|
|
|
#ifdef CONFIG_KERNEL_BZIP2
|
|
#include "../../../../lib/decompress_bunzip2.c"
|
|
#endif
|
|
|
|
#ifdef CONFIG_KERNEL_LZMA
|
|
#include "../../../../lib/decompress_unlzma.c"
|
|
#endif
|
|
|
|
#ifdef CONFIG_KERNEL_XZ
|
|
#include "../../../../lib/decompress_unxz.c"
|
|
#endif
|
|
|
|
#ifdef CONFIG_KERNEL_LZO
|
|
#include "../../../../lib/decompress_unlzo.c"
|
|
#endif
|
|
|
|
#ifdef CONFIG_KERNEL_LZ4
|
|
#include "../../../../lib/decompress_unlz4.c"
|
|
#endif
|
|
|
|
#ifdef CONFIG_KERNEL_ZSTD
|
|
#include "../../../../lib/decompress_unzstd.c"
|
|
#endif
|
|
/*
|
|
* NOTE: When adding a new decompressor, please update the analysis in
|
|
* ../header.S.
|
|
*/
|
|
|
|
static void scroll(void)
|
|
{
|
|
int i;
|
|
|
|
memmove(vidmem, vidmem + cols * 2, (lines - 1) * cols * 2);
|
|
for (i = (lines - 1) * cols * 2; i < lines * cols * 2; i += 2)
|
|
vidmem[i] = ' ';
|
|
}
|
|
|
|
#define XMTRDY 0x20
|
|
|
|
#define TXR 0 /* Transmit register (WRITE) */
|
|
#define LSR 5 /* Line Status */
|
|
static void serial_putchar(int ch)
|
|
{
|
|
unsigned timeout = 0xffff;
|
|
|
|
while ((inb(early_serial_base + LSR) & XMTRDY) == 0 && --timeout)
|
|
cpu_relax();
|
|
|
|
outb(ch, early_serial_base + TXR);
|
|
}
|
|
|
|
void __putstr(const char *s)
|
|
{
|
|
int x, y, pos;
|
|
char c;
|
|
|
|
if (early_serial_base) {
|
|
const char *str = s;
|
|
while (*str) {
|
|
if (*str == '\n')
|
|
serial_putchar('\r');
|
|
serial_putchar(*str++);
|
|
}
|
|
}
|
|
|
|
if (lines == 0 || cols == 0)
|
|
return;
|
|
|
|
x = boot_params->screen_info.orig_x;
|
|
y = boot_params->screen_info.orig_y;
|
|
|
|
while ((c = *s++) != '\0') {
|
|
if (c == '\n') {
|
|
x = 0;
|
|
if (++y >= lines) {
|
|
scroll();
|
|
y--;
|
|
}
|
|
} else {
|
|
vidmem[(x + cols * y) * 2] = c;
|
|
if (++x >= cols) {
|
|
x = 0;
|
|
if (++y >= lines) {
|
|
scroll();
|
|
y--;
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
boot_params->screen_info.orig_x = x;
|
|
boot_params->screen_info.orig_y = y;
|
|
|
|
pos = (x + cols * y) * 2; /* Update cursor position */
|
|
outb(14, vidport);
|
|
outb(0xff & (pos >> 9), vidport+1);
|
|
outb(15, vidport);
|
|
outb(0xff & (pos >> 1), vidport+1);
|
|
}
|
|
|
|
void __puthex(unsigned long value)
|
|
{
|
|
char alpha[2] = "0";
|
|
int bits;
|
|
|
|
for (bits = sizeof(value) * 8 - 4; bits >= 0; bits -= 4) {
|
|
unsigned long digit = (value >> bits) & 0xf;
|
|
|
|
if (digit < 0xA)
|
|
alpha[0] = '0' + digit;
|
|
else
|
|
alpha[0] = 'a' + (digit - 0xA);
|
|
|
|
__putstr(alpha);
|
|
}
|
|
}
|
|
|
|
#ifdef CONFIG_X86_NEED_RELOCS
|
|
static void handle_relocations(void *output, unsigned long output_len,
|
|
unsigned long virt_addr)
|
|
{
|
|
int *reloc;
|
|
unsigned long delta, map, ptr;
|
|
unsigned long min_addr = (unsigned long)output;
|
|
unsigned long max_addr = min_addr + (VO___bss_start - VO__text);
|
|
|
|
/*
|
|
* Calculate the delta between where vmlinux was linked to load
|
|
* and where it was actually loaded.
|
|
*/
|
|
delta = min_addr - LOAD_PHYSICAL_ADDR;
|
|
|
|
/*
|
|
* The kernel contains a table of relocation addresses. Those
|
|
* addresses have the final load address of the kernel in virtual
|
|
* memory. We are currently working in the self map. So we need to
|
|
* create an adjustment for kernel memory addresses to the self map.
|
|
* This will involve subtracting out the base address of the kernel.
|
|
*/
|
|
map = delta - __START_KERNEL_map;
|
|
|
|
/*
|
|
* 32-bit always performs relocations. 64-bit relocations are only
|
|
* needed if KASLR has chosen a different starting address offset
|
|
* from __START_KERNEL_map.
|
|
*/
|
|
if (IS_ENABLED(CONFIG_X86_64))
|
|
delta = virt_addr - LOAD_PHYSICAL_ADDR;
|
|
|
|
if (!delta) {
|
|
debug_putstr("No relocation needed... ");
|
|
return;
|
|
}
|
|
debug_putstr("Performing relocations... ");
|
|
|
|
/*
|
|
* Process relocations: 32 bit relocations first then 64 bit after.
|
|
* Three sets of binary relocations are added to the end of the kernel
|
|
* before compression. Each relocation table entry is the kernel
|
|
* address of the location which needs to be updated stored as a
|
|
* 32-bit value which is sign extended to 64 bits.
|
|
*
|
|
* Format is:
|
|
*
|
|
* kernel bits...
|
|
* 0 - zero terminator for 64 bit relocations
|
|
* 64 bit relocation repeated
|
|
* 0 - zero terminator for inverse 32 bit relocations
|
|
* 32 bit inverse relocation repeated
|
|
* 0 - zero terminator for 32 bit relocations
|
|
* 32 bit relocation repeated
|
|
*
|
|
* So we work backwards from the end of the decompressed image.
|
|
*/
|
|
for (reloc = output + output_len - sizeof(*reloc); *reloc; reloc--) {
|
|
long extended = *reloc;
|
|
extended += map;
|
|
|
|
ptr = (unsigned long)extended;
|
|
if (ptr < min_addr || ptr > max_addr)
|
|
error("32-bit relocation outside of kernel!\n");
|
|
|
|
*(uint32_t *)ptr += delta;
|
|
}
|
|
#ifdef CONFIG_X86_64
|
|
while (*--reloc) {
|
|
long extended = *reloc;
|
|
extended += map;
|
|
|
|
ptr = (unsigned long)extended;
|
|
if (ptr < min_addr || ptr > max_addr)
|
|
error("inverse 32-bit relocation outside of kernel!\n");
|
|
|
|
*(int32_t *)ptr -= delta;
|
|
}
|
|
for (reloc--; *reloc; reloc--) {
|
|
long extended = *reloc;
|
|
extended += map;
|
|
|
|
ptr = (unsigned long)extended;
|
|
if (ptr < min_addr || ptr > max_addr)
|
|
error("64-bit relocation outside of kernel!\n");
|
|
|
|
*(uint64_t *)ptr += delta;
|
|
}
|
|
#endif
|
|
}
|
|
#else
|
|
static inline void handle_relocations(void *output, unsigned long output_len,
|
|
unsigned long virt_addr)
|
|
{ }
|
|
#endif
|
|
|
|
static void parse_elf(void *output)
|
|
{
|
|
#ifdef CONFIG_X86_64
|
|
Elf64_Ehdr ehdr;
|
|
Elf64_Phdr *phdrs, *phdr;
|
|
#else
|
|
Elf32_Ehdr ehdr;
|
|
Elf32_Phdr *phdrs, *phdr;
|
|
#endif
|
|
void *dest;
|
|
int i;
|
|
|
|
memcpy(&ehdr, output, sizeof(ehdr));
|
|
if (ehdr.e_ident[EI_MAG0] != ELFMAG0 ||
|
|
ehdr.e_ident[EI_MAG1] != ELFMAG1 ||
|
|
ehdr.e_ident[EI_MAG2] != ELFMAG2 ||
|
|
ehdr.e_ident[EI_MAG3] != ELFMAG3) {
|
|
error("Kernel is not a valid ELF file");
|
|
return;
|
|
}
|
|
|
|
debug_putstr("Parsing ELF... ");
|
|
|
|
phdrs = malloc(sizeof(*phdrs) * ehdr.e_phnum);
|
|
if (!phdrs)
|
|
error("Failed to allocate space for phdrs");
|
|
|
|
memcpy(phdrs, output + ehdr.e_phoff, sizeof(*phdrs) * ehdr.e_phnum);
|
|
|
|
for (i = 0; i < ehdr.e_phnum; i++) {
|
|
phdr = &phdrs[i];
|
|
|
|
switch (phdr->p_type) {
|
|
case PT_LOAD:
|
|
#ifdef CONFIG_X86_64
|
|
if ((phdr->p_align % 0x200000) != 0)
|
|
error("Alignment of LOAD segment isn't multiple of 2MB");
|
|
#endif
|
|
#ifdef CONFIG_RELOCATABLE
|
|
dest = output;
|
|
dest += (phdr->p_paddr - LOAD_PHYSICAL_ADDR);
|
|
#else
|
|
dest = (void *)(phdr->p_paddr);
|
|
#endif
|
|
memmove(dest, output + phdr->p_offset, phdr->p_filesz);
|
|
break;
|
|
default: /* Ignore other PT_* */ break;
|
|
}
|
|
}
|
|
|
|
free(phdrs);
|
|
}
|
|
|
|
/*
|
|
* The compressed kernel image (ZO), has been moved so that its position
|
|
* is against the end of the buffer used to hold the uncompressed kernel
|
|
* image (VO) and the execution environment (.bss, .brk), which makes sure
|
|
* there is room to do the in-place decompression. (See header.S for the
|
|
* calculations.)
|
|
*
|
|
* |-----compressed kernel image------|
|
|
* V V
|
|
* 0 extract_offset +INIT_SIZE
|
|
* |-----------|---------------|-------------------------|--------|
|
|
* | | | |
|
|
* VO__text startup_32 of ZO VO__end ZO__end
|
|
* ^ ^
|
|
* |-------uncompressed kernel image---------|
|
|
*
|
|
*/
|
|
asmlinkage __visible void *extract_kernel(void *rmode, memptr heap,
|
|
unsigned char *input_data,
|
|
unsigned long input_len,
|
|
unsigned char *output,
|
|
unsigned long output_len)
|
|
{
|
|
const unsigned long kernel_total_size = VO__end - VO__text;
|
|
unsigned long virt_addr = LOAD_PHYSICAL_ADDR;
|
|
unsigned long needed_size;
|
|
|
|
/* Retain x86 boot parameters pointer passed from startup_32/64. */
|
|
boot_params = rmode;
|
|
|
|
/* Clear flags intended for solely in-kernel use. */
|
|
boot_params->hdr.loadflags &= ~KASLR_FLAG;
|
|
|
|
sanitize_boot_params(boot_params);
|
|
|
|
if (boot_params->screen_info.orig_video_mode == 7) {
|
|
vidmem = (char *) 0xb0000;
|
|
vidport = 0x3b4;
|
|
} else {
|
|
vidmem = (char *) 0xb8000;
|
|
vidport = 0x3d4;
|
|
}
|
|
|
|
lines = boot_params->screen_info.orig_video_lines;
|
|
cols = boot_params->screen_info.orig_video_cols;
|
|
|
|
init_default_io_ops();
|
|
|
|
/*
|
|
* Detect TDX guest environment.
|
|
*
|
|
* It has to be done before console_init() in order to use
|
|
* paravirtualized port I/O operations if needed.
|
|
*/
|
|
early_tdx_detect();
|
|
|
|
console_init();
|
|
|
|
/*
|
|
* Save RSDP address for later use. Have this after console_init()
|
|
* so that early debugging output from the RSDP parsing code can be
|
|
* collected.
|
|
*/
|
|
boot_params->acpi_rsdp_addr = get_rsdp_addr();
|
|
|
|
debug_putstr("early console in extract_kernel\n");
|
|
|
|
free_mem_ptr = heap; /* Heap */
|
|
free_mem_end_ptr = heap + BOOT_HEAP_SIZE;
|
|
|
|
/*
|
|
* The memory hole needed for the kernel is the larger of either
|
|
* the entire decompressed kernel plus relocation table, or the
|
|
* entire decompressed kernel plus .bss and .brk sections.
|
|
*
|
|
* On X86_64, the memory is mapped with PMD pages. Round the
|
|
* size up so that the full extent of PMD pages mapped is
|
|
* included in the check against the valid memory table
|
|
* entries. This ensures the full mapped area is usable RAM
|
|
* and doesn't include any reserved areas.
|
|
*/
|
|
needed_size = max(output_len, kernel_total_size);
|
|
#ifdef CONFIG_X86_64
|
|
needed_size = ALIGN(needed_size, MIN_KERNEL_ALIGN);
|
|
#endif
|
|
|
|
/* Report initial kernel position details. */
|
|
debug_putaddr(input_data);
|
|
debug_putaddr(input_len);
|
|
debug_putaddr(output);
|
|
debug_putaddr(output_len);
|
|
debug_putaddr(kernel_total_size);
|
|
debug_putaddr(needed_size);
|
|
|
|
#ifdef CONFIG_X86_64
|
|
/* Report address of 32-bit trampoline */
|
|
debug_putaddr(trampoline_32bit);
|
|
#endif
|
|
|
|
choose_random_location((unsigned long)input_data, input_len,
|
|
(unsigned long *)&output,
|
|
needed_size,
|
|
&virt_addr);
|
|
|
|
/* Validate memory location choices. */
|
|
if ((unsigned long)output & (MIN_KERNEL_ALIGN - 1))
|
|
error("Destination physical address inappropriately aligned");
|
|
if (virt_addr & (MIN_KERNEL_ALIGN - 1))
|
|
error("Destination virtual address inappropriately aligned");
|
|
#ifdef CONFIG_X86_64
|
|
if (heap > 0x3fffffffffffUL)
|
|
error("Destination address too large");
|
|
if (virt_addr + max(output_len, kernel_total_size) > KERNEL_IMAGE_SIZE)
|
|
error("Destination virtual address is beyond the kernel mapping area");
|
|
#else
|
|
if (heap > ((-__PAGE_OFFSET-(128<<20)-1) & 0x7fffffff))
|
|
error("Destination address too large");
|
|
#endif
|
|
#ifndef CONFIG_RELOCATABLE
|
|
if (virt_addr != LOAD_PHYSICAL_ADDR)
|
|
error("Destination virtual address changed when not relocatable");
|
|
#endif
|
|
|
|
debug_putstr("\nDecompressing Linux... ");
|
|
__decompress(input_data, input_len, NULL, NULL, output, output_len,
|
|
NULL, error);
|
|
parse_elf(output);
|
|
handle_relocations(output, output_len, virt_addr);
|
|
debug_putstr("done.\nBooting the kernel.\n");
|
|
|
|
/* Disable exception handling before booting the kernel */
|
|
cleanup_exception_handling();
|
|
|
|
return output;
|
|
}
|
|
|
|
void fortify_panic(const char *name)
|
|
{
|
|
error("detected buffer overflow");
|
|
}
|