linux/arch/x86/mm/mem_encrypt.c
Ashish Kalra 400fea4b96 x86/sev: Add callback to apply RMP table fixups for kexec
Handle cases where the RMP table placement in the BIOS is not 2M aligned
and the kexec-ed kernel could try to allocate from within that chunk
which then causes a fatal RMP fault.

The kexec failure is illustrated below:

  SEV-SNP: RMP table physical range [0x0000007ffe800000 - 0x000000807f0fffff]
  BIOS-provided physical RAM map:
  BIOS-e820: [mem 0x0000000000000000-0x000000000008efff] usable
  BIOS-e820: [mem 0x000000000008f000-0x000000000008ffff] ACPI NVS
  ...
  BIOS-e820: [mem 0x0000004080000000-0x0000007ffe7fffff] usable
  BIOS-e820: [mem 0x0000007ffe800000-0x000000807f0fffff] reserved
  BIOS-e820: [mem 0x000000807f100000-0x000000807f1fefff] usable

As seen here in the e820 memory map, the end range of the RMP table is not
aligned to 2MB and not reserved but it is usable as RAM.

Subsequently, kexec -s (KEXEC_FILE_LOAD syscall) loads it's purgatory
code and boot_param, command line and other setup data into this RAM
region as seen in the kexec logs below, which leads to fatal RMP fault
during kexec boot.

  Loaded purgatory at 0x807f1fa000
  Loaded boot_param, command line and misc at 0x807f1f8000 bufsz=0x1350 memsz=0x2000
  Loaded 64bit kernel at 0x7ffae00000 bufsz=0xd06200 memsz=0x3894000
  Loaded initrd at 0x7ff6c89000 bufsz=0x4176014 memsz=0x4176014
  E820 memmap:
  0000000000000000-000000000008efff (1)
  000000000008f000-000000000008ffff (4)
  0000000000090000-000000000009ffff (1)
  ...
  0000004080000000-0000007ffe7fffff (1)
  0000007ffe800000-000000807f0fffff (2)
  000000807f100000-000000807f1fefff (1)
  000000807f1ff000-000000807fffffff (2)
  nr_segments = 4
  segment[0]: buf=0x00000000e626d1a2 bufsz=0x4000 mem=0x807f1fa000 memsz=0x5000
  segment[1]: buf=0x0000000029c67bd6 bufsz=0x1350 mem=0x807f1f8000 memsz=0x2000
  segment[2]: buf=0x0000000045c60183 bufsz=0xd06200 mem=0x7ffae00000 memsz=0x3894000
  segment[3]: buf=0x000000006e54f08d bufsz=0x4176014 mem=0x7ff6c89000 memsz=0x4177000
  kexec_file_load: type:0, start:0x807f1fa150 head:0x1184d0002 flags:0x0

Check if RMP table start and end physical range in the e820 tables are
not aligned to 2MB and in that case map this range to reserved in all
the three e820 tables.

  [ bp: Massage. ]

Fixes: c3b86e61b7 ("x86/cpufeatures: Enable/unmask SEV-SNP CPU feature")
Signed-off-by: Ashish Kalra <ashish.kalra@amd.com>
Signed-off-by: Borislav Petkov (AMD) <bp@alien8.de>
Link: https://lore.kernel.org/r/df6e995ff88565262c2c7c69964883ff8aa6fc30.1714090302.git.ashish.kalra@amd.com
2024-04-29 11:21:09 +02:00

139 lines
3.5 KiB
C

// SPDX-License-Identifier: GPL-2.0-only
/*
* Memory Encryption Support Common Code
*
* Copyright (C) 2016 Advanced Micro Devices, Inc.
*
* Author: Tom Lendacky <thomas.lendacky@amd.com>
*/
#include <linux/dma-direct.h>
#include <linux/dma-mapping.h>
#include <linux/swiotlb.h>
#include <linux/cc_platform.h>
#include <linux/mem_encrypt.h>
#include <linux/virtio_anchor.h>
#include <asm/sev.h>
/* Override for DMA direct allocation check - ARCH_HAS_FORCE_DMA_UNENCRYPTED */
bool force_dma_unencrypted(struct device *dev)
{
/*
* For SEV, all DMA must be to unencrypted addresses.
*/
if (cc_platform_has(CC_ATTR_GUEST_MEM_ENCRYPT))
return true;
/*
* For SME, all DMA must be to unencrypted addresses if the
* device does not support DMA to addresses that include the
* encryption mask.
*/
if (cc_platform_has(CC_ATTR_HOST_MEM_ENCRYPT)) {
u64 dma_enc_mask = DMA_BIT_MASK(__ffs64(sme_me_mask));
u64 dma_dev_mask = min_not_zero(dev->coherent_dma_mask,
dev->bus_dma_limit);
if (dma_dev_mask <= dma_enc_mask)
return true;
}
return false;
}
static void print_mem_encrypt_feature_info(void)
{
pr_info("Memory Encryption Features active: ");
switch (cc_vendor) {
case CC_VENDOR_INTEL:
pr_cont("Intel TDX\n");
break;
case CC_VENDOR_AMD:
pr_cont("AMD");
/* Secure Memory Encryption */
if (cc_platform_has(CC_ATTR_HOST_MEM_ENCRYPT)) {
/*
* SME is mutually exclusive with any of the SEV
* features below.
*/
pr_cont(" SME\n");
return;
}
/* Secure Encrypted Virtualization */
if (cc_platform_has(CC_ATTR_GUEST_MEM_ENCRYPT))
pr_cont(" SEV");
/* Encrypted Register State */
if (cc_platform_has(CC_ATTR_GUEST_STATE_ENCRYPT))
pr_cont(" SEV-ES");
/* Secure Nested Paging */
if (cc_platform_has(CC_ATTR_GUEST_SEV_SNP))
pr_cont(" SEV-SNP");
pr_cont("\n");
sev_show_status();
break;
default:
pr_cont("Unknown\n");
}
}
/* Architecture __weak replacement functions */
void __init mem_encrypt_init(void)
{
if (!cc_platform_has(CC_ATTR_MEM_ENCRYPT))
return;
/* Call into SWIOTLB to update the SWIOTLB DMA buffers */
swiotlb_update_mem_attributes();
print_mem_encrypt_feature_info();
}
void __init mem_encrypt_setup_arch(void)
{
phys_addr_t total_mem = memblock_phys_mem_size();
unsigned long size;
/*
* Do RMP table fixups after the e820 tables have been setup by
* e820__memory_setup().
*/
if (cc_platform_has(CC_ATTR_HOST_SEV_SNP))
snp_fixup_e820_tables();
if (!cc_platform_has(CC_ATTR_GUEST_MEM_ENCRYPT))
return;
/*
* For SEV and TDX, all DMA has to occur via shared/unencrypted pages.
* Kernel uses SWIOTLB to make this happen without changing device
* drivers. However, depending on the workload being run, the
* default 64MB of SWIOTLB may not be enough and SWIOTLB may
* run out of buffers for DMA, resulting in I/O errors and/or
* performance degradation especially with high I/O workloads.
*
* Adjust the default size of SWIOTLB using a percentage of guest
* memory for SWIOTLB buffers. Also, as the SWIOTLB bounce buffer
* memory is allocated from low memory, ensure that the adjusted size
* is within the limits of low available memory.
*
* The percentage of guest memory used here for SWIOTLB buffers
* is more of an approximation of the static adjustment which
* 64MB for <1G, and ~128M to 256M for 1G-to-4G, i.e., the 6%
*/
size = total_mem * 6 / 100;
size = clamp_val(size, IO_TLB_DEFAULT_SIZE, SZ_1G);
swiotlb_adjust_size(size);
/* Set restricted memory access for virtio. */
virtio_set_mem_acc_cb(virtio_require_restricted_mem_acc);
}