mirror of
https://mirrors.bfsu.edu.cn/git/linux.git
synced 2024-11-23 20:24:12 +08:00
X86 updates:
- Make RESERVE_BRK() work again with older binutils. The recent 'simplification' broke that. - Make early #VE handling increment RIP when successful. - Make the #VE code consistent vs. the RIP adjustments and add comments. - Handle load_unaligned_zeropad() across page boundaries correctly in #VE when the second page is shared. -----BEGIN PGP SIGNATURE----- iQJHBAABCgAxFiEEQp8+kY+LLUocC4bMphj1TA10mKEFAmKvIG4THHRnbHhAbGlu dXRyb25peC5kZQAKCRCmGPVMDXSYoaqCD/9NAUyHTjKDqdWuMD/ITU8ymDr+Ix8z vUlysdXbxJg6MvT12ZbhJUFTKAsXskGAAnXz/EtZ8zTQQVzTjis/HooJh4XLeuO4 NLh9KV9FvH7w69e6Jg31MGkOUJU3BV+WYUx1f34zbQ8FHftxUwu+M47UYExPYKDR VIbNeQIpqoBfjTSPVGXlWl/panuZG6RV+PRcvxV3yeRRA8zyCB/WTmNkoDjbw4fl YCWwJF7/m4iT3LtoaFXWVGFzSRZoGHbhSdgEOZGIZ7sjvydoaQo402JuhW3WLI2m oXLVZ+2wOPGBKp3WQ1t3mpfScBvCiN3SW4pSPDQ+E8fT/RQiRMb29c9S6ANdm3nT 27fYMJOq+xxex5gOYzdgLz7O99M08uOn2bxJwB+IBIr5jEFH9b4EffeEWsfdZBsi 1AzkXCi+Ib0ZYAndxUP068m+4iW0LtuApm0fg6LhtdDmBGquj+88OZOUK7Z/kW/N IkjgCeqFgmdNb/+Z3XrdYobaAl6J4toIqA4A+O8yL6gJfn9PnaMGsYtA8c5yQchD kFoTu5pCALY2KjZkKFRMuEbMH2oj3sjjb7f6mYAHxec6jikIx2c5HswA4sLmzHAN GG2MDUH12bWoLfeA4IRwTRz/vh8IeZNq5ZzdCnS6KHUNk5OJRGLtRphKy8z+pOYx +i9ThZFBV8pBzg== =sRtG -----END PGP SIGNATURE----- Merge tag 'x86-urgent-2022-06-19' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip Pull x86 fixes from Thomas Gleixner: - Make RESERVE_BRK() work again with older binutils. The recent 'simplification' broke that. - Make early #VE handling increment RIP when successful. - Make the #VE code consistent vs. the RIP adjustments and add comments. - Handle load_unaligned_zeropad() across page boundaries correctly in #VE when the second page is shared. * tag 'x86-urgent-2022-06-19' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip: x86/tdx: Handle load_unaligned_zeropad() page-cross to a shared page x86/tdx: Clarify RIP adjustments in #VE handler x86/tdx: Fix early #VE handling x86/mm: Fix RESERVE_BRK() for older binutils
This commit is contained in:
commit
05c6ca8512
@ -124,6 +124,51 @@ static u64 get_cc_mask(void)
|
||||
return BIT_ULL(gpa_width - 1);
|
||||
}
|
||||
|
||||
/*
|
||||
* The TDX module spec states that #VE may be injected for a limited set of
|
||||
* reasons:
|
||||
*
|
||||
* - Emulation of the architectural #VE injection on EPT violation;
|
||||
*
|
||||
* - As a result of guest TD execution of a disallowed instruction,
|
||||
* a disallowed MSR access, or CPUID virtualization;
|
||||
*
|
||||
* - A notification to the guest TD about anomalous behavior;
|
||||
*
|
||||
* The last one is opt-in and is not used by the kernel.
|
||||
*
|
||||
* The Intel Software Developer's Manual describes cases when instruction
|
||||
* length field can be used in section "Information for VM Exits Due to
|
||||
* Instruction Execution".
|
||||
*
|
||||
* For TDX, it ultimately means GET_VEINFO provides reliable instruction length
|
||||
* information if #VE occurred due to instruction execution, but not for EPT
|
||||
* violations.
|
||||
*/
|
||||
static int ve_instr_len(struct ve_info *ve)
|
||||
{
|
||||
switch (ve->exit_reason) {
|
||||
case EXIT_REASON_HLT:
|
||||
case EXIT_REASON_MSR_READ:
|
||||
case EXIT_REASON_MSR_WRITE:
|
||||
case EXIT_REASON_CPUID:
|
||||
case EXIT_REASON_IO_INSTRUCTION:
|
||||
/* It is safe to use ve->instr_len for #VE due instructions */
|
||||
return ve->instr_len;
|
||||
case EXIT_REASON_EPT_VIOLATION:
|
||||
/*
|
||||
* For EPT violations, ve->insn_len is not defined. For those,
|
||||
* the kernel must decode instructions manually and should not
|
||||
* be using this function.
|
||||
*/
|
||||
WARN_ONCE(1, "ve->instr_len is not defined for EPT violations");
|
||||
return 0;
|
||||
default:
|
||||
WARN_ONCE(1, "Unexpected #VE-type: %lld\n", ve->exit_reason);
|
||||
return ve->instr_len;
|
||||
}
|
||||
}
|
||||
|
||||
static u64 __cpuidle __halt(const bool irq_disabled, const bool do_sti)
|
||||
{
|
||||
struct tdx_hypercall_args args = {
|
||||
@ -147,7 +192,7 @@ static u64 __cpuidle __halt(const bool irq_disabled, const bool do_sti)
|
||||
return __tdx_hypercall(&args, do_sti ? TDX_HCALL_ISSUE_STI : 0);
|
||||
}
|
||||
|
||||
static bool handle_halt(void)
|
||||
static int handle_halt(struct ve_info *ve)
|
||||
{
|
||||
/*
|
||||
* Since non safe halt is mainly used in CPU offlining
|
||||
@ -158,9 +203,9 @@ static bool handle_halt(void)
|
||||
const bool do_sti = false;
|
||||
|
||||
if (__halt(irq_disabled, do_sti))
|
||||
return false;
|
||||
return -EIO;
|
||||
|
||||
return true;
|
||||
return ve_instr_len(ve);
|
||||
}
|
||||
|
||||
void __cpuidle tdx_safe_halt(void)
|
||||
@ -180,7 +225,7 @@ void __cpuidle tdx_safe_halt(void)
|
||||
WARN_ONCE(1, "HLT instruction emulation failed\n");
|
||||
}
|
||||
|
||||
static bool read_msr(struct pt_regs *regs)
|
||||
static int read_msr(struct pt_regs *regs, struct ve_info *ve)
|
||||
{
|
||||
struct tdx_hypercall_args args = {
|
||||
.r10 = TDX_HYPERCALL_STANDARD,
|
||||
@ -194,14 +239,14 @@ static bool read_msr(struct pt_regs *regs)
|
||||
* (GHCI), section titled "TDG.VP.VMCALL<Instruction.RDMSR>".
|
||||
*/
|
||||
if (__tdx_hypercall(&args, TDX_HCALL_HAS_OUTPUT))
|
||||
return false;
|
||||
return -EIO;
|
||||
|
||||
regs->ax = lower_32_bits(args.r11);
|
||||
regs->dx = upper_32_bits(args.r11);
|
||||
return true;
|
||||
return ve_instr_len(ve);
|
||||
}
|
||||
|
||||
static bool write_msr(struct pt_regs *regs)
|
||||
static int write_msr(struct pt_regs *regs, struct ve_info *ve)
|
||||
{
|
||||
struct tdx_hypercall_args args = {
|
||||
.r10 = TDX_HYPERCALL_STANDARD,
|
||||
@ -215,10 +260,13 @@ static bool write_msr(struct pt_regs *regs)
|
||||
* can be found in TDX Guest-Host-Communication Interface
|
||||
* (GHCI) section titled "TDG.VP.VMCALL<Instruction.WRMSR>".
|
||||
*/
|
||||
return !__tdx_hypercall(&args, 0);
|
||||
if (__tdx_hypercall(&args, 0))
|
||||
return -EIO;
|
||||
|
||||
return ve_instr_len(ve);
|
||||
}
|
||||
|
||||
static bool handle_cpuid(struct pt_regs *regs)
|
||||
static int handle_cpuid(struct pt_regs *regs, struct ve_info *ve)
|
||||
{
|
||||
struct tdx_hypercall_args args = {
|
||||
.r10 = TDX_HYPERCALL_STANDARD,
|
||||
@ -236,7 +284,7 @@ static bool handle_cpuid(struct pt_regs *regs)
|
||||
*/
|
||||
if (regs->ax < 0x40000000 || regs->ax > 0x4FFFFFFF) {
|
||||
regs->ax = regs->bx = regs->cx = regs->dx = 0;
|
||||
return true;
|
||||
return ve_instr_len(ve);
|
||||
}
|
||||
|
||||
/*
|
||||
@ -245,7 +293,7 @@ static bool handle_cpuid(struct pt_regs *regs)
|
||||
* (GHCI), section titled "VP.VMCALL<Instruction.CPUID>".
|
||||
*/
|
||||
if (__tdx_hypercall(&args, TDX_HCALL_HAS_OUTPUT))
|
||||
return false;
|
||||
return -EIO;
|
||||
|
||||
/*
|
||||
* As per TDX GHCI CPUID ABI, r12-r15 registers contain contents of
|
||||
@ -257,7 +305,7 @@ static bool handle_cpuid(struct pt_regs *regs)
|
||||
regs->cx = args.r14;
|
||||
regs->dx = args.r15;
|
||||
|
||||
return true;
|
||||
return ve_instr_len(ve);
|
||||
}
|
||||
|
||||
static bool mmio_read(int size, unsigned long addr, unsigned long *val)
|
||||
@ -283,10 +331,10 @@ static bool mmio_write(int size, unsigned long addr, unsigned long val)
|
||||
EPT_WRITE, addr, val);
|
||||
}
|
||||
|
||||
static bool handle_mmio(struct pt_regs *regs, struct ve_info *ve)
|
||||
static int handle_mmio(struct pt_regs *regs, struct ve_info *ve)
|
||||
{
|
||||
unsigned long *reg, val, vaddr;
|
||||
char buffer[MAX_INSN_SIZE];
|
||||
unsigned long *reg, val;
|
||||
struct insn insn = {};
|
||||
enum mmio_type mmio;
|
||||
int size, extend_size;
|
||||
@ -294,34 +342,49 @@ static bool handle_mmio(struct pt_regs *regs, struct ve_info *ve)
|
||||
|
||||
/* Only in-kernel MMIO is supported */
|
||||
if (WARN_ON_ONCE(user_mode(regs)))
|
||||
return false;
|
||||
return -EFAULT;
|
||||
|
||||
if (copy_from_kernel_nofault(buffer, (void *)regs->ip, MAX_INSN_SIZE))
|
||||
return false;
|
||||
return -EFAULT;
|
||||
|
||||
if (insn_decode(&insn, buffer, MAX_INSN_SIZE, INSN_MODE_64))
|
||||
return false;
|
||||
return -EINVAL;
|
||||
|
||||
mmio = insn_decode_mmio(&insn, &size);
|
||||
if (WARN_ON_ONCE(mmio == MMIO_DECODE_FAILED))
|
||||
return false;
|
||||
return -EINVAL;
|
||||
|
||||
if (mmio != MMIO_WRITE_IMM && mmio != MMIO_MOVS) {
|
||||
reg = insn_get_modrm_reg_ptr(&insn, regs);
|
||||
if (!reg)
|
||||
return false;
|
||||
return -EINVAL;
|
||||
}
|
||||
|
||||
ve->instr_len = insn.length;
|
||||
/*
|
||||
* Reject EPT violation #VEs that split pages.
|
||||
*
|
||||
* MMIO accesses are supposed to be naturally aligned and therefore
|
||||
* never cross page boundaries. Seeing split page accesses indicates
|
||||
* a bug or a load_unaligned_zeropad() that stepped into an MMIO page.
|
||||
*
|
||||
* load_unaligned_zeropad() will recover using exception fixups.
|
||||
*/
|
||||
vaddr = (unsigned long)insn_get_addr_ref(&insn, regs);
|
||||
if (vaddr / PAGE_SIZE != (vaddr + size - 1) / PAGE_SIZE)
|
||||
return -EFAULT;
|
||||
|
||||
/* Handle writes first */
|
||||
switch (mmio) {
|
||||
case MMIO_WRITE:
|
||||
memcpy(&val, reg, size);
|
||||
return mmio_write(size, ve->gpa, val);
|
||||
if (!mmio_write(size, ve->gpa, val))
|
||||
return -EIO;
|
||||
return insn.length;
|
||||
case MMIO_WRITE_IMM:
|
||||
val = insn.immediate.value;
|
||||
return mmio_write(size, ve->gpa, val);
|
||||
if (!mmio_write(size, ve->gpa, val))
|
||||
return -EIO;
|
||||
return insn.length;
|
||||
case MMIO_READ:
|
||||
case MMIO_READ_ZERO_EXTEND:
|
||||
case MMIO_READ_SIGN_EXTEND:
|
||||
@ -334,15 +397,15 @@ static bool handle_mmio(struct pt_regs *regs, struct ve_info *ve)
|
||||
* decoded or handled properly. It was likely not using io.h
|
||||
* helpers or accessed MMIO accidentally.
|
||||
*/
|
||||
return false;
|
||||
return -EINVAL;
|
||||
default:
|
||||
WARN_ONCE(1, "Unknown insn_decode_mmio() decode value?");
|
||||
return false;
|
||||
return -EINVAL;
|
||||
}
|
||||
|
||||
/* Handle reads */
|
||||
if (!mmio_read(size, ve->gpa, &val))
|
||||
return false;
|
||||
return -EIO;
|
||||
|
||||
switch (mmio) {
|
||||
case MMIO_READ:
|
||||
@ -364,13 +427,13 @@ static bool handle_mmio(struct pt_regs *regs, struct ve_info *ve)
|
||||
default:
|
||||
/* All other cases has to be covered with the first switch() */
|
||||
WARN_ON_ONCE(1);
|
||||
return false;
|
||||
return -EINVAL;
|
||||
}
|
||||
|
||||
if (extend_size)
|
||||
memset(reg, extend_val, extend_size);
|
||||
memcpy(reg, &val, size);
|
||||
return true;
|
||||
return insn.length;
|
||||
}
|
||||
|
||||
static bool handle_in(struct pt_regs *regs, int size, int port)
|
||||
@ -421,13 +484,14 @@ static bool handle_out(struct pt_regs *regs, int size, int port)
|
||||
*
|
||||
* Return True on success or False on failure.
|
||||
*/
|
||||
static bool handle_io(struct pt_regs *regs, u32 exit_qual)
|
||||
static int handle_io(struct pt_regs *regs, struct ve_info *ve)
|
||||
{
|
||||
u32 exit_qual = ve->exit_qual;
|
||||
int size, port;
|
||||
bool in;
|
||||
bool in, ret;
|
||||
|
||||
if (VE_IS_IO_STRING(exit_qual))
|
||||
return false;
|
||||
return -EIO;
|
||||
|
||||
in = VE_IS_IO_IN(exit_qual);
|
||||
size = VE_GET_IO_SIZE(exit_qual);
|
||||
@ -435,9 +499,13 @@ static bool handle_io(struct pt_regs *regs, u32 exit_qual)
|
||||
|
||||
|
||||
if (in)
|
||||
return handle_in(regs, size, port);
|
||||
ret = handle_in(regs, size, port);
|
||||
else
|
||||
return handle_out(regs, size, port);
|
||||
ret = handle_out(regs, size, port);
|
||||
if (!ret)
|
||||
return -EIO;
|
||||
|
||||
return ve_instr_len(ve);
|
||||
}
|
||||
|
||||
/*
|
||||
@ -447,13 +515,19 @@ static bool handle_io(struct pt_regs *regs, u32 exit_qual)
|
||||
__init bool tdx_early_handle_ve(struct pt_regs *regs)
|
||||
{
|
||||
struct ve_info ve;
|
||||
int insn_len;
|
||||
|
||||
tdx_get_ve_info(&ve);
|
||||
|
||||
if (ve.exit_reason != EXIT_REASON_IO_INSTRUCTION)
|
||||
return false;
|
||||
|
||||
return handle_io(regs, ve.exit_qual);
|
||||
insn_len = handle_io(regs, &ve);
|
||||
if (insn_len < 0)
|
||||
return false;
|
||||
|
||||
regs->ip += insn_len;
|
||||
return true;
|
||||
}
|
||||
|
||||
void tdx_get_ve_info(struct ve_info *ve)
|
||||
@ -486,54 +560,65 @@ void tdx_get_ve_info(struct ve_info *ve)
|
||||
ve->instr_info = upper_32_bits(out.r10);
|
||||
}
|
||||
|
||||
/* Handle the user initiated #VE */
|
||||
static bool virt_exception_user(struct pt_regs *regs, struct ve_info *ve)
|
||||
/*
|
||||
* Handle the user initiated #VE.
|
||||
*
|
||||
* On success, returns the number of bytes RIP should be incremented (>=0)
|
||||
* or -errno on error.
|
||||
*/
|
||||
static int virt_exception_user(struct pt_regs *regs, struct ve_info *ve)
|
||||
{
|
||||
switch (ve->exit_reason) {
|
||||
case EXIT_REASON_CPUID:
|
||||
return handle_cpuid(regs);
|
||||
return handle_cpuid(regs, ve);
|
||||
default:
|
||||
pr_warn("Unexpected #VE: %lld\n", ve->exit_reason);
|
||||
return false;
|
||||
return -EIO;
|
||||
}
|
||||
}
|
||||
|
||||
/* Handle the kernel #VE */
|
||||
static bool virt_exception_kernel(struct pt_regs *regs, struct ve_info *ve)
|
||||
/*
|
||||
* Handle the kernel #VE.
|
||||
*
|
||||
* On success, returns the number of bytes RIP should be incremented (>=0)
|
||||
* or -errno on error.
|
||||
*/
|
||||
static int virt_exception_kernel(struct pt_regs *regs, struct ve_info *ve)
|
||||
{
|
||||
switch (ve->exit_reason) {
|
||||
case EXIT_REASON_HLT:
|
||||
return handle_halt();
|
||||
return handle_halt(ve);
|
||||
case EXIT_REASON_MSR_READ:
|
||||
return read_msr(regs);
|
||||
return read_msr(regs, ve);
|
||||
case EXIT_REASON_MSR_WRITE:
|
||||
return write_msr(regs);
|
||||
return write_msr(regs, ve);
|
||||
case EXIT_REASON_CPUID:
|
||||
return handle_cpuid(regs);
|
||||
return handle_cpuid(regs, ve);
|
||||
case EXIT_REASON_EPT_VIOLATION:
|
||||
return handle_mmio(regs, ve);
|
||||
case EXIT_REASON_IO_INSTRUCTION:
|
||||
return handle_io(regs, ve->exit_qual);
|
||||
return handle_io(regs, ve);
|
||||
default:
|
||||
pr_warn("Unexpected #VE: %lld\n", ve->exit_reason);
|
||||
return false;
|
||||
return -EIO;
|
||||
}
|
||||
}
|
||||
|
||||
bool tdx_handle_virt_exception(struct pt_regs *regs, struct ve_info *ve)
|
||||
{
|
||||
bool ret;
|
||||
int insn_len;
|
||||
|
||||
if (user_mode(regs))
|
||||
ret = virt_exception_user(regs, ve);
|
||||
insn_len = virt_exception_user(regs, ve);
|
||||
else
|
||||
ret = virt_exception_kernel(regs, ve);
|
||||
insn_len = virt_exception_kernel(regs, ve);
|
||||
if (insn_len < 0)
|
||||
return false;
|
||||
|
||||
/* After successful #VE handling, move the IP */
|
||||
if (ret)
|
||||
regs->ip += ve->instr_len;
|
||||
regs->ip += insn_len;
|
||||
|
||||
return ret;
|
||||
return true;
|
||||
}
|
||||
|
||||
static bool tdx_tlb_flush_required(bool private)
|
||||
|
@ -108,19 +108,16 @@ extern unsigned long _brk_end;
|
||||
void *extend_brk(size_t size, size_t align);
|
||||
|
||||
/*
|
||||
* Reserve space in the brk section. The name must be unique within the file,
|
||||
* and somewhat descriptive. The size is in bytes.
|
||||
* Reserve space in the .brk section, which is a block of memory from which the
|
||||
* caller is allowed to allocate very early (before even memblock is available)
|
||||
* by calling extend_brk(). All allocated memory will be eventually converted
|
||||
* to memblock. Any leftover unallocated memory will be freed.
|
||||
*
|
||||
* The allocation is done using inline asm (rather than using a section
|
||||
* attribute on a normal variable) in order to allow the use of @nobits, so
|
||||
* that it doesn't take up any space in the vmlinux file.
|
||||
* The size is in bytes.
|
||||
*/
|
||||
#define RESERVE_BRK(name, size) \
|
||||
asm(".pushsection .brk_reservation,\"aw\",@nobits\n\t" \
|
||||
".brk." #name ":\n\t" \
|
||||
".skip " __stringify(size) "\n\t" \
|
||||
".size .brk." #name ", " __stringify(size) "\n\t" \
|
||||
".popsection\n\t")
|
||||
#define RESERVE_BRK(name, size) \
|
||||
__section(".bss..brk") __aligned(1) __used \
|
||||
static char __brk_##name[size]
|
||||
|
||||
extern void probe_roms(void);
|
||||
#ifdef __i386__
|
||||
@ -133,12 +130,19 @@ asmlinkage void __init x86_64_start_reservations(char *real_mode_data);
|
||||
|
||||
#endif /* __i386__ */
|
||||
#endif /* _SETUP */
|
||||
#else
|
||||
#define RESERVE_BRK(name,sz) \
|
||||
.pushsection .brk_reservation,"aw",@nobits; \
|
||||
.brk.name: \
|
||||
1: .skip sz; \
|
||||
.size .brk.name,.-1b; \
|
||||
|
||||
#else /* __ASSEMBLY */
|
||||
|
||||
.macro __RESERVE_BRK name, size
|
||||
.pushsection .bss..brk, "aw"
|
||||
SYM_DATA_START(__brk_\name)
|
||||
.skip \size
|
||||
SYM_DATA_END(__brk_\name)
|
||||
.popsection
|
||||
.endm
|
||||
|
||||
#define RESERVE_BRK(name, size) __RESERVE_BRK name, size
|
||||
|
||||
#endif /* __ASSEMBLY__ */
|
||||
|
||||
#endif /* _ASM_X86_SETUP_H */
|
||||
|
@ -67,11 +67,6 @@ RESERVE_BRK(dmi_alloc, 65536);
|
||||
#endif
|
||||
|
||||
|
||||
/*
|
||||
* Range of the BSS area. The size of the BSS area is determined
|
||||
* at link time, with RESERVE_BRK() facility reserving additional
|
||||
* chunks.
|
||||
*/
|
||||
unsigned long _brk_start = (unsigned long)__brk_base;
|
||||
unsigned long _brk_end = (unsigned long)__brk_base;
|
||||
|
||||
|
@ -385,10 +385,10 @@ SECTIONS
|
||||
__end_of_kernel_reserve = .;
|
||||
|
||||
. = ALIGN(PAGE_SIZE);
|
||||
.brk : AT(ADDR(.brk) - LOAD_OFFSET) {
|
||||
.brk (NOLOAD) : AT(ADDR(.brk) - LOAD_OFFSET) {
|
||||
__brk_base = .;
|
||||
. += 64 * 1024; /* 64k alignment slop space */
|
||||
*(.brk_reservation) /* areas brk users have reserved */
|
||||
*(.bss..brk) /* areas brk users have reserved */
|
||||
__brk_limit = .;
|
||||
}
|
||||
|
||||
|
Loading…
Reference in New Issue
Block a user