linux/tools/testing/selftests/x86/mov_ss_trap.c

287 lines
7.7 KiB
C
Raw Normal View History

/* SPDX-License-Identifier: GPL-2.0 */
/*
* mov_ss_trap.c: Exercise the bizarre side effects of a watchpoint on MOV SS
*
* This does MOV SS from a watchpointed address followed by various
* types of kernel entries. A MOV SS that hits a watchpoint will queue
* up a #DB trap but will not actually deliver that trap. The trap
* will be delivered after the next instruction instead. The CPU's logic
* seems to be:
*
* - Any fault: drop the pending #DB trap.
* - INT $N, INT3, INTO, SYSCALL, SYSENTER: enter the kernel and then
* deliver #DB.
* - ICEBP: enter the kernel but do not deliver the watchpoint trap
* - breakpoint: only one #DB is delivered (phew!)
*
* There are plenty of ways for a kernel to handle this incorrectly. This
* test tries to exercise all the cases.
*
* This should mostly cover CVE-2018-1087 and CVE-2018-8897.
*/
#define _GNU_SOURCE
#include <stdlib.h>
#include <sys/ptrace.h>
#include <sys/types.h>
#include <sys/wait.h>
#include <sys/user.h>
#include <sys/syscall.h>
#include <unistd.h>
#include <errno.h>
#include <stddef.h>
#include <stdio.h>
#include <err.h>
#include <string.h>
#include <setjmp.h>
#include <sys/prctl.h>
#define X86_EFLAGS_RF (1UL << 16)
#if __x86_64__
# define REG_IP REG_RIP
#else
# define REG_IP REG_EIP
#endif
unsigned short ss;
extern unsigned char breakpoint_insn[];
sigjmp_buf jmpbuf;
static void enable_watchpoint(void)
{
pid_t parent = getpid();
int status;
pid_t child = fork();
if (child < 0)
err(1, "fork");
if (child) {
if (waitpid(child, &status, 0) != child)
err(1, "waitpid for child");
} else {
unsigned long dr0, dr1, dr7;
dr0 = (unsigned long)&ss;
dr1 = (unsigned long)breakpoint_insn;
dr7 = ((1UL << 1) | /* G0 */
(3UL << 16) | /* RW0 = read or write */
(1UL << 18) | /* LEN0 = 2 bytes */
(1UL << 3)); /* G1, RW1 = insn */
if (ptrace(PTRACE_ATTACH, parent, NULL, NULL) != 0)
err(1, "PTRACE_ATTACH");
if (waitpid(parent, &status, 0) != parent)
err(1, "waitpid for child");
if (ptrace(PTRACE_POKEUSER, parent, (void *)offsetof(struct user, u_debugreg[0]), dr0) != 0)
err(1, "PTRACE_POKEUSER DR0");
if (ptrace(PTRACE_POKEUSER, parent, (void *)offsetof(struct user, u_debugreg[1]), dr1) != 0)
err(1, "PTRACE_POKEUSER DR1");
if (ptrace(PTRACE_POKEUSER, parent, (void *)offsetof(struct user, u_debugreg[7]), dr7) != 0)
err(1, "PTRACE_POKEUSER DR7");
printf("\tDR0 = %lx, DR1 = %lx, DR7 = %lx\n", dr0, dr1, dr7);
if (ptrace(PTRACE_DETACH, parent, NULL, NULL) != 0)
err(1, "PTRACE_DETACH");
exit(0);
}
}
static void sethandler(int sig, void (*handler)(int, siginfo_t *, void *),
int flags)
{
struct sigaction sa;
memset(&sa, 0, sizeof(sa));
sa.sa_sigaction = handler;
sa.sa_flags = SA_SIGINFO | flags;
sigemptyset(&sa.sa_mask);
if (sigaction(sig, &sa, 0))
err(1, "sigaction");
}
static char const * const signames[] = {
[SIGSEGV] = "SIGSEGV",
[SIGBUS] = "SIBGUS",
[SIGTRAP] = "SIGTRAP",
[SIGILL] = "SIGILL",
};
static void sigtrap(int sig, siginfo_t *si, void *ctx_void)
{
ucontext_t *ctx = ctx_void;
printf("\tGot SIGTRAP with RIP=%lx, EFLAGS.RF=%d\n",
(unsigned long)ctx->uc_mcontext.gregs[REG_IP],
!!(ctx->uc_mcontext.gregs[REG_EFL] & X86_EFLAGS_RF));
}
static void handle_and_return(int sig, siginfo_t *si, void *ctx_void)
{
ucontext_t *ctx = ctx_void;
printf("\tGot %s with RIP=%lx\n", signames[sig],
(unsigned long)ctx->uc_mcontext.gregs[REG_IP]);
}
static void handle_and_longjmp(int sig, siginfo_t *si, void *ctx_void)
{
ucontext_t *ctx = ctx_void;
printf("\tGot %s with RIP=%lx\n", signames[sig],
(unsigned long)ctx->uc_mcontext.gregs[REG_IP]);
siglongjmp(jmpbuf, 1);
}
int main()
{
unsigned long nr;
asm volatile ("mov %%ss, %[ss]" : [ss] "=m" (ss));
printf("\tSS = 0x%hx, &SS = 0x%p\n", ss, &ss);
if (prctl(PR_SET_PTRACER, PR_SET_PTRACER_ANY, 0, 0, 0) == 0)
printf("\tPR_SET_PTRACER_ANY succeeded\n");
printf("\tSet up a watchpoint\n");
sethandler(SIGTRAP, sigtrap, 0);
enable_watchpoint();
printf("[RUN]\tRead from watched memory (should get SIGTRAP)\n");
asm volatile ("mov %[ss], %[tmp]" : [tmp] "=r" (nr) : [ss] "m" (ss));
printf("[RUN]\tMOV SS; INT3\n");
asm volatile ("mov %[ss], %%ss; int3" :: [ss] "m" (ss));
printf("[RUN]\tMOV SS; INT 3\n");
asm volatile ("mov %[ss], %%ss; .byte 0xcd, 0x3" :: [ss] "m" (ss));
printf("[RUN]\tMOV SS; CS CS INT3\n");
asm volatile ("mov %[ss], %%ss; .byte 0x2e, 0x2e; int3" :: [ss] "m" (ss));
printf("[RUN]\tMOV SS; CSx14 INT3\n");
asm volatile ("mov %[ss], %%ss; .fill 14,1,0x2e; int3" :: [ss] "m" (ss));
printf("[RUN]\tMOV SS; INT 4\n");
sethandler(SIGSEGV, handle_and_return, SA_RESETHAND);
asm volatile ("mov %[ss], %%ss; int $4" :: [ss] "m" (ss));
#ifdef __i386__
printf("[RUN]\tMOV SS; INTO\n");
sethandler(SIGSEGV, handle_and_return, SA_RESETHAND);
nr = -1;
asm volatile ("add $1, %[tmp]; mov %[ss], %%ss; into"
: [tmp] "+r" (nr) : [ss] "m" (ss));
#endif
if (sigsetjmp(jmpbuf, 1) == 0) {
printf("[RUN]\tMOV SS; ICEBP\n");
/* Some emulators (e.g. QEMU TCG) don't emulate ICEBP. */
sethandler(SIGILL, handle_and_longjmp, SA_RESETHAND);
asm volatile ("mov %[ss], %%ss; .byte 0xf1" :: [ss] "m" (ss));
}
if (sigsetjmp(jmpbuf, 1) == 0) {
printf("[RUN]\tMOV SS; CLI\n");
sethandler(SIGSEGV, handle_and_longjmp, SA_RESETHAND);
asm volatile ("mov %[ss], %%ss; cli" :: [ss] "m" (ss));
}
if (sigsetjmp(jmpbuf, 1) == 0) {
printf("[RUN]\tMOV SS; #PF\n");
sethandler(SIGSEGV, handle_and_longjmp, SA_RESETHAND);
asm volatile ("mov %[ss], %%ss; mov (-1), %[tmp]"
: [tmp] "=r" (nr) : [ss] "m" (ss));
}
/*
* INT $1: if #DB has DPL=3 and there isn't special handling,
* then the kernel will die.
*/
if (sigsetjmp(jmpbuf, 1) == 0) {
printf("[RUN]\tMOV SS; INT 1\n");
sethandler(SIGSEGV, handle_and_longjmp, SA_RESETHAND);
asm volatile ("mov %[ss], %%ss; int $1" :: [ss] "m" (ss));
}
#ifdef __x86_64__
/*
* In principle, we should test 32-bit SYSCALL as well, but
* the calling convention is so unpredictable that it's
* not obviously worth the effort.
*/
if (sigsetjmp(jmpbuf, 1) == 0) {
printf("[RUN]\tMOV SS; SYSCALL\n");
sethandler(SIGILL, handle_and_longjmp, SA_RESETHAND);
nr = SYS_getpid;
/*
* Toggle the high bit of RSP to make it noncanonical to
* strengthen this test on non-SMAP systems.
*/
asm volatile ("btc $63, %%rsp\n\t"
"mov %[ss], %%ss; syscall\n\t"
"btc $63, %%rsp"
: "+a" (nr) : [ss] "m" (ss)
: "rcx"
#ifdef __x86_64__
, "r11"
#endif
);
}
#endif
printf("[RUN]\tMOV SS; breakpointed NOP\n");
asm volatile ("mov %[ss], %%ss; breakpoint_insn: nop" :: [ss] "m" (ss));
/*
* Invoking SYSENTER directly breaks all the rules. Just handle
* the SIGSEGV.
*/
if (sigsetjmp(jmpbuf, 1) == 0) {
printf("[RUN]\tMOV SS; SYSENTER\n");
stack_t stack = {
selftests/x86: Fix error: variably modified 'altstack_data' at file scope A glibc 2.34 feature adds support for variable MINSIGSTKSZ and SIGSTKSZ. When _DYNAMIC_STACK_SIZE_SOURCE or _GNU_SOURCE are defined, MINSIGSTKSZ and SIGSTKSZ are no longer constant on Linux. glibc 2.34 flags code paths assuming MINSIGSTKSZ or SIGSTKSZ are constant. Fix these error in x86 test. Feature description and build error: NEWS for version 2.34 ===================== Major new features: * Add _SC_MINSIGSTKSZ and _SC_SIGSTKSZ. When _DYNAMIC_STACK_SIZE_SOURCE or _GNU_SOURCE are defined, MINSIGSTKSZ and SIGSTKSZ are no longer constant on Linux. MINSIGSTKSZ is redefined to sysconf(_SC_MINSIGSTKSZ) and SIGSTKSZ is redefined to sysconf (_SC_SIGSTKSZ). This supports dynamic sized register sets for modern architectural features like Arm SVE. ===================== If _SC_SIGSTKSZ_SOURCE or _GNU_SOURCE are defined, MINSIGSTKSZ and SIGSTKSZ are redefined as: /* Default stack size for a signal handler: sysconf (SC_SIGSTKSZ). */ # undef SIGSTKSZ # define SIGSTKSZ sysconf (_SC_SIGSTKSZ) /* Minimum stack size for a signal handler: SIGSTKSZ. */ # undef MINSIGSTKSZ # define MINSIGSTKSZ SIGSTKSZ Compilation will fail if the source assumes constant MINSIGSTKSZ or SIGSTKSZ. Build error with the GNU C Library 2.34: DEBUG: | sigreturn.c:150:13: error: variably modified 'altstack_data' at file scope | sigreturn.c:150:13: error: variably modified 'altstack_data' at file scope DEBUG: | 150 | static char altstack_data[SIGSTKSZ]; | 150 | static char altstack_data[SIGSTKSZ]; DEBUG: | | ^~~~~~~~~~~~~ DEBUG: | single_step_syscall.c:60:22: error: variably modified 'altstack_data' at file scope DEBUG: | 60 | static unsigned char altstack_data[SIGSTKSZ]; DEBUG: | | ^~~~~~~~~~~~~ Fixed commit log to improve formatting and clarity: Shuah Khan <skhan@linuxfoundation.org> Link: https://sourceware.org/pipermail/libc-alpha/2021-January/121996.html Link: https://sourceware.org/pipermail/libc-alpha/2021-August/129718.html Suggested-by: Jianwei Hu <jianwei.hu@windriver.com> Signed-off-by: Jun Miao <jun.miao@windriver.com> Signed-off-by: Shuah Khan <skhan@linuxfoundation.org>
2021-08-24 10:43:58 +08:00
.ss_sp = malloc(sizeof(char) * SIGSTKSZ),
.ss_size = SIGSTKSZ,
};
if (sigaltstack(&stack, NULL) != 0)
err(1, "sigaltstack");
sethandler(SIGSEGV, handle_and_longjmp, SA_RESETHAND | SA_ONSTACK);
nr = SYS_getpid;
selftests/x86: Fix error: variably modified 'altstack_data' at file scope A glibc 2.34 feature adds support for variable MINSIGSTKSZ and SIGSTKSZ. When _DYNAMIC_STACK_SIZE_SOURCE or _GNU_SOURCE are defined, MINSIGSTKSZ and SIGSTKSZ are no longer constant on Linux. glibc 2.34 flags code paths assuming MINSIGSTKSZ or SIGSTKSZ are constant. Fix these error in x86 test. Feature description and build error: NEWS for version 2.34 ===================== Major new features: * Add _SC_MINSIGSTKSZ and _SC_SIGSTKSZ. When _DYNAMIC_STACK_SIZE_SOURCE or _GNU_SOURCE are defined, MINSIGSTKSZ and SIGSTKSZ are no longer constant on Linux. MINSIGSTKSZ is redefined to sysconf(_SC_MINSIGSTKSZ) and SIGSTKSZ is redefined to sysconf (_SC_SIGSTKSZ). This supports dynamic sized register sets for modern architectural features like Arm SVE. ===================== If _SC_SIGSTKSZ_SOURCE or _GNU_SOURCE are defined, MINSIGSTKSZ and SIGSTKSZ are redefined as: /* Default stack size for a signal handler: sysconf (SC_SIGSTKSZ). */ # undef SIGSTKSZ # define SIGSTKSZ sysconf (_SC_SIGSTKSZ) /* Minimum stack size for a signal handler: SIGSTKSZ. */ # undef MINSIGSTKSZ # define MINSIGSTKSZ SIGSTKSZ Compilation will fail if the source assumes constant MINSIGSTKSZ or SIGSTKSZ. Build error with the GNU C Library 2.34: DEBUG: | sigreturn.c:150:13: error: variably modified 'altstack_data' at file scope | sigreturn.c:150:13: error: variably modified 'altstack_data' at file scope DEBUG: | 150 | static char altstack_data[SIGSTKSZ]; | 150 | static char altstack_data[SIGSTKSZ]; DEBUG: | | ^~~~~~~~~~~~~ DEBUG: | single_step_syscall.c:60:22: error: variably modified 'altstack_data' at file scope DEBUG: | 60 | static unsigned char altstack_data[SIGSTKSZ]; DEBUG: | | ^~~~~~~~~~~~~ Fixed commit log to improve formatting and clarity: Shuah Khan <skhan@linuxfoundation.org> Link: https://sourceware.org/pipermail/libc-alpha/2021-January/121996.html Link: https://sourceware.org/pipermail/libc-alpha/2021-August/129718.html Suggested-by: Jianwei Hu <jianwei.hu@windriver.com> Signed-off-by: Jun Miao <jun.miao@windriver.com> Signed-off-by: Shuah Khan <skhan@linuxfoundation.org>
2021-08-24 10:43:58 +08:00
free(stack.ss_sp);
/* Clear EBP first to make sure we segfault cleanly. */
asm volatile ("xorl %%ebp, %%ebp; mov %[ss], %%ss; SYSENTER" : "+a" (nr)
: [ss] "m" (ss) : "flags", "rcx"
#ifdef __x86_64__
, "r11"
#endif
);
/* We're unreachable here. SYSENTER forgets RIP. */
}
if (sigsetjmp(jmpbuf, 1) == 0) {
printf("[RUN]\tMOV SS; INT $0x80\n");
sethandler(SIGSEGV, handle_and_longjmp, SA_RESETHAND);
nr = 20; /* compat getpid */
asm volatile ("mov %[ss], %%ss; int $0x80"
: "+a" (nr) : [ss] "m" (ss)
: "flags"
#ifdef __x86_64__
, "r8", "r9", "r10", "r11"
#endif
);
}
printf("[OK]\tI aten't dead\n");
return 0;
}