2
0
mirror of https://github.com/edk2-porting/linux-next.git synced 2024-12-14 00:04:00 +08:00

x86/fpu updates:

- Cleanup of extable fixup handling to be more robust, which in turn
    allows to make the FPU exception fixups more robust as well.
 
  - Change the return code for signal frame related failures from explicit
    error codes to a boolean fail/success as that's all what the calling
    code evaluates.
 
  - A large refactoring of the FPU code to prepare for adding AMX support:
 
    - Distangle the public header maze and remove especially the misnomed
      kitchen sink internal.h which is despite it's name included all over
      the place.
 
    - Add a proper abstraction for the register buffer storage (struct
      fpstate) which allows to dynamically size the buffer at runtime by
      flipping the pointer to the buffer container from the default
      container which is embedded in task_struct::tread::fpu to a
      dynamically allocated container with a larger register buffer.
 
    - Convert the code over to the new fpstate mechanism.
 
    - Consolidate the KVM FPU handling by moving the FPU related code into
      the FPU core which removes the number of exports and avoids adding
      even more export when AMX has to be supported in KVM. This also
      removes duplicated code which was of course unnecessary different and
      incomplete in the KVM copy.
 
    - Simplify the KVM FPU buffer handling by utilizing the new fpstate
      container and just switching the buffer pointer from the user space
      buffer to the KVM guest buffer when entering vcpu_run() and flipping
      it back when leaving the function. This cuts the memory requirements
      of a vCPU for FPU buffers in half and avoids pointless memory copy
      operations.
 
      This also solves the so far unresolved problem of adding AMX support
      because the current FPU buffer handling of KVM inflicted a circular
      dependency between adding AMX support to the core and to KVM.  With
      the new scheme of switching fpstate AMX support can be added to the
      core code without affecting KVM.
 
    - Replace various variables with proper data structures so the extra
      information required for adding dynamically enabled FPU features (AMX)
      can be added in one place
 
  - Add AMX (Advanved Matrix eXtensions) support (finally):
 
     AMX is a large XSTATE component which is going to be available with
     Saphire Rapids XEON CPUs. The feature comes with an extra MSR (MSR_XFD)
     which allows to trap the (first) use of an AMX related instruction,
     which has two benefits:
 
     1) It allows the kernel to control access to the feature
 
     2) It allows the kernel to dynamically allocate the large register
        state buffer instead of burdening every task with the the extra 8K
        or larger state storage.
 
     It would have been great to gain this kind of control already with
     AVX512.
 
     The support comes with the following infrastructure components:
 
     1) arch_prctl() to
        - read the supported features (equivalent to XGETBV(0))
        - read the permitted features for a task
        - request permission for a dynamically enabled feature
 
        Permission is granted per process, inherited on fork() and cleared
        on exec(). The permission policy of the kernel is restricted to
        sigaltstack size validation, but the syscall obviously allows
        further restrictions via seccomp etc.
 
     2) A stronger sigaltstack size validation for sys_sigaltstack(2) which
        takes granted permissions and the potentially resulting larger
        signal frame into account. This mechanism can also be used to
        enforce factual sigaltstack validation independent of dynamic
        features to help with finding potential victims of the 2K
        sigaltstack size constant which is broken since AVX512 support was
        added.
 
     3) Exception handling for #NM traps to catch first use of a extended
        feature via a new cause MSR. If the exception was caused by the use
        of such a feature, the handler checks permission for that
        feature. If permission has not been granted, the handler sends a
        SIGILL like the #UD handler would do if the feature would have been
        disabled in XCR0. If permission has been granted, then a new fpstate
        which fits the larger buffer requirement is allocated.
 
        In the unlikely case that this allocation fails, the handler sends
        SIGSEGV to the task. That's not elegant, but unavoidable as the
        other discussed options of preallocation or full per task
        permissions come with their own set of horrors for kernel and/or
        userspace. So this is the lesser of the evils and SIGSEGV caused by
        unexpected memory allocation failures is not a fundamentally new
        concept either.
 
        When allocation succeeds, the fpstate properties are filled in to
        reflect the extended feature set and the resulting sizes, the
        fpu::fpstate pointer is updated accordingly and the trap is disarmed
        for this task permanently.
 
     4) Enumeration and size calculations
 
     5) Trap switching via MSR_XFD
 
        The XFD (eXtended Feature Disable) MSR is context switched with the
        same life time rules as the FPU register state itself. The mechanism
        is keyed off with a static key which is default disabled so !AMX
        equipped CPUs have zero overhead. On AMX enabled CPUs the overhead
        is limited by comparing the tasks XFD value with a per CPU shadow
        variable to avoid redundant MSR writes. In case of switching from a
        AMX using task to a non AMX using task or vice versa, the extra MSR
        write is obviously inevitable.
 
        All other places which need to be aware of the variable feature sets
        and resulting variable sizes are not affected at all because they
        retrieve the information (feature set, sizes) unconditonally from
        the fpstate properties.
 
     6) Enable the new AMX states
 
   Note, this is relatively new code despite the fact that AMX support is in
   the works for more than a year now.
 
   The big refactoring of the FPU code, which allowed to do a proper
   integration has been started exactly 3 weeks ago. Refactoring of the
   existing FPU code and of the original AMX patches took a week and has
   been subject to extensive review and testing. The only fallout which has
   not been caught in review and testing right away was restricted to AMX
   enabled systems, which is completely irrelevant for anyone outside Intel
   and their early access program. There might be dragons lurking as usual,
   but so far the fine grained refactoring has held up and eventual yet
   undetected fallout is bisectable and should be easily addressable before
   the 5.16 release. Famous last words...
 
   Many thanks to Chang Bae and Dave Hansen for working hard on this and
   also to the various test teams at Intel who reserved extra capacity to
   follow the rapid development of this closely which provides the
   confidence level required to offer this rather large update for inclusion
   into 5.16-rc1.
 -----BEGIN PGP SIGNATURE-----
 
 iQJHBAABCgAxFiEEQp8+kY+LLUocC4bMphj1TA10mKEFAmF/NkITHHRnbHhAbGlu
 dXRyb25peC5kZQAKCRCmGPVMDXSYodDkEADH4+/nN/QoSUHIuuha5Zptj3g2b16a
 /3TxT9fhwPen/kzMGsUk70s3iWJMA+I5dCfkSZexJ2hfhcRe9cBzZIa1HCawKwf3
 YCISTsO/M+LpeORuZ+TpfFLJKnxNr1SEOl+EYffGhq0AkCjifb9Cnr0JZuoMUzGU
 jpfJZ2bj28ri5lG812DtzSMBM9E3SAwgJv+GNjmZbxZKb9mAfhbAMdBUXHirX7Ej
 jmx6koQjYOKwYIW8w1BrdC270lUKQUyJTbQgdRkN9Mh/HnKyFixQ18JqGlgaV2cT
 EtYePUfTEdaHdAhUINLIlEug1MfOslHU+HyGsdywnoChNB4GHPQuePC5Tz60VeFN
 RbQ9aKcBUu8r95rjlnKtAtBijNMA4bjGwllVxNwJ/ZoA9RPv1SbDZ07RX3qTaLVY
 YhVQl8+shD33/W24jUTJv1kMMexpHXIlv0gyfMryzpwI7uzzmGHRPAokJdbYKctC
 dyMPfdE90rxTiMUdL/1IQGhnh3awjbyfArzUhHyQ++HyUyzCFh0slsO0CD18vUy8
 FofhCugGBhjuKw3XwLNQ+KsWURz5qHctSzBc3qMOSyqFHbAJCVRANkhsFvWJo2qL
 75+Z7OTRebtsyOUZIdq26r4roSxHrps3dupWTtN70HWx2NhQG1nLEw986QYiQu1T
 hcKvDmehQLrUvg==
 =x3WL
 -----END PGP SIGNATURE-----

Merge tag 'x86-fpu-2021-11-01' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip

Pull x86 fpu updates from Thomas Gleixner:

 - Cleanup of extable fixup handling to be more robust, which in turn
   allows to make the FPU exception fixups more robust as well.

 - Change the return code for signal frame related failures from
   explicit error codes to a boolean fail/success as that's all what the
   calling code evaluates.

 - A large refactoring of the FPU code to prepare for adding AMX
   support:

      - Distangle the public header maze and remove especially the
        misnomed kitchen sink internal.h which is despite it's name
        included all over the place.

      - Add a proper abstraction for the register buffer storage (struct
        fpstate) which allows to dynamically size the buffer at runtime
        by flipping the pointer to the buffer container from the default
        container which is embedded in task_struct::tread::fpu to a
        dynamically allocated container with a larger register buffer.

      - Convert the code over to the new fpstate mechanism.

      - Consolidate the KVM FPU handling by moving the FPU related code
        into the FPU core which removes the number of exports and avoids
        adding even more export when AMX has to be supported in KVM.
        This also removes duplicated code which was of course
        unnecessary different and incomplete in the KVM copy.

      - Simplify the KVM FPU buffer handling by utilizing the new
        fpstate container and just switching the buffer pointer from the
        user space buffer to the KVM guest buffer when entering
        vcpu_run() and flipping it back when leaving the function. This
        cuts the memory requirements of a vCPU for FPU buffers in half
        and avoids pointless memory copy operations.

        This also solves the so far unresolved problem of adding AMX
        support because the current FPU buffer handling of KVM inflicted
        a circular dependency between adding AMX support to the core and
        to KVM. With the new scheme of switching fpstate AMX support can
        be added to the core code without affecting KVM.

      - Replace various variables with proper data structures so the
        extra information required for adding dynamically enabled FPU
        features (AMX) can be added in one place

 - Add AMX (Advanced Matrix eXtensions) support (finally):

   AMX is a large XSTATE component which is going to be available with
   Saphire Rapids XEON CPUs. The feature comes with an extra MSR
   (MSR_XFD) which allows to trap the (first) use of an AMX related
   instruction, which has two benefits:

    1) It allows the kernel to control access to the feature

    2) It allows the kernel to dynamically allocate the large register
       state buffer instead of burdening every task with the the extra
       8K or larger state storage.

   It would have been great to gain this kind of control already with
   AVX512.

   The support comes with the following infrastructure components:

    1) arch_prctl() to
        - read the supported features (equivalent to XGETBV(0))
        - read the permitted features for a task
        - request permission for a dynamically enabled feature

       Permission is granted per process, inherited on fork() and
       cleared on exec(). The permission policy of the kernel is
       restricted to sigaltstack size validation, but the syscall
       obviously allows further restrictions via seccomp etc.

    2) A stronger sigaltstack size validation for sys_sigaltstack(2)
       which takes granted permissions and the potentially resulting
       larger signal frame into account. This mechanism can also be used
       to enforce factual sigaltstack validation independent of dynamic
       features to help with finding potential victims of the 2K
       sigaltstack size constant which is broken since AVX512 support
       was added.

    3) Exception handling for #NM traps to catch first use of a extended
       feature via a new cause MSR. If the exception was caused by the
       use of such a feature, the handler checks permission for that
       feature. If permission has not been granted, the handler sends a
       SIGILL like the #UD handler would do if the feature would have
       been disabled in XCR0. If permission has been granted, then a new
       fpstate which fits the larger buffer requirement is allocated.

       In the unlikely case that this allocation fails, the handler
       sends SIGSEGV to the task. That's not elegant, but unavoidable as
       the other discussed options of preallocation or full per task
       permissions come with their own set of horrors for kernel and/or
       userspace. So this is the lesser of the evils and SIGSEGV caused
       by unexpected memory allocation failures is not a fundamentally
       new concept either.

       When allocation succeeds, the fpstate properties are filled in to
       reflect the extended feature set and the resulting sizes, the
       fpu::fpstate pointer is updated accordingly and the trap is
       disarmed for this task permanently.

    4) Enumeration and size calculations

    5) Trap switching via MSR_XFD

       The XFD (eXtended Feature Disable) MSR is context switched with
       the same life time rules as the FPU register state itself. The
       mechanism is keyed off with a static key which is default
       disabled so !AMX equipped CPUs have zero overhead. On AMX enabled
       CPUs the overhead is limited by comparing the tasks XFD value
       with a per CPU shadow variable to avoid redundant MSR writes. In
       case of switching from a AMX using task to a non AMX using task
       or vice versa, the extra MSR write is obviously inevitable.

       All other places which need to be aware of the variable feature
       sets and resulting variable sizes are not affected at all because
       they retrieve the information (feature set, sizes) unconditonally
       from the fpstate properties.

    6) Enable the new AMX states

   Note, this is relatively new code despite the fact that AMX support
   is in the works for more than a year now.

   The big refactoring of the FPU code, which allowed to do a proper
   integration has been started exactly 3 weeks ago. Refactoring of the
   existing FPU code and of the original AMX patches took a week and has
   been subject to extensive review and testing. The only fallout which
   has not been caught in review and testing right away was restricted
   to AMX enabled systems, which is completely irrelevant for anyone
   outside Intel and their early access program. There might be dragons
   lurking as usual, but so far the fine grained refactoring has held up
   and eventual yet undetected fallout is bisectable and should be
   easily addressable before the 5.16 release. Famous last words...

   Many thanks to Chang Bae and Dave Hansen for working hard on this and
   also to the various test teams at Intel who reserved extra capacity
   to follow the rapid development of this closely which provides the
   confidence level required to offer this rather large update for
   inclusion into 5.16-rc1

* tag 'x86-fpu-2021-11-01' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip: (110 commits)
  Documentation/x86: Add documentation for using dynamic XSTATE features
  x86/fpu: Include vmalloc.h for vzalloc()
  selftests/x86/amx: Add context switch test
  selftests/x86/amx: Add test cases for AMX state management
  x86/fpu/amx: Enable the AMX feature in 64-bit mode
  x86/fpu: Add XFD handling for dynamic states
  x86/fpu: Calculate the default sizes independently
  x86/fpu/amx: Define AMX state components and have it used for boot-time checks
  x86/fpu/xstate: Prepare XSAVE feature table for gaps in state component numbers
  x86/fpu/xstate: Add fpstate_realloc()/free()
  x86/fpu/xstate: Add XFD #NM handler
  x86/fpu: Update XFD state where required
  x86/fpu: Add sanity checks for XFD
  x86/fpu: Add XFD state to fpstate
  x86/msr-index: Add MSRs for XFD
  x86/cpufeatures: Add eXtended Feature Disabling (XFD) feature bit
  x86/fpu: Reset permission and fpstate on exec()
  x86/fpu: Prepare fpu_clone() for dynamically enabled features
  x86/fpu/signal: Prepare for variable sigframe length
  x86/signal: Use fpu::__state_user_size for sigalt stack validation
  ...
This commit is contained in:
Linus Torvalds 2021-11-01 14:03:56 -07:00
commit 8cb1ae19bf
67 changed files with 3458 additions and 1575 deletions

View File

@ -5497,6 +5497,15 @@
stifb= [HW]
Format: bpp:<bpp1>[:<bpp2>[:<bpp3>...]]
strict_sas_size=
[X86]
Format: <bool>
Enable or disable strict sigaltstack size checks
against the required signal frame size which
depends on the supported FPU features. This can
be used to filter out binaries which have
not yet been made aware of AT_MINSIGSTKSZ.
sunrpc.min_resvport=
sunrpc.max_resvport=
[NFS,SUNRPC]

View File

@ -37,3 +37,4 @@ x86-specific Documentation
sgx
features
elf_auxvec
xstate

View File

@ -0,0 +1,65 @@
Using XSTATE features in user space applications
================================================
The x86 architecture supports floating-point extensions which are
enumerated via CPUID. Applications consult CPUID and use XGETBV to
evaluate which features have been enabled by the kernel XCR0.
Up to AVX-512 and PKRU states, these features are automatically enabled by
the kernel if available. Features like AMX TILE_DATA (XSTATE component 18)
are enabled by XCR0 as well, but the first use of related instruction is
trapped by the kernel because by default the required large XSTATE buffers
are not allocated automatically.
Using dynamically enabled XSTATE features in user space applications
--------------------------------------------------------------------
The kernel provides an arch_prctl(2) based mechanism for applications to
request the usage of such features. The arch_prctl(2) options related to
this are:
-ARCH_GET_XCOMP_SUPP
arch_prctl(ARCH_GET_XCOMP_SUPP, &features);
ARCH_GET_XCOMP_SUPP stores the supported features in userspace storage of
type uint64_t. The second argument is a pointer to that storage.
-ARCH_GET_XCOMP_PERM
arch_prctl(ARCH_GET_XCOMP_PERM, &features);
ARCH_GET_XCOMP_PERM stores the features for which the userspace process
has permission in userspace storage of type uint64_t. The second argument
is a pointer to that storage.
-ARCH_REQ_XCOMP_PERM
arch_prctl(ARCH_REQ_XCOMP_PERM, feature_nr);
ARCH_REQ_XCOMP_PERM allows to request permission for a dynamically enabled
feature or a feature set. A feature set can be mapped to a facility, e.g.
AMX, and can require one or more XSTATE components to be enabled.
The feature argument is the number of the highest XSTATE component which
is required for a facility to work.
When requesting permission for a feature, the kernel checks the
availability. The kernel ensures that sigaltstacks in the process's tasks
are large enough to accommodate the resulting large signal frame. It
enforces this both during ARCH_REQ_XCOMP_SUPP and during any subsequent
sigaltstack(2) calls. If an installed sigaltstack is smaller than the
resulting sigframe size, ARCH_REQ_XCOMP_SUPP results in -ENOSUPP. Also,
sigaltstack(2) results in -ENOMEM if the requested altstack is too small
for the permitted features.
Permission, when granted, is valid per process. Permissions are inherited
on fork(2) and cleared on exec(3).
The first use of an instruction related to a dynamically enabled feature is
trapped by the kernel. The trap handler checks whether the process has
permission to use the feature. If the process has no permission then the
kernel sends SIGILL to the application. If the process has permission then
the handler allocates a larger xstate buffer for the task so the large
state can be context switched. In the unlikely cases that the allocation
fails, the kernel sends SIGSEGV.

View File

@ -1288,6 +1288,9 @@ config ARCH_HAS_ELFCORE_COMPAT
config ARCH_HAS_PARANOID_L1D_FLUSH
bool
config DYNAMIC_SIGFRAME
bool
source "kernel/gcov/Kconfig"
source "scripts/gcc-plugins/Kconfig"

View File

@ -125,6 +125,7 @@ config X86
select CLOCKSOURCE_VALIDATE_LAST_CYCLE
select CLOCKSOURCE_WATCHDOG
select DCACHE_WORD_ACCESS
select DYNAMIC_SIGFRAME
select EDAC_ATOMIC_SCRUB
select EDAC_SUPPORT
select GENERIC_CLOCKEVENTS_BROADCAST if X86_64 || (X86_32 && X86_LOCAL_APIC)
@ -2399,6 +2400,22 @@ config MODIFY_LDT_SYSCALL
Saying 'N' here may make sense for embedded or server kernels.
config STRICT_SIGALTSTACK_SIZE
bool "Enforce strict size checking for sigaltstack"
depends on DYNAMIC_SIGFRAME
help
For historical reasons MINSIGSTKSZ is a constant which became
already too small with AVX512 support. Add a mechanism to
enforce strict checking of the sigaltstack size against the
real size of the FPU frame. This option enables the check
by default. It can also be controlled via the kernel command
line option 'strict_sas_size' independent of this config
switch. Enabling it might break existing applications which
allocate a too small sigaltstack but 'work' because they
never get a signal delivered.
Say 'N' unless you want to really enforce this check.
source "kernel/livepatch/Kconfig"
endmenu

View File

@ -14,6 +14,7 @@
#include <linux/perf_event.h>
#include <asm/fpu/xstate.h>
#include <asm/intel_ds.h>
#include <asm/cpu.h>

View File

@ -24,7 +24,6 @@
#include <linux/syscalls.h>
#include <asm/ucontext.h>
#include <linux/uaccess.h>
#include <asm/fpu/internal.h>
#include <asm/fpu/signal.h>
#include <asm/ptrace.h>
#include <asm/ia32_unistd.h>
@ -57,8 +56,8 @@ static inline void reload_segments(struct sigcontext_32 *sc)
/*
* Do a signal return; undo the signal stack.
*/
static int ia32_restore_sigcontext(struct pt_regs *regs,
struct sigcontext_32 __user *usc)
static bool ia32_restore_sigcontext(struct pt_regs *regs,
struct sigcontext_32 __user *usc)
{
struct sigcontext_32 sc;
@ -66,7 +65,7 @@ static int ia32_restore_sigcontext(struct pt_regs *regs,
current->restart_block.fn = do_no_restart_syscall;
if (unlikely(copy_from_user(&sc, usc, sizeof(sc))))
return -EFAULT;
return false;
/* Get only the ia32 registers. */
regs->bx = sc.bx;
@ -111,7 +110,7 @@ COMPAT_SYSCALL_DEFINE0(sigreturn)
set_current_blocked(&set);
if (ia32_restore_sigcontext(regs, &frame->sc))
if (!ia32_restore_sigcontext(regs, &frame->sc))
goto badframe;
return regs->ax;
@ -135,7 +134,7 @@ COMPAT_SYSCALL_DEFINE0(rt_sigreturn)
set_current_blocked(&set);
if (ia32_restore_sigcontext(regs, &frame->uc.uc_mcontext))
if (!ia32_restore_sigcontext(regs, &frame->uc.uc_mcontext))
goto badframe;
if (compat_restore_altstack(&frame->uc.uc_stack))
@ -220,8 +219,8 @@ static void __user *get_sigframe(struct ksignal *ksig, struct pt_regs *regs,
sp = fpu__alloc_mathframe(sp, 1, &fx_aligned, &math_size);
*fpstate = (struct _fpstate_32 __user *) sp;
if (copy_fpstate_to_sigframe(*fpstate, (void __user *)fx_aligned,
math_size) < 0)
if (!copy_fpstate_to_sigframe(*fpstate, (void __user *)fx_aligned,
math_size))
return (void __user *) -1L;
sp -= frame_size;

View File

@ -122,28 +122,19 @@
#ifdef __KERNEL__
# include <asm/extable_fixup_types.h>
/* Exception table entry */
#ifdef __ASSEMBLY__
# define _ASM_EXTABLE_HANDLE(from, to, handler) \
# define _ASM_EXTABLE_TYPE(from, to, type) \
.pushsection "__ex_table","a" ; \
.balign 4 ; \
.long (from) - . ; \
.long (to) - . ; \
.long (handler) - . ; \
.long type ; \
.popsection
# define _ASM_EXTABLE(from, to) \
_ASM_EXTABLE_HANDLE(from, to, ex_handler_default)
# define _ASM_EXTABLE_UA(from, to) \
_ASM_EXTABLE_HANDLE(from, to, ex_handler_uaccess)
# define _ASM_EXTABLE_CPY(from, to) \
_ASM_EXTABLE_HANDLE(from, to, ex_handler_copy)
# define _ASM_EXTABLE_FAULT(from, to) \
_ASM_EXTABLE_HANDLE(from, to, ex_handler_fault)
# ifdef CONFIG_KPROBES
# define _ASM_NOKPROBE(entry) \
.pushsection "_kprobe_blacklist","aw" ; \
@ -155,27 +146,15 @@
# endif
#else /* ! __ASSEMBLY__ */
# define _EXPAND_EXTABLE_HANDLE(x) #x
# define _ASM_EXTABLE_HANDLE(from, to, handler) \
# define _ASM_EXTABLE_TYPE(from, to, type) \
" .pushsection \"__ex_table\",\"a\"\n" \
" .balign 4\n" \
" .long (" #from ") - .\n" \
" .long (" #to ") - .\n" \
" .long (" _EXPAND_EXTABLE_HANDLE(handler) ") - .\n" \
" .long " __stringify(type) " \n" \
" .popsection\n"
# define _ASM_EXTABLE(from, to) \
_ASM_EXTABLE_HANDLE(from, to, ex_handler_default)
# define _ASM_EXTABLE_UA(from, to) \
_ASM_EXTABLE_HANDLE(from, to, ex_handler_uaccess)
# define _ASM_EXTABLE_CPY(from, to) \
_ASM_EXTABLE_HANDLE(from, to, ex_handler_copy)
# define _ASM_EXTABLE_FAULT(from, to) \
_ASM_EXTABLE_HANDLE(from, to, ex_handler_fault)
/* For C file, we already have NOKPROBE_SYMBOL macro */
/*
@ -188,6 +167,17 @@ register unsigned long current_stack_pointer asm(_ASM_SP);
#define ASM_CALL_CONSTRAINT "+r" (current_stack_pointer)
#endif /* __ASSEMBLY__ */
#endif /* __KERNEL__ */
#define _ASM_EXTABLE(from, to) \
_ASM_EXTABLE_TYPE(from, to, EX_TYPE_DEFAULT)
#define _ASM_EXTABLE_UA(from, to) \
_ASM_EXTABLE_TYPE(from, to, EX_TYPE_UACCESS)
#define _ASM_EXTABLE_CPY(from, to) \
_ASM_EXTABLE_TYPE(from, to, EX_TYPE_COPY)
#define _ASM_EXTABLE_FAULT(from, to) \
_ASM_EXTABLE_TYPE(from, to, EX_TYPE_FAULT)
#endif /* __KERNEL__ */
#endif /* _ASM_X86_ASM_H */

View File

@ -277,6 +277,7 @@
#define X86_FEATURE_XSAVEC (10*32+ 1) /* XSAVEC instruction */
#define X86_FEATURE_XGETBV1 (10*32+ 2) /* XGETBV with ECX = 1 instruction */
#define X86_FEATURE_XSAVES (10*32+ 3) /* XSAVES/XRSTORS instructions */
#define X86_FEATURE_XFD (10*32+ 4) /* "" eXtended Feature Disabling */
/*
* Extended auxiliary flags: Linux defined - for features scattered in various
@ -298,6 +299,7 @@
/* Intel-defined CPU features, CPUID level 0x00000007:1 (EAX), word 12 */
#define X86_FEATURE_AVX_VNNI (12*32+ 4) /* AVX VNNI instructions */
#define X86_FEATURE_AVX512_BF16 (12*32+ 5) /* AVX512 BFLOAT16 instructions */
#define X86_FEATURE_AMX_TILE (18*32+24) /* AMX tile Support */
/* AMD-defined CPU features, CPUID level 0x80000008 (EBX), word 13 */
#define X86_FEATURE_CLZERO (13*32+ 0) /* CLZERO instruction */

View File

@ -1,12 +1,18 @@
/* SPDX-License-Identifier: GPL-2.0 */
#ifndef _ASM_X86_EXTABLE_H
#define _ASM_X86_EXTABLE_H
#include <asm/extable_fixup_types.h>
/*
* The exception table consists of triples of addresses relative to the
* exception table entry itself. The first address is of an instruction
* that is allowed to fault, the second is the target at which the program
* should continue. The third is a handler function to deal with the fault
* caused by the instruction in the first field.
* The exception table consists of two addresses relative to the
* exception table entry itself and a type selector field.
*
* The first address is of an instruction that is allowed to fault, the
* second is the target at which the program should continue.
*
* The type entry is used by fixup_exception() to select the handler to
* deal with the fault caused by the instruction in the first field.
*
* All the routines below use bits of fixup code that are out of line
* with the main instruction path. This means when everything is well,
@ -15,7 +21,7 @@
*/
struct exception_table_entry {
int insn, fixup, handler;
int insn, fixup, type;
};
struct pt_regs;
@ -25,21 +31,27 @@ struct pt_regs;
do { \
(a)->fixup = (b)->fixup + (delta); \
(b)->fixup = (tmp).fixup - (delta); \
(a)->handler = (b)->handler + (delta); \
(b)->handler = (tmp).handler - (delta); \
(a)->type = (b)->type; \
(b)->type = (tmp).type; \
} while (0)
enum handler_type {
EX_HANDLER_NONE,
EX_HANDLER_FAULT,
EX_HANDLER_UACCESS,
EX_HANDLER_OTHER
};
extern int fixup_exception(struct pt_regs *regs, int trapnr,
unsigned long error_code, unsigned long fault_addr);
extern int fixup_bug(struct pt_regs *regs, int trapnr);
extern enum handler_type ex_get_fault_handler_type(unsigned long ip);
extern int ex_get_fixup_type(unsigned long ip);
extern void early_fixup_exception(struct pt_regs *regs, int trapnr);
#ifdef CONFIG_X86_MCE
extern void ex_handler_msr_mce(struct pt_regs *regs, bool wrmsr);
#else
static inline void ex_handler_msr_mce(struct pt_regs *regs, bool wrmsr) { }
#endif
#if defined(CONFIG_BPF_JIT) && defined(CONFIG_X86_64)
bool ex_handler_bpf(const struct exception_table_entry *x, struct pt_regs *regs);
#else
static inline bool ex_handler_bpf(const struct exception_table_entry *x,
struct pt_regs *regs) { return false; }
#endif
#endif

View File

@ -0,0 +1,22 @@
/* SPDX-License-Identifier: GPL-2.0 */
#ifndef _ASM_X86_EXTABLE_FIXUP_TYPES_H
#define _ASM_X86_EXTABLE_FIXUP_TYPES_H
#define EX_TYPE_NONE 0
#define EX_TYPE_DEFAULT 1
#define EX_TYPE_FAULT 2
#define EX_TYPE_UACCESS 3
#define EX_TYPE_COPY 4
#define EX_TYPE_CLEAR_FS 5
#define EX_TYPE_FPU_RESTORE 6
#define EX_TYPE_WRMSR 7
#define EX_TYPE_RDMSR 8
#define EX_TYPE_BPF 9
#define EX_TYPE_WRMSR_IN_MCE 10
#define EX_TYPE_RDMSR_IN_MCE 11
#define EX_TYPE_DEFAULT_MCE_SAFE 12
#define EX_TYPE_FAULT_MCE_SAFE 13
#endif

View File

@ -12,6 +12,8 @@
#define _ASM_X86_FPU_API_H
#include <linux/bottom_half.h>
#include <asm/fpu/types.h>
/*
* Use kernel_fpu_begin/end() if you intend to use FPU in kernel context. It
* disables preemption so be careful if you intend to use it for long periods
@ -48,9 +50,9 @@ static inline void kernel_fpu_begin(void)
}
/*
* Use fpregs_lock() while editing CPU's FPU registers or fpu->state.
* Use fpregs_lock() while editing CPU's FPU registers or fpu->fpstate.
* A context switch will (and softirq might) save CPU's FPU registers to
* fpu->state and set TIF_NEED_FPU_LOAD leaving CPU's FPU registers in
* fpu->fpstate.regs and set TIF_NEED_FPU_LOAD leaving CPU's FPU registers in
* a random state.
*
* local_bh_disable() protects against both preemption and soft interrupts
@ -108,4 +110,56 @@ extern int cpu_has_xfeatures(u64 xfeatures_mask, const char **feature_name);
static inline void update_pasid(void) { }
/* Trap handling */
extern int fpu__exception_code(struct fpu *fpu, int trap_nr);
extern void fpu_sync_fpstate(struct fpu *fpu);
extern void fpu_reset_from_exception_fixup(void);
/* Boot, hotplug and resume */
extern void fpu__init_cpu(void);
extern void fpu__init_system(struct cpuinfo_x86 *c);
extern void fpu__init_check_bugs(void);
extern void fpu__resume_cpu(void);
#ifdef CONFIG_MATH_EMULATION
extern void fpstate_init_soft(struct swregs_state *soft);
#else
static inline void fpstate_init_soft(struct swregs_state *soft) {}
#endif
/* State tracking */
DECLARE_PER_CPU(struct fpu *, fpu_fpregs_owner_ctx);
/* Process cleanup */
#ifdef CONFIG_X86_64
extern void fpstate_free(struct fpu *fpu);
#else
static inline void fpstate_free(struct fpu *fpu) { }
#endif
/* fpstate-related functions which are exported to KVM */
extern void fpstate_clear_xstate_component(struct fpstate *fps, unsigned int xfeature);
/* KVM specific functions */
extern bool fpu_alloc_guest_fpstate(struct fpu_guest *gfpu);
extern void fpu_free_guest_fpstate(struct fpu_guest *gfpu);
extern int fpu_swap_kvm_fpstate(struct fpu_guest *gfpu, bool enter_guest);
extern void fpu_copy_guest_fpstate_to_uabi(struct fpu_guest *gfpu, void *buf, unsigned int size, u32 pkru);
extern int fpu_copy_uabi_to_guest_fpstate(struct fpu_guest *gfpu, const void *buf, u64 xcr0, u32 *vpkru);
static inline void fpstate_set_confidential(struct fpu_guest *gfpu)
{
gfpu->fpstate->is_confidential = true;
}
static inline bool fpstate_is_confidential(struct fpu_guest *gfpu)
{
return gfpu->fpstate->is_confidential;
}
/* prctl */
struct task_struct;
extern long fpu_xstate_prctl(struct task_struct *tsk, int option, unsigned long arg2);
#endif /* _ASM_X86_FPU_API_H */

View File

@ -1,540 +0,0 @@
/* SPDX-License-Identifier: GPL-2.0 */
/*
* Copyright (C) 1994 Linus Torvalds
*
* Pentium III FXSR, SSE support
* General FPU state handling cleanups
* Gareth Hughes <gareth@valinux.com>, May 2000
* x86-64 work by Andi Kleen 2002
*/
#ifndef _ASM_X86_FPU_INTERNAL_H
#define _ASM_X86_FPU_INTERNAL_H
#include <linux/compat.h>
#include <linux/sched.h>
#include <linux/slab.h>
#include <linux/mm.h>
#include <asm/user.h>
#include <asm/fpu/api.h>
#include <asm/fpu/xstate.h>
#include <asm/fpu/xcr.h>
#include <asm/cpufeature.h>
#include <asm/trace/fpu.h>
/*
* High level FPU state handling functions:
*/
extern int fpu__restore_sig(void __user *buf, int ia32_frame);
extern void fpu__drop(struct fpu *fpu);
extern void fpu__clear_user_states(struct fpu *fpu);
extern int fpu__exception_code(struct fpu *fpu, int trap_nr);
extern void fpu_sync_fpstate(struct fpu *fpu);
/* Clone and exit operations */
extern int fpu_clone(struct task_struct *dst);
extern void fpu_flush_thread(void);
/*
* Boot time FPU initialization functions:
*/
extern void fpu__init_cpu(void);
extern void fpu__init_system_xstate(void);
extern void fpu__init_cpu_xstate(void);
extern void fpu__init_system(struct cpuinfo_x86 *c);
extern void fpu__init_check_bugs(void);
extern void fpu__resume_cpu(void);
/*
* Debugging facility:
*/
#ifdef CONFIG_X86_DEBUG_FPU
# define WARN_ON_FPU(x) WARN_ON_ONCE(x)
#else
# define WARN_ON_FPU(x) ({ (void)(x); 0; })
#endif
/*
* FPU related CPU feature flag helper routines:
*/
static __always_inline __pure bool use_xsaveopt(void)
{
return static_cpu_has(X86_FEATURE_XSAVEOPT);
}
static __always_inline __pure bool use_xsave(void)
{
return static_cpu_has(X86_FEATURE_XSAVE);
}
static __always_inline __pure bool use_fxsr(void)
{
return static_cpu_has(X86_FEATURE_FXSR);
}
/*
* fpstate handling functions:
*/
extern union fpregs_state init_fpstate;
extern void fpstate_init(union fpregs_state *state);
#ifdef CONFIG_MATH_EMULATION
extern void fpstate_init_soft(struct swregs_state *soft);
#else
static inline void fpstate_init_soft(struct swregs_state *soft) {}
#endif
extern void save_fpregs_to_fpstate(struct fpu *fpu);
/* Returns 0 or the negated trap number, which results in -EFAULT for #PF */
#define user_insn(insn, output, input...) \
({ \
int err; \
\
might_fault(); \
\
asm volatile(ASM_STAC "\n" \
"1: " #insn "\n" \
"2: " ASM_CLAC "\n" \
".section .fixup,\"ax\"\n" \
"3: negl %%eax\n" \
" jmp 2b\n" \
".previous\n" \
_ASM_EXTABLE_FAULT(1b, 3b) \
: [err] "=a" (err), output \
: "0"(0), input); \
err; \
})
#define kernel_insn_err(insn, output, input...) \
({ \
int err; \
asm volatile("1:" #insn "\n\t" \
"2:\n" \
".section .fixup,\"ax\"\n" \
"3: movl $-1,%[err]\n" \
" jmp 2b\n" \
".previous\n" \
_ASM_EXTABLE(1b, 3b) \
: [err] "=r" (err), output \
: "0"(0), input); \
err; \
})
#define kernel_insn(insn, output, input...) \
asm volatile("1:" #insn "\n\t" \
"2:\n" \
_ASM_EXTABLE_HANDLE(1b, 2b, ex_handler_fprestore) \
: output : input)
static inline int fnsave_to_user_sigframe(struct fregs_state __user *fx)
{
return user_insn(fnsave %[fx]; fwait, [fx] "=m" (*fx), "m" (*fx));
}
static inline int fxsave_to_user_sigframe(struct fxregs_state __user *fx)
{
if (IS_ENABLED(CONFIG_X86_32))
return user_insn(fxsave %[fx], [fx] "=m" (*fx), "m" (*fx));
else
return user_insn(fxsaveq %[fx], [fx] "=m" (*fx), "m" (*fx));
}
static inline void fxrstor(struct fxregs_state *fx)
{
if (IS_ENABLED(CONFIG_X86_32))
kernel_insn(fxrstor %[fx], "=m" (*fx), [fx] "m" (*fx));
else
kernel_insn(fxrstorq %[fx], "=m" (*fx), [fx] "m" (*fx));
}
static inline int fxrstor_safe(struct fxregs_state *fx)
{
if (IS_ENABLED(CONFIG_X86_32))
return kernel_insn_err(fxrstor %[fx], "=m" (*fx), [fx] "m" (*fx));
else
return kernel_insn_err(fxrstorq %[fx], "=m" (*fx), [fx] "m" (*fx));
}
static inline int fxrstor_from_user_sigframe(struct fxregs_state __user *fx)
{
if (IS_ENABLED(CONFIG_X86_32))
return user_insn(fxrstor %[fx], "=m" (*fx), [fx] "m" (*fx));
else
return user_insn(fxrstorq %[fx], "=m" (*fx), [fx] "m" (*fx));
}
static inline void frstor(struct fregs_state *fx)
{
kernel_insn(frstor %[fx], "=m" (*fx), [fx] "m" (*fx));
}
static inline int frstor_safe(struct fregs_state *fx)
{
return kernel_insn_err(frstor %[fx], "=m" (*fx), [fx] "m" (*fx));
}
static inline int frstor_from_user_sigframe(struct fregs_state __user *fx)
{
return user_insn(frstor %[fx], "=m" (*fx), [fx] "m" (*fx));
}
static inline void fxsave(struct fxregs_state *fx)
{
if (IS_ENABLED(CONFIG_X86_32))
asm volatile( "fxsave %[fx]" : [fx] "=m" (*fx));
else
asm volatile("fxsaveq %[fx]" : [fx] "=m" (*fx));
}
/* These macros all use (%edi)/(%rdi) as the single memory argument. */
#define XSAVE ".byte " REX_PREFIX "0x0f,0xae,0x27"
#define XSAVEOPT ".byte " REX_PREFIX "0x0f,0xae,0x37"
#define XSAVES ".byte " REX_PREFIX "0x0f,0xc7,0x2f"
#define XRSTOR ".byte " REX_PREFIX "0x0f,0xae,0x2f"
#define XRSTORS ".byte " REX_PREFIX "0x0f,0xc7,0x1f"
/*
* After this @err contains 0 on success or the negated trap number when
* the operation raises an exception. For faults this results in -EFAULT.
*/
#define XSTATE_OP(op, st, lmask, hmask, err) \
asm volatile("1:" op "\n\t" \
"xor %[err], %[err]\n" \
"2:\n\t" \
".pushsection .fixup,\"ax\"\n\t" \
"3: negl %%eax\n\t" \
"jmp 2b\n\t" \
".popsection\n\t" \
_ASM_EXTABLE_FAULT(1b, 3b) \
: [err] "=a" (err) \
: "D" (st), "m" (*st), "a" (lmask), "d" (hmask) \
: "memory")
/*
* If XSAVES is enabled, it replaces XSAVEOPT because it supports a compact
* format and supervisor states in addition to modified optimization in
* XSAVEOPT.
*
* Otherwise, if XSAVEOPT is enabled, XSAVEOPT replaces XSAVE because XSAVEOPT
* supports modified optimization which is not supported by XSAVE.
*
* We use XSAVE as a fallback.
*
* The 661 label is defined in the ALTERNATIVE* macros as the address of the
* original instruction which gets replaced. We need to use it here as the
* address of the instruction where we might get an exception at.
*/
#define XSTATE_XSAVE(st, lmask, hmask, err) \
asm volatile(ALTERNATIVE_2(XSAVE, \
XSAVEOPT, X86_FEATURE_XSAVEOPT, \
XSAVES, X86_FEATURE_XSAVES) \
"\n" \
"xor %[err], %[err]\n" \
"3:\n" \
".pushsection .fixup,\"ax\"\n" \
"4: movl $-2, %[err]\n" \
"jmp 3b\n" \
".popsection\n" \
_ASM_EXTABLE(661b, 4b) \
: [err] "=r" (err) \
: "D" (st), "m" (*st), "a" (lmask), "d" (hmask) \
: "memory")
/*
* Use XRSTORS to restore context if it is enabled. XRSTORS supports compact
* XSAVE area format.
*/
#define XSTATE_XRESTORE(st, lmask, hmask) \
asm volatile(ALTERNATIVE(XRSTOR, \
XRSTORS, X86_FEATURE_XSAVES) \
"\n" \
"3:\n" \
_ASM_EXTABLE_HANDLE(661b, 3b, ex_handler_fprestore)\
: \
: "D" (st), "m" (*st), "a" (lmask), "d" (hmask) \
: "memory")
/*
* This function is called only during boot time when x86 caps are not set
* up and alternative can not be used yet.
*/
static inline void os_xrstor_booting(struct xregs_state *xstate)
{
u64 mask = xfeatures_mask_fpstate();
u32 lmask = mask;
u32 hmask = mask >> 32;
int err;
WARN_ON(system_state != SYSTEM_BOOTING);
if (boot_cpu_has(X86_FEATURE_XSAVES))
XSTATE_OP(XRSTORS, xstate, lmask, hmask, err);
else
XSTATE_OP(XRSTOR, xstate, lmask, hmask, err);
/*
* We should never fault when copying from a kernel buffer, and the FPU
* state we set at boot time should be valid.
*/
WARN_ON_FPU(err);
}
/*
* Save processor xstate to xsave area.
*
* Uses either XSAVE or XSAVEOPT or XSAVES depending on the CPU features
* and command line options. The choice is permanent until the next reboot.
*/
static inline void os_xsave(struct xregs_state *xstate)
{
u64 mask = xfeatures_mask_all;
u32 lmask = mask;
u32 hmask = mask >> 32;
int err;
WARN_ON_FPU(!alternatives_patched);
XSTATE_XSAVE(xstate, lmask, hmask, err);
/* We should never fault when copying to a kernel buffer: */
WARN_ON_FPU(err);
}
/*
* Restore processor xstate from xsave area.
*
* Uses XRSTORS when XSAVES is used, XRSTOR otherwise.
*/
static inline void os_xrstor(struct xregs_state *xstate, u64 mask)
{
u32 lmask = mask;
u32 hmask = mask >> 32;
XSTATE_XRESTORE(xstate, lmask, hmask);
}
/*
* Save xstate to user space xsave area.
*
* We don't use modified optimization because xrstor/xrstors might track
* a different application.
*
* We don't use compacted format xsave area for
* backward compatibility for old applications which don't understand
* compacted format of xsave area.
*/
static inline int xsave_to_user_sigframe(struct xregs_state __user *buf)
{
/*
* Include the features which are not xsaved/rstored by the kernel
* internally, e.g. PKRU. That's user space ABI and also required
* to allow the signal handler to modify PKRU.
*/
u64 mask = xfeatures_mask_uabi();
u32 lmask = mask;
u32 hmask = mask >> 32;
int err;
/*
* Clear the xsave header first, so that reserved fields are
* initialized to zero.
*/
err = __clear_user(&buf->header, sizeof(buf->header));
if (unlikely(err))
return -EFAULT;
stac();
XSTATE_OP(XSAVE, buf, lmask, hmask, err);
clac();
return err;
}
/*
* Restore xstate from user space xsave area.
*/
static inline int xrstor_from_user_sigframe(struct xregs_state __user *buf, u64 mask)
{
struct xregs_state *xstate = ((__force struct xregs_state *)buf);
u32 lmask = mask;
u32 hmask = mask >> 32;
int err;
stac();
XSTATE_OP(XRSTOR, xstate, lmask, hmask, err);
clac();
return err;
}
/*
* Restore xstate from kernel space xsave area, return an error code instead of
* an exception.
*/
static inline int os_xrstor_safe(struct xregs_state *xstate, u64 mask)
{
u32 lmask = mask;
u32 hmask = mask >> 32;
int err;
if (cpu_feature_enabled(X86_FEATURE_XSAVES))
XSTATE_OP(XRSTORS, xstate, lmask, hmask, err);
else
XSTATE_OP(XRSTOR, xstate, lmask, hmask, err);
return err;
}
extern void __restore_fpregs_from_fpstate(union fpregs_state *fpstate, u64 mask);
static inline void restore_fpregs_from_fpstate(union fpregs_state *fpstate)
{
__restore_fpregs_from_fpstate(fpstate, xfeatures_mask_fpstate());
}
extern int copy_fpstate_to_sigframe(void __user *buf, void __user *fp, int size);
/*
* FPU context switch related helper methods:
*/
DECLARE_PER_CPU(struct fpu *, fpu_fpregs_owner_ctx);
/*
* The in-register FPU state for an FPU context on a CPU is assumed to be
* valid if the fpu->last_cpu matches the CPU, and the fpu_fpregs_owner_ctx
* matches the FPU.
*
* If the FPU register state is valid, the kernel can skip restoring the
* FPU state from memory.
*
* Any code that clobbers the FPU registers or updates the in-memory
* FPU state for a task MUST let the rest of the kernel know that the
* FPU registers are no longer valid for this task.
*
* Either one of these invalidation functions is enough. Invalidate
* a resource you control: CPU if using the CPU for something else
* (with preemption disabled), FPU for the current task, or a task that
* is prevented from running by the current task.
*/
static inline void __cpu_invalidate_fpregs_state(void)
{
__this_cpu_write(fpu_fpregs_owner_ctx, NULL);
}
static inline void __fpu_invalidate_fpregs_state(struct fpu *fpu)
{
fpu->last_cpu = -1;
}
static inline int fpregs_state_valid(struct fpu *fpu, unsigned int cpu)
{
return fpu == this_cpu_read(fpu_fpregs_owner_ctx) && cpu == fpu->last_cpu;
}
/*
* These generally need preemption protection to work,
* do try to avoid using these on their own:
*/
static inline void fpregs_deactivate(struct fpu *fpu)
{
this_cpu_write(fpu_fpregs_owner_ctx, NULL);
trace_x86_fpu_regs_deactivated(fpu);
}
static inline void fpregs_activate(struct fpu *fpu)
{
this_cpu_write(fpu_fpregs_owner_ctx, fpu);
trace_x86_fpu_regs_activated(fpu);
}
/* Internal helper for switch_fpu_return() and signal frame setup */
static inline void fpregs_restore_userregs(void)
{
struct fpu *fpu = &current->thread.fpu;
int cpu = smp_processor_id();
if (WARN_ON_ONCE(current->flags & PF_KTHREAD))
return;
if (!fpregs_state_valid(fpu, cpu)) {
u64 mask;
/*
* This restores _all_ xstate which has not been
* established yet.
*
* If PKRU is enabled, then the PKRU value is already
* correct because it was either set in switch_to() or in
* flush_thread(). So it is excluded because it might be
* not up to date in current->thread.fpu.xsave state.
*/
mask = xfeatures_mask_restore_user() |
xfeatures_mask_supervisor();
__restore_fpregs_from_fpstate(&fpu->state, mask);
fpregs_activate(fpu);
fpu->last_cpu = cpu;
}
clear_thread_flag(TIF_NEED_FPU_LOAD);
}
/*
* FPU state switching for scheduling.
*
* This is a two-stage process:
*
* - switch_fpu_prepare() saves the old state.
* This is done within the context of the old process.
*
* - switch_fpu_finish() sets TIF_NEED_FPU_LOAD; the floating point state
* will get loaded on return to userspace, or when the kernel needs it.
*
* If TIF_NEED_FPU_LOAD is cleared then the CPU's FPU registers
* are saved in the current thread's FPU register state.
*
* If TIF_NEED_FPU_LOAD is set then CPU's FPU registers may not
* hold current()'s FPU registers. It is required to load the
* registers before returning to userland or using the content
* otherwise.
*
* The FPU context is only stored/restored for a user task and
* PF_KTHREAD is used to distinguish between kernel and user threads.
*/
static inline void switch_fpu_prepare(struct fpu *old_fpu, int cpu)
{
if (static_cpu_has(X86_FEATURE_FPU) && !(current->flags & PF_KTHREAD)) {
save_fpregs_to_fpstate(old_fpu);
/*
* The save operation preserved register state, so the
* fpu_fpregs_owner_ctx is still @old_fpu. Store the
* current CPU number in @old_fpu, so the next return
* to user space can avoid the FPU register restore
* when is returns on the same CPU and still owns the
* context.
*/
old_fpu->last_cpu = cpu;
trace_x86_fpu_regs_deactivated(old_fpu);
}
}
/*
* Misc helper functions:
*/
/*
* Delay loading of the complete FPU state until the return to userland.
* PKRU is handled separately.
*/
static inline void switch_fpu_finish(struct fpu *new_fpu)
{
if (cpu_feature_enabled(X86_FEATURE_FPU))
set_thread_flag(TIF_NEED_FPU_LOAD);
}
#endif /* _ASM_X86_FPU_INTERNAL_H */

View File

@ -0,0 +1,68 @@
/* SPDX-License-Identifier: GPL-2.0 */
#ifndef _ASM_X86_FPU_SCHED_H
#define _ASM_X86_FPU_SCHED_H
#include <linux/sched.h>
#include <asm/cpufeature.h>
#include <asm/fpu/types.h>
#include <asm/trace/fpu.h>
extern void save_fpregs_to_fpstate(struct fpu *fpu);
extern void fpu__drop(struct fpu *fpu);
extern int fpu_clone(struct task_struct *dst, unsigned long clone_flags);
extern void fpu_flush_thread(void);
/*
* FPU state switching for scheduling.
*
* This is a two-stage process:
*
* - switch_fpu_prepare() saves the old state.
* This is done within the context of the old process.
*
* - switch_fpu_finish() sets TIF_NEED_FPU_LOAD; the floating point state
* will get loaded on return to userspace, or when the kernel needs it.
*
* If TIF_NEED_FPU_LOAD is cleared then the CPU's FPU registers
* are saved in the current thread's FPU register state.
*
* If TIF_NEED_FPU_LOAD is set then CPU's FPU registers may not
* hold current()'s FPU registers. It is required to load the
* registers before returning to userland or using the content
* otherwise.
*
* The FPU context is only stored/restored for a user task and
* PF_KTHREAD is used to distinguish between kernel and user threads.
*/
static inline void switch_fpu_prepare(struct fpu *old_fpu, int cpu)
{
if (cpu_feature_enabled(X86_FEATURE_FPU) &&
!(current->flags & PF_KTHREAD)) {
save_fpregs_to_fpstate(old_fpu);
/*
* The save operation preserved register state, so the
* fpu_fpregs_owner_ctx is still @old_fpu. Store the
* current CPU number in @old_fpu, so the next return
* to user space can avoid the FPU register restore
* when is returns on the same CPU and still owns the
* context.
*/
old_fpu->last_cpu = cpu;
trace_x86_fpu_regs_deactivated(old_fpu);
}
}
/*
* Delay loading of the complete FPU state until the return to userland.
* PKRU is handled separately.
*/
static inline void switch_fpu_finish(void)
{
if (cpu_feature_enabled(X86_FEATURE_FPU))
set_thread_flag(TIF_NEED_FPU_LOAD);
}
#endif /* _ASM_X86_FPU_SCHED_H */

View File

@ -5,6 +5,11 @@
#ifndef _ASM_X86_FPU_SIGNAL_H
#define _ASM_X86_FPU_SIGNAL_H
#include <linux/compat.h>
#include <linux/user.h>
#include <asm/fpu/types.h>
#ifdef CONFIG_X86_64
# include <uapi/asm/sigcontext.h>
# include <asm/user32.h>
@ -31,6 +36,12 @@ fpu__alloc_mathframe(unsigned long sp, int ia32_frame,
unsigned long fpu__get_fpstate_size(void);
extern void fpu__init_prepare_fx_sw_frame(void);
extern bool copy_fpstate_to_sigframe(void __user *buf, void __user *fp, int size);
extern void fpu__clear_user_states(struct fpu *fpu);
extern bool fpu__restore_sig(void __user *buf, int ia32_frame);
extern void restore_fpregs_from_fpstate(struct fpstate *fpstate, u64 mask);
extern bool copy_fpstate_to_sigframe(void __user *buf, void __user *fp, int size);
#endif /* _ASM_X86_FPU_SIGNAL_H */

View File

@ -120,6 +120,9 @@ enum xfeature {
XFEATURE_RSRVD_COMP_13,
XFEATURE_RSRVD_COMP_14,
XFEATURE_LBR,
XFEATURE_RSRVD_COMP_16,
XFEATURE_XTILE_CFG,
XFEATURE_XTILE_DATA,
XFEATURE_MAX,
};
@ -136,12 +139,21 @@ enum xfeature {
#define XFEATURE_MASK_PKRU (1 << XFEATURE_PKRU)
#define XFEATURE_MASK_PASID (1 << XFEATURE_PASID)
#define XFEATURE_MASK_LBR (1 << XFEATURE_LBR)
#define XFEATURE_MASK_XTILE_CFG (1 << XFEATURE_XTILE_CFG)
#define XFEATURE_MASK_XTILE_DATA (1 << XFEATURE_XTILE_DATA)
#define XFEATURE_MASK_FPSSE (XFEATURE_MASK_FP | XFEATURE_MASK_SSE)
#define XFEATURE_MASK_AVX512 (XFEATURE_MASK_OPMASK \
| XFEATURE_MASK_ZMM_Hi256 \
| XFEATURE_MASK_Hi16_ZMM)
#ifdef CONFIG_X86_64
# define XFEATURE_MASK_XTILE (XFEATURE_MASK_XTILE_DATA \
| XFEATURE_MASK_XTILE_CFG)
#else
# define XFEATURE_MASK_XTILE (0)
#endif
#define FIRST_EXTENDED_XFEATURE XFEATURE_YMM
struct reg_128_bit {
@ -153,6 +165,9 @@ struct reg_256_bit {
struct reg_512_bit {
u8 regbytes[512/8];
};
struct reg_1024_byte {
u8 regbytes[1024];
};
/*
* State component 2:
@ -255,6 +270,23 @@ struct arch_lbr_state {
u64 ler_to;
u64 ler_info;
struct lbr_entry entries[];
};
/*
* State component 17: 64-byte tile configuration register.
*/
struct xtile_cfg {
u64 tcfg[8];
} __packed;
/*
* State component 18: 1KB tile data register.
* Each register represents 16 64-byte rows of the matrix
* data. But the number of registers depends on the actual
* implementation.
*/
struct xtile_data {
struct reg_1024_byte tmm;
} __packed;
/*
@ -309,6 +341,91 @@ union fpregs_state {
u8 __padding[PAGE_SIZE];
};
struct fpstate {
/* @kernel_size: The size of the kernel register image */
unsigned int size;
/* @user_size: The size in non-compacted UABI format */
unsigned int user_size;
/* @xfeatures: xfeatures for which the storage is sized */
u64 xfeatures;
/* @user_xfeatures: xfeatures valid in UABI buffers */
u64 user_xfeatures;
/* @xfd: xfeatures disabled to trap userspace use. */
u64 xfd;
/* @is_valloc: Indicator for dynamically allocated state */
unsigned int is_valloc : 1;
/* @is_guest: Indicator for guest state (KVM) */
unsigned int is_guest : 1;
/*
* @is_confidential: Indicator for KVM confidential mode.
* The FPU registers are restored by the
* vmentry firmware from encrypted guest
* memory. On vmexit the FPU registers are
* saved by firmware to encrypted guest memory
* and the registers are scrubbed before
* returning to the host. So there is no
* content which is worth saving and restoring.
* The fpstate has to be there so that
* preemption and softirq FPU usage works
* without special casing.
*/
unsigned int is_confidential : 1;
/* @in_use: State is in use */
unsigned int in_use : 1;
/* @regs: The register state union for all supported formats */
union fpregs_state regs;
/* @regs is dynamically sized! Don't add anything after @regs! */
} __aligned(64);
struct fpu_state_perm {
/*
* @__state_perm:
*
* This bitmap indicates the permission for state components, which
* are available to a thread group. The permission prctl() sets the
* enabled state bits in thread_group_leader()->thread.fpu.
*
* All run time operations use the per thread information in the
* currently active fpu.fpstate which contains the xfeature masks
* and sizes for kernel and user space.
*
* This master permission field is only to be used when
* task.fpu.fpstate based checks fail to validate whether the task
* is allowed to expand it's xfeatures set which requires to
* allocate a larger sized fpstate buffer.
*
* Do not access this field directly. Use the provided helper
* function. Unlocked access is possible for quick checks.
*/
u64 __state_perm;
/*
* @__state_size:
*
* The size required for @__state_perm. Only valid to access
* with sighand locked.
*/
unsigned int __state_size;
/*
* @__user_state_size:
*
* The size required for @__state_perm user part. Only valid to
* access with sighand locked.
*/
unsigned int __user_state_size;
};
/*
* Highest level per task FPU state data structure that
* contains the FPU register state plus various FPU
@ -337,19 +454,100 @@ struct fpu {
unsigned long avx512_timestamp;
/*
* @state:
* @fpstate:
*
* In-memory copy of all FPU registers that we save/restore
* over context switches. If the task is using the FPU then
* the registers in the FPU are more recent than this state
* copy. If the task context-switches away then they get
* saved here and represent the FPU state.
* Pointer to the active struct fpstate. Initialized to
* point at @__fpstate below.
*/
union fpregs_state state;
struct fpstate *fpstate;
/*
* WARNING: 'state' is dynamically-sized. Do not put
* @__task_fpstate:
*
* Pointer to an inactive struct fpstate. Initialized to NULL. Is
* used only for KVM support to swap out the regular task fpstate.
*/
struct fpstate *__task_fpstate;
/*
* @perm:
*
* Permission related information
*/
struct fpu_state_perm perm;
/*
* @__fpstate:
*
* Initial in-memory storage for FPU registers which are saved in
* context switch and when the kernel uses the FPU. The registers
* are restored from this storage on return to user space if they
* are not longer containing the tasks FPU register state.
*/
struct fpstate __fpstate;
/*
* WARNING: '__fpstate' is dynamically-sized. Do not put
* anything after it here.
*/
};
/*
* Guest pseudo FPU container
*/
struct fpu_guest {
/*
* @fpstate: Pointer to the allocated guest fpstate
*/
struct fpstate *fpstate;
};
/*
* FPU state configuration data. Initialized at boot time. Read only after init.
*/
struct fpu_state_config {
/*
* @max_size:
*
* The maximum size of the register state buffer. Includes all
* supported features except independent managed features.
*/
unsigned int max_size;
/*
* @default_size:
*
* The default size of the register state buffer. Includes all
* supported features except independent managed features and
* features which have to be requested by user space before usage.
*/
unsigned int default_size;
/*
* @max_features:
*
* The maximum supported features bitmap. Does not include
* independent managed features.
*/
u64 max_features;
/*
* @default_features:
*
* The default supported features bitmap. Does not include
* independent managed features and features which have to
* be requested by user space before usage.
*/
u64 default_features;
/*
* @legacy_features:
*
* Features which can be reported back to user space
* even without XSAVE support, i.e. legacy features FP + SSE
*/
u64 legacy_features;
};
/* FPU state configuration information */
extern struct fpu_state_config fpu_kernel_cfg, fpu_user_cfg;
#endif /* _ASM_X86_FPU_H */

View File

@ -2,17 +2,6 @@
#ifndef _ASM_X86_FPU_XCR_H
#define _ASM_X86_FPU_XCR_H
/*
* MXCSR and XCR definitions:
*/
static inline void ldmxcsr(u32 mxcsr)
{
asm volatile("ldmxcsr %0" :: "m" (mxcsr));
}
extern unsigned int mxcsr_feature_mask;
#define XCR_XFEATURE_ENABLED_MASK 0x00000000
static inline u64 xgetbv(u32 index)

View File

@ -14,6 +14,8 @@
#define XSTATE_CPUID 0x0000000d
#define TILE_CPUID 0x0000001d
#define FXSAVE_SIZE 512
#define XSAVE_HDR_SIZE 64
@ -33,7 +35,8 @@
XFEATURE_MASK_Hi16_ZMM | \
XFEATURE_MASK_PKRU | \
XFEATURE_MASK_BNDREGS | \
XFEATURE_MASK_BNDCSR)
XFEATURE_MASK_BNDCSR | \
XFEATURE_MASK_XTILE)
/*
* Features which are restored when returning to user space.
@ -43,6 +46,9 @@
#define XFEATURE_MASK_USER_RESTORE \
(XFEATURE_MASK_USER_SUPPORTED & ~XFEATURE_MASK_PKRU)
/* Features which are dynamically enabled for a process on request */
#define XFEATURE_MASK_USER_DYNAMIC XFEATURE_MASK_XTILE_DATA
/* All currently supported supervisor features */
#define XFEATURE_MASK_SUPERVISOR_SUPPORTED (XFEATURE_MASK_PASID)
@ -78,78 +84,42 @@
XFEATURE_MASK_INDEPENDENT | \
XFEATURE_MASK_SUPERVISOR_UNSUPPORTED)
#ifdef CONFIG_X86_64
#define REX_PREFIX "0x48, "
#else
#define REX_PREFIX
#endif
extern u64 xfeatures_mask_all;
static inline u64 xfeatures_mask_supervisor(void)
{
return xfeatures_mask_all & XFEATURE_MASK_SUPERVISOR_SUPPORTED;
}
/*
* The xfeatures which are enabled in XCR0 and expected to be in ptrace
* buffers and signal frames.
* The feature mask required to restore FPU state:
* - All user states which are not eagerly switched in switch_to()/exec()
* - The suporvisor states
*/
static inline u64 xfeatures_mask_uabi(void)
{
return xfeatures_mask_all & XFEATURE_MASK_USER_SUPPORTED;
}
/*
* The xfeatures which are restored by the kernel when returning to user
* mode. This is not necessarily the same as xfeatures_mask_uabi() as the
* kernel does not manage all XCR0 enabled features via xsave/xrstor as
* some of them have to be switched eagerly on context switch and exec().
*/
static inline u64 xfeatures_mask_restore_user(void)
{
return xfeatures_mask_all & XFEATURE_MASK_USER_RESTORE;
}
/*
* Like xfeatures_mask_restore_user() but additionally restors the
* supported supervisor states.
*/
static inline u64 xfeatures_mask_fpstate(void)
{
return xfeatures_mask_all & \
(XFEATURE_MASK_USER_RESTORE | XFEATURE_MASK_SUPERVISOR_SUPPORTED);
}
static inline u64 xfeatures_mask_independent(void)
{
if (!boot_cpu_has(X86_FEATURE_ARCH_LBR))
return XFEATURE_MASK_INDEPENDENT & ~XFEATURE_MASK_LBR;
return XFEATURE_MASK_INDEPENDENT;
}
#define XFEATURE_MASK_FPSTATE (XFEATURE_MASK_USER_RESTORE | \
XFEATURE_MASK_SUPERVISOR_SUPPORTED)
extern u64 xstate_fx_sw_bytes[USER_XSTATE_FX_SW_WORDS];
extern void __init update_regset_xstate_info(unsigned int size,
u64 xstate_mask);
void *get_xsave_addr(struct xregs_state *xsave, int xfeature_nr);
int xfeature_size(int xfeature_nr);
int copy_uabi_from_kernel_to_xstate(struct xregs_state *xsave, const void *kbuf);
int copy_sigframe_from_user_to_xstate(struct xregs_state *xsave, const void __user *ubuf);
void xsaves(struct xregs_state *xsave, u64 mask);
void xrstors(struct xregs_state *xsave, u64 mask);
enum xstate_copy_mode {
XSTATE_COPY_FP,
XSTATE_COPY_FX,
XSTATE_COPY_XSAVE,
};
int xfd_enable_feature(u64 xfd_err);
struct membuf;
void copy_xstate_to_uabi_buf(struct membuf to, struct task_struct *tsk,
enum xstate_copy_mode mode);
#ifdef CONFIG_X86_64
DECLARE_STATIC_KEY_FALSE(__fpu_state_size_dynamic);
#endif
#ifdef CONFIG_X86_64
DECLARE_STATIC_KEY_FALSE(__fpu_state_size_dynamic);
static __always_inline __pure bool fpu_state_size_dynamic(void)
{
return static_branch_unlikely(&__fpu_state_size_dynamic);
}
#else
static __always_inline __pure bool fpu_state_size_dynamic(void)
{
return false;
}
#endif
#endif

View File

@ -691,11 +691,10 @@ struct kvm_vcpu_arch {
*
* Note that while the PKRU state lives inside the fpu registers,
* it is switched out separately at VMENTER and VMEXIT time. The
* "guest_fpu" state here contains the guest FPU context, with the
* "guest_fpstate" state here contains the guest FPU context, with the
* host PRKU bits.
*/
struct fpu *user_fpu;
struct fpu *guest_fpu;
struct fpu_guest guest_fpu;
u64 xcr0;
u64 guest_supported_xcr0;
@ -1686,8 +1685,6 @@ void kvm_vcpu_deliver_sipi_vector(struct kvm_vcpu *vcpu, u8 vector);
int kvm_task_switch(struct kvm_vcpu *vcpu, u16 tss_selector, int idt_index,
int reason, bool has_error_code, u32 error_code);
void kvm_free_guest_fpu(struct kvm_vcpu *vcpu);
void kvm_post_set_cr0(struct kvm_vcpu *vcpu, unsigned long old_cr0, unsigned long cr0);
void kvm_post_set_cr4(struct kvm_vcpu *vcpu, unsigned long old_cr4, unsigned long cr4);
int kvm_set_cr0(struct kvm_vcpu *vcpu, unsigned long cr0);

View File

@ -625,6 +625,8 @@
#define MSR_IA32_BNDCFGS_RSVD 0x00000ffc
#define MSR_IA32_XFD 0x000001c4
#define MSR_IA32_XFD_ERR 0x000001c5
#define MSR_IA32_XSS 0x00000da0
#define MSR_IA32_APICBASE 0x0000001b

View File

@ -92,7 +92,7 @@ static __always_inline unsigned long long __rdmsr(unsigned int msr)
asm volatile("1: rdmsr\n"
"2:\n"
_ASM_EXTABLE_HANDLE(1b, 2b, ex_handler_rdmsr_unsafe)
_ASM_EXTABLE_TYPE(1b, 2b, EX_TYPE_RDMSR)
: EAX_EDX_RET(val, low, high) : "c" (msr));
return EAX_EDX_VAL(val, low, high);
@ -102,7 +102,7 @@ static __always_inline void __wrmsr(unsigned int msr, u32 low, u32 high)
{
asm volatile("1: wrmsr\n"
"2:\n"
_ASM_EXTABLE_HANDLE(1b, 2b, ex_handler_wrmsr_unsafe)
_ASM_EXTABLE_TYPE(1b, 2b, EX_TYPE_WRMSR)
: : "c" (msr), "a"(low), "d" (high) : "memory");
}

View File

@ -2,7 +2,7 @@
#ifndef _ASM_X86_PKRU_H
#define _ASM_X86_PKRU_H
#include <asm/fpu/xstate.h>
#include <asm/cpufeature.h>
#define PKRU_AD_BIT 0x1
#define PKRU_WD_BIT 0x2

View File

@ -461,9 +461,6 @@ DECLARE_PER_CPU(struct irq_stack *, hardirq_stack_ptr);
DECLARE_PER_CPU(struct irq_stack *, softirq_stack_ptr);
#endif /* !X86_64 */
extern unsigned int fpu_kernel_xstate_size;
extern unsigned int fpu_user_xstate_size;
struct perf_event;
struct thread_struct {
@ -537,12 +534,12 @@ struct thread_struct {
*/
};
/* Whitelist the FPU state from the task_struct for hardened usercopy. */
extern void fpu_thread_struct_whitelist(unsigned long *offset, unsigned long *size);
static inline void arch_thread_struct_whitelist(unsigned long *offset,
unsigned long *size)
{
*offset = offsetof(struct thread_struct, fpu.state);
*size = fpu_kernel_xstate_size;
fpu_thread_struct_whitelist(offset, size);
}
static inline void

View File

@ -40,6 +40,6 @@ void x86_report_nx(void);
extern int reboot_force;
long do_arch_prctl_common(struct task_struct *task, int option,
unsigned long cpuid_enabled);
unsigned long arg2);
#endif /* _ASM_X86_PROTO_H */

View File

@ -339,7 +339,7 @@ static inline void __loadsegment_fs(unsigned short value)
"1: movw %0, %%fs \n"
"2: \n"
_ASM_EXTABLE_HANDLE(1b, 2b, ex_handler_clear_fs)
_ASM_EXTABLE_TYPE(1b, 2b, EX_TYPE_CLEAR_FS)
: : "rm" (value) : "memory");
}

View File

@ -22,8 +22,8 @@ DECLARE_EVENT_CLASS(x86_fpu,
__entry->fpu = fpu;
__entry->load_fpu = test_thread_flag(TIF_NEED_FPU_LOAD);
if (boot_cpu_has(X86_FEATURE_OSXSAVE)) {
__entry->xfeatures = fpu->state.xsave.header.xfeatures;
__entry->xcomp_bv = fpu->state.xsave.header.xcomp_bv;
__entry->xfeatures = fpu->fpstate->regs.xsave.header.xfeatures;
__entry->xcomp_bv = fpu->fpstate->regs.xsave.header.xcomp_bv;
}
),
TP_printk("x86/fpu: %p load: %d xfeatures: %llx xcomp_bv: %llx",

View File

@ -10,6 +10,10 @@
#define ARCH_GET_CPUID 0x1011
#define ARCH_SET_CPUID 0x1012
#define ARCH_GET_XCOMP_SUPP 0x1021
#define ARCH_GET_XCOMP_PERM 0x1022
#define ARCH_REQ_XCOMP_PERM 0x1023
#define ARCH_MAP_VDSO_X32 0x2001
#define ARCH_MAP_VDSO_32 0x2002
#define ARCH_MAP_VDSO_64 0x2003

View File

@ -22,7 +22,7 @@
#include <asm/bugs.h>
#include <asm/processor.h>
#include <asm/processor-flags.h>
#include <asm/fpu/internal.h>
#include <asm/fpu/api.h>
#include <asm/msr.h>
#include <asm/vmx.h>
#include <asm/paravirt.h>

View File

@ -42,7 +42,7 @@
#include <asm/setup.h>
#include <asm/apic.h>
#include <asm/desc.h>
#include <asm/fpu/internal.h>
#include <asm/fpu/api.h>
#include <asm/mtrr.h>
#include <asm/hwcap2.h>
#include <linux/numa.h>

View File

@ -75,6 +75,8 @@ static const struct cpuid_dep cpuid_deps[] = {
{ X86_FEATURE_SGX_LC, X86_FEATURE_SGX },
{ X86_FEATURE_SGX1, X86_FEATURE_SGX },
{ X86_FEATURE_SGX2, X86_FEATURE_SGX1 },
{ X86_FEATURE_XFD, X86_FEATURE_XSAVES },
{ X86_FEATURE_AMX_TILE, X86_FEATURE_XFD },
{}
};

View File

@ -373,13 +373,16 @@ static int msr_to_offset(u32 msr)
return -1;
}
__visible bool ex_handler_rdmsr_fault(const struct exception_table_entry *fixup,
struct pt_regs *regs, int trapnr,
unsigned long error_code,
unsigned long fault_addr)
void ex_handler_msr_mce(struct pt_regs *regs, bool wrmsr)
{
pr_emerg("MSR access error: RDMSR from 0x%x at rIP: 0x%lx (%pS)\n",
(unsigned int)regs->cx, regs->ip, (void *)regs->ip);
if (wrmsr) {
pr_emerg("MSR access error: WRMSR to 0x%x (tried to write 0x%08x%08x) at rIP: 0x%lx (%pS)\n",
(unsigned int)regs->cx, (unsigned int)regs->dx, (unsigned int)regs->ax,
regs->ip, (void *)regs->ip);
} else {
pr_emerg("MSR access error: RDMSR from 0x%x at rIP: 0x%lx (%pS)\n",
(unsigned int)regs->cx, regs->ip, (void *)regs->ip);
}
show_stack_regs(regs);
@ -387,8 +390,6 @@ __visible bool ex_handler_rdmsr_fault(const struct exception_table_entry *fixup,
while (true)
cpu_relax();
return true;
}
/* MSR access wrappers used for error injection */
@ -420,32 +421,13 @@ static noinstr u64 mce_rdmsrl(u32 msr)
*/
asm volatile("1: rdmsr\n"
"2:\n"
_ASM_EXTABLE_HANDLE(1b, 2b, ex_handler_rdmsr_fault)
_ASM_EXTABLE_TYPE(1b, 2b, EX_TYPE_RDMSR_IN_MCE)
: EAX_EDX_RET(val, low, high) : "c" (msr));
return EAX_EDX_VAL(val, low, high);
}
__visible bool ex_handler_wrmsr_fault(const struct exception_table_entry *fixup,
struct pt_regs *regs, int trapnr,
unsigned long error_code,
unsigned long fault_addr)
{
pr_emerg("MSR access error: WRMSR to 0x%x (tried to write 0x%08x%08x) at rIP: 0x%lx (%pS)\n",
(unsigned int)regs->cx, (unsigned int)regs->dx, (unsigned int)regs->ax,
regs->ip, (void *)regs->ip);
show_stack_regs(regs);
panic("MCA architectural violation!\n");
while (true)
cpu_relax();
return true;
}
static noinstr void mce_wrmsrl(u32 msr, u64 v)
{
u32 low, high;
@ -470,7 +452,7 @@ static noinstr void mce_wrmsrl(u32 msr, u64 v)
/* See comment in mce_rdmsrl() */
asm volatile("1: wrmsr\n"
"2:\n"
_ASM_EXTABLE_HANDLE(1b, 2b, ex_handler_wrmsr_fault)
_ASM_EXTABLE_TYPE(1b, 2b, EX_TYPE_WRMSR_IN_MCE)
: : "c" (msr), "a"(low), "d" (high) : "memory");
}

View File

@ -61,7 +61,7 @@ static inline void cmci_disable_bank(int bank) { }
static inline void intel_init_cmci(void) { }
static inline void intel_init_lmce(void) { }
static inline void intel_clear_lmce(void) { }
static inline bool intel_filter_mce(struct mce *m) { return false; };
static inline bool intel_filter_mce(struct mce *m) { return false; }
#endif
void mce_timer_kick(unsigned long interval);
@ -183,17 +183,7 @@ extern bool filter_mce(struct mce *m);
#ifdef CONFIG_X86_MCE_AMD
extern bool amd_filter_mce(struct mce *m);
#else
static inline bool amd_filter_mce(struct mce *m) { return false; };
static inline bool amd_filter_mce(struct mce *m) { return false; }
#endif
__visible bool ex_handler_rdmsr_fault(const struct exception_table_entry *fixup,
struct pt_regs *regs, int trapnr,
unsigned long error_code,
unsigned long fault_addr);
__visible bool ex_handler_wrmsr_fault(const struct exception_table_entry *fixup,
struct pt_regs *regs, int trapnr,
unsigned long error_code,
unsigned long fault_addr);
#endif /* __X86_MCE_INTERNAL_H__ */

View File

@ -265,25 +265,25 @@ static bool is_copy_from_user(struct pt_regs *regs)
*/
static int error_context(struct mce *m, struct pt_regs *regs)
{
enum handler_type t;
if ((m->cs & 3) == 3)
return IN_USER;
if (!mc_recoverable(m->mcgstatus))
return IN_KERNEL;
t = ex_get_fault_handler_type(m->ip);
if (t == EX_HANDLER_FAULT) {
m->kflags |= MCE_IN_KERNEL_RECOV;
return IN_KERNEL_RECOV;
}
if (t == EX_HANDLER_UACCESS && regs && is_copy_from_user(regs)) {
m->kflags |= MCE_IN_KERNEL_RECOV;
switch (ex_get_fixup_type(m->ip)) {
case EX_TYPE_UACCESS:
case EX_TYPE_COPY:
if (!regs || !is_copy_from_user(regs))
return IN_KERNEL;
m->kflags |= MCE_IN_KERNEL_COPYIN;
fallthrough;
case EX_TYPE_FAULT_MCE_SAFE:
case EX_TYPE_DEFAULT_MCE_SAFE:
m->kflags |= MCE_IN_KERNEL_RECOV;
return IN_KERNEL_RECOV;
default:
return IN_KERNEL;
}
return IN_KERNEL;
}
static int mce_severity_amd_smca(struct mce *m, enum context err_ctx)

View File

@ -2,7 +2,7 @@
/*
* x86 FPU bug checks:
*/
#include <asm/fpu/internal.h>
#include <asm/fpu/api.h>
/*
* Boot time CPU/FPU FDIV bug detection code:

View File

@ -0,0 +1,83 @@
/* SPDX-License-Identifier: GPL-2.0 */
#ifndef __X86_KERNEL_FPU_CONTEXT_H
#define __X86_KERNEL_FPU_CONTEXT_H
#include <asm/fpu/xstate.h>
#include <asm/trace/fpu.h>
/* Functions related to FPU context tracking */
/*
* The in-register FPU state for an FPU context on a CPU is assumed to be
* valid if the fpu->last_cpu matches the CPU, and the fpu_fpregs_owner_ctx
* matches the FPU.
*
* If the FPU register state is valid, the kernel can skip restoring the
* FPU state from memory.
*
* Any code that clobbers the FPU registers or updates the in-memory
* FPU state for a task MUST let the rest of the kernel know that the
* FPU registers are no longer valid for this task.
*
* Either one of these invalidation functions is enough. Invalidate
* a resource you control: CPU if using the CPU for something else
* (with preemption disabled), FPU for the current task, or a task that
* is prevented from running by the current task.
*/
static inline void __cpu_invalidate_fpregs_state(void)
{
__this_cpu_write(fpu_fpregs_owner_ctx, NULL);
}
static inline void __fpu_invalidate_fpregs_state(struct fpu *fpu)
{
fpu->last_cpu = -1;
}
static inline int fpregs_state_valid(struct fpu *fpu, unsigned int cpu)
{
return fpu == this_cpu_read(fpu_fpregs_owner_ctx) && cpu == fpu->last_cpu;
}
static inline void fpregs_deactivate(struct fpu *fpu)
{
__this_cpu_write(fpu_fpregs_owner_ctx, NULL);
trace_x86_fpu_regs_deactivated(fpu);
}
static inline void fpregs_activate(struct fpu *fpu)
{
__this_cpu_write(fpu_fpregs_owner_ctx, fpu);
trace_x86_fpu_regs_activated(fpu);
}
/* Internal helper for switch_fpu_return() and signal frame setup */
static inline void fpregs_restore_userregs(void)
{
struct fpu *fpu = &current->thread.fpu;
int cpu = smp_processor_id();
if (WARN_ON_ONCE(current->flags & PF_KTHREAD))
return;
if (!fpregs_state_valid(fpu, cpu)) {
/*
* This restores _all_ xstate which has not been
* established yet.
*
* If PKRU is enabled, then the PKRU value is already
* correct because it was either set in switch_to() or in
* flush_thread(). So it is excluded because it might be
* not up to date in current->thread.fpu.xsave state.
*
* XFD state is handled in restore_fpregs_from_fpstate().
*/
restore_fpregs_from_fpstate(fpu->fpstate, XFEATURE_MASK_FPSTATE);
fpregs_activate(fpu);
fpu->last_cpu = cpu;
}
clear_thread_flag(TIF_NEED_FPU_LOAD);
}
#endif

View File

@ -6,8 +6,9 @@
* General FPU state handling cleanups
* Gareth Hughes <gareth@valinux.com>, May 2000
*/
#include <asm/fpu/internal.h>
#include <asm/fpu/api.h>
#include <asm/fpu/regset.h>
#include <asm/fpu/sched.h>
#include <asm/fpu/signal.h>
#include <asm/fpu/types.h>
#include <asm/traps.h>
@ -15,15 +16,30 @@
#include <linux/hardirq.h>
#include <linux/pkeys.h>
#include <linux/vmalloc.h>
#include "context.h"
#include "internal.h"
#include "legacy.h"
#include "xstate.h"
#define CREATE_TRACE_POINTS
#include <asm/trace/fpu.h>
#ifdef CONFIG_X86_64
DEFINE_STATIC_KEY_FALSE(__fpu_state_size_dynamic);
DEFINE_PER_CPU(u64, xfd_state);
#endif
/* The FPU state configuration data for kernel and user space */
struct fpu_state_config fpu_kernel_cfg __ro_after_init;
struct fpu_state_config fpu_user_cfg __ro_after_init;
/*
* Represents the initial FPU state. It's mostly (but not completely) zeroes,
* depending on the FPU hardware format:
*/
union fpregs_state init_fpstate __ro_after_init;
struct fpstate init_fpstate __ro_after_init;
/*
* Track whether the kernel is using the FPU state
@ -83,7 +99,7 @@ bool irq_fpu_usable(void)
EXPORT_SYMBOL(irq_fpu_usable);
/*
* Save the FPU register state in fpu->state. The register state is
* Save the FPU register state in fpu->fpstate->regs. The register state is
* preserved.
*
* Must be called with fpregs_lock() held.
@ -99,19 +115,19 @@ EXPORT_SYMBOL(irq_fpu_usable);
void save_fpregs_to_fpstate(struct fpu *fpu)
{
if (likely(use_xsave())) {
os_xsave(&fpu->state.xsave);
os_xsave(fpu->fpstate);
/*
* AVX512 state is tracked here because its use is
* known to slow the max clock speed of the core.
*/
if (fpu->state.xsave.header.xfeatures & XFEATURE_MASK_AVX512)
if (fpu->fpstate->regs.xsave.header.xfeatures & XFEATURE_MASK_AVX512)
fpu->avx512_timestamp = jiffies;
return;
}
if (likely(use_fxsr())) {
fxsave(&fpu->state.fxsave);
fxsave(&fpu->fpstate->regs.fxsave);
return;
}
@ -119,12 +135,11 @@ void save_fpregs_to_fpstate(struct fpu *fpu)
* Legacy FPU register saving, FNSAVE always clears FPU registers,
* so we have to reload them from the memory state.
*/
asm volatile("fnsave %[fp]; fwait" : [fp] "=m" (fpu->state.fsave));
frstor(&fpu->state.fsave);
asm volatile("fnsave %[fp]; fwait" : [fp] "=m" (fpu->fpstate->regs.fsave));
frstor(&fpu->fpstate->regs.fsave);
}
EXPORT_SYMBOL(save_fpregs_to_fpstate);
void __restore_fpregs_from_fpstate(union fpregs_state *fpstate, u64 mask)
void restore_fpregs_from_fpstate(struct fpstate *fpstate, u64 mask)
{
/*
* AMD K7/K8 and later CPUs up to Zen don't save/restore
@ -141,15 +156,181 @@ void __restore_fpregs_from_fpstate(union fpregs_state *fpstate, u64 mask)
}
if (use_xsave()) {
os_xrstor(&fpstate->xsave, mask);
/*
* Dynamically enabled features are enabled in XCR0, but
* usage requires also that the corresponding bits in XFD
* are cleared. If the bits are set then using a related
* instruction will raise #NM. This allows to do the
* allocation of the larger FPU buffer lazy from #NM or if
* the task has no permission to kill it which would happen
* via #UD if the feature is disabled in XCR0.
*
* XFD state is following the same life time rules as
* XSTATE and to restore state correctly XFD has to be
* updated before XRSTORS otherwise the component would
* stay in or go into init state even if the bits are set
* in fpstate::regs::xsave::xfeatures.
*/
xfd_update_state(fpstate);
/*
* Restoring state always needs to modify all features
* which are in @mask even if the current task cannot use
* extended features.
*
* So fpstate->xfeatures cannot be used here, because then
* a feature for which the task has no permission but was
* used by the previous task would not go into init state.
*/
mask = fpu_kernel_cfg.max_features & mask;
os_xrstor(fpstate, mask);
} else {
if (use_fxsr())
fxrstor(&fpstate->fxsave);
fxrstor(&fpstate->regs.fxsave);
else
frstor(&fpstate->fsave);
frstor(&fpstate->regs.fsave);
}
}
EXPORT_SYMBOL_GPL(__restore_fpregs_from_fpstate);
void fpu_reset_from_exception_fixup(void)
{
restore_fpregs_from_fpstate(&init_fpstate, XFEATURE_MASK_FPSTATE);
}
#if IS_ENABLED(CONFIG_KVM)
static void __fpstate_reset(struct fpstate *fpstate);
bool fpu_alloc_guest_fpstate(struct fpu_guest *gfpu)
{
struct fpstate *fpstate;
unsigned int size;
size = fpu_user_cfg.default_size + ALIGN(offsetof(struct fpstate, regs), 64);
fpstate = vzalloc(size);
if (!fpstate)
return false;
__fpstate_reset(fpstate);
fpstate_init_user(fpstate);
fpstate->is_valloc = true;
fpstate->is_guest = true;
gfpu->fpstate = fpstate;
return true;
}
EXPORT_SYMBOL_GPL(fpu_alloc_guest_fpstate);
void fpu_free_guest_fpstate(struct fpu_guest *gfpu)
{
struct fpstate *fps = gfpu->fpstate;
if (!fps)
return;
if (WARN_ON_ONCE(!fps->is_valloc || !fps->is_guest || fps->in_use))
return;
gfpu->fpstate = NULL;
vfree(fps);
}
EXPORT_SYMBOL_GPL(fpu_free_guest_fpstate);
int fpu_swap_kvm_fpstate(struct fpu_guest *guest_fpu, bool enter_guest)
{
struct fpstate *guest_fps = guest_fpu->fpstate;
struct fpu *fpu = &current->thread.fpu;
struct fpstate *cur_fps = fpu->fpstate;
fpregs_lock();
if (!cur_fps->is_confidential && !test_thread_flag(TIF_NEED_FPU_LOAD))
save_fpregs_to_fpstate(fpu);
/* Swap fpstate */
if (enter_guest) {
fpu->__task_fpstate = cur_fps;
fpu->fpstate = guest_fps;
guest_fps->in_use = true;
} else {
guest_fps->in_use = false;
fpu->fpstate = fpu->__task_fpstate;
fpu->__task_fpstate = NULL;
}
cur_fps = fpu->fpstate;
if (!cur_fps->is_confidential) {
/* Includes XFD update */
restore_fpregs_from_fpstate(cur_fps, XFEATURE_MASK_FPSTATE);
} else {
/*
* XSTATE is restored by firmware from encrypted
* memory. Make sure XFD state is correct while
* running with guest fpstate
*/
xfd_update_state(cur_fps);
}
fpregs_mark_activate();
fpregs_unlock();
return 0;
}
EXPORT_SYMBOL_GPL(fpu_swap_kvm_fpstate);
void fpu_copy_guest_fpstate_to_uabi(struct fpu_guest *gfpu, void *buf,
unsigned int size, u32 pkru)
{
struct fpstate *kstate = gfpu->fpstate;
union fpregs_state *ustate = buf;
struct membuf mb = { .p = buf, .left = size };
if (cpu_feature_enabled(X86_FEATURE_XSAVE)) {
__copy_xstate_to_uabi_buf(mb, kstate, pkru, XSTATE_COPY_XSAVE);
} else {
memcpy(&ustate->fxsave, &kstate->regs.fxsave,
sizeof(ustate->fxsave));
/* Make it restorable on a XSAVE enabled host */
ustate->xsave.header.xfeatures = XFEATURE_MASK_FPSSE;
}
}
EXPORT_SYMBOL_GPL(fpu_copy_guest_fpstate_to_uabi);
int fpu_copy_uabi_to_guest_fpstate(struct fpu_guest *gfpu, const void *buf,
u64 xcr0, u32 *vpkru)
{
struct fpstate *kstate = gfpu->fpstate;
const union fpregs_state *ustate = buf;
struct pkru_state *xpkru;
int ret;
if (!cpu_feature_enabled(X86_FEATURE_XSAVE)) {
if (ustate->xsave.header.xfeatures & ~XFEATURE_MASK_FPSSE)
return -EINVAL;
if (ustate->fxsave.mxcsr & ~mxcsr_feature_mask)
return -EINVAL;
memcpy(&kstate->regs.fxsave, &ustate->fxsave, sizeof(ustate->fxsave));
return 0;
}
if (ustate->xsave.header.xfeatures & ~xcr0)
return -EINVAL;
ret = copy_uabi_from_kernel_to_xstate(kstate, ustate);
if (ret)
return ret;
/* Retrieve PKRU if not in init state */
if (kstate->regs.xsave.header.xfeatures & XFEATURE_MASK_PKRU) {
xpkru = get_xsave_addr(&kstate->regs.xsave, XFEATURE_PKRU);
*vpkru = xpkru->pkru;
}
/* Ensure that XCOMP_BV is set up for XSAVES */
xstate_init_xcomp_bv(&kstate->regs.xsave, kstate->xfeatures);
return 0;
}
EXPORT_SYMBOL_GPL(fpu_copy_uabi_to_guest_fpstate);
#endif /* CONFIG_KVM */
void kernel_fpu_begin_mask(unsigned int kfpu_mask)
{
@ -203,52 +384,88 @@ void fpu_sync_fpstate(struct fpu *fpu)
fpregs_unlock();
}
static inline void fpstate_init_xstate(struct xregs_state *xsave)
static inline unsigned int init_fpstate_copy_size(void)
{
/*
* XRSTORS requires these bits set in xcomp_bv, or it will
* trigger #GP:
*/
xsave->header.xcomp_bv = XCOMP_BV_COMPACTED_FORMAT | xfeatures_mask_all;
if (!use_xsave())
return fpu_kernel_cfg.default_size;
/* XSAVE(S) just needs the legacy and the xstate header part */
return sizeof(init_fpstate.regs.xsave);
}
static inline void fpstate_init_fxstate(struct fxregs_state *fx)
static inline void fpstate_init_fxstate(struct fpstate *fpstate)
{
fx->cwd = 0x37f;
fx->mxcsr = MXCSR_DEFAULT;
fpstate->regs.fxsave.cwd = 0x37f;
fpstate->regs.fxsave.mxcsr = MXCSR_DEFAULT;
}
/*
* Legacy x87 fpstate state init:
*/
static inline void fpstate_init_fstate(struct fregs_state *fp)
static inline void fpstate_init_fstate(struct fpstate *fpstate)
{
fp->cwd = 0xffff037fu;
fp->swd = 0xffff0000u;
fp->twd = 0xffffffffu;
fp->fos = 0xffff0000u;
fpstate->regs.fsave.cwd = 0xffff037fu;
fpstate->regs.fsave.swd = 0xffff0000u;
fpstate->regs.fsave.twd = 0xffffffffu;
fpstate->regs.fsave.fos = 0xffff0000u;
}
void fpstate_init(union fpregs_state *state)
/*
* Used in two places:
* 1) Early boot to setup init_fpstate for non XSAVE systems
* 2) fpu_init_fpstate_user() which is invoked from KVM
*/
void fpstate_init_user(struct fpstate *fpstate)
{
if (!static_cpu_has(X86_FEATURE_FPU)) {
fpstate_init_soft(&state->soft);
if (!cpu_feature_enabled(X86_FEATURE_FPU)) {
fpstate_init_soft(&fpstate->regs.soft);
return;
}
memset(state, 0, fpu_kernel_xstate_size);
xstate_init_xcomp_bv(&fpstate->regs.xsave, fpstate->xfeatures);
if (static_cpu_has(X86_FEATURE_XSAVES))
fpstate_init_xstate(&state->xsave);
if (static_cpu_has(X86_FEATURE_FXSR))
fpstate_init_fxstate(&state->fxsave);
if (cpu_feature_enabled(X86_FEATURE_FXSR))
fpstate_init_fxstate(fpstate);
else
fpstate_init_fstate(&state->fsave);
fpstate_init_fstate(fpstate);
}
static void __fpstate_reset(struct fpstate *fpstate)
{
/* Initialize sizes and feature masks */
fpstate->size = fpu_kernel_cfg.default_size;
fpstate->user_size = fpu_user_cfg.default_size;
fpstate->xfeatures = fpu_kernel_cfg.default_features;
fpstate->user_xfeatures = fpu_user_cfg.default_features;
fpstate->xfd = init_fpstate.xfd;
}
void fpstate_reset(struct fpu *fpu)
{
/* Set the fpstate pointer to the default fpstate */
fpu->fpstate = &fpu->__fpstate;
__fpstate_reset(fpu->fpstate);
/* Initialize the permission related info in fpu */
fpu->perm.__state_perm = fpu_kernel_cfg.default_features;
fpu->perm.__state_size = fpu_kernel_cfg.default_size;
fpu->perm.__user_state_size = fpu_user_cfg.default_size;
}
static inline void fpu_inherit_perms(struct fpu *dst_fpu)
{
if (fpu_state_size_dynamic()) {
struct fpu *src_fpu = &current->group_leader->thread.fpu;
spin_lock_irq(&current->sighand->siglock);
/* Fork also inherits the permissions of the parent */
dst_fpu->perm = src_fpu->perm;
spin_unlock_irq(&current->sighand->siglock);
}
}
EXPORT_SYMBOL_GPL(fpstate_init);
/* Clone current's FPU state on fork */
int fpu_clone(struct task_struct *dst)
int fpu_clone(struct task_struct *dst, unsigned long clone_flags)
{
struct fpu *src_fpu = &current->thread.fpu;
struct fpu *dst_fpu = &dst->thread.fpu;
@ -256,36 +473,67 @@ int fpu_clone(struct task_struct *dst)
/* The new task's FPU state cannot be valid in the hardware. */
dst_fpu->last_cpu = -1;
fpstate_reset(dst_fpu);
if (!cpu_feature_enabled(X86_FEATURE_FPU))
return 0;
/*
* Don't let 'init optimized' areas of the XSAVE area
* leak into the child task:
* Enforce reload for user space tasks and prevent kernel threads
* from trying to save the FPU registers on context switch.
*/
memset(&dst_fpu->state.xsave, 0, fpu_kernel_xstate_size);
set_tsk_thread_flag(dst, TIF_NEED_FPU_LOAD);
/*
* If the FPU registers are not owned by current just memcpy() the
* state. Otherwise save the FPU registers directly into the
* child's FPU context, without any memory-to-memory copying.
* No FPU state inheritance for kernel threads and IO
* worker threads.
*/
if (dst->flags & (PF_KTHREAD | PF_IO_WORKER)) {
/* Clear out the minimal state */
memcpy(&dst_fpu->fpstate->regs, &init_fpstate.regs,
init_fpstate_copy_size());
return 0;
}
/*
* If a new feature is added, ensure all dynamic features are
* caller-saved from here!
*/
BUILD_BUG_ON(XFEATURE_MASK_USER_DYNAMIC != XFEATURE_MASK_XTILE_DATA);
/*
* Save the default portion of the current FPU state into the
* clone. Assume all dynamic features to be defined as caller-
* saved, which enables skipping both the expansion of fpstate
* and the copying of any dynamic state.
*
* Do not use memcpy() when TIF_NEED_FPU_LOAD is set because
* copying is not valid when current uses non-default states.
*/
fpregs_lock();
if (test_thread_flag(TIF_NEED_FPU_LOAD))
memcpy(&dst_fpu->state, &src_fpu->state, fpu_kernel_xstate_size);
else
save_fpregs_to_fpstate(dst_fpu);
fpregs_restore_userregs();
save_fpregs_to_fpstate(dst_fpu);
if (!(clone_flags & CLONE_THREAD))
fpu_inherit_perms(dst_fpu);
fpregs_unlock();
set_tsk_thread_flag(dst, TIF_NEED_FPU_LOAD);
trace_x86_fpu_copy_src(src_fpu);
trace_x86_fpu_copy_dst(dst_fpu);
return 0;
}
/*
* Whitelist the FPU register state embedded into task_struct for hardened
* usercopy.
*/
void fpu_thread_struct_whitelist(unsigned long *offset, unsigned long *size)
{
*offset = offsetof(struct thread_struct, fpu.__fpstate.regs);
*size = fpu_kernel_cfg.default_size;
}
/*
* Drops current FPU state: deactivates the fpregs and
* the fpstate. NOTE: it still leaves previous contents
@ -319,28 +567,19 @@ void fpu__drop(struct fpu *fpu)
static inline void restore_fpregs_from_init_fpstate(u64 features_mask)
{
if (use_xsave())
os_xrstor(&init_fpstate.xsave, features_mask);
os_xrstor(&init_fpstate, features_mask);
else if (use_fxsr())
fxrstor(&init_fpstate.fxsave);
fxrstor(&init_fpstate.regs.fxsave);
else
frstor(&init_fpstate.fsave);
frstor(&init_fpstate.regs.fsave);
pkru_write_default();
}
static inline unsigned int init_fpstate_copy_size(void)
{
if (!use_xsave())
return fpu_kernel_xstate_size;
/* XSAVE(S) just needs the legacy and the xstate header part */
return sizeof(init_fpstate.xsave);
}
/*
* Reset current->fpu memory state to the init values.
*/
static void fpu_reset_fpstate(void)
static void fpu_reset_fpregs(void)
{
struct fpu *fpu = &current->thread.fpu;
@ -359,7 +598,7 @@ static void fpu_reset_fpstate(void)
* user space as PKRU is eagerly written in switch_to() and
* flush_thread().
*/
memcpy(&fpu->state, &init_fpstate, init_fpstate_copy_size());
memcpy(&fpu->fpstate->regs, &init_fpstate.regs, init_fpstate_copy_size());
set_thread_flag(TIF_NEED_FPU_LOAD);
fpregs_unlock();
}
@ -375,7 +614,7 @@ void fpu__clear_user_states(struct fpu *fpu)
fpregs_lock();
if (!cpu_feature_enabled(X86_FEATURE_FPU)) {
fpu_reset_fpstate();
fpu_reset_fpregs();
fpregs_unlock();
return;
}
@ -385,12 +624,11 @@ void fpu__clear_user_states(struct fpu *fpu)
* corresponding registers.
*/
if (xfeatures_mask_supervisor() &&
!fpregs_state_valid(fpu, smp_processor_id())) {
os_xrstor(&fpu->state.xsave, xfeatures_mask_supervisor());
}
!fpregs_state_valid(fpu, smp_processor_id()))
os_xrstor_supervisor(fpu->fpstate);
/* Reset user states in registers. */
restore_fpregs_from_init_fpstate(xfeatures_mask_restore_user());
restore_fpregs_from_init_fpstate(XFEATURE_MASK_USER_RESTORE);
/*
* Now all FPU registers have their desired values. Inform the FPU
@ -405,7 +643,8 @@ void fpu__clear_user_states(struct fpu *fpu)
void fpu_flush_thread(void)
{
fpu_reset_fpstate();
fpstate_reset(&current->thread.fpu);
fpu_reset_fpregs();
}
/*
* Load FPU context before returning to userspace.
@ -445,7 +684,6 @@ void fpregs_mark_activate(void)
fpu->last_cpu = smp_processor_id();
clear_thread_flag(TIF_NEED_FPU_LOAD);
}
EXPORT_SYMBOL_GPL(fpregs_mark_activate);
/*
* x87 math exception handling:
@ -468,11 +706,11 @@ int fpu__exception_code(struct fpu *fpu, int trap_nr)
* fully reproduce the context of the exception.
*/
if (boot_cpu_has(X86_FEATURE_FXSR)) {
cwd = fpu->state.fxsave.cwd;
swd = fpu->state.fxsave.swd;
cwd = fpu->fpstate->regs.fxsave.cwd;
swd = fpu->fpstate->regs.fxsave.swd;
} else {
cwd = (unsigned short)fpu->state.fsave.cwd;
swd = (unsigned short)fpu->state.fsave.swd;
cwd = (unsigned short)fpu->fpstate->regs.fsave.cwd;
swd = (unsigned short)fpu->fpstate->regs.fsave.swd;
}
err = swd & ~cwd;
@ -486,7 +724,7 @@ int fpu__exception_code(struct fpu *fpu, int trap_nr)
unsigned short mxcsr = MXCSR_DEFAULT;
if (boot_cpu_has(X86_FEATURE_XMM))
mxcsr = fpu->state.fxsave.mxcsr;
mxcsr = fpu->fpstate->regs.fxsave.mxcsr;
err = ~(mxcsr >> 7) & mxcsr;
}

View File

@ -2,7 +2,7 @@
/*
* x86 FPU boot time init code:
*/
#include <asm/fpu/internal.h>
#include <asm/fpu/api.h>
#include <asm/tlbflush.h>
#include <asm/setup.h>
@ -10,6 +10,10 @@
#include <linux/sched/task.h>
#include <linux/init.h>
#include "internal.h"
#include "legacy.h"
#include "xstate.h"
/*
* Initialize the registers found in all CPUs, CR0 and CR4:
*/
@ -34,7 +38,7 @@ static void fpu__init_cpu_generic(void)
/* Flush out any pending x87 state: */
#ifdef CONFIG_MATH_EMULATION
if (!boot_cpu_has(X86_FEATURE_FPU))
fpstate_init_soft(&current->thread.fpu.state.soft);
fpstate_init_soft(&current->thread.fpu.fpstate->regs.soft);
else
#endif
asm volatile ("fninit");
@ -121,23 +125,14 @@ static void __init fpu__init_system_mxcsr(void)
static void __init fpu__init_system_generic(void)
{
/*
* Set up the legacy init FPU context. (xstate init might overwrite this
* with a more modern format, if the CPU supports it.)
* Set up the legacy init FPU context. Will be updated when the
* CPU supports XSAVE[S].
*/
fpstate_init(&init_fpstate);
fpstate_init_user(&init_fpstate);
fpu__init_system_mxcsr();
}
/*
* Size of the FPU context state. All tasks in the system use the
* same context size, regardless of what portion they use.
* This is inherent to the XSAVE architecture which puts all state
* components into a single, continuous memory block:
*/
unsigned int fpu_kernel_xstate_size __ro_after_init;
EXPORT_SYMBOL_GPL(fpu_kernel_xstate_size);
/* Get alignment of the TYPE. */
#define TYPE_ALIGN(TYPE) offsetof(struct { char x; TYPE test; }, test)
@ -162,13 +157,13 @@ static void __init fpu__init_task_struct_size(void)
* Subtract off the static size of the register state.
* It potentially has a bunch of padding.
*/
task_size -= sizeof(((struct task_struct *)0)->thread.fpu.state);
task_size -= sizeof(current->thread.fpu.__fpstate.regs);
/*
* Add back the dynamically-calculated register state
* size.
*/
task_size += fpu_kernel_xstate_size;
task_size += fpu_kernel_cfg.default_size;
/*
* We dynamically size 'struct fpu', so we require that
@ -177,7 +172,7 @@ static void __init fpu__init_task_struct_size(void)
* you hit a compile error here, check the structure to
* see if something got added to the end.
*/
CHECK_MEMBER_AT_END_OF(struct fpu, state);
CHECK_MEMBER_AT_END_OF(struct fpu, __fpstate);
CHECK_MEMBER_AT_END_OF(struct thread_struct, fpu);
CHECK_MEMBER_AT_END_OF(struct task_struct, thread);
@ -192,37 +187,34 @@ static void __init fpu__init_task_struct_size(void)
*/
static void __init fpu__init_system_xstate_size_legacy(void)
{
static int on_boot_cpu __initdata = 1;
WARN_ON_FPU(!on_boot_cpu);
on_boot_cpu = 0;
unsigned int size;
/*
* Note that xstate sizes might be overwritten later during
* fpu__init_system_xstate().
* Note that the size configuration might be overwritten later
* during fpu__init_system_xstate().
*/
if (!boot_cpu_has(X86_FEATURE_FPU)) {
fpu_kernel_xstate_size = sizeof(struct swregs_state);
if (!cpu_feature_enabled(X86_FEATURE_FPU)) {
size = sizeof(struct swregs_state);
} else if (cpu_feature_enabled(X86_FEATURE_FXSR)) {
size = sizeof(struct fxregs_state);
fpu_user_cfg.legacy_features = XFEATURE_MASK_FPSSE;
} else {
if (boot_cpu_has(X86_FEATURE_FXSR))
fpu_kernel_xstate_size =
sizeof(struct fxregs_state);
else
fpu_kernel_xstate_size =
sizeof(struct fregs_state);
size = sizeof(struct fregs_state);
fpu_user_cfg.legacy_features = XFEATURE_MASK_FP;
}
fpu_user_xstate_size = fpu_kernel_xstate_size;
fpu_kernel_cfg.max_size = size;
fpu_kernel_cfg.default_size = size;
fpu_user_cfg.max_size = size;
fpu_user_cfg.default_size = size;
fpstate_reset(&current->thread.fpu);
}
/* Legacy code to initialize eager fpu mode. */
static void __init fpu__init_system_ctx_switch(void)
static void __init fpu__init_init_fpstate(void)
{
static bool on_boot_cpu __initdata = 1;
WARN_ON_FPU(!on_boot_cpu);
on_boot_cpu = 0;
/* Bring init_fpstate size and features up to date */
init_fpstate.size = fpu_kernel_cfg.max_size;
init_fpstate.xfeatures = fpu_kernel_cfg.max_features;
}
/*
@ -231,6 +223,7 @@ static void __init fpu__init_system_ctx_switch(void)
*/
void __init fpu__init_system(struct cpuinfo_x86 *c)
{
fpstate_reset(&current->thread.fpu);
fpu__init_system_early_generic(c);
/*
@ -241,8 +234,7 @@ void __init fpu__init_system(struct cpuinfo_x86 *c)
fpu__init_system_generic();
fpu__init_system_xstate_size_legacy();
fpu__init_system_xstate();
fpu__init_system_xstate(fpu_kernel_cfg.max_size);
fpu__init_task_struct_size();
fpu__init_system_ctx_switch();
fpu__init_init_fpstate();
}

View File

@ -0,0 +1,28 @@
/* SPDX-License-Identifier: GPL-2.0 */
#ifndef __X86_KERNEL_FPU_INTERNAL_H
#define __X86_KERNEL_FPU_INTERNAL_H
extern struct fpstate init_fpstate;
/* CPU feature check wrappers */
static __always_inline __pure bool use_xsave(void)
{
return cpu_feature_enabled(X86_FEATURE_XSAVE);
}
static __always_inline __pure bool use_fxsr(void)
{
return cpu_feature_enabled(X86_FEATURE_FXSR);
}
#ifdef CONFIG_X86_DEBUG_FPU
# define WARN_ON_FPU(x) WARN_ON_ONCE(x)
#else
# define WARN_ON_FPU(x) ({ (void)(x); 0; })
#endif
/* Used in init.c */
extern void fpstate_init_user(struct fpstate *fpstate);
extern void fpstate_reset(struct fpu *fpu);
#endif

View File

@ -0,0 +1,115 @@
/* SPDX-License-Identifier: GPL-2.0 */
#ifndef __X86_KERNEL_FPU_LEGACY_H
#define __X86_KERNEL_FPU_LEGACY_H
#include <asm/fpu/types.h>
extern unsigned int mxcsr_feature_mask;
static inline void ldmxcsr(u32 mxcsr)
{
asm volatile("ldmxcsr %0" :: "m" (mxcsr));
}
/*
* Returns 0 on success or the trap number when the operation raises an
* exception.
*/
#define user_insn(insn, output, input...) \
({ \
int err; \
\
might_fault(); \
\
asm volatile(ASM_STAC "\n" \
"1: " #insn "\n" \
"2: " ASM_CLAC "\n" \
_ASM_EXTABLE_TYPE(1b, 2b, EX_TYPE_FAULT_MCE_SAFE) \
: [err] "=a" (err), output \
: "0"(0), input); \
err; \
})
#define kernel_insn_err(insn, output, input...) \
({ \
int err; \
asm volatile("1:" #insn "\n\t" \
"2:\n" \
".section .fixup,\"ax\"\n" \
"3: movl $-1,%[err]\n" \
" jmp 2b\n" \
".previous\n" \
_ASM_EXTABLE(1b, 3b) \
: [err] "=r" (err), output \
: "0"(0), input); \
err; \
})
#define kernel_insn(insn, output, input...) \
asm volatile("1:" #insn "\n\t" \
"2:\n" \
_ASM_EXTABLE_TYPE(1b, 2b, EX_TYPE_FPU_RESTORE) \
: output : input)
static inline int fnsave_to_user_sigframe(struct fregs_state __user *fx)
{
return user_insn(fnsave %[fx]; fwait, [fx] "=m" (*fx), "m" (*fx));
}
static inline int fxsave_to_user_sigframe(struct fxregs_state __user *fx)
{
if (IS_ENABLED(CONFIG_X86_32))
return user_insn(fxsave %[fx], [fx] "=m" (*fx), "m" (*fx));
else
return user_insn(fxsaveq %[fx], [fx] "=m" (*fx), "m" (*fx));
}
static inline void fxrstor(struct fxregs_state *fx)
{
if (IS_ENABLED(CONFIG_X86_32))
kernel_insn(fxrstor %[fx], "=m" (*fx), [fx] "m" (*fx));
else
kernel_insn(fxrstorq %[fx], "=m" (*fx), [fx] "m" (*fx));
}
static inline int fxrstor_safe(struct fxregs_state *fx)
{
if (IS_ENABLED(CONFIG_X86_32))
return kernel_insn_err(fxrstor %[fx], "=m" (*fx), [fx] "m" (*fx));
else
return kernel_insn_err(fxrstorq %[fx], "=m" (*fx), [fx] "m" (*fx));
}
static inline int fxrstor_from_user_sigframe(struct fxregs_state __user *fx)
{
if (IS_ENABLED(CONFIG_X86_32))
return user_insn(fxrstor %[fx], "=m" (*fx), [fx] "m" (*fx));
else
return user_insn(fxrstorq %[fx], "=m" (*fx), [fx] "m" (*fx));
}
static inline void frstor(struct fregs_state *fx)
{
kernel_insn(frstor %[fx], "=m" (*fx), [fx] "m" (*fx));
}
static inline int frstor_safe(struct fregs_state *fx)
{
return kernel_insn_err(frstor %[fx], "=m" (*fx), [fx] "m" (*fx));
}
static inline int frstor_from_user_sigframe(struct fregs_state __user *fx)
{
return user_insn(frstor %[fx], "=m" (*fx), [fx] "m" (*fx));
}
static inline void fxsave(struct fxregs_state *fx)
{
if (IS_ENABLED(CONFIG_X86_32))
asm volatile( "fxsave %[fx]" : [fx] "=m" (*fx));
else
asm volatile("fxsaveq %[fx]" : [fx] "=m" (*fx));
}
#endif

View File

@ -5,10 +5,14 @@
#include <linux/sched/task_stack.h>
#include <linux/vmalloc.h>
#include <asm/fpu/internal.h>
#include <asm/fpu/api.h>
#include <asm/fpu/signal.h>
#include <asm/fpu/regset.h>
#include <asm/fpu/xstate.h>
#include "context.h"
#include "internal.h"
#include "legacy.h"
#include "xstate.h"
/*
* The xstateregs_active() routine is the same as the regset_fpregs_active() routine,
@ -74,8 +78,8 @@ int xfpregs_get(struct task_struct *target, const struct user_regset *regset,
sync_fpstate(fpu);
if (!use_xsave()) {
return membuf_write(&to, &fpu->state.fxsave,
sizeof(fpu->state.fxsave));
return membuf_write(&to, &fpu->fpstate->regs.fxsave,
sizeof(fpu->fpstate->regs.fxsave));
}
copy_xstate_to_uabi_buf(to, target, XSTATE_COPY_FX);
@ -110,15 +114,15 @@ int xfpregs_set(struct task_struct *target, const struct user_regset *regset,
fpu_force_restore(fpu);
/* Copy the state */
memcpy(&fpu->state.fxsave, &newstate, sizeof(newstate));
memcpy(&fpu->fpstate->regs.fxsave, &newstate, sizeof(newstate));
/* Clear xmm8..15 */
BUILD_BUG_ON(sizeof(fpu->state.fxsave.xmm_space) != 16 * 16);
memset(&fpu->state.fxsave.xmm_space[8], 0, 8 * 16);
BUILD_BUG_ON(sizeof(fpu->__fpstate.regs.fxsave.xmm_space) != 16 * 16);
memset(&fpu->fpstate->regs.fxsave.xmm_space[8], 0, 8 * 16);
/* Mark FP and SSE as in use when XSAVE is enabled */
if (use_xsave())
fpu->state.xsave.header.xfeatures |= XFEATURE_MASK_FPSSE;
fpu->fpstate->regs.xsave.header.xfeatures |= XFEATURE_MASK_FPSSE;
return 0;
}
@ -149,7 +153,7 @@ int xstateregs_set(struct task_struct *target, const struct user_regset *regset,
/*
* A whole standard-format XSAVE buffer is needed:
*/
if (pos != 0 || count != fpu_user_xstate_size)
if (pos != 0 || count != fpu_user_cfg.max_size)
return -EFAULT;
if (!kbuf) {
@ -164,7 +168,7 @@ int xstateregs_set(struct task_struct *target, const struct user_regset *regset,
}
fpu_force_restore(fpu);
ret = copy_uabi_from_kernel_to_xstate(&fpu->state.xsave, kbuf ?: tmpbuf);
ret = copy_uabi_from_kernel_to_xstate(fpu->fpstate, kbuf ?: tmpbuf);
out:
vfree(tmpbuf);
@ -283,7 +287,7 @@ static void __convert_from_fxsr(struct user_i387_ia32_struct *env,
void
convert_from_fxsr(struct user_i387_ia32_struct *env, struct task_struct *tsk)
{
__convert_from_fxsr(env, tsk, &tsk->thread.fpu.state.fxsave);
__convert_from_fxsr(env, tsk, &tsk->thread.fpu.fpstate->regs.fxsave);
}
void convert_to_fxsr(struct fxregs_state *fxsave,
@ -326,7 +330,7 @@ int fpregs_get(struct task_struct *target, const struct user_regset *regset,
return fpregs_soft_get(target, regset, to);
if (!cpu_feature_enabled(X86_FEATURE_FXSR)) {
return membuf_write(&to, &fpu->state.fsave,
return membuf_write(&to, &fpu->fpstate->regs.fsave,
sizeof(struct fregs_state));
}
@ -337,7 +341,7 @@ int fpregs_get(struct task_struct *target, const struct user_regset *regset,
copy_xstate_to_uabi_buf(mb, target, XSTATE_COPY_FP);
fx = &fxsave;
} else {
fx = &fpu->state.fxsave;
fx = &fpu->fpstate->regs.fxsave;
}
__convert_from_fxsr(&env, target, fx);
@ -366,16 +370,16 @@ int fpregs_set(struct task_struct *target, const struct user_regset *regset,
fpu_force_restore(fpu);
if (cpu_feature_enabled(X86_FEATURE_FXSR))
convert_to_fxsr(&fpu->state.fxsave, &env);
convert_to_fxsr(&fpu->fpstate->regs.fxsave, &env);
else
memcpy(&fpu->state.fsave, &env, sizeof(env));
memcpy(&fpu->fpstate->regs.fsave, &env, sizeof(env));
/*
* Update the header bit in the xsave header, indicating the
* presence of FP.
*/
if (cpu_feature_enabled(X86_FEATURE_XSAVE))
fpu->state.xsave.header.xfeatures |= XFEATURE_MASK_FP;
fpu->fpstate->regs.xsave.header.xfeatures |= XFEATURE_MASK_FP;
return 0;
}

View File

@ -7,23 +7,25 @@
#include <linux/cpu.h>
#include <linux/pagemap.h>
#include <asm/fpu/internal.h>
#include <asm/fpu/signal.h>
#include <asm/fpu/regset.h>
#include <asm/fpu/xstate.h>
#include <asm/sigframe.h>
#include <asm/trapnr.h>
#include <asm/trace/fpu.h>
static struct _fpx_sw_bytes fx_sw_reserved __ro_after_init;
static struct _fpx_sw_bytes fx_sw_reserved_ia32 __ro_after_init;
#include "context.h"
#include "internal.h"
#include "legacy.h"
#include "xstate.h"
/*
* Check for the presence of extended state information in the
* user fpstate pointer in the sigcontext.
*/
static inline int check_xstate_in_sigframe(struct fxregs_state __user *fxbuf,
struct _fpx_sw_bytes *fx_sw)
static inline bool check_xstate_in_sigframe(struct fxregs_state __user *fxbuf,
struct _fpx_sw_bytes *fx_sw)
{
int min_xstate_size = sizeof(struct fxregs_state) +
sizeof(struct xstate_header);
@ -31,12 +33,12 @@ static inline int check_xstate_in_sigframe(struct fxregs_state __user *fxbuf,
unsigned int magic2;
if (__copy_from_user(fx_sw, &fxbuf->sw_reserved[0], sizeof(*fx_sw)))
return -EFAULT;
return false;
/* Check for the first magic field and other error scenarios. */
if (fx_sw->magic1 != FP_XSTATE_MAGIC1 ||
fx_sw->xstate_size < min_xstate_size ||
fx_sw->xstate_size > fpu_user_xstate_size ||
fx_sw->xstate_size > current->thread.fpu.fpstate->user_size ||
fx_sw->xstate_size > fx_sw->extended_size)
goto setfx;
@ -47,10 +49,10 @@ static inline int check_xstate_in_sigframe(struct fxregs_state __user *fxbuf,
* in the memory layout.
*/
if (__get_user(magic2, (__u32 __user *)(fpstate + fx_sw->xstate_size)))
return -EFAULT;
return false;
if (likely(magic2 == FP_XSTATE_MAGIC2))
return 0;
return true;
setfx:
trace_x86_fpu_xstate_check_failed(&current->thread.fpu);
@ -58,22 +60,22 @@ setfx:
fx_sw->magic1 = 0;
fx_sw->xstate_size = sizeof(struct fxregs_state);
fx_sw->xfeatures = XFEATURE_MASK_FPSSE;
return 0;
return true;
}
/*
* Signal frame handlers.
*/
static inline int save_fsave_header(struct task_struct *tsk, void __user *buf)
static inline bool save_fsave_header(struct task_struct *tsk, void __user *buf)
{
if (use_fxsr()) {
struct xregs_state *xsave = &tsk->thread.fpu.state.xsave;
struct xregs_state *xsave = &tsk->thread.fpu.fpstate->regs.xsave;
struct user_i387_ia32_struct env;
struct _fpstate_32 __user *fp = buf;
fpregs_lock();
if (!test_thread_flag(TIF_NEED_FPU_LOAD))
fxsave(&tsk->thread.fpu.state.fxsave);
fxsave(&tsk->thread.fpu.fpstate->regs.fxsave);
fpregs_unlock();
convert_from_fxsr(&env, tsk);
@ -81,33 +83,54 @@ static inline int save_fsave_header(struct task_struct *tsk, void __user *buf)
if (__copy_to_user(buf, &env, sizeof(env)) ||
__put_user(xsave->i387.swd, &fp->status) ||
__put_user(X86_FXSR_MAGIC, &fp->magic))
return -1;
return false;
} else {
struct fregs_state __user *fp = buf;
u32 swd;
if (__get_user(swd, &fp->swd) || __put_user(swd, &fp->status))
return -1;
return false;
}
return 0;
return true;
}
static inline int save_xstate_epilog(void __user *buf, int ia32_frame)
/*
* Prepare the SW reserved portion of the fxsave memory layout, indicating
* the presence of the extended state information in the memory layout
* pointed to by the fpstate pointer in the sigcontext.
* This is saved when ever the FP and extended state context is
* saved on the user stack during the signal handler delivery to the user.
*/
static inline void save_sw_bytes(struct _fpx_sw_bytes *sw_bytes, bool ia32_frame,
struct fpstate *fpstate)
{
sw_bytes->magic1 = FP_XSTATE_MAGIC1;
sw_bytes->extended_size = fpstate->user_size + FP_XSTATE_MAGIC2_SIZE;
sw_bytes->xfeatures = fpstate->user_xfeatures;
sw_bytes->xstate_size = fpstate->user_size;
if (ia32_frame)
sw_bytes->extended_size += sizeof(struct fregs_state);
}
static inline bool save_xstate_epilog(void __user *buf, int ia32_frame,
struct fpstate *fpstate)
{
struct xregs_state __user *x = buf;
struct _fpx_sw_bytes *sw_bytes;
struct _fpx_sw_bytes sw_bytes;
u32 xfeatures;
int err;
/* Setup the bytes not touched by the [f]xsave and reserved for SW. */
sw_bytes = ia32_frame ? &fx_sw_reserved_ia32 : &fx_sw_reserved;
err = __copy_to_user(&x->i387.sw_reserved, sw_bytes, sizeof(*sw_bytes));
save_sw_bytes(&sw_bytes, ia32_frame, fpstate);
err = __copy_to_user(&x->i387.sw_reserved, &sw_bytes, sizeof(sw_bytes));
if (!use_xsave())
return err;
return !err;
err |= __put_user(FP_XSTATE_MAGIC2,
(__u32 __user *)(buf + fpu_user_xstate_size));
(__u32 __user *)(buf + fpstate->user_size));
/*
* Read the xfeatures which we copied (directly from the cpu or
@ -130,23 +153,17 @@ static inline int save_xstate_epilog(void __user *buf, int ia32_frame)
err |= __put_user(xfeatures, (__u32 __user *)&x->header.xfeatures);
return err;
return !err;
}
static inline int copy_fpregs_to_sigframe(struct xregs_state __user *buf)
{
int err;
if (use_xsave())
err = xsave_to_user_sigframe(buf);
else if (use_fxsr())
err = fxsave_to_user_sigframe((struct fxregs_state __user *) buf);
return xsave_to_user_sigframe(buf);
if (use_fxsr())
return fxsave_to_user_sigframe((struct fxregs_state __user *) buf);
else
err = fnsave_to_user_sigframe((struct fregs_state __user *) buf);
if (unlikely(err) && __clear_user(buf, fpu_user_xstate_size))
err = -EFAULT;
return err;
return fnsave_to_user_sigframe((struct fregs_state __user *) buf);
}
/*
@ -159,10 +176,8 @@ static inline int copy_fpregs_to_sigframe(struct xregs_state __user *buf)
* buf == buf_fx for 64-bit frames and 32-bit fsave frame.
* buf != buf_fx for 32-bit frames with fxstate.
*
* Try to save it directly to the user frame with disabled page fault handler.
* If this fails then do the slow path where the FPU state is first saved to
* task's fpu->state and then copy it to the user frame pointed to by the
* aligned pointer 'buf_fx'.
* Save it directly to the user frame with disabled page fault handler. If
* that faults, try to clear the frame which handles the page fault.
*
* If this is a 32-bit frame with fxstate, put a fsave header before
* the aligned state at 'buf_fx'.
@ -170,10 +185,11 @@ static inline int copy_fpregs_to_sigframe(struct xregs_state __user *buf)
* For [f]xsave state, update the SW reserved fields in the [f]xsave frame
* indicating the absence/presence of the extended state to the user.
*/
int copy_fpstate_to_sigframe(void __user *buf, void __user *buf_fx, int size)
bool copy_fpstate_to_sigframe(void __user *buf, void __user *buf_fx, int size)
{
struct task_struct *tsk = current;
int ia32_fxstate = (buf != buf_fx);
struct fpstate *fpstate = tsk->thread.fpu.fpstate;
bool ia32_fxstate = (buf != buf_fx);
int ret;
ia32_fxstate &= (IS_ENABLED(CONFIG_X86_32) ||
@ -181,13 +197,25 @@ int copy_fpstate_to_sigframe(void __user *buf, void __user *buf_fx, int size)
if (!static_cpu_has(X86_FEATURE_FPU)) {
struct user_i387_ia32_struct fp;
fpregs_soft_get(current, NULL, (struct membuf){.p = &fp,
.left = sizeof(fp)});
return copy_to_user(buf, &fp, sizeof(fp)) ? -EFAULT : 0;
return !copy_to_user(buf, &fp, sizeof(fp));
}
if (!access_ok(buf, size))
return -EACCES;
return false;
if (use_xsave()) {
struct xregs_state __user *xbuf = buf_fx;
/*
* Clear the xsave header first, so that reserved fields are
* initialized to zero.
*/
if (__clear_user(&xbuf->header, sizeof(xbuf->header)))
return false;
}
retry:
/*
* Load the FPU registers if they are not valid for the current task.
@ -205,26 +233,26 @@ retry:
fpregs_unlock();
if (ret) {
if (!fault_in_pages_writeable(buf_fx, fpu_user_xstate_size))
if (!__clear_user(buf_fx, fpstate->user_size))
goto retry;
return -EFAULT;
return false;
}
/* Save the fsave header for the 32-bit frames. */
if ((ia32_fxstate || !use_fxsr()) && save_fsave_header(tsk, buf))
return -1;
if ((ia32_fxstate || !use_fxsr()) && !save_fsave_header(tsk, buf))
return false;
if (use_fxsr() && save_xstate_epilog(buf_fx, ia32_fxstate))
return -1;
if (use_fxsr() && !save_xstate_epilog(buf_fx, ia32_fxstate, fpstate))
return false;
return 0;
return true;
}
static int __restore_fpregs_from_user(void __user *buf, u64 xrestore,
bool fx_only)
static int __restore_fpregs_from_user(void __user *buf, u64 ufeatures,
u64 xrestore, bool fx_only)
{
if (use_xsave()) {
u64 init_bv = xfeatures_mask_uabi() & ~xrestore;
u64 init_bv = ufeatures & ~xrestore;
int ret;
if (likely(!fx_only))
@ -233,7 +261,7 @@ static int __restore_fpregs_from_user(void __user *buf, u64 xrestore,
ret = fxrstor_from_user_sigframe(buf);
if (!ret && unlikely(init_bv))
os_xrstor(&init_fpstate.xsave, init_bv);
os_xrstor(&init_fpstate, init_bv);
return ret;
} else if (use_fxsr()) {
return fxrstor_from_user_sigframe(buf);
@ -246,16 +274,19 @@ static int __restore_fpregs_from_user(void __user *buf, u64 xrestore,
* Attempt to restore the FPU registers directly from user memory.
* Pagefaults are handled and any errors returned are fatal.
*/
static int restore_fpregs_from_user(void __user *buf, u64 xrestore,
bool fx_only, unsigned int size)
static bool restore_fpregs_from_user(void __user *buf, u64 xrestore,
bool fx_only, unsigned int size)
{
struct fpu *fpu = &current->thread.fpu;
int ret;
retry:
fpregs_lock();
/* Ensure that XFD is up to date */
xfd_update_state(fpu->fpstate);
pagefault_disable();
ret = __restore_fpregs_from_user(buf, xrestore, fx_only);
ret = __restore_fpregs_from_user(buf, fpu->fpstate->user_xfeatures,
xrestore, fx_only);
pagefault_enable();
if (unlikely(ret)) {
@ -275,13 +306,12 @@ retry:
fpregs_unlock();
/* Try to handle #PF, but anything else is fatal. */
if (ret != -EFAULT)
return -EINVAL;
if (ret != X86_TRAP_PF)
return false;
ret = fault_in_pages_readable(buf, size);
if (!ret)
if (!fault_in_pages_readable(buf, size))
goto retry;
return ret;
return false;
}
/*
@ -294,45 +324,40 @@ retry:
* been restored from a user buffer directly.
*/
if (test_thread_flag(TIF_NEED_FPU_LOAD) && xfeatures_mask_supervisor())
os_xrstor(&fpu->state.xsave, xfeatures_mask_supervisor());
os_xrstor_supervisor(fpu->fpstate);
fpregs_mark_activate();
fpregs_unlock();
return 0;
return true;
}
static int __fpu_restore_sig(void __user *buf, void __user *buf_fx,
bool ia32_fxstate)
static bool __fpu_restore_sig(void __user *buf, void __user *buf_fx,
bool ia32_fxstate)
{
int state_size = fpu_kernel_xstate_size;
struct task_struct *tsk = current;
struct fpu *fpu = &tsk->thread.fpu;
struct user_i387_ia32_struct env;
bool success, fx_only = false;
union fpregs_state *fpregs;
unsigned int state_size;
u64 user_xfeatures = 0;
bool fx_only = false;
int ret;
if (use_xsave()) {
struct _fpx_sw_bytes fx_sw_user;
ret = check_xstate_in_sigframe(buf_fx, &fx_sw_user);
if (unlikely(ret))
return ret;
if (!check_xstate_in_sigframe(buf_fx, &fx_sw_user))
return false;
fx_only = !fx_sw_user.magic1;
state_size = fx_sw_user.xstate_size;
user_xfeatures = fx_sw_user.xfeatures;
} else {
user_xfeatures = XFEATURE_MASK_FPSSE;
state_size = fpu->fpstate->user_size;
}
if (likely(!ia32_fxstate)) {
/*
* Attempt to restore the FPU registers directly from user
* memory. For that to succeed, the user access cannot cause page
* faults. If it does, fall back to the slow path below, going
* through the kernel buffer with the enabled pagefault handler.
*/
/* Restore the FPU registers directly from user memory. */
return restore_fpregs_from_user(buf_fx, user_xfeatures, fx_only,
state_size);
}
@ -342,9 +367,8 @@ static int __fpu_restore_sig(void __user *buf, void __user *buf_fx,
* to be ignored for histerical raisins. The legacy state is folded
* in once the larger state has been copied.
*/
ret = __copy_from_user(&env, buf, sizeof(env));
if (ret)
return ret;
if (__copy_from_user(&env, buf, sizeof(env)))
return false;
/*
* By setting TIF_NEED_FPU_LOAD it is ensured that our xstate is
@ -363,38 +387,38 @@ static int __fpu_restore_sig(void __user *buf, void __user *buf_fx,
* the right place in memory. It's ia32 mode. Shrug.
*/
if (xfeatures_mask_supervisor())
os_xsave(&fpu->state.xsave);
os_xsave(fpu->fpstate);
set_thread_flag(TIF_NEED_FPU_LOAD);
}
__fpu_invalidate_fpregs_state(fpu);
__cpu_invalidate_fpregs_state();
fpregs_unlock();
fpregs = &fpu->fpstate->regs;
if (use_xsave() && !fx_only) {
ret = copy_sigframe_from_user_to_xstate(&fpu->state.xsave, buf_fx);
if (ret)
return ret;
if (copy_sigframe_from_user_to_xstate(fpu->fpstate, buf_fx))
return false;
} else {
if (__copy_from_user(&fpu->state.fxsave, buf_fx,
sizeof(fpu->state.fxsave)))
return -EFAULT;
if (__copy_from_user(&fpregs->fxsave, buf_fx,
sizeof(fpregs->fxsave)))
return false;
if (IS_ENABLED(CONFIG_X86_64)) {
/* Reject invalid MXCSR values. */
if (fpu->state.fxsave.mxcsr & ~mxcsr_feature_mask)
return -EINVAL;
if (fpregs->fxsave.mxcsr & ~mxcsr_feature_mask)
return false;
} else {
/* Mask invalid bits out for historical reasons (broken hardware). */
fpu->state.fxsave.mxcsr &= mxcsr_feature_mask;
fpregs->fxsave.mxcsr &= mxcsr_feature_mask;
}
/* Enforce XFEATURE_MASK_FPSSE when XSAVE is enabled */
if (use_xsave())
fpu->state.xsave.header.xfeatures |= XFEATURE_MASK_FPSSE;
fpregs->xsave.header.xfeatures |= XFEATURE_MASK_FPSSE;
}
/* Fold the legacy FP storage */
convert_to_fxsr(&fpu->state.fxsave, &env);
convert_to_fxsr(&fpregs->fxsave, &env);
fpregs_lock();
if (use_xsave()) {
@ -409,40 +433,45 @@ static int __fpu_restore_sig(void __user *buf, void __user *buf_fx,
*/
u64 mask = user_xfeatures | xfeatures_mask_supervisor();
fpu->state.xsave.header.xfeatures &= mask;
ret = os_xrstor_safe(&fpu->state.xsave, xfeatures_mask_all);
fpregs->xsave.header.xfeatures &= mask;
success = !os_xrstor_safe(fpu->fpstate,
fpu_kernel_cfg.max_features);
} else {
ret = fxrstor_safe(&fpu->state.fxsave);
success = !fxrstor_safe(&fpregs->fxsave);
}
if (likely(!ret))
if (likely(success))
fpregs_mark_activate();
fpregs_unlock();
return ret;
return success;
}
static inline int xstate_sigframe_size(void)
static inline unsigned int xstate_sigframe_size(struct fpstate *fpstate)
{
return use_xsave() ? fpu_user_xstate_size + FP_XSTATE_MAGIC2_SIZE :
fpu_user_xstate_size;
unsigned int size = fpstate->user_size;
return use_xsave() ? size + FP_XSTATE_MAGIC2_SIZE : size;
}
/*
* Restore FPU state from a sigframe:
*/
int fpu__restore_sig(void __user *buf, int ia32_frame)
bool fpu__restore_sig(void __user *buf, int ia32_frame)
{
unsigned int size = xstate_sigframe_size();
struct fpu *fpu = &current->thread.fpu;
void __user *buf_fx = buf;
bool ia32_fxstate = false;
int ret;
bool success = false;
unsigned int size;
if (unlikely(!buf)) {
fpu__clear_user_states(fpu);
return 0;
return true;
}
size = xstate_sigframe_size(fpu->fpstate);
ia32_frame &= (IS_ENABLED(CONFIG_X86_32) ||
IS_ENABLED(CONFIG_IA32_EMULATION));
@ -456,30 +485,28 @@ int fpu__restore_sig(void __user *buf, int ia32_frame)
ia32_fxstate = true;
}
if (!access_ok(buf, size)) {
ret = -EACCES;
if (!access_ok(buf, size))
goto out;
}
if (!IS_ENABLED(CONFIG_X86_64) && !cpu_feature_enabled(X86_FEATURE_FPU)) {
ret = fpregs_soft_set(current, NULL, 0,
sizeof(struct user_i387_ia32_struct),
NULL, buf);
success = !fpregs_soft_set(current, NULL, 0,
sizeof(struct user_i387_ia32_struct),
NULL, buf);
} else {
ret = __fpu_restore_sig(buf, buf_fx, ia32_fxstate);
success = __fpu_restore_sig(buf, buf_fx, ia32_fxstate);
}
out:
if (unlikely(ret))
if (unlikely(!success))
fpu__clear_user_states(fpu);
return ret;
return success;
}
unsigned long
fpu__alloc_mathframe(unsigned long sp, int ia32_frame,
unsigned long *buf_fx, unsigned long *size)
{
unsigned long frame_size = xstate_sigframe_size();
unsigned long frame_size = xstate_sigframe_size(current->thread.fpu.fpstate);
*buf_fx = sp = round_down(sp - frame_size, 64);
if (ia32_frame && use_fxsr()) {
@ -492,9 +519,12 @@ fpu__alloc_mathframe(unsigned long sp, int ia32_frame,
return sp;
}
unsigned long fpu__get_fpstate_size(void)
unsigned long __init fpu__get_fpstate_size(void)
{
unsigned long ret = xstate_sigframe_size();
unsigned long ret = fpu_user_cfg.max_size;
if (use_xsave())
ret += FP_XSTATE_MAGIC2_SIZE;
/*
* This space is needed on (most) 32-bit kernels, or when a 32-bit
@ -510,28 +540,3 @@ unsigned long fpu__get_fpstate_size(void)
return ret;
}
/*
* Prepare the SW reserved portion of the fxsave memory layout, indicating
* the presence of the extended state information in the memory layout
* pointed by the fpstate pointer in the sigcontext.
* This will be saved when ever the FP and extended state context is
* saved on the user stack during the signal handler delivery to the user.
*/
void fpu__init_prepare_fx_sw_frame(void)
{
int size = fpu_user_xstate_size + FP_XSTATE_MAGIC2_SIZE;
fx_sw_reserved.magic1 = FP_XSTATE_MAGIC1;
fx_sw_reserved.extended_size = size;
fx_sw_reserved.xfeatures = xfeatures_mask_uabi();
fx_sw_reserved.xstate_size = fpu_user_xstate_size;
if (IS_ENABLED(CONFIG_IA32_EMULATION) ||
IS_ENABLED(CONFIG_X86_32)) {
int fsave_header_size = sizeof(struct fregs_state);
fx_sw_reserved_ia32 = fx_sw_reserved;
fx_sw_reserved_ia32.extended_size = size + fsave_header_size;
}
}

File diff suppressed because it is too large Load Diff

View File

@ -0,0 +1,278 @@
/* SPDX-License-Identifier: GPL-2.0 */
#ifndef __X86_KERNEL_FPU_XSTATE_H
#define __X86_KERNEL_FPU_XSTATE_H
#include <asm/cpufeature.h>
#include <asm/fpu/xstate.h>
#ifdef CONFIG_X86_64
DECLARE_PER_CPU(u64, xfd_state);
#endif
static inline void xstate_init_xcomp_bv(struct xregs_state *xsave, u64 mask)
{
/*
* XRSTORS requires these bits set in xcomp_bv, or it will
* trigger #GP:
*/
if (cpu_feature_enabled(X86_FEATURE_XSAVES))
xsave->header.xcomp_bv = mask | XCOMP_BV_COMPACTED_FORMAT;
}
static inline u64 xstate_get_host_group_perm(void)
{
/* Pairs with WRITE_ONCE() in xstate_request_perm() */
return READ_ONCE(current->group_leader->thread.fpu.perm.__state_perm);
}
enum xstate_copy_mode {
XSTATE_COPY_FP,
XSTATE_COPY_FX,
XSTATE_COPY_XSAVE,
};
struct membuf;
extern void __copy_xstate_to_uabi_buf(struct membuf to, struct fpstate *fpstate,
u32 pkru_val, enum xstate_copy_mode copy_mode);
extern void copy_xstate_to_uabi_buf(struct membuf to, struct task_struct *tsk,
enum xstate_copy_mode mode);
extern int copy_uabi_from_kernel_to_xstate(struct fpstate *fpstate, const void *kbuf);
extern int copy_sigframe_from_user_to_xstate(struct fpstate *fpstate, const void __user *ubuf);
extern void fpu__init_cpu_xstate(void);
extern void fpu__init_system_xstate(unsigned int legacy_size);
extern void *get_xsave_addr(struct xregs_state *xsave, int xfeature_nr);
static inline u64 xfeatures_mask_supervisor(void)
{
return fpu_kernel_cfg.max_features & XFEATURE_MASK_SUPERVISOR_SUPPORTED;
}
static inline u64 xfeatures_mask_independent(void)
{
if (!cpu_feature_enabled(X86_FEATURE_ARCH_LBR))
return XFEATURE_MASK_INDEPENDENT & ~XFEATURE_MASK_LBR;
return XFEATURE_MASK_INDEPENDENT;
}
/* XSAVE/XRSTOR wrapper functions */
#ifdef CONFIG_X86_64
#define REX_PREFIX "0x48, "
#else
#define REX_PREFIX
#endif
/* These macros all use (%edi)/(%rdi) as the single memory argument. */
#define XSAVE ".byte " REX_PREFIX "0x0f,0xae,0x27"
#define XSAVEOPT ".byte " REX_PREFIX "0x0f,0xae,0x37"
#define XSAVES ".byte " REX_PREFIX "0x0f,0xc7,0x2f"
#define XRSTOR ".byte " REX_PREFIX "0x0f,0xae,0x2f"
#define XRSTORS ".byte " REX_PREFIX "0x0f,0xc7,0x1f"
/*
* After this @err contains 0 on success or the trap number when the
* operation raises an exception.
*/
#define XSTATE_OP(op, st, lmask, hmask, err) \
asm volatile("1:" op "\n\t" \
"xor %[err], %[err]\n" \
"2:\n\t" \
_ASM_EXTABLE_TYPE(1b, 2b, EX_TYPE_FAULT_MCE_SAFE) \
: [err] "=a" (err) \
: "D" (st), "m" (*st), "a" (lmask), "d" (hmask) \
: "memory")
/*
* If XSAVES is enabled, it replaces XSAVEOPT because it supports a compact
* format and supervisor states in addition to modified optimization in
* XSAVEOPT.
*
* Otherwise, if XSAVEOPT is enabled, XSAVEOPT replaces XSAVE because XSAVEOPT
* supports modified optimization which is not supported by XSAVE.
*
* We use XSAVE as a fallback.
*
* The 661 label is defined in the ALTERNATIVE* macros as the address of the
* original instruction which gets replaced. We need to use it here as the
* address of the instruction where we might get an exception at.
*/
#define XSTATE_XSAVE(st, lmask, hmask, err) \
asm volatile(ALTERNATIVE_2(XSAVE, \
XSAVEOPT, X86_FEATURE_XSAVEOPT, \
XSAVES, X86_FEATURE_XSAVES) \
"\n" \
"xor %[err], %[err]\n" \
"3:\n" \
".pushsection .fixup,\"ax\"\n" \
"4: movl $-2, %[err]\n" \
"jmp 3b\n" \
".popsection\n" \
_ASM_EXTABLE(661b, 4b) \
: [err] "=r" (err) \
: "D" (st), "m" (*st), "a" (lmask), "d" (hmask) \
: "memory")
/*
* Use XRSTORS to restore context if it is enabled. XRSTORS supports compact
* XSAVE area format.
*/
#define XSTATE_XRESTORE(st, lmask, hmask) \
asm volatile(ALTERNATIVE(XRSTOR, \
XRSTORS, X86_FEATURE_XSAVES) \
"\n" \
"3:\n" \
_ASM_EXTABLE_TYPE(661b, 3b, EX_TYPE_FPU_RESTORE) \
: \
: "D" (st), "m" (*st), "a" (lmask), "d" (hmask) \
: "memory")
#if defined(CONFIG_X86_64) && defined(CONFIG_X86_DEBUG_FPU)
extern void xfd_validate_state(struct fpstate *fpstate, u64 mask, bool rstor);
#else
static inline void xfd_validate_state(struct fpstate *fpstate, u64 mask, bool rstor) { }
#endif
#ifdef CONFIG_X86_64
static inline void xfd_update_state(struct fpstate *fpstate)
{
if (fpu_state_size_dynamic()) {
u64 xfd = fpstate->xfd;
if (__this_cpu_read(xfd_state) != xfd) {
wrmsrl(MSR_IA32_XFD, xfd);
__this_cpu_write(xfd_state, xfd);
}
}
}
#else
static inline void xfd_update_state(struct fpstate *fpstate) { }
#endif
/*
* Save processor xstate to xsave area.
*
* Uses either XSAVE or XSAVEOPT or XSAVES depending on the CPU features
* and command line options. The choice is permanent until the next reboot.
*/
static inline void os_xsave(struct fpstate *fpstate)
{
u64 mask = fpstate->xfeatures;
u32 lmask = mask;
u32 hmask = mask >> 32;
int err;
WARN_ON_FPU(!alternatives_patched);
xfd_validate_state(fpstate, mask, false);
XSTATE_XSAVE(&fpstate->regs.xsave, lmask, hmask, err);
/* We should never fault when copying to a kernel buffer: */
WARN_ON_FPU(err);
}
/*
* Restore processor xstate from xsave area.
*
* Uses XRSTORS when XSAVES is used, XRSTOR otherwise.
*/
static inline void os_xrstor(struct fpstate *fpstate, u64 mask)
{
u32 lmask = mask;
u32 hmask = mask >> 32;
xfd_validate_state(fpstate, mask, true);
XSTATE_XRESTORE(&fpstate->regs.xsave, lmask, hmask);
}
/* Restore of supervisor state. Does not require XFD */
static inline void os_xrstor_supervisor(struct fpstate *fpstate)
{
u64 mask = xfeatures_mask_supervisor();
u32 lmask = mask;
u32 hmask = mask >> 32;
XSTATE_XRESTORE(&fpstate->regs.xsave, lmask, hmask);
}
/*
* Save xstate to user space xsave area.
*
* We don't use modified optimization because xrstor/xrstors might track
* a different application.
*
* We don't use compacted format xsave area for backward compatibility for
* old applications which don't understand the compacted format of the
* xsave area.
*
* The caller has to zero buf::header before calling this because XSAVE*
* does not touch the reserved fields in the header.
*/
static inline int xsave_to_user_sigframe(struct xregs_state __user *buf)
{
/*
* Include the features which are not xsaved/rstored by the kernel
* internally, e.g. PKRU. That's user space ABI and also required
* to allow the signal handler to modify PKRU.
*/
struct fpstate *fpstate = current->thread.fpu.fpstate;
u64 mask = fpstate->user_xfeatures;
u32 lmask = mask;
u32 hmask = mask >> 32;
int err;
xfd_validate_state(fpstate, mask, false);
stac();
XSTATE_OP(XSAVE, buf, lmask, hmask, err);
clac();
return err;
}
/*
* Restore xstate from user space xsave area.
*/
static inline int xrstor_from_user_sigframe(struct xregs_state __user *buf, u64 mask)
{
struct xregs_state *xstate = ((__force struct xregs_state *)buf);
u32 lmask = mask;
u32 hmask = mask >> 32;
int err;
xfd_validate_state(current->thread.fpu.fpstate, mask, true);
stac();
XSTATE_OP(XRSTOR, xstate, lmask, hmask, err);
clac();
return err;
}
/*
* Restore xstate from kernel space xsave area, return an error code instead of
* an exception.
*/
static inline int os_xrstor_safe(struct fpstate *fpstate, u64 mask)
{
struct xregs_state *xstate = &fpstate->regs.xsave;
u32 lmask = mask;
u32 hmask = mask >> 32;
int err;
/* Ensure that XFD is up to date */
xfd_update_state(fpstate);
if (cpu_feature_enabled(X86_FEATURE_XSAVES))
XSTATE_OP(XRSTORS, xstate, lmask, hmask, err);
else
XSTATE_OP(XRSTOR, xstate, lmask, hmask, err);
return err;
}
#endif

View File

@ -30,7 +30,9 @@
#include <asm/apic.h>
#include <linux/uaccess.h>
#include <asm/mwait.h>
#include <asm/fpu/internal.h>
#include <asm/fpu/api.h>
#include <asm/fpu/sched.h>
#include <asm/fpu/xstate.h>
#include <asm/debugreg.h>
#include <asm/nmi.h>
#include <asm/tlbflush.h>
@ -88,9 +90,20 @@ int arch_dup_task_struct(struct task_struct *dst, struct task_struct *src)
#ifdef CONFIG_VM86
dst->thread.vm86 = NULL;
#endif
return fpu_clone(dst);
/* Drop the copied pointer to current's fpstate */
dst->thread.fpu.fpstate = NULL;
return 0;
}
#ifdef CONFIG_X86_64
void arch_release_task_struct(struct task_struct *tsk)
{
if (fpu_state_size_dynamic())
fpstate_free(&tsk->thread.fpu);
}
#endif
/*
* Free thread data structures etc..
*/
@ -155,6 +168,8 @@ int copy_thread(unsigned long clone_flags, unsigned long sp, unsigned long arg,
frame->flags = X86_EFLAGS_FIXED;
#endif
fpu_clone(p, clone_flags);
/* Kernel thread ? */
if (unlikely(p->flags & PF_KTHREAD)) {
p->thread.pkru = pkru_get_init_value();
@ -962,13 +977,17 @@ unsigned long __get_wchan(struct task_struct *p)
}
long do_arch_prctl_common(struct task_struct *task, int option,
unsigned long cpuid_enabled)
unsigned long arg2)
{
switch (option) {
case ARCH_GET_CPUID:
return get_cpuid_mode();
case ARCH_SET_CPUID:
return set_cpuid_mode(task, cpuid_enabled);
return set_cpuid_mode(task, arg2);
case ARCH_GET_XCOMP_SUPP:
case ARCH_GET_XCOMP_PERM:
case ARCH_REQ_XCOMP_PERM:
return fpu_xstate_prctl(task, option, arg2);
}
return -EINVAL;

View File

@ -41,7 +41,7 @@
#include <asm/ldt.h>
#include <asm/processor.h>
#include <asm/fpu/internal.h>
#include <asm/fpu/sched.h>
#include <asm/desc.h>
#include <linux/err.h>
@ -160,7 +160,6 @@ __switch_to(struct task_struct *prev_p, struct task_struct *next_p)
struct thread_struct *prev = &prev_p->thread,
*next = &next_p->thread;
struct fpu *prev_fpu = &prev->fpu;
struct fpu *next_fpu = &next->fpu;
int cpu = smp_processor_id();
/* never put a printk in __switch_to... printk() calls wake_up*() indirectly */
@ -213,7 +212,7 @@ __switch_to(struct task_struct *prev_p, struct task_struct *next_p)
this_cpu_write(current_task, next_p);
switch_fpu_finish(next_fpu);
switch_fpu_finish();
/* Load the Intel cache allocation PQR MSR. */
resctrl_sched_in();

View File

@ -42,7 +42,7 @@
#include <asm/processor.h>
#include <asm/pkru.h>
#include <asm/fpu/internal.h>
#include <asm/fpu/sched.h>
#include <asm/mmu_context.h>
#include <asm/prctl.h>
#include <asm/desc.h>
@ -559,7 +559,6 @@ __switch_to(struct task_struct *prev_p, struct task_struct *next_p)
struct thread_struct *prev = &prev_p->thread;
struct thread_struct *next = &next_p->thread;
struct fpu *prev_fpu = &prev->fpu;
struct fpu *next_fpu = &next->fpu;
int cpu = smp_processor_id();
WARN_ON_ONCE(IS_ENABLED(CONFIG_DEBUG_ENTRY) &&
@ -620,7 +619,7 @@ __switch_to(struct task_struct *prev_p, struct task_struct *next_p)
this_cpu_write(current_task, next_p);
this_cpu_write(cpu_current_top_of_stack, task_top_of_stack(next_p));
switch_fpu_finish(next_fpu);
switch_fpu_finish();
/* Reload sp0. */
update_task_stack(next_p);

View File

@ -29,9 +29,9 @@
#include <linux/uaccess.h>
#include <asm/processor.h>
#include <asm/fpu/internal.h>
#include <asm/fpu/signal.h>
#include <asm/fpu/regset.h>
#include <asm/fpu/xstate.h>
#include <asm/debugreg.h>
#include <asm/ldt.h>
#include <asm/desc.h>

View File

@ -23,7 +23,7 @@
#include <asm/stacktrace.h>
#include <asm/sev.h>
#include <asm/insn-eval.h>
#include <asm/fpu/internal.h>
#include <asm/fpu/xcr.h>
#include <asm/processor.h>
#include <asm/realmode.h>
#include <asm/traps.h>

View File

@ -15,6 +15,7 @@
#include <linux/mm.h>
#include <linux/smp.h>
#include <linux/kernel.h>
#include <linux/kstrtox.h>
#include <linux/errno.h>
#include <linux/wait.h>
#include <linux/tracehook.h>
@ -30,8 +31,8 @@
#include <asm/processor.h>
#include <asm/ucontext.h>
#include <asm/fpu/internal.h>
#include <asm/fpu/signal.h>
#include <asm/fpu/xstate.h>
#include <asm/vdso.h>
#include <asm/mce.h>
#include <asm/sighandling.h>
@ -41,6 +42,7 @@
#include <linux/compat.h>
#include <asm/proto.h>
#include <asm/ia32_unistd.h>
#include <asm/fpu/xstate.h>
#endif /* CONFIG_X86_64 */
#include <asm/syscall.h>
@ -79,9 +81,9 @@ static void force_valid_ss(struct pt_regs *regs)
# define CONTEXT_COPY_SIZE sizeof(struct sigcontext)
#endif
static int restore_sigcontext(struct pt_regs *regs,
struct sigcontext __user *usc,
unsigned long uc_flags)
static bool restore_sigcontext(struct pt_regs *regs,
struct sigcontext __user *usc,
unsigned long uc_flags)
{
struct sigcontext sc;
@ -89,7 +91,7 @@ static int restore_sigcontext(struct pt_regs *regs,
current->restart_block.fn = do_no_restart_syscall;
if (copy_from_user(&sc, usc, CONTEXT_COPY_SIZE))
return -EFAULT;
return false;
#ifdef CONFIG_X86_32
set_user_gs(regs, sc.gs);
@ -244,7 +246,6 @@ get_sigframe(struct k_sigaction *ka, struct pt_regs *regs, size_t frame_size,
unsigned long math_size = 0;
unsigned long sp = regs->sp;
unsigned long buf_fx = 0;
int ret;
/* redzone */
if (IS_ENABLED(CONFIG_X86_64))
@ -292,8 +293,7 @@ get_sigframe(struct k_sigaction *ka, struct pt_regs *regs, size_t frame_size,
}
/* save i387 and extended state */
ret = copy_fpstate_to_sigframe(*fpstate, (void __user *)buf_fx, math_size);
if (ret < 0)
if (!copy_fpstate_to_sigframe(*fpstate, (void __user *)buf_fx, math_size))
return (void __user *)-1L;
return (void __user *)sp;
@ -643,7 +643,7 @@ SYSCALL_DEFINE0(sigreturn)
* x86_32 has no uc_flags bits relevant to restore_sigcontext.
* Save a few cycles by skipping the __get_user.
*/
if (restore_sigcontext(regs, &frame->sc, 0))
if (!restore_sigcontext(regs, &frame->sc, 0))
goto badframe;
return regs->ax;
@ -671,7 +671,7 @@ SYSCALL_DEFINE0(rt_sigreturn)
set_current_blocked(&set);
if (restore_sigcontext(regs, &frame->uc.uc_mcontext, uc_flags))
if (!restore_sigcontext(regs, &frame->uc.uc_mcontext, uc_flags))
goto badframe;
if (restore_altstack(&frame->uc.uc_stack))
@ -721,12 +721,15 @@ badframe:
/* max_frame_size tells userspace the worst case signal stack size. */
static unsigned long __ro_after_init max_frame_size;
static unsigned int __ro_after_init fpu_default_state_size;
void __init init_sigframe_size(void)
{
fpu_default_state_size = fpu__get_fpstate_size();
max_frame_size = MAX_FRAME_SIGINFO_UCTXT_SIZE + MAX_FRAME_PADDING;
max_frame_size += fpu__get_fpstate_size() + MAX_XSAVE_PADDING;
max_frame_size += fpu_default_state_size + MAX_XSAVE_PADDING;
/* Userspace expects an aligned size. */
max_frame_size = round_up(max_frame_size, FRAME_ALIGNMENT);
@ -910,6 +913,62 @@ void signal_fault(struct pt_regs *regs, void __user *frame, char *where)
force_sig(SIGSEGV);
}
#ifdef CONFIG_DYNAMIC_SIGFRAME
#ifdef CONFIG_STRICT_SIGALTSTACK_SIZE
static bool strict_sigaltstack_size __ro_after_init = true;
#else
static bool strict_sigaltstack_size __ro_after_init = false;
#endif
static int __init strict_sas_size(char *arg)
{
return kstrtobool(arg, &strict_sigaltstack_size);
}
__setup("strict_sas_size", strict_sas_size);
/*
* MINSIGSTKSZ is 2048 and can't be changed despite the fact that AVX512
* exceeds that size already. As such programs might never use the
* sigaltstack they just continued to work. While always checking against
* the real size would be correct, this might be considered a regression.
*
* Therefore avoid the sanity check, unless enforced by kernel
* configuration or command line option.
*
* When dynamic FPU features are supported, the check is also enforced when
* the task has permissions to use dynamic features. Tasks which have no
* permission are checked against the size of the non-dynamic feature set
* if strict checking is enabled. This avoids forcing all tasks on the
* system to allocate large sigaltstacks even if they are never going
* to use a dynamic feature. As this is serialized via sighand::siglock
* any permission request for a dynamic feature either happened already
* or will see the newly install sigaltstack size in the permission checks.
*/
bool sigaltstack_size_valid(size_t ss_size)
{
unsigned long fsize = max_frame_size - fpu_default_state_size;
u64 mask;
lockdep_assert_held(&current->sighand->siglock);
if (!fpu_state_size_dynamic() && !strict_sigaltstack_size)
return true;
fsize += current->group_leader->thread.fpu.perm.__user_state_size;
if (likely(ss_size > fsize))
return true;
if (strict_sigaltstack_size)
return ss_size > fsize;
mask = current->group_leader->thread.fpu.perm.__state_perm;
if (mask & XFEATURE_MASK_USER_DYNAMIC)
return ss_size > fsize;
return true;
}
#endif /* CONFIG_DYNAMIC_SIGFRAME */
#ifdef CONFIG_X86_X32_ABI
COMPAT_SYSCALL_DEFINE0(x32_rt_sigreturn)
{
@ -929,7 +988,7 @@ COMPAT_SYSCALL_DEFINE0(x32_rt_sigreturn)
set_current_blocked(&set);
if (restore_sigcontext(regs, &frame->uc.uc_mcontext, uc_flags))
if (!restore_sigcontext(regs, &frame->uc.uc_mcontext, uc_flags))
goto badframe;
if (compat_restore_altstack(&frame->uc.uc_stack))

View File

@ -70,7 +70,7 @@
#include <asm/mwait.h>
#include <asm/apic.h>
#include <asm/io_apic.h>
#include <asm/fpu/internal.h>
#include <asm/fpu/api.h>
#include <asm/setup.h>
#include <asm/uv/uv.h>
#include <linux/mc146818rtc.h>

View File

@ -48,7 +48,7 @@
#include <asm/ftrace.h>
#include <asm/traps.h>
#include <asm/desc.h>
#include <asm/fpu/internal.h>
#include <asm/fpu/api.h>
#include <asm/cpu.h>
#include <asm/cpu_entry_area.h>
#include <asm/mce.h>
@ -1108,10 +1108,48 @@ DEFINE_IDTENTRY(exc_spurious_interrupt_bug)
*/
}
static bool handle_xfd_event(struct pt_regs *regs)
{
u64 xfd_err;
int err;
if (!IS_ENABLED(CONFIG_X86_64) || !cpu_feature_enabled(X86_FEATURE_XFD))
return false;
rdmsrl(MSR_IA32_XFD_ERR, xfd_err);
if (!xfd_err)
return false;
wrmsrl(MSR_IA32_XFD_ERR, 0);
/* Die if that happens in kernel space */
if (WARN_ON(!user_mode(regs)))
return false;
local_irq_enable();
err = xfd_enable_feature(xfd_err);
switch (err) {
case -EPERM:
force_sig_fault(SIGILL, ILL_ILLOPC, error_get_trap_addr(regs));
break;
case -EFAULT:
force_sig(SIGSEGV);
break;
}
local_irq_disable();
return true;
}
DEFINE_IDTENTRY(exc_device_not_available)
{
unsigned long cr0 = read_cr0();
if (handle_xfd_event(regs))
return;
#ifdef CONFIG_MATH_EMULATION
if (!boot_cpu_has(X86_FEATURE_FPU) && (cr0 & X86_CR0_EM)) {
struct math_emu_info info = { };

View File

@ -17,10 +17,10 @@
#include <linux/misc_cgroup.h>
#include <linux/processor.h>
#include <linux/trace_events.h>
#include <asm/fpu/internal.h>
#include <asm/pkru.h>
#include <asm/trapnr.h>
#include <asm/fpu/xcr.h>
#include "x86.h"
#include "svm.h"

View File

@ -36,6 +36,7 @@
#include <asm/spec-ctrl.h>
#include <asm/cpu_device_id.h>
#include <asm/traps.h>
#include <asm/fpu/api.h>
#include <asm/virtext.h>
#include "trace.h"
@ -1346,10 +1347,10 @@ static int svm_create_vcpu(struct kvm_vcpu *vcpu)
/*
* SEV-ES guests maintain an encrypted version of their FPU
* state which is restored and saved on VMRUN and VMEXIT.
* Free the fpu structure to prevent KVM from attempting to
* access the FPU state.
* Mark vcpu->arch.guest_fpu->fpstate as scratch so it won't
* do xsave/xrstor on it.
*/
kvm_free_guest_fpu(vcpu);
fpstate_set_confidential(&vcpu->arch.guest_fpu);
}
err = avic_init_vcpu(svm);

View File

@ -35,7 +35,7 @@
#include <asm/cpu_device_id.h>
#include <asm/debugreg.h>
#include <asm/desc.h>
#include <asm/fpu/internal.h>
#include <asm/fpu/api.h>
#include <asm/idtentry.h>
#include <asm/io.h>
#include <asm/irq_remapping.h>

View File

@ -68,7 +68,9 @@
#include <asm/mce.h>
#include <asm/pkru.h>
#include <linux/kernel_stat.h>
#include <asm/fpu/internal.h> /* Ugh! */
#include <asm/fpu/api.h>
#include <asm/fpu/xcr.h>
#include <asm/fpu/xstate.h>
#include <asm/pvclock.h>
#include <asm/div64.h>
#include <asm/irq_remapping.h>
@ -293,8 +295,6 @@ u64 __read_mostly host_xcr0;
u64 __read_mostly supported_xcr0;
EXPORT_SYMBOL_GPL(supported_xcr0);
static struct kmem_cache *x86_fpu_cache;
static struct kmem_cache *x86_emulator_cache;
/*
@ -4700,144 +4700,27 @@ static int kvm_vcpu_ioctl_x86_set_debugregs(struct kvm_vcpu *vcpu,
return 0;
}
#define XSTATE_COMPACTION_ENABLED (1ULL << 63)
static void fill_xsave(u8 *dest, struct kvm_vcpu *vcpu)
{
struct xregs_state *xsave = &vcpu->arch.guest_fpu->state.xsave;
u64 xstate_bv = xsave->header.xfeatures;
u64 valid;
/*
* Copy legacy XSAVE area, to avoid complications with CPUID
* leaves 0 and 1 in the loop below.
*/
memcpy(dest, xsave, XSAVE_HDR_OFFSET);
/* Set XSTATE_BV */
xstate_bv &= vcpu->arch.guest_supported_xcr0 | XFEATURE_MASK_FPSSE;
*(u64 *)(dest + XSAVE_HDR_OFFSET) = xstate_bv;
/*
* Copy each region from the possibly compacted offset to the
* non-compacted offset.
*/
valid = xstate_bv & ~XFEATURE_MASK_FPSSE;
while (valid) {
u32 size, offset, ecx, edx;
u64 xfeature_mask = valid & -valid;
int xfeature_nr = fls64(xfeature_mask) - 1;
void *src;
cpuid_count(XSTATE_CPUID, xfeature_nr,
&size, &offset, &ecx, &edx);
if (xfeature_nr == XFEATURE_PKRU) {
memcpy(dest + offset, &vcpu->arch.pkru,
sizeof(vcpu->arch.pkru));
} else {
src = get_xsave_addr(xsave, xfeature_nr);
if (src)
memcpy(dest + offset, src, size);
}
valid -= xfeature_mask;
}
}
static void load_xsave(struct kvm_vcpu *vcpu, u8 *src)
{
struct xregs_state *xsave = &vcpu->arch.guest_fpu->state.xsave;
u64 xstate_bv = *(u64 *)(src + XSAVE_HDR_OFFSET);
u64 valid;
/*
* Copy legacy XSAVE area, to avoid complications with CPUID
* leaves 0 and 1 in the loop below.
*/
memcpy(xsave, src, XSAVE_HDR_OFFSET);
/* Set XSTATE_BV and possibly XCOMP_BV. */
xsave->header.xfeatures = xstate_bv;
if (boot_cpu_has(X86_FEATURE_XSAVES))
xsave->header.xcomp_bv = host_xcr0 | XSTATE_COMPACTION_ENABLED;
/*
* Copy each region from the non-compacted offset to the
* possibly compacted offset.
*/
valid = xstate_bv & ~XFEATURE_MASK_FPSSE;
while (valid) {
u32 size, offset, ecx, edx;
u64 xfeature_mask = valid & -valid;
int xfeature_nr = fls64(xfeature_mask) - 1;
cpuid_count(XSTATE_CPUID, xfeature_nr,
&size, &offset, &ecx, &edx);
if (xfeature_nr == XFEATURE_PKRU) {
memcpy(&vcpu->arch.pkru, src + offset,
sizeof(vcpu->arch.pkru));
} else {
void *dest = get_xsave_addr(xsave, xfeature_nr);
if (dest)
memcpy(dest, src + offset, size);
}
valid -= xfeature_mask;
}
}
static void kvm_vcpu_ioctl_x86_get_xsave(struct kvm_vcpu *vcpu,
struct kvm_xsave *guest_xsave)
{
if (!vcpu->arch.guest_fpu)
if (fpstate_is_confidential(&vcpu->arch.guest_fpu))
return;
if (boot_cpu_has(X86_FEATURE_XSAVE)) {
memset(guest_xsave, 0, sizeof(struct kvm_xsave));
fill_xsave((u8 *) guest_xsave->region, vcpu);
} else {
memcpy(guest_xsave->region,
&vcpu->arch.guest_fpu->state.fxsave,
sizeof(struct fxregs_state));
*(u64 *)&guest_xsave->region[XSAVE_HDR_OFFSET / sizeof(u32)] =
XFEATURE_MASK_FPSSE;
}
fpu_copy_guest_fpstate_to_uabi(&vcpu->arch.guest_fpu,
guest_xsave->region,
sizeof(guest_xsave->region),
vcpu->arch.pkru);
}
#define XSAVE_MXCSR_OFFSET 24
static int kvm_vcpu_ioctl_x86_set_xsave(struct kvm_vcpu *vcpu,
struct kvm_xsave *guest_xsave)
{
u64 xstate_bv;
u32 mxcsr;
if (!vcpu->arch.guest_fpu)
if (fpstate_is_confidential(&vcpu->arch.guest_fpu))
return 0;
xstate_bv = *(u64 *)&guest_xsave->region[XSAVE_HDR_OFFSET / sizeof(u32)];
mxcsr = *(u32 *)&guest_xsave->region[XSAVE_MXCSR_OFFSET / sizeof(u32)];
if (boot_cpu_has(X86_FEATURE_XSAVE)) {
/*
* Here we allow setting states that are not present in
* CPUID leaf 0xD, index 0, EDX:EAX. This is for compatibility
* with old userspace.
*/
if (xstate_bv & ~supported_xcr0 || mxcsr & ~mxcsr_feature_mask)
return -EINVAL;
load_xsave(vcpu, (u8 *)guest_xsave->region);
} else {
if (xstate_bv & ~XFEATURE_MASK_FPSSE ||
mxcsr & ~mxcsr_feature_mask)
return -EINVAL;
memcpy(&vcpu->arch.guest_fpu->state.fxsave,
guest_xsave->region, sizeof(struct fxregs_state));
}
return 0;
return fpu_copy_uabi_to_guest_fpstate(&vcpu->arch.guest_fpu,
guest_xsave->region,
supported_xcr0, &vcpu->arch.pkru);
}
static void kvm_vcpu_ioctl_x86_get_xcrs(struct kvm_vcpu *vcpu,
@ -8434,18 +8317,11 @@ int kvm_arch_init(void *opaque)
}
r = -ENOMEM;
x86_fpu_cache = kmem_cache_create("x86_fpu", sizeof(struct fpu),
__alignof__(struct fpu), SLAB_ACCOUNT,
NULL);
if (!x86_fpu_cache) {
printk(KERN_ERR "kvm: failed to allocate cache for x86 fpu\n");
goto out;
}
x86_emulator_cache = kvm_alloc_emulator_cache();
if (!x86_emulator_cache) {
pr_err("kvm: failed to allocate cache for x86 emulator\n");
goto out_free_x86_fpu_cache;
goto out;
}
user_return_msrs = alloc_percpu(struct kvm_user_return_msrs);
@ -8483,8 +8359,6 @@ out_free_percpu:
free_percpu(user_return_msrs);
out_free_x86_emulator_cache:
kmem_cache_destroy(x86_emulator_cache);
out_free_x86_fpu_cache:
kmem_cache_destroy(x86_fpu_cache);
out:
return r;
}
@ -8511,7 +8385,6 @@ void kvm_arch_exit(void)
kvm_mmu_module_exit();
free_percpu(user_return_msrs);
kmem_cache_destroy(x86_emulator_cache);
kmem_cache_destroy(x86_fpu_cache);
#ifdef CONFIG_KVM_XEN
static_key_deferred_flush(&kvm_xen_enabled);
WARN_ON(static_branch_unlikely(&kvm_xen_enabled.key));
@ -9938,58 +9811,21 @@ static int complete_emulated_mmio(struct kvm_vcpu *vcpu)
return 0;
}
static void kvm_save_current_fpu(struct fpu *fpu)
{
/*
* If the target FPU state is not resident in the CPU registers, just
* memcpy() from current, else save CPU state directly to the target.
*/
if (test_thread_flag(TIF_NEED_FPU_LOAD))
memcpy(&fpu->state, &current->thread.fpu.state,
fpu_kernel_xstate_size);
else
save_fpregs_to_fpstate(fpu);
}
/* Swap (qemu) user FPU context for the guest FPU context. */
static void kvm_load_guest_fpu(struct kvm_vcpu *vcpu)
{
fpregs_lock();
kvm_save_current_fpu(vcpu->arch.user_fpu);
/*
* Guests with protected state can't have it set by the hypervisor,
* so skip trying to set it.
* Exclude PKRU from restore as restored separately in
* kvm_x86_ops.run().
*/
if (vcpu->arch.guest_fpu)
/* PKRU is separately restored in kvm_x86_ops.run. */
__restore_fpregs_from_fpstate(&vcpu->arch.guest_fpu->state,
~XFEATURE_MASK_PKRU);
fpregs_mark_activate();
fpregs_unlock();
fpu_swap_kvm_fpstate(&vcpu->arch.guest_fpu, true);
trace_kvm_fpu(1);
}
/* When vcpu_run ends, restore user space FPU context. */
static void kvm_put_guest_fpu(struct kvm_vcpu *vcpu)
{
fpregs_lock();
/*
* Guests with protected state can't have it read by the hypervisor,
* so skip trying to save it.
*/
if (vcpu->arch.guest_fpu)
kvm_save_current_fpu(vcpu->arch.guest_fpu);
restore_fpregs_from_fpstate(&vcpu->arch.user_fpu->state);
fpregs_mark_activate();
fpregs_unlock();
fpu_swap_kvm_fpstate(&vcpu->arch.guest_fpu, false);
++vcpu->stat.fpu_reload;
trace_kvm_fpu(0);
}
@ -10570,12 +10406,12 @@ int kvm_arch_vcpu_ioctl_get_fpu(struct kvm_vcpu *vcpu, struct kvm_fpu *fpu)
{
struct fxregs_state *fxsave;
if (!vcpu->arch.guest_fpu)
if (fpstate_is_confidential(&vcpu->arch.guest_fpu))
return 0;
vcpu_load(vcpu);
fxsave = &vcpu->arch.guest_fpu->state.fxsave;
fxsave = &vcpu->arch.guest_fpu.fpstate->regs.fxsave;
memcpy(fpu->fpr, fxsave->st_space, 128);
fpu->fcw = fxsave->cwd;
fpu->fsw = fxsave->swd;
@ -10593,12 +10429,12 @@ int kvm_arch_vcpu_ioctl_set_fpu(struct kvm_vcpu *vcpu, struct kvm_fpu *fpu)
{
struct fxregs_state *fxsave;
if (!vcpu->arch.guest_fpu)
if (fpstate_is_confidential(&vcpu->arch.guest_fpu))
return 0;
vcpu_load(vcpu);
fxsave = &vcpu->arch.guest_fpu->state.fxsave;
fxsave = &vcpu->arch.guest_fpu.fpstate->regs.fxsave;
memcpy(fxsave->st_space, fpu->fpr, 128);
fxsave->cwd = fpu->fcw;
@ -10651,14 +10487,6 @@ static int sync_regs(struct kvm_vcpu *vcpu)
static void fx_init(struct kvm_vcpu *vcpu)
{
if (!vcpu->arch.guest_fpu)
return;
fpstate_init(&vcpu->arch.guest_fpu->state);
if (boot_cpu_has(X86_FEATURE_XSAVES))
vcpu->arch.guest_fpu->state.xsave.header.xcomp_bv =
host_xcr0 | XSTATE_COMPACTION_ENABLED;
/*
* Ensure guest xcr0 is valid for loading
*/
@ -10667,15 +10495,6 @@ static void fx_init(struct kvm_vcpu *vcpu)
vcpu->arch.cr0 |= X86_CR0_ET;
}
void kvm_free_guest_fpu(struct kvm_vcpu *vcpu)
{
if (vcpu->arch.guest_fpu) {
kmem_cache_free(x86_fpu_cache, vcpu->arch.guest_fpu);
vcpu->arch.guest_fpu = NULL;
}
}
EXPORT_SYMBOL_GPL(kvm_free_guest_fpu);
int kvm_arch_vcpu_precreate(struct kvm *kvm, unsigned int id)
{
if (kvm_check_tsc_unstable() && atomic_read(&kvm->online_vcpus) != 0)
@ -10732,19 +10551,11 @@ int kvm_arch_vcpu_create(struct kvm_vcpu *vcpu)
if (!alloc_emulate_ctxt(vcpu))
goto free_wbinvd_dirty_mask;
vcpu->arch.user_fpu = kmem_cache_zalloc(x86_fpu_cache,
GFP_KERNEL_ACCOUNT);
if (!vcpu->arch.user_fpu) {
pr_err("kvm: failed to allocate userspace's fpu\n");
if (!fpu_alloc_guest_fpstate(&vcpu->arch.guest_fpu)) {
pr_err("kvm: failed to allocate vcpu's fpu\n");
goto free_emulate_ctxt;
}
vcpu->arch.guest_fpu = kmem_cache_zalloc(x86_fpu_cache,
GFP_KERNEL_ACCOUNT);
if (!vcpu->arch.guest_fpu) {
pr_err("kvm: failed to allocate vcpu's fpu\n");
goto free_user_fpu;
}
fx_init(vcpu);
vcpu->arch.maxphyaddr = cpuid_query_maxphyaddr(vcpu);
@ -10777,9 +10588,7 @@ int kvm_arch_vcpu_create(struct kvm_vcpu *vcpu)
return 0;
free_guest_fpu:
kvm_free_guest_fpu(vcpu);
free_user_fpu:
kmem_cache_free(x86_fpu_cache, vcpu->arch.user_fpu);
fpu_free_guest_fpstate(&vcpu->arch.guest_fpu);
free_emulate_ctxt:
kmem_cache_free(x86_emulator_cache, vcpu->arch.emulate_ctxt);
free_wbinvd_dirty_mask:
@ -10828,8 +10637,7 @@ void kvm_arch_vcpu_destroy(struct kvm_vcpu *vcpu)
kmem_cache_free(x86_emulator_cache, vcpu->arch.emulate_ctxt);
free_cpumask_var(vcpu->arch.wbinvd_dirty_mask);
kmem_cache_free(x86_fpu_cache, vcpu->arch.user_fpu);
kvm_free_guest_fpu(vcpu);
fpu_free_guest_fpstate(&vcpu->arch.guest_fpu);
kvm_hv_vcpu_uninit(vcpu);
kvm_pmu_destroy(vcpu);
@ -10881,8 +10689,8 @@ void kvm_vcpu_reset(struct kvm_vcpu *vcpu, bool init_event)
kvm_async_pf_hash_reset(vcpu);
vcpu->arch.apf.halted = false;
if (vcpu->arch.guest_fpu && kvm_mpx_supported()) {
void *mpx_state_buffer;
if (vcpu->arch.guest_fpu.fpstate && kvm_mpx_supported()) {
struct fpstate *fpstate = vcpu->arch.guest_fpu.fpstate;
/*
* To avoid have the INIT path from kvm_apic_has_events() that be
@ -10890,14 +10698,10 @@ void kvm_vcpu_reset(struct kvm_vcpu *vcpu, bool init_event)
*/
if (init_event)
kvm_put_guest_fpu(vcpu);
mpx_state_buffer = get_xsave_addr(&vcpu->arch.guest_fpu->state.xsave,
XFEATURE_BNDREGS);
if (mpx_state_buffer)
memset(mpx_state_buffer, 0, sizeof(struct mpx_bndreg_state));
mpx_state_buffer = get_xsave_addr(&vcpu->arch.guest_fpu->state.xsave,
XFEATURE_BNDCSR);
if (mpx_state_buffer)
memset(mpx_state_buffer, 0, sizeof(struct mpx_bndcsr));
fpstate_clear_xstate_component(fpstate, XFEATURE_BNDREGS);
fpstate_clear_xstate_component(fpstate, XFEATURE_BNDCSR);
if (init_event)
kvm_load_guest_fpu(vcpu);
}

View File

@ -107,9 +107,9 @@ SYM_FUNC_END(copy_mc_fragile)
.previous
_ASM_EXTABLE_FAULT(.L_read_leading_bytes, .E_leading_bytes)
_ASM_EXTABLE_FAULT(.L_read_words, .E_read_words)
_ASM_EXTABLE_FAULT(.L_read_trailing_bytes, .E_trailing_bytes)
_ASM_EXTABLE_TYPE(.L_read_leading_bytes, .E_leading_bytes, EX_TYPE_DEFAULT_MCE_SAFE)
_ASM_EXTABLE_TYPE(.L_read_words, .E_read_words, EX_TYPE_DEFAULT_MCE_SAFE)
_ASM_EXTABLE_TYPE(.L_read_trailing_bytes, .E_trailing_bytes, EX_TYPE_DEFAULT_MCE_SAFE)
_ASM_EXTABLE(.L_write_leading_bytes, .E_leading_bytes)
_ASM_EXTABLE(.L_write_words, .E_write_words)
_ASM_EXTABLE(.L_write_trailing_bytes, .E_trailing_bytes)
@ -149,5 +149,5 @@ SYM_FUNC_END(copy_mc_enhanced_fast_string)
.previous
_ASM_EXTABLE_FAULT(.L_copy, .E_copy)
_ASM_EXTABLE_TYPE(.L_copy, .E_copy, EX_TYPE_DEFAULT_MCE_SAFE)
#endif /* !CONFIG_UML */

View File

@ -53,7 +53,7 @@ void fpstate_init_soft(struct swregs_state *soft)
void finit(void)
{
fpstate_init_soft(&current->thread.fpu.state.soft);
fpstate_init_soft(&current->thread.fpu.fpstate->regs.soft);
}
/*

View File

@ -31,7 +31,7 @@
#include <linux/uaccess.h>
#include <asm/traps.h>
#include <asm/user.h>
#include <asm/fpu/internal.h>
#include <asm/fpu/api.h>
#include "fpu_system.h"
#include "fpu_emu.h"
@ -640,7 +640,7 @@ int fpregs_soft_set(struct task_struct *target,
unsigned int pos, unsigned int count,
const void *kbuf, const void __user *ubuf)
{
struct swregs_state *s387 = &target->thread.fpu.state.soft;
struct swregs_state *s387 = &target->thread.fpu.fpstate->regs.soft;
void *space = s387->st_space;
int ret;
int offset, other, i, tags, regnr, tag, newtop;
@ -691,7 +691,7 @@ int fpregs_soft_get(struct task_struct *target,
const struct user_regset *regset,
struct membuf to)
{
struct swregs_state *s387 = &target->thread.fpu.state.soft;
struct swregs_state *s387 = &target->thread.fpu.fpstate->regs.soft;
const void *space = s387->st_space;
int offset = (S387->ftop & 7) * 10, other = 80 - offset;

View File

@ -73,7 +73,7 @@ static inline bool seg_writable(struct desc_struct *d)
return (d->type & SEG_TYPE_EXECUTE_MASK) == SEG_TYPE_WRITABLE;
}
#define I387 (&current->thread.fpu.state)
#define I387 (&current->thread.fpu.fpstate->regs)
#define FPU_info (I387->soft.info)
#define FPU_CS (*(unsigned short *) &(FPU_info->regs->cs))

View File

@ -4,46 +4,30 @@
#include <linux/sched/debug.h>
#include <xen/xen.h>
#include <asm/fpu/internal.h>
#include <asm/fpu/api.h>
#include <asm/sev.h>
#include <asm/traps.h>
#include <asm/kdebug.h>
typedef bool (*ex_handler_t)(const struct exception_table_entry *,
struct pt_regs *, int, unsigned long,
unsigned long);
static inline unsigned long
ex_fixup_addr(const struct exception_table_entry *x)
{
return (unsigned long)&x->fixup + x->fixup;
}
static inline ex_handler_t
ex_fixup_handler(const struct exception_table_entry *x)
{
return (ex_handler_t)((unsigned long)&x->handler + x->handler);
}
__visible bool ex_handler_default(const struct exception_table_entry *fixup,
struct pt_regs *regs, int trapnr,
unsigned long error_code,
unsigned long fault_addr)
static bool ex_handler_default(const struct exception_table_entry *fixup,
struct pt_regs *regs)
{
regs->ip = ex_fixup_addr(fixup);
return true;
}
EXPORT_SYMBOL(ex_handler_default);
__visible bool ex_handler_fault(const struct exception_table_entry *fixup,
struct pt_regs *regs, int trapnr,
unsigned long error_code,
unsigned long fault_addr)
static bool ex_handler_fault(const struct exception_table_entry *fixup,
struct pt_regs *regs, int trapnr)
{
regs->ip = ex_fixup_addr(fixup);
regs->ax = trapnr;
return true;
return ex_handler_default(fixup, regs);
}
EXPORT_SYMBOL_GPL(ex_handler_fault);
/*
* Handler for when we fail to restore a task's FPU state. We should never get
@ -55,65 +39,47 @@ EXPORT_SYMBOL_GPL(ex_handler_fault);
* of vulnerability by restoring from the initial state (essentially, zeroing
* out all the FPU registers) if we can't restore from the task's FPU state.
*/
__visible bool ex_handler_fprestore(const struct exception_table_entry *fixup,
struct pt_regs *regs, int trapnr,
unsigned long error_code,
unsigned long fault_addr)
static bool ex_handler_fprestore(const struct exception_table_entry *fixup,
struct pt_regs *regs)
{
regs->ip = ex_fixup_addr(fixup);
WARN_ONCE(1, "Bad FPU state detected at %pB, reinitializing FPU registers.",
(void *)instruction_pointer(regs));
__restore_fpregs_from_fpstate(&init_fpstate, xfeatures_mask_fpstate());
fpu_reset_from_exception_fixup();
return true;
}
EXPORT_SYMBOL_GPL(ex_handler_fprestore);
__visible bool ex_handler_uaccess(const struct exception_table_entry *fixup,
struct pt_regs *regs, int trapnr,
unsigned long error_code,
unsigned long fault_addr)
static bool ex_handler_uaccess(const struct exception_table_entry *fixup,
struct pt_regs *regs, int trapnr)
{
WARN_ONCE(trapnr == X86_TRAP_GP, "General protection fault in user access. Non-canonical address?");
regs->ip = ex_fixup_addr(fixup);
return true;
return ex_handler_default(fixup, regs);
}
EXPORT_SYMBOL(ex_handler_uaccess);
__visible bool ex_handler_copy(const struct exception_table_entry *fixup,
struct pt_regs *regs, int trapnr,
unsigned long error_code,
unsigned long fault_addr)
static bool ex_handler_copy(const struct exception_table_entry *fixup,
struct pt_regs *regs, int trapnr)
{
WARN_ONCE(trapnr == X86_TRAP_GP, "General protection fault in user access. Non-canonical address?");
regs->ip = ex_fixup_addr(fixup);
regs->ax = trapnr;
return true;
return ex_handler_fault(fixup, regs, trapnr);
}
EXPORT_SYMBOL(ex_handler_copy);
__visible bool ex_handler_rdmsr_unsafe(const struct exception_table_entry *fixup,
struct pt_regs *regs, int trapnr,
unsigned long error_code,
unsigned long fault_addr)
static bool ex_handler_rdmsr_unsafe(const struct exception_table_entry *fixup,
struct pt_regs *regs)
{
if (pr_warn_once("unchecked MSR access error: RDMSR from 0x%x at rIP: 0x%lx (%pS)\n",
(unsigned int)regs->cx, regs->ip, (void *)regs->ip))
show_stack_regs(regs);
/* Pretend that the read succeeded and returned 0. */
regs->ip = ex_fixup_addr(fixup);
regs->ax = 0;
regs->dx = 0;
return true;
return ex_handler_default(fixup, regs);
}
EXPORT_SYMBOL(ex_handler_rdmsr_unsafe);
__visible bool ex_handler_wrmsr_unsafe(const struct exception_table_entry *fixup,
struct pt_regs *regs, int trapnr,
unsigned long error_code,
unsigned long fault_addr)
static bool ex_handler_wrmsr_unsafe(const struct exception_table_entry *fixup,
struct pt_regs *regs)
{
if (pr_warn_once("unchecked MSR access error: WRMSR to 0x%x (tried to write 0x%08x%08x) at rIP: 0x%lx (%pS)\n",
(unsigned int)regs->cx, (unsigned int)regs->dx,
@ -121,45 +87,29 @@ __visible bool ex_handler_wrmsr_unsafe(const struct exception_table_entry *fixup
show_stack_regs(regs);
/* Pretend that the write succeeded. */
regs->ip = ex_fixup_addr(fixup);
return true;
return ex_handler_default(fixup, regs);
}
EXPORT_SYMBOL(ex_handler_wrmsr_unsafe);
__visible bool ex_handler_clear_fs(const struct exception_table_entry *fixup,
struct pt_regs *regs, int trapnr,
unsigned long error_code,
unsigned long fault_addr)
static bool ex_handler_clear_fs(const struct exception_table_entry *fixup,
struct pt_regs *regs)
{
if (static_cpu_has(X86_BUG_NULL_SEG))
asm volatile ("mov %0, %%fs" : : "rm" (__USER_DS));
asm volatile ("mov %0, %%fs" : : "rm" (0));
return ex_handler_default(fixup, regs, trapnr, error_code, fault_addr);
return ex_handler_default(fixup, regs);
}
EXPORT_SYMBOL(ex_handler_clear_fs);
enum handler_type ex_get_fault_handler_type(unsigned long ip)
int ex_get_fixup_type(unsigned long ip)
{
const struct exception_table_entry *e;
ex_handler_t handler;
const struct exception_table_entry *e = search_exception_tables(ip);
e = search_exception_tables(ip);
if (!e)
return EX_HANDLER_NONE;
handler = ex_fixup_handler(e);
if (handler == ex_handler_fault)
return EX_HANDLER_FAULT;
else if (handler == ex_handler_uaccess || handler == ex_handler_copy)
return EX_HANDLER_UACCESS;
else
return EX_HANDLER_OTHER;
return e ? e->type : EX_TYPE_NONE;
}
int fixup_exception(struct pt_regs *regs, int trapnr, unsigned long error_code,
unsigned long fault_addr)
{
const struct exception_table_entry *e;
ex_handler_t handler;
#ifdef CONFIG_PNPBIOS
if (unlikely(SEGMENT_IS_PNP_CODE(regs->cs))) {
@ -179,8 +129,35 @@ int fixup_exception(struct pt_regs *regs, int trapnr, unsigned long error_code,
if (!e)
return 0;
handler = ex_fixup_handler(e);
return handler(e, regs, trapnr, error_code, fault_addr);
switch (e->type) {
case EX_TYPE_DEFAULT:
case EX_TYPE_DEFAULT_MCE_SAFE:
return ex_handler_default(e, regs);
case EX_TYPE_FAULT:
case EX_TYPE_FAULT_MCE_SAFE:
return ex_handler_fault(e, regs, trapnr);
case EX_TYPE_UACCESS:
return ex_handler_uaccess(e, regs, trapnr);
case EX_TYPE_COPY:
return ex_handler_copy(e, regs, trapnr);
case EX_TYPE_CLEAR_FS:
return ex_handler_clear_fs(e, regs);
case EX_TYPE_FPU_RESTORE:
return ex_handler_fprestore(e, regs);
case EX_TYPE_RDMSR:
return ex_handler_rdmsr_unsafe(e, regs);
case EX_TYPE_WRMSR:
return ex_handler_wrmsr_unsafe(e, regs);
case EX_TYPE_BPF:
return ex_handler_bpf(e, regs);
case EX_TYPE_RDMSR_IN_MCE:
ex_handler_msr_mce(regs, false);
break;
case EX_TYPE_WRMSR_IN_MCE:
ex_handler_msr_mce(regs, true);
break;
}
BUG();
}
extern unsigned int early_recursion_flag;

View File

@ -803,9 +803,7 @@ static int emit_atomic(u8 **pprog, u8 atomic_op,
return 0;
}
static bool ex_handler_bpf(const struct exception_table_entry *x,
struct pt_regs *regs, int trapnr,
unsigned long error_code, unsigned long fault_addr)
bool ex_handler_bpf(const struct exception_table_entry *x, struct pt_regs *regs)
{
u32 reg = x->fixup >> 8;
@ -1288,12 +1286,7 @@ st: if (is_imm8(insn->off))
}
ex->insn = delta;
delta = (u8 *)ex_handler_bpf - (u8 *)&ex->handler;
if (!is_simm32(delta)) {
pr_err("extable->handler doesn't fit into 32-bit\n");
return -EFAULT;
}
ex->handler = delta;
ex->type = EX_TYPE_BPF;
if (dst_reg > BPF_REG_9) {
pr_err("verifier error\n");

View File

@ -20,7 +20,7 @@
#include <asm/page.h>
#include <asm/mce.h>
#include <asm/suspend.h>
#include <asm/fpu/internal.h>
#include <asm/fpu/api.h>
#include <asm/debugreg.h>
#include <asm/cpu.h>
#include <asm/mmu_context.h>

View File

@ -464,6 +464,12 @@ int __save_altstack(stack_t __user *, unsigned long);
unsafe_put_user(t->sas_ss_size, &__uss->ss_size, label); \
} while (0);
#ifdef CONFIG_DYNAMIC_SIGFRAME
bool sigaltstack_size_valid(size_t ss_size);
#else
static inline bool sigaltstack_size_valid(size_t size) { return true; }
#endif /* !CONFIG_DYNAMIC_SIGFRAME */
#ifdef CONFIG_PROC_FS
struct seq_file;
extern void render_sigset_t(struct seq_file *, const char *, sigset_t *);

View File

@ -4138,11 +4138,29 @@ int do_sigaction(int sig, struct k_sigaction *act, struct k_sigaction *oact)
return 0;
}
#ifdef CONFIG_DYNAMIC_SIGFRAME
static inline void sigaltstack_lock(void)
__acquires(&current->sighand->siglock)
{
spin_lock_irq(&current->sighand->siglock);
}
static inline void sigaltstack_unlock(void)
__releases(&current->sighand->siglock)
{
spin_unlock_irq(&current->sighand->siglock);
}
#else
static inline void sigaltstack_lock(void) { }
static inline void sigaltstack_unlock(void) { }
#endif
static int
do_sigaltstack (const stack_t *ss, stack_t *oss, unsigned long sp,
size_t min_ss_size)
{
struct task_struct *t = current;
int ret = 0;
if (oss) {
memset(oss, 0, sizeof(stack_t));
@ -4166,19 +4184,24 @@ do_sigaltstack (const stack_t *ss, stack_t *oss, unsigned long sp,
ss_mode != 0))
return -EINVAL;
sigaltstack_lock();
if (ss_mode == SS_DISABLE) {
ss_size = 0;
ss_sp = NULL;
} else {
if (unlikely(ss_size < min_ss_size))
return -ENOMEM;
ret = -ENOMEM;
if (!sigaltstack_size_valid(ss_size))
ret = -ENOMEM;
}
t->sas_ss_sp = (unsigned long) ss_sp;
t->sas_ss_size = ss_size;
t->sas_ss_flags = ss_flags;
if (!ret) {
t->sas_ss_sp = (unsigned long) ss_sp;
t->sas_ss_size = ss_size;
t->sas_ss_flags = ss_flags;
}
sigaltstack_unlock();
}
return 0;
return ret;
}
SYSCALL_DEFINE2(sigaltstack,const stack_t __user *,uss, stack_t __user *,uoss)

View File

@ -240,7 +240,7 @@ static void x86_sort_relative_table(char *extab_image, int image_size)
w(r(loc) + i, loc);
w(r(loc + 1) + i + 4, loc + 1);
w(r(loc + 2) + i + 8, loc + 2);
/* Don't touch the fixup type */
i += sizeof(uint32_t) * 3;
}
@ -253,7 +253,7 @@ static void x86_sort_relative_table(char *extab_image, int image_size)
w(r(loc) - i, loc);
w(r(loc + 1) - (i + 4), loc + 1);
w(r(loc + 2) - (i + 8), loc + 2);
/* Don't touch the fixup type */
i += sizeof(uint32_t) * 3;
}

View File

@ -18,7 +18,7 @@ TARGETS_C_32BIT_ONLY := entry_from_vm86 test_syscall_vdso unwind_vdso \
test_FCMOV test_FCOMI test_FISTTP \
vdso_restorer
TARGETS_C_64BIT_ONLY := fsgsbase sysret_rip syscall_numbering \
corrupt_xstate_header
corrupt_xstate_header amx
# Some selftests require 32bit support enabled also on 64bit systems
TARGETS_C_32BIT_NEEDED := ldt_gdt ptrace_syscall

View File

@ -0,0 +1,851 @@
// SPDX-License-Identifier: GPL-2.0
#define _GNU_SOURCE
#include <err.h>
#include <errno.h>
#include <pthread.h>
#include <setjmp.h>
#include <stdio.h>
#include <string.h>
#include <stdbool.h>
#include <unistd.h>
#include <x86intrin.h>
#include <sys/auxv.h>
#include <sys/mman.h>
#include <sys/shm.h>
#include <sys/syscall.h>
#include <sys/wait.h>
#ifndef __x86_64__
# error This test is 64-bit only
#endif
#define XSAVE_HDR_OFFSET 512
#define XSAVE_HDR_SIZE 64
struct xsave_buffer {
union {
struct {
char legacy[XSAVE_HDR_OFFSET];
char header[XSAVE_HDR_SIZE];
char extended[0];
};
char bytes[0];
};
};
static inline uint64_t xgetbv(uint32_t index)
{
uint32_t eax, edx;
asm volatile("xgetbv;"
: "=a" (eax), "=d" (edx)
: "c" (index));
return eax + ((uint64_t)edx << 32);
}
static inline void cpuid(uint32_t *eax, uint32_t *ebx, uint32_t *ecx, uint32_t *edx)
{
asm volatile("cpuid;"
: "=a" (*eax), "=b" (*ebx), "=c" (*ecx), "=d" (*edx)
: "0" (*eax), "2" (*ecx));
}
static inline void xsave(struct xsave_buffer *xbuf, uint64_t rfbm)
{
uint32_t rfbm_lo = rfbm;
uint32_t rfbm_hi = rfbm >> 32;
asm volatile("xsave (%%rdi)"
: : "D" (xbuf), "a" (rfbm_lo), "d" (rfbm_hi)
: "memory");
}
static inline void xrstor(struct xsave_buffer *xbuf, uint64_t rfbm)
{
uint32_t rfbm_lo = rfbm;
uint32_t rfbm_hi = rfbm >> 32;
asm volatile("xrstor (%%rdi)"
: : "D" (xbuf), "a" (rfbm_lo), "d" (rfbm_hi));
}
/* err() exits and will not return */
#define fatal_error(msg, ...) err(1, "[FAIL]\t" msg, ##__VA_ARGS__)
static void sethandler(int sig, void (*handler)(int, siginfo_t *, void *),
int flags)
{
struct sigaction sa;
memset(&sa, 0, sizeof(sa));
sa.sa_sigaction = handler;
sa.sa_flags = SA_SIGINFO | flags;
sigemptyset(&sa.sa_mask);
if (sigaction(sig, &sa, 0))
fatal_error("sigaction");
}
static void clearhandler(int sig)
{
struct sigaction sa;
memset(&sa, 0, sizeof(sa));
sa.sa_handler = SIG_DFL;
sigemptyset(&sa.sa_mask);
if (sigaction(sig, &sa, 0))
fatal_error("sigaction");
}
#define XFEATURE_XTILECFG 17
#define XFEATURE_XTILEDATA 18
#define XFEATURE_MASK_XTILECFG (1 << XFEATURE_XTILECFG)
#define XFEATURE_MASK_XTILEDATA (1 << XFEATURE_XTILEDATA)
#define XFEATURE_MASK_XTILE (XFEATURE_MASK_XTILECFG | XFEATURE_MASK_XTILEDATA)
#define CPUID_LEAF1_ECX_XSAVE_MASK (1 << 26)
#define CPUID_LEAF1_ECX_OSXSAVE_MASK (1 << 27)
static inline void check_cpuid_xsave(void)
{
uint32_t eax, ebx, ecx, edx;
/*
* CPUID.1:ECX.XSAVE[bit 26] enumerates general
* support for the XSAVE feature set, including
* XGETBV.
*/
eax = 1;
ecx = 0;
cpuid(&eax, &ebx, &ecx, &edx);
if (!(ecx & CPUID_LEAF1_ECX_XSAVE_MASK))
fatal_error("cpuid: no CPU xsave support");
if (!(ecx & CPUID_LEAF1_ECX_OSXSAVE_MASK))
fatal_error("cpuid: no OS xsave support");
}
static uint32_t xbuf_size;
static struct {
uint32_t xbuf_offset;
uint32_t size;
} xtiledata;
#define CPUID_LEAF_XSTATE 0xd
#define CPUID_SUBLEAF_XSTATE_USER 0x0
#define TILE_CPUID 0x1d
#define TILE_PALETTE_ID 0x1
static void check_cpuid_xtiledata(void)
{
uint32_t eax, ebx, ecx, edx;
eax = CPUID_LEAF_XSTATE;
ecx = CPUID_SUBLEAF_XSTATE_USER;
cpuid(&eax, &ebx, &ecx, &edx);
/*
* EBX enumerates the size (in bytes) required by the XSAVE
* instruction for an XSAVE area containing all the user state
* components corresponding to bits currently set in XCR0.
*
* Stash that off so it can be used to allocate buffers later.
*/
xbuf_size = ebx;
eax = CPUID_LEAF_XSTATE;
ecx = XFEATURE_XTILEDATA;
cpuid(&eax, &ebx, &ecx, &edx);
/*
* eax: XTILEDATA state component size
* ebx: XTILEDATA state component offset in user buffer
*/
if (!eax || !ebx)
fatal_error("xstate cpuid: invalid tile data size/offset: %d/%d",
eax, ebx);
xtiledata.size = eax;
xtiledata.xbuf_offset = ebx;
}
/* The helpers for managing XSAVE buffer and tile states: */
struct xsave_buffer *alloc_xbuf(void)
{
struct xsave_buffer *xbuf;
/* XSAVE buffer should be 64B-aligned. */
xbuf = aligned_alloc(64, xbuf_size);
if (!xbuf)
fatal_error("aligned_alloc()");
return xbuf;
}
static inline void clear_xstate_header(struct xsave_buffer *buffer)
{
memset(&buffer->header, 0, sizeof(buffer->header));
}
static inline uint64_t get_xstatebv(struct xsave_buffer *buffer)
{
/* XSTATE_BV is at the beginning of the header: */
return *(uint64_t *)&buffer->header;
}
static inline void set_xstatebv(struct xsave_buffer *buffer, uint64_t bv)
{
/* XSTATE_BV is at the beginning of the header: */
*(uint64_t *)(&buffer->header) = bv;
}
static void set_rand_tiledata(struct xsave_buffer *xbuf)
{
int *ptr = (int *)&xbuf->bytes[xtiledata.xbuf_offset];
int data;
int i;
/*
* Ensure that 'data' is never 0. This ensures that
* the registers are never in their initial configuration
* and thus never tracked as being in the init state.
*/
data = rand() | 1;
for (i = 0; i < xtiledata.size / sizeof(int); i++, ptr++)
*ptr = data;
}
struct xsave_buffer *stashed_xsave;
static void init_stashed_xsave(void)
{
stashed_xsave = alloc_xbuf();
if (!stashed_xsave)
fatal_error("failed to allocate stashed_xsave\n");
clear_xstate_header(stashed_xsave);
}
static void free_stashed_xsave(void)
{
free(stashed_xsave);
}
/* See 'struct _fpx_sw_bytes' at sigcontext.h */
#define SW_BYTES_OFFSET 464
/* N.B. The struct's field name varies so read from the offset. */
#define SW_BYTES_BV_OFFSET (SW_BYTES_OFFSET + 8)
static inline struct _fpx_sw_bytes *get_fpx_sw_bytes(void *buffer)
{
return (struct _fpx_sw_bytes *)(buffer + SW_BYTES_OFFSET);
}
static inline uint64_t get_fpx_sw_bytes_features(void *buffer)
{
return *(uint64_t *)(buffer + SW_BYTES_BV_OFFSET);
}
/* Work around printf() being unsafe in signals: */
#define SIGNAL_BUF_LEN 1000
char signal_message_buffer[SIGNAL_BUF_LEN];
void sig_print(char *msg)
{
int left = SIGNAL_BUF_LEN - strlen(signal_message_buffer) - 1;
strncat(signal_message_buffer, msg, left);
}
static volatile bool noperm_signaled;
static int noperm_errs;
/*
* Signal handler for when AMX is used but
* permission has not been obtained.
*/
static void handle_noperm(int sig, siginfo_t *si, void *ctx_void)
{
ucontext_t *ctx = (ucontext_t *)ctx_void;
void *xbuf = ctx->uc_mcontext.fpregs;
struct _fpx_sw_bytes *sw_bytes;
uint64_t features;
/* Reset the signal message buffer: */
signal_message_buffer[0] = '\0';
sig_print("\tAt SIGILL handler,\n");
if (si->si_code != ILL_ILLOPC) {
noperm_errs++;
sig_print("[FAIL]\tInvalid signal code.\n");
} else {
sig_print("[OK]\tValid signal code (ILL_ILLOPC).\n");
}
sw_bytes = get_fpx_sw_bytes(xbuf);
/*
* Without permission, the signal XSAVE buffer should not
* have room for AMX register state (aka. xtiledata).
* Check that the size does not overlap with where xtiledata
* will reside.
*
* This also implies that no state components *PAST*
* XTILEDATA (features >=19) can be present in the buffer.
*/
if (sw_bytes->xstate_size <= xtiledata.xbuf_offset) {
sig_print("[OK]\tValid xstate size\n");
} else {
noperm_errs++;
sig_print("[FAIL]\tInvalid xstate size\n");
}
features = get_fpx_sw_bytes_features(xbuf);
/*
* Without permission, the XTILEDATA feature
* bit should not be set.
*/
if ((features & XFEATURE_MASK_XTILEDATA) == 0) {
sig_print("[OK]\tValid xstate mask\n");
} else {
noperm_errs++;
sig_print("[FAIL]\tInvalid xstate mask\n");
}
noperm_signaled = true;
ctx->uc_mcontext.gregs[REG_RIP] += 3; /* Skip the faulting XRSTOR */
}
/* Return true if XRSTOR is successful; otherwise, false. */
static inline bool xrstor_safe(struct xsave_buffer *xbuf, uint64_t mask)
{
noperm_signaled = false;
xrstor(xbuf, mask);
/* Print any messages produced by the signal code: */
printf("%s", signal_message_buffer);
/*
* Reset the buffer to make sure any future printing
* only outputs new messages:
*/
signal_message_buffer[0] = '\0';
if (noperm_errs)
fatal_error("saw %d errors in noperm signal handler\n", noperm_errs);
return !noperm_signaled;
}
/*
* Use XRSTOR to populate the XTILEDATA registers with
* random data.
*
* Return true if successful; otherwise, false.
*/
static inline bool load_rand_tiledata(struct xsave_buffer *xbuf)
{
clear_xstate_header(xbuf);
set_xstatebv(xbuf, XFEATURE_MASK_XTILEDATA);
set_rand_tiledata(xbuf);
return xrstor_safe(xbuf, XFEATURE_MASK_XTILEDATA);
}
/* Return XTILEDATA to its initial configuration. */
static inline void init_xtiledata(void)
{
clear_xstate_header(stashed_xsave);
xrstor_safe(stashed_xsave, XFEATURE_MASK_XTILEDATA);
}
enum expected_result { FAIL_EXPECTED, SUCCESS_EXPECTED };
/* arch_prctl() and sigaltstack() test */
#define ARCH_GET_XCOMP_PERM 0x1022
#define ARCH_REQ_XCOMP_PERM 0x1023
static void req_xtiledata_perm(void)
{
syscall(SYS_arch_prctl, ARCH_REQ_XCOMP_PERM, XFEATURE_XTILEDATA);
}
static void validate_req_xcomp_perm(enum expected_result exp)
{
unsigned long bitmask;
long rc;
rc = syscall(SYS_arch_prctl, ARCH_REQ_XCOMP_PERM, XFEATURE_XTILEDATA);
if (exp == FAIL_EXPECTED) {
if (rc) {
printf("[OK]\tARCH_REQ_XCOMP_PERM saw expected failure..\n");
return;
}
fatal_error("ARCH_REQ_XCOMP_PERM saw unexpected success.\n");
} else if (rc) {
fatal_error("ARCH_REQ_XCOMP_PERM saw unexpected failure.\n");
}
rc = syscall(SYS_arch_prctl, ARCH_GET_XCOMP_PERM, &bitmask);
if (rc) {
fatal_error("prctl(ARCH_GET_XCOMP_PERM) error: %ld", rc);
} else if (bitmask & XFEATURE_MASK_XTILE) {
printf("\tARCH_REQ_XCOMP_PERM is successful.\n");
}
}
static void validate_xcomp_perm(enum expected_result exp)
{
bool load_success = load_rand_tiledata(stashed_xsave);
if (exp == FAIL_EXPECTED) {
if (load_success) {
noperm_errs++;
printf("[FAIL]\tLoad tiledata succeeded.\n");
} else {
printf("[OK]\tLoad tiledata failed.\n");
}
} else if (exp == SUCCESS_EXPECTED) {
if (load_success) {
printf("[OK]\tLoad tiledata succeeded.\n");
} else {
noperm_errs++;
printf("[FAIL]\tLoad tiledata failed.\n");
}
}
}
#ifndef AT_MINSIGSTKSZ
# define AT_MINSIGSTKSZ 51
#endif
static void *alloc_altstack(unsigned int size)
{
void *altstack;
altstack = mmap(NULL, size, PROT_READ | PROT_WRITE,
MAP_PRIVATE | MAP_ANONYMOUS | MAP_STACK, -1, 0);
if (altstack == MAP_FAILED)
fatal_error("mmap() for altstack");
return altstack;
}
static void setup_altstack(void *addr, unsigned long size, enum expected_result exp)
{
stack_t ss;
int rc;
memset(&ss, 0, sizeof(ss));
ss.ss_size = size;
ss.ss_sp = addr;
rc = sigaltstack(&ss, NULL);
if (exp == FAIL_EXPECTED) {
if (rc) {
printf("[OK]\tsigaltstack() failed.\n");
} else {
fatal_error("sigaltstack() succeeded unexpectedly.\n");
}
} else if (rc) {
fatal_error("sigaltstack()");
}
}
static void test_dynamic_sigaltstack(void)
{
unsigned int small_size, enough_size;
unsigned long minsigstksz;
void *altstack;
minsigstksz = getauxval(AT_MINSIGSTKSZ);
printf("\tAT_MINSIGSTKSZ = %lu\n", minsigstksz);
/*
* getauxval() itself can return 0 for failure or
* success. But, in this case, AT_MINSIGSTKSZ
* will always return a >=0 value if implemented.
* Just check for 0.
*/
if (minsigstksz == 0) {
printf("no support for AT_MINSIGSTKSZ, skipping sigaltstack tests\n");
return;
}
enough_size = minsigstksz * 2;
altstack = alloc_altstack(enough_size);
printf("\tAllocate memory for altstack (%u bytes).\n", enough_size);
/*
* Try setup_altstack() with a size which can not fit
* XTILEDATA. ARCH_REQ_XCOMP_PERM should fail.
*/
small_size = minsigstksz - xtiledata.size;
printf("\tAfter sigaltstack() with small size (%u bytes).\n", small_size);
setup_altstack(altstack, small_size, SUCCESS_EXPECTED);
validate_req_xcomp_perm(FAIL_EXPECTED);
/*
* Try setup_altstack() with a size derived from
* AT_MINSIGSTKSZ. It should be more than large enough
* and thus ARCH_REQ_XCOMP_PERM should succeed.
*/
printf("\tAfter sigaltstack() with enough size (%u bytes).\n", enough_size);
setup_altstack(altstack, enough_size, SUCCESS_EXPECTED);
validate_req_xcomp_perm(SUCCESS_EXPECTED);
/*
* Try to coerce setup_altstack() to again accept a
* too-small altstack. This ensures that big-enough
* sigaltstacks can not shrink to a too-small value
* once XTILEDATA permission is established.
*/
printf("\tThen, sigaltstack() with small size (%u bytes).\n", small_size);
setup_altstack(altstack, small_size, FAIL_EXPECTED);
}
static void test_dynamic_state(void)
{
pid_t parent, child, grandchild;
parent = fork();
if (parent < 0) {
/* fork() failed */
fatal_error("fork");
} else if (parent > 0) {
int status;
/* fork() succeeded. Now in the parent. */
wait(&status);
if (!WIFEXITED(status) || WEXITSTATUS(status))
fatal_error("arch_prctl test parent exit");
return;
}
/* fork() succeeded. Now in the child . */
printf("[RUN]\tCheck ARCH_REQ_XCOMP_PERM around process fork() and sigaltack() test.\n");
printf("\tFork a child.\n");
child = fork();
if (child < 0) {
fatal_error("fork");
} else if (child > 0) {
int status;
wait(&status);
if (!WIFEXITED(status) || WEXITSTATUS(status))
fatal_error("arch_prctl test child exit");
_exit(0);
}
/*
* The permission request should fail without an
* XTILEDATA-compatible signal stack
*/
printf("\tTest XCOMP_PERM at child.\n");
validate_xcomp_perm(FAIL_EXPECTED);
/*
* Set up an XTILEDATA-compatible signal stack and
* also obtain permission to populate XTILEDATA.
*/
printf("\tTest dynamic sigaltstack at child:\n");
test_dynamic_sigaltstack();
/* Ensure that XTILEDATA can be populated. */
printf("\tTest XCOMP_PERM again at child.\n");
validate_xcomp_perm(SUCCESS_EXPECTED);
printf("\tFork a grandchild.\n");
grandchild = fork();
if (grandchild < 0) {
/* fork() failed */
fatal_error("fork");
} else if (!grandchild) {
/* fork() succeeded. Now in the (grand)child. */
printf("\tTest XCOMP_PERM at grandchild.\n");
/*
* Ensure that the grandchild inherited
* permission and a compatible sigaltstack:
*/
validate_xcomp_perm(SUCCESS_EXPECTED);
} else {
int status;
/* fork() succeeded. Now in the parent. */
wait(&status);
if (!WIFEXITED(status) || WEXITSTATUS(status))
fatal_error("fork test grandchild");
}
_exit(0);
}
/*
* Save current register state and compare it to @xbuf1.'
*
* Returns false if @xbuf1 matches the registers.
* Returns true if @xbuf1 differs from the registers.
*/
static inline bool __validate_tiledata_regs(struct xsave_buffer *xbuf1)
{
struct xsave_buffer *xbuf2;
int ret;
xbuf2 = alloc_xbuf();
if (!xbuf2)
fatal_error("failed to allocate XSAVE buffer\n");
xsave(xbuf2, XFEATURE_MASK_XTILEDATA);
ret = memcmp(&xbuf1->bytes[xtiledata.xbuf_offset],
&xbuf2->bytes[xtiledata.xbuf_offset],
xtiledata.size);
free(xbuf2);
if (ret == 0)
return false;
return true;
}
static inline void validate_tiledata_regs_same(struct xsave_buffer *xbuf)
{
int ret = __validate_tiledata_regs(xbuf);
if (ret != 0)
fatal_error("TILEDATA registers changed");
}
static inline void validate_tiledata_regs_changed(struct xsave_buffer *xbuf)
{
int ret = __validate_tiledata_regs(xbuf);
if (ret == 0)
fatal_error("TILEDATA registers did not change");
}
/* tiledata inheritance test */
static void test_fork(void)
{
pid_t child, grandchild;
child = fork();
if (child < 0) {
/* fork() failed */
fatal_error("fork");
} else if (child > 0) {
/* fork() succeeded. Now in the parent. */
int status;
wait(&status);
if (!WIFEXITED(status) || WEXITSTATUS(status))
fatal_error("fork test child");
return;
}
/* fork() succeeded. Now in the child. */
printf("[RUN]\tCheck tile data inheritance.\n\tBefore fork(), load tiledata\n");
load_rand_tiledata(stashed_xsave);
grandchild = fork();
if (grandchild < 0) {
/* fork() failed */
fatal_error("fork");
} else if (grandchild > 0) {
/* fork() succeeded. Still in the first child. */
int status;
wait(&status);
if (!WIFEXITED(status) || WEXITSTATUS(status))
fatal_error("fork test grand child");
_exit(0);
}
/* fork() succeeded. Now in the (grand)child. */
/*
* TILEDATA registers are not preserved across fork().
* Ensure that their value has changed:
*/
validate_tiledata_regs_changed(stashed_xsave);
_exit(0);
}
/* Context switching test */
static struct _ctxtswtest_cfg {
unsigned int iterations;
unsigned int num_threads;
} ctxtswtest_config;
struct futex_info {
pthread_t thread;
int nr;
pthread_mutex_t mutex;
struct futex_info *next;
};
static void *check_tiledata(void *info)
{
struct futex_info *finfo = (struct futex_info *)info;
struct xsave_buffer *xbuf;
int i;
xbuf = alloc_xbuf();
if (!xbuf)
fatal_error("unable to allocate XSAVE buffer");
/*
* Load random data into 'xbuf' and then restore
* it to the tile registers themselves.
*/
load_rand_tiledata(xbuf);
for (i = 0; i < ctxtswtest_config.iterations; i++) {
pthread_mutex_lock(&finfo->mutex);
/*
* Ensure the register values have not
* diverged from those recorded in 'xbuf'.
*/
validate_tiledata_regs_same(xbuf);
/* Load new, random values into xbuf and registers */
load_rand_tiledata(xbuf);
/*
* The last thread's last unlock will be for
* thread 0's mutex. However, thread 0 will
* have already exited the loop and the mutex
* will already be unlocked.
*
* Because this is not an ERRORCHECK mutex,
* that inconsistency will be silently ignored.
*/
pthread_mutex_unlock(&finfo->next->mutex);
}
free(xbuf);
/*
* Return this thread's finfo, which is
* a unique value for this thread.
*/
return finfo;
}
static int create_threads(int num, struct futex_info *finfo)
{
int i;
for (i = 0; i < num; i++) {
int next_nr;
finfo[i].nr = i;
/*
* Thread 'i' will wait on this mutex to
* be unlocked. Lock it immediately after
* initialization:
*/
pthread_mutex_init(&finfo[i].mutex, NULL);
pthread_mutex_lock(&finfo[i].mutex);
next_nr = (i + 1) % num;
finfo[i].next = &finfo[next_nr];
if (pthread_create(&finfo[i].thread, NULL, check_tiledata, &finfo[i]))
fatal_error("pthread_create()");
}
return 0;
}
static void affinitize_cpu0(void)
{
cpu_set_t cpuset;
CPU_ZERO(&cpuset);
CPU_SET(0, &cpuset);
if (sched_setaffinity(0, sizeof(cpuset), &cpuset) != 0)
fatal_error("sched_setaffinity to CPU 0");
}
static void test_context_switch(void)
{
struct futex_info *finfo;
int i;
/* Affinitize to one CPU to force context switches */
affinitize_cpu0();
req_xtiledata_perm();
printf("[RUN]\tCheck tiledata context switches, %d iterations, %d threads.\n",
ctxtswtest_config.iterations,
ctxtswtest_config.num_threads);
finfo = malloc(sizeof(*finfo) * ctxtswtest_config.num_threads);
if (!finfo)
fatal_error("malloc()");
create_threads(ctxtswtest_config.num_threads, finfo);
/*
* This thread wakes up thread 0
* Thread 0 will wake up 1
* Thread 1 will wake up 2
* ...
* the last thread will wake up 0
*
* ... this will repeat for the configured
* number of iterations.
*/
pthread_mutex_unlock(&finfo[0].mutex);
/* Wait for all the threads to finish: */
for (i = 0; i < ctxtswtest_config.num_threads; i++) {
void *thread_retval;
int rc;
rc = pthread_join(finfo[i].thread, &thread_retval);
if (rc)
fatal_error("pthread_join() failed for thread %d err: %d\n",
i, rc);
if (thread_retval != &finfo[i])
fatal_error("unexpected thread retval for thread %d: %p\n",
i, thread_retval);
}
printf("[OK]\tNo incorrect case was found.\n");
free(finfo);
}
int main(void)
{
/* Check hardware availability at first */
check_cpuid_xsave();
check_cpuid_xtiledata();
init_stashed_xsave();
sethandler(SIGILL, handle_noperm, 0);
test_dynamic_state();
/* Request permission for the following tests */
req_xtiledata_perm();
test_fork();
ctxtswtest_config.iterations = 10;
ctxtswtest_config.num_threads = 5;
test_context_switch();
clearhandler(SIGILL);
free_stashed_xsave();
return 0;
}