2008-07-30 01:29:19 +08:00
|
|
|
#ifndef __ASM_X86_XSAVE_H
|
|
|
|
#define __ASM_X86_XSAVE_H
|
|
|
|
|
2008-07-30 08:23:16 +08:00
|
|
|
#include <linux/types.h>
|
2008-07-30 01:29:19 +08:00
|
|
|
#include <asm/processor.h>
|
|
|
|
|
2010-07-22 01:03:54 +08:00
|
|
|
#define XSTATE_CPUID 0x0000000d
|
2008-07-30 01:29:19 +08:00
|
|
|
|
|
|
|
#define XSTATE_FP 0x1
|
|
|
|
#define XSTATE_SSE 0x2
|
2009-04-11 06:21:24 +08:00
|
|
|
#define XSTATE_YMM 0x4
|
2008-07-30 01:29:19 +08:00
|
|
|
|
|
|
|
#define XSTATE_FPSSE (XSTATE_FP | XSTATE_SSE)
|
|
|
|
|
|
|
|
#define FXSAVE_SIZE 512
|
|
|
|
|
2010-06-13 17:29:39 +08:00
|
|
|
#define XSAVE_HDR_SIZE 64
|
|
|
|
#define XSAVE_HDR_OFFSET FXSAVE_SIZE
|
|
|
|
|
|
|
|
#define XSAVE_YMM_SIZE 256
|
|
|
|
#define XSAVE_YMM_OFFSET (XSAVE_HDR_SIZE + XSAVE_HDR_OFFSET)
|
2010-05-17 17:22:23 +08:00
|
|
|
|
2008-07-30 01:29:19 +08:00
|
|
|
/*
|
|
|
|
* These are the features that the OS can handle currently.
|
|
|
|
*/
|
2009-04-11 06:21:24 +08:00
|
|
|
#define XCNTXT_MASK (XSTATE_FP | XSTATE_SSE | XSTATE_YMM)
|
2008-07-30 01:29:19 +08:00
|
|
|
|
2008-07-30 01:29:20 +08:00
|
|
|
#ifdef CONFIG_X86_64
|
|
|
|
#define REX_PREFIX "0x48, "
|
|
|
|
#else
|
|
|
|
#define REX_PREFIX
|
|
|
|
#endif
|
|
|
|
|
2008-07-30 08:23:16 +08:00
|
|
|
extern unsigned int xstate_size;
|
|
|
|
extern u64 pcntxt_mask;
|
2010-02-12 03:50:59 +08:00
|
|
|
extern u64 xstate_fx_sw_bytes[USER_XSTATE_FX_SW_WORDS];
|
2012-08-25 05:13:02 +08:00
|
|
|
extern struct xsave_struct *init_xstate_buf;
|
2008-07-30 01:29:19 +08:00
|
|
|
|
|
|
|
extern void xsave_init(void);
|
2010-02-12 03:50:59 +08:00
|
|
|
extern void update_regset_xstate_info(unsigned int size, u64 xstate_mask);
|
2008-07-30 01:29:20 +08:00
|
|
|
extern int init_fpu(struct task_struct *child);
|
|
|
|
|
2012-07-25 07:05:28 +08:00
|
|
|
static inline int fpu_xrstor_checking(struct xsave_struct *fx)
|
2008-07-30 01:29:20 +08:00
|
|
|
{
|
|
|
|
int err;
|
|
|
|
|
|
|
|
asm volatile("1: .byte " REX_PREFIX "0x0f,0xae,0x2f\n\t"
|
|
|
|
"2:\n"
|
|
|
|
".section .fixup,\"ax\"\n"
|
|
|
|
"3: movl $-1,%[err]\n"
|
|
|
|
" jmp 2b\n"
|
|
|
|
".previous\n"
|
|
|
|
_ASM_EXTABLE(1b, 3b)
|
|
|
|
: [err] "=r" (err)
|
|
|
|
: "D" (fx), "m" (*fx), "a" (-1), "d" (-1), "0" (0)
|
|
|
|
: "memory");
|
|
|
|
|
|
|
|
return err;
|
|
|
|
}
|
|
|
|
|
2008-07-30 01:29:25 +08:00
|
|
|
static inline int xsave_user(struct xsave_struct __user *buf)
|
2008-07-30 01:29:23 +08:00
|
|
|
{
|
|
|
|
int err;
|
2010-06-23 07:23:37 +08:00
|
|
|
|
|
|
|
/*
|
|
|
|
* Clear the xsave header first, so that reserved fields are
|
|
|
|
* initialized to zero.
|
|
|
|
*/
|
x86, fpu: Unify signal handling code paths for x86 and x86_64 kernels
Currently for x86 and x86_32 binaries, fpstate in the user sigframe is copied
to/from the fpstate in the task struct.
And in the case of signal delivery for x86_64 binaries, if the fpstate is live
in the CPU registers, then the live state is copied directly to the user
sigframe. Otherwise fpstate in the task struct is copied to the user sigframe.
During restore, fpstate in the user sigframe is restored directly to the live
CPU registers.
Historically, different code paths led to different bugs. For example,
x86_64 code path was not preemption safe till recently. Also there is lot
of code duplication for support of new features like xsave etc.
Unify signal handling code paths for x86 and x86_64 kernels.
New strategy is as follows:
Signal delivery: Both for 32/64-bit frames, align the core math frame area to
64bytes as needed by xsave (this where the main fpu/extended state gets copied
to and excludes the legacy compatibility fsave header for the 32-bit [f]xsave
frames). If the state is live, copy the register state directly to the user
frame. If not live, copy the state in the thread struct to the user frame. And
for 32-bit [f]xsave frames, construct the fsave header separately before
the actual [f]xsave area.
Signal return: As the 32-bit frames with [f]xstate has an additional
'fsave' header, copy everything back from the user sigframe to the
fpstate in the task structure and reconstruct the fxstate from the 'fsave'
header (Also user passed pointers may not be correctly aligned for
any attempt to directly restore any partial state). At the next fpstate usage,
everything will be restored to the live CPU registers.
For all the 64-bit frames and the 32-bit fsave frame, restore the state from
the user sigframe directly to the live CPU registers. 64-bit signals always
restored the math frame directly, so we can expect the math frame pointer
to be correctly aligned. For 32-bit fsave frames, there are no alignment
requirements, so we can restore the state directly.
"lat_sig catch" microbenchmark numbers (for x86, x86_64, x86_32 binaries) are
with in the noise range with this change.
Signed-off-by: Suresh Siddha <suresh.b.siddha@intel.com>
Link: http://lkml.kernel.org/r/1343171129-2747-4-git-send-email-suresh.b.siddha@intel.com
[ Merged in compilation fix ]
Link: http://lkml.kernel.org/r/1344544736.8326.17.camel@sbsiddha-desk.sc.intel.com
Signed-off-by: H. Peter Anvin <hpa@linux.intel.com>
2012-07-25 07:05:29 +08:00
|
|
|
err = __clear_user(&buf->xsave_hdr, sizeof(buf->xsave_hdr));
|
2010-06-23 07:23:37 +08:00
|
|
|
if (unlikely(err))
|
|
|
|
return -EFAULT;
|
|
|
|
|
2012-09-22 03:43:12 +08:00
|
|
|
__asm__ __volatile__(ASM_STAC "\n"
|
|
|
|
"1: .byte " REX_PREFIX "0x0f,0xae,0x27\n"
|
|
|
|
"2: " ASM_CLAC "\n"
|
2008-07-30 01:29:23 +08:00
|
|
|
".section .fixup,\"ax\"\n"
|
|
|
|
"3: movl $-1,%[err]\n"
|
|
|
|
" jmp 2b\n"
|
|
|
|
".previous\n"
|
2012-04-21 04:42:25 +08:00
|
|
|
_ASM_EXTABLE(1b,3b)
|
2008-07-30 01:29:23 +08:00
|
|
|
: [err] "=r" (err)
|
|
|
|
: "D" (buf), "a" (-1), "d" (-1), "0" (0)
|
|
|
|
: "memory");
|
|
|
|
return err;
|
|
|
|
}
|
|
|
|
|
2008-07-30 08:23:16 +08:00
|
|
|
static inline int xrestore_user(struct xsave_struct __user *buf, u64 mask)
|
2008-07-30 01:29:23 +08:00
|
|
|
{
|
|
|
|
int err;
|
|
|
|
struct xsave_struct *xstate = ((__force struct xsave_struct *)buf);
|
2008-07-30 08:23:16 +08:00
|
|
|
u32 lmask = mask;
|
|
|
|
u32 hmask = mask >> 32;
|
2008-07-30 01:29:23 +08:00
|
|
|
|
2012-09-22 03:43:12 +08:00
|
|
|
__asm__ __volatile__(ASM_STAC "\n"
|
|
|
|
"1: .byte " REX_PREFIX "0x0f,0xae,0x2f\n"
|
|
|
|
"2: " ASM_CLAC "\n"
|
2008-07-30 01:29:23 +08:00
|
|
|
".section .fixup,\"ax\"\n"
|
|
|
|
"3: movl $-1,%[err]\n"
|
|
|
|
" jmp 2b\n"
|
|
|
|
".previous\n"
|
2012-04-21 04:42:25 +08:00
|
|
|
_ASM_EXTABLE(1b,3b)
|
2008-07-30 01:29:23 +08:00
|
|
|
: [err] "=r" (err)
|
|
|
|
: "D" (xstate), "a" (lmask), "d" (hmask), "0" (0)
|
|
|
|
: "memory"); /* memory required? */
|
|
|
|
return err;
|
|
|
|
}
|
|
|
|
|
2008-07-30 08:23:16 +08:00
|
|
|
static inline void xrstor_state(struct xsave_struct *fx, u64 mask)
|
2008-07-30 01:29:23 +08:00
|
|
|
{
|
2008-07-30 08:23:16 +08:00
|
|
|
u32 lmask = mask;
|
|
|
|
u32 hmask = mask >> 32;
|
|
|
|
|
2008-07-30 01:29:23 +08:00
|
|
|
asm volatile(".byte " REX_PREFIX "0x0f,0xae,0x2f\n\t"
|
|
|
|
: : "D" (fx), "m" (*fx), "a" (lmask), "d" (hmask)
|
|
|
|
: "memory");
|
|
|
|
}
|
|
|
|
|
2010-07-20 07:05:49 +08:00
|
|
|
static inline void xsave_state(struct xsave_struct *fx, u64 mask)
|
|
|
|
{
|
|
|
|
u32 lmask = mask;
|
|
|
|
u32 hmask = mask >> 32;
|
|
|
|
|
|
|
|
asm volatile(".byte " REX_PREFIX "0x0f,0xae,0x27\n\t"
|
|
|
|
: : "D" (fx), "m" (*fx), "a" (lmask), "d" (hmask)
|
|
|
|
: "memory");
|
|
|
|
}
|
|
|
|
|
2010-05-06 16:45:46 +08:00
|
|
|
static inline void fpu_xsave(struct fpu *fpu)
|
2008-07-30 01:29:20 +08:00
|
|
|
{
|
|
|
|
/* This, however, we can work around by forcing the compiler to select
|
|
|
|
an addressing mode that doesn't require extended registers. */
|
2010-07-20 07:05:52 +08:00
|
|
|
alternative_input(
|
|
|
|
".byte " REX_PREFIX "0x0f,0xae,0x27",
|
|
|
|
".byte " REX_PREFIX "0x0f,0xae,0x37",
|
|
|
|
X86_FEATURE_XSAVEOPT,
|
|
|
|
[fx] "D" (&fpu->state->xsave), "a" (-1), "d" (-1) :
|
|
|
|
"memory");
|
2008-07-30 01:29:20 +08:00
|
|
|
}
|
2008-07-30 01:29:19 +08:00
|
|
|
#endif
|