Merge branch 'x86-fpu-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip

Pull x86 fpu changes from Ingo Molnar:
 "Various x86 FPU handling cleanups, refactorings and fixes (Borislav
  Petkov, Oleg Nesterov, Rik van Riel)"

* 'x86-fpu-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip: (21 commits)
  x86/fpu: Kill eager_fpu_init_bp()
  x86/fpu: Don't allocate fpu->state for swapper/0
  x86/fpu: Rename drop_init_fpu() to fpu_reset_state()
  x86/fpu: Fold __drop_fpu() into its sole user
  x86/fpu: Don't abuse drop_init_fpu() in flush_thread()
  x86/fpu: Use restore_init_xstate() instead of math_state_restore() on kthread exec
  x86/fpu: Introduce restore_init_xstate()
  x86/fpu: Document user_fpu_begin()
  x86/fpu: Factor out memset(xstate, 0) in fpu_finit() paths
  x86/fpu: Change xstateregs_get()/set() to use ->xsave.i387 rather than ->fxsave
  x86/fpu: Don't abuse FPU in kernel threads if use_eager_fpu()
  x86/fpu: Always allow FPU in interrupt if use_eager_fpu()
  x86/fpu: __kernel_fpu_begin() should clear fpu_owner_task even if use_eager_fpu()
  x86/fpu: Also check fpu_lazy_restore() when use_eager_fpu()
  x86/fpu: Use task_disable_lazy_fpu_restore() helper
  x86/fpu: Use an explicit if/else in switch_fpu_prepare()
  x86/fpu: Introduce task_disable_lazy_fpu_restore() helper
  x86/fpu: Move lazy restore functions up a few lines
  x86/fpu: Change math_error() to use unlazy_fpu(), kill (now) unused save_init_fpu()
  x86/fpu: Don't do __thread_fpu_end() if use_eager_fpu()
  ...
This commit is contained in:
Linus Torvalds 2015-04-13 13:24:23 -07:00
commit 421ec9017f
6 changed files with 119 additions and 133 deletions

View File

@ -67,6 +67,34 @@ extern void finit_soft_fpu(struct i387_soft_struct *soft);
static inline void finit_soft_fpu(struct i387_soft_struct *soft) {}
#endif
/*
* Must be run with preemption disabled: this clears the fpu_owner_task,
* on this CPU.
*
* This will disable any lazy FPU state restore of the current FPU state,
* but if the current thread owns the FPU, it will still be saved by.
*/
static inline void __cpu_disable_lazy_restore(unsigned int cpu)
{
per_cpu(fpu_owner_task, cpu) = NULL;
}
/*
* Used to indicate that the FPU state in memory is newer than the FPU
* state in registers, and the FPU state should be reloaded next time the
* task is run. Only safe on the current task, or non-running tasks.
*/
static inline void task_disable_lazy_fpu_restore(struct task_struct *tsk)
{
tsk->thread.fpu.last_cpu = ~0;
}
static inline int fpu_lazy_restore(struct task_struct *new, unsigned int cpu)
{
return new == this_cpu_read_stable(fpu_owner_task) &&
cpu == new->thread.fpu.last_cpu;
}
static inline int is_ia32_compat_frame(void)
{
return config_enabled(CONFIG_IA32_EMULATION) &&
@ -107,7 +135,6 @@ static __always_inline __pure bool use_fxsr(void)
static inline void fx_finit(struct i387_fxsave_struct *fx)
{
memset(fx, 0, xstate_size);
fx->cwd = 0x37f;
fx->mxcsr = MXCSR_DEFAULT;
}
@ -351,17 +378,6 @@ static inline void __thread_fpu_begin(struct task_struct *tsk)
__thread_set_has_fpu(tsk);
}
static inline void __drop_fpu(struct task_struct *tsk)
{
if (__thread_has_fpu(tsk)) {
/* Ignore delayed exceptions from user space */
asm volatile("1: fwait\n"
"2:\n"
_ASM_EXTABLE(1b, 2b));
__thread_fpu_end(tsk);
}
}
static inline void drop_fpu(struct task_struct *tsk)
{
/*
@ -369,21 +385,37 @@ static inline void drop_fpu(struct task_struct *tsk)
*/
preempt_disable();
tsk->thread.fpu_counter = 0;
__drop_fpu(tsk);
if (__thread_has_fpu(tsk)) {
/* Ignore delayed exceptions from user space */
asm volatile("1: fwait\n"
"2:\n"
_ASM_EXTABLE(1b, 2b));
__thread_fpu_end(tsk);
}
clear_stopped_child_used_math(tsk);
preempt_enable();
}
static inline void drop_init_fpu(struct task_struct *tsk)
static inline void restore_init_xstate(void)
{
if (use_xsave())
xrstor_state(init_xstate_buf, -1);
else
fxrstor_checking(&init_xstate_buf->i387);
}
/*
* Reset the FPU state in the eager case and drop it in the lazy case (later use
* will reinit it).
*/
static inline void fpu_reset_state(struct task_struct *tsk)
{
if (!use_eager_fpu())
drop_fpu(tsk);
else {
if (use_xsave())
xrstor_state(init_xstate_buf, -1);
else
fxrstor_checking(&init_xstate_buf->i387);
}
else
restore_init_xstate();
}
/*
@ -400,24 +432,6 @@ static inline void drop_init_fpu(struct task_struct *tsk)
*/
typedef struct { int preload; } fpu_switch_t;
/*
* Must be run with preemption disabled: this clears the fpu_owner_task,
* on this CPU.
*
* This will disable any lazy FPU state restore of the current FPU state,
* but if the current thread owns the FPU, it will still be saved by.
*/
static inline void __cpu_disable_lazy_restore(unsigned int cpu)
{
per_cpu(fpu_owner_task, cpu) = NULL;
}
static inline int fpu_lazy_restore(struct task_struct *new, unsigned int cpu)
{
return new == this_cpu_read_stable(fpu_owner_task) &&
cpu == new->thread.fpu.last_cpu;
}
static inline fpu_switch_t switch_fpu_prepare(struct task_struct *old, struct task_struct *new, int cpu)
{
fpu_switch_t fpu;
@ -426,13 +440,17 @@ static inline fpu_switch_t switch_fpu_prepare(struct task_struct *old, struct ta
* If the task has used the math, pre-load the FPU on xsave processors
* or if the past 5 consecutive context-switches used math.
*/
fpu.preload = tsk_used_math(new) && (use_eager_fpu() ||
new->thread.fpu_counter > 5);
fpu.preload = tsk_used_math(new) &&
(use_eager_fpu() || new->thread.fpu_counter > 5);
if (__thread_has_fpu(old)) {
if (!__save_init_fpu(old))
cpu = ~0;
old->thread.fpu.last_cpu = cpu;
old->thread.fpu.has_fpu = 0; /* But leave fpu_owner_task! */
task_disable_lazy_fpu_restore(old);
else
old->thread.fpu.last_cpu = cpu;
/* But leave fpu_owner_task! */
old->thread.fpu.has_fpu = 0;
/* Don't change CR0.TS if we just switch! */
if (fpu.preload) {
@ -443,10 +461,10 @@ static inline fpu_switch_t switch_fpu_prepare(struct task_struct *old, struct ta
stts();
} else {
old->thread.fpu_counter = 0;
old->thread.fpu.last_cpu = ~0;
task_disable_lazy_fpu_restore(old);
if (fpu.preload) {
new->thread.fpu_counter++;
if (!use_eager_fpu() && fpu_lazy_restore(new, cpu))
if (fpu_lazy_restore(new, cpu))
fpu.preload = 0;
else
prefetch(new->thread.fpu.state);
@ -466,7 +484,7 @@ static inline void switch_fpu_finish(struct task_struct *new, fpu_switch_t fpu)
{
if (fpu.preload) {
if (unlikely(restore_fpu_checking(new)))
drop_init_fpu(new);
fpu_reset_state(new);
}
}
@ -495,10 +513,12 @@ static inline int restore_xstate_sig(void __user *buf, int ia32_frame)
}
/*
* Need to be preemption-safe.
* Needs to be preemption-safe.
*
* NOTE! user_fpu_begin() must be used only immediately before restoring
* it. This function does not do any save/restore on their own.
* the save state. It does not do any saving/restoring on its own. In
* lazy FPU mode, it is just an optimization to avoid a #NM exception,
* the task can lose the FPU right after preempt_enable().
*/
static inline void user_fpu_begin(void)
{
@ -519,24 +539,6 @@ static inline void __save_fpu(struct task_struct *tsk)
fpu_fxsave(&tsk->thread.fpu);
}
/*
* These disable preemption on their own and are safe
*/
static inline void save_init_fpu(struct task_struct *tsk)
{
WARN_ON_ONCE(!__thread_has_fpu(tsk));
if (use_eager_fpu()) {
__save_fpu(tsk);
return;
}
preempt_disable();
__save_init_fpu(tsk);
__thread_fpu_end(tsk);
preempt_enable();
}
/*
* i387 state interaction
*/

View File

@ -42,8 +42,8 @@ void kernel_fpu_enable(void)
* be set (so that the clts/stts pair does nothing that is
* visible in the interrupted kernel thread).
*
* Except for the eagerfpu case when we return 1 unless we've already
* been eager and saved the state in kernel_fpu_begin().
* Except for the eagerfpu case when we return true; in the likely case
* the thread has FPU but we are not going to set/clear TS.
*/
static inline bool interrupted_kernel_fpu_idle(void)
{
@ -51,7 +51,7 @@ static inline bool interrupted_kernel_fpu_idle(void)
return false;
if (use_eager_fpu())
return __thread_has_fpu(current);
return true;
return !__thread_has_fpu(current) &&
(read_cr0() & X86_CR0_TS);
@ -94,9 +94,10 @@ void __kernel_fpu_begin(void)
if (__thread_has_fpu(me)) {
__save_init_fpu(me);
} else if (!use_eager_fpu()) {
} else {
this_cpu_write(fpu_owner_task, NULL);
clts();
if (!use_eager_fpu())
clts();
}
}
EXPORT_SYMBOL(__kernel_fpu_begin);
@ -107,7 +108,7 @@ void __kernel_fpu_end(void)
if (__thread_has_fpu(me)) {
if (WARN_ON(restore_fpu_checking(me)))
drop_init_fpu(me);
fpu_reset_state(me);
} else if (!use_eager_fpu()) {
stts();
}
@ -120,10 +121,13 @@ void unlazy_fpu(struct task_struct *tsk)
{
preempt_disable();
if (__thread_has_fpu(tsk)) {
__save_init_fpu(tsk);
__thread_fpu_end(tsk);
} else
tsk->thread.fpu_counter = 0;
if (use_eager_fpu()) {
__save_fpu(tsk);
} else {
__save_init_fpu(tsk);
__thread_fpu_end(tsk);
}
}
preempt_enable();
}
EXPORT_SYMBOL(unlazy_fpu);
@ -221,11 +225,12 @@ void fpu_finit(struct fpu *fpu)
return;
}
memset(fpu->state, 0, xstate_size);
if (cpu_has_fxsr) {
fx_finit(&fpu->state->fxsave);
} else {
struct i387_fsave_struct *fp = &fpu->state->fsave;
memset(fp, 0, xstate_size);
fp->cwd = 0xffff037fu;
fp->swd = 0xffff0000u;
fp->twd = 0xffffffffu;
@ -247,7 +252,7 @@ int init_fpu(struct task_struct *tsk)
if (tsk_used_math(tsk)) {
if (cpu_has_fpu && tsk == current)
unlazy_fpu(tsk);
tsk->thread.fpu.last_cpu = ~0;
task_disable_lazy_fpu_restore(tsk);
return 0;
}
@ -336,6 +341,7 @@ int xstateregs_get(struct task_struct *target, const struct user_regset *regset,
unsigned int pos, unsigned int count,
void *kbuf, void __user *ubuf)
{
struct xsave_struct *xsave = &target->thread.fpu.state->xsave;
int ret;
if (!cpu_has_xsave)
@ -350,14 +356,12 @@ int xstateregs_get(struct task_struct *target, const struct user_regset *regset,
* memory layout in the thread struct, so that we can copy the entire
* xstateregs to the user using one user_regset_copyout().
*/
memcpy(&target->thread.fpu.state->fxsave.sw_reserved,
xstate_fx_sw_bytes, sizeof(xstate_fx_sw_bytes));
memcpy(&xsave->i387.sw_reserved,
xstate_fx_sw_bytes, sizeof(xstate_fx_sw_bytes));
/*
* Copy the xstate memory layout.
*/
ret = user_regset_copyout(&pos, &count, &kbuf, &ubuf,
&target->thread.fpu.state->xsave, 0, -1);
ret = user_regset_copyout(&pos, &count, &kbuf, &ubuf, xsave, 0, -1);
return ret;
}
@ -365,8 +369,8 @@ int xstateregs_set(struct task_struct *target, const struct user_regset *regset,
unsigned int pos, unsigned int count,
const void *kbuf, const void __user *ubuf)
{
struct xsave_struct *xsave = &target->thread.fpu.state->xsave;
int ret;
struct xsave_hdr_struct *xsave_hdr;
if (!cpu_has_xsave)
return -ENODEV;
@ -375,22 +379,16 @@ int xstateregs_set(struct task_struct *target, const struct user_regset *regset,
if (ret)
return ret;
ret = user_regset_copyin(&pos, &count, &kbuf, &ubuf,
&target->thread.fpu.state->xsave, 0, -1);
ret = user_regset_copyin(&pos, &count, &kbuf, &ubuf, xsave, 0, -1);
/*
* mxcsr reserved bits must be masked to zero for security reasons.
*/
target->thread.fpu.state->fxsave.mxcsr &= mxcsr_feature_mask;
xsave_hdr = &target->thread.fpu.state->xsave.xsave_hdr;
xsave_hdr->xstate_bv &= pcntxt_mask;
xsave->i387.mxcsr &= mxcsr_feature_mask;
xsave->xsave_hdr.xstate_bv &= pcntxt_mask;
/*
* These bits must be zero.
*/
memset(xsave_hdr->reserved, 0, 48);
memset(&xsave->xsave_hdr.reserved, 0, 48);
return ret;
}

View File

@ -89,8 +89,8 @@ int arch_dup_task_struct(struct task_struct *dst, struct task_struct *src)
dst->thread.fpu_counter = 0;
dst->thread.fpu.has_fpu = 0;
dst->thread.fpu.last_cpu = ~0;
dst->thread.fpu.state = NULL;
task_disable_lazy_fpu_restore(dst);
if (tsk_used_math(src)) {
int err = fpu_alloc(&dst->thread.fpu);
if (err)
@ -151,13 +151,18 @@ void flush_thread(void)
flush_ptrace_hw_breakpoint(tsk);
memset(tsk->thread.tls_array, 0, sizeof(tsk->thread.tls_array));
drop_init_fpu(tsk);
/*
* Free the FPU state for non xsave platforms. They get reallocated
* lazily at the first use.
*/
if (!use_eager_fpu())
if (!use_eager_fpu()) {
/* FPU state will be reallocated lazily at the first use. */
drop_fpu(tsk);
free_thread_xstate(tsk);
} else if (!used_math()) {
/* kthread execs. TODO: cleanup this horror. */
if (WARN_ON(init_fpu(tsk)))
force_sig(SIGKILL, tsk);
user_fpu_begin();
restore_init_xstate();
}
}
static void hard_disable_TSC(void)

View File

@ -680,7 +680,7 @@ handle_signal(struct ksignal *ksig, struct pt_regs *regs)
* Ensure the signal handler starts with the new fpu state.
*/
if (used_math())
drop_init_fpu(current);
fpu_reset_state(current);
}
signal_setup_done(failed, ksig, test_thread_flag(TIF_SINGLESTEP));
}

View File

@ -731,7 +731,7 @@ static void math_error(struct pt_regs *regs, int error_code, int trapnr)
/*
* Save the info for the exception handler and clear the error.
*/
save_init_fpu(task);
unlazy_fpu(task);
task->thread.trap_nr = trapnr;
task->thread.error_code = error_code;
info.si_signo = SIGFPE;
@ -860,7 +860,7 @@ void math_state_restore(void)
kernel_fpu_disable();
__thread_fpu_begin(tsk);
if (unlikely(restore_fpu_checking(tsk))) {
drop_init_fpu(tsk);
fpu_reset_state(tsk);
force_sig_info(SIGSEGV, SEND_SIG_PRIV, tsk);
} else {
tsk->thread.fpu_counter++;

View File

@ -342,7 +342,7 @@ int __restore_xstate_sig(void __user *buf, void __user *buf_fx, int size)
config_enabled(CONFIG_IA32_EMULATION));
if (!buf) {
drop_init_fpu(tsk);
fpu_reset_state(tsk);
return 0;
}
@ -416,7 +416,7 @@ int __restore_xstate_sig(void __user *buf, void __user *buf_fx, int size)
*/
user_fpu_begin();
if (restore_user_xstate(buf_fx, xstate_bv, fx_only)) {
drop_init_fpu(tsk);
fpu_reset_state(tsk);
return -1;
}
}
@ -678,19 +678,13 @@ void xsave_init(void)
this_func();
}
static inline void __init eager_fpu_init_bp(void)
/*
* setup_init_fpu_buf() is __init and it is OK to call it here because
* init_xstate_buf will be unset only once during boot.
*/
void __init_refok eager_fpu_init(void)
{
current->thread.fpu.state =
alloc_bootmem_align(xstate_size, __alignof__(struct xsave_struct));
if (!init_xstate_buf)
setup_init_fpu_buf();
}
void eager_fpu_init(void)
{
static __refdata void (*boot_func)(void) = eager_fpu_init_bp;
clear_used_math();
WARN_ON(used_math());
current_thread_info()->status = 0;
if (eagerfpu == ENABLE)
@ -701,21 +695,8 @@ void eager_fpu_init(void)
return;
}
if (boot_func) {
boot_func();
boot_func = NULL;
}
/*
* This is same as math_state_restore(). But use_xsave() is
* not yet patched to use math_state_restore().
*/
init_fpu(current);
__thread_fpu_begin(current);
if (cpu_has_xsave)
xrstor_state(init_xstate_buf, -1);
else
fxrstor_checking(&init_xstate_buf->i387);
if (!init_xstate_buf)
setup_init_fpu_buf();
}
/*