mirror of
https://mirrors.bfsu.edu.cn/git/linux.git
synced 2024-12-14 22:44:27 +08:00
8c09871a95
The first invocation of kernel_fpu_begin() after switching from user to kernel context will save all vector registers, even if only parts of the vector registers are used within the kernel fpu context. Given that save and restore of all vector registers is quite expensive change the current approach in several ways: - Instead of saving and restoring all user registers limit this to those registers which are actually used within an kernel fpu context. - On context switch save all remaining user fpu registers, so they can be restored when the task is rescheduled. - Saving user registers within kernel_fpu_begin() is done without disabling and enabling interrupts - which also slightly reduces runtime. In worst case (e.g. interrupt context uses the same registers) this may lead to the situation that registers are saved several times, however the assumption is that this will not happen frequently, so that the new method is faster in nearly all cases. - save_user_fpu_regs() can still be called from all contexts and saves all (or all remaining) user registers to a tasks ufpu user fpu save area. Overall this reduces the time required to save and restore the user fpu context for nearly all cases. Signed-off-by: Heiko Carstens <hca@linux.ibm.com>
194 lines
4.2 KiB
C
194 lines
4.2 KiB
C
// SPDX-License-Identifier: GPL-2.0
|
|
/*
|
|
* In-kernel vector facility support functions
|
|
*
|
|
* Copyright IBM Corp. 2015
|
|
* Author(s): Hendrik Brueckner <brueckner@linux.vnet.ibm.com>
|
|
*/
|
|
#include <linux/kernel.h>
|
|
#include <linux/cpu.h>
|
|
#include <linux/sched.h>
|
|
#include <asm/fpu.h>
|
|
|
|
void __kernel_fpu_begin(struct kernel_fpu *state, int flags)
|
|
{
|
|
__vector128 *vxrs = state->vxrs;
|
|
int mask;
|
|
|
|
/*
|
|
* Limit the save to the FPU/vector registers already
|
|
* in use by the previous context.
|
|
*/
|
|
flags &= state->hdr.mask;
|
|
if (flags & KERNEL_FPC)
|
|
fpu_stfpc(&state->hdr.fpc);
|
|
if (!cpu_has_vx()) {
|
|
if (flags & KERNEL_VXR_LOW)
|
|
save_fp_regs_vx(vxrs);
|
|
return;
|
|
}
|
|
mask = flags & KERNEL_VXR;
|
|
if (mask == KERNEL_VXR) {
|
|
vxrs += fpu_vstm(0, 15, vxrs);
|
|
vxrs += fpu_vstm(16, 31, vxrs);
|
|
return;
|
|
}
|
|
if (mask == KERNEL_VXR_MID) {
|
|
vxrs += fpu_vstm(8, 23, vxrs);
|
|
return;
|
|
}
|
|
mask = flags & KERNEL_VXR_LOW;
|
|
if (mask) {
|
|
if (mask == KERNEL_VXR_LOW)
|
|
vxrs += fpu_vstm(0, 15, vxrs);
|
|
else if (mask == KERNEL_VXR_V0V7)
|
|
vxrs += fpu_vstm(0, 7, vxrs);
|
|
else
|
|
vxrs += fpu_vstm(8, 15, vxrs);
|
|
}
|
|
mask = flags & KERNEL_VXR_HIGH;
|
|
if (mask) {
|
|
if (mask == KERNEL_VXR_HIGH)
|
|
vxrs += fpu_vstm(16, 31, vxrs);
|
|
else if (mask == KERNEL_VXR_V16V23)
|
|
vxrs += fpu_vstm(16, 23, vxrs);
|
|
else
|
|
vxrs += fpu_vstm(24, 31, vxrs);
|
|
}
|
|
}
|
|
EXPORT_SYMBOL(__kernel_fpu_begin);
|
|
|
|
void __kernel_fpu_end(struct kernel_fpu *state, int flags)
|
|
{
|
|
__vector128 *vxrs = state->vxrs;
|
|
int mask;
|
|
|
|
/*
|
|
* Limit the restore to the FPU/vector registers of the
|
|
* previous context that have been overwritten by the
|
|
* current context.
|
|
*/
|
|
flags &= state->hdr.mask;
|
|
if (flags & KERNEL_FPC)
|
|
fpu_lfpc(&state->hdr.fpc);
|
|
if (!cpu_has_vx()) {
|
|
if (flags & KERNEL_VXR_LOW)
|
|
load_fp_regs_vx(vxrs);
|
|
return;
|
|
}
|
|
mask = flags & KERNEL_VXR;
|
|
if (mask == KERNEL_VXR) {
|
|
vxrs += fpu_vlm(0, 15, vxrs);
|
|
vxrs += fpu_vlm(16, 31, vxrs);
|
|
return;
|
|
}
|
|
if (mask == KERNEL_VXR_MID) {
|
|
vxrs += fpu_vlm(8, 23, vxrs);
|
|
return;
|
|
}
|
|
mask = flags & KERNEL_VXR_LOW;
|
|
if (mask) {
|
|
if (mask == KERNEL_VXR_LOW)
|
|
vxrs += fpu_vlm(0, 15, vxrs);
|
|
else if (mask == KERNEL_VXR_V0V7)
|
|
vxrs += fpu_vlm(0, 7, vxrs);
|
|
else
|
|
vxrs += fpu_vlm(8, 15, vxrs);
|
|
}
|
|
mask = flags & KERNEL_VXR_HIGH;
|
|
if (mask) {
|
|
if (mask == KERNEL_VXR_HIGH)
|
|
vxrs += fpu_vlm(16, 31, vxrs);
|
|
else if (mask == KERNEL_VXR_V16V23)
|
|
vxrs += fpu_vlm(16, 23, vxrs);
|
|
else
|
|
vxrs += fpu_vlm(24, 31, vxrs);
|
|
}
|
|
}
|
|
EXPORT_SYMBOL(__kernel_fpu_end);
|
|
|
|
void load_fpu_state(struct fpu *state, int flags)
|
|
{
|
|
__vector128 *vxrs = &state->vxrs[0];
|
|
int mask;
|
|
|
|
if (flags & KERNEL_FPC)
|
|
fpu_lfpc(&state->fpc);
|
|
if (!cpu_has_vx()) {
|
|
if (flags & KERNEL_VXR_V0V7)
|
|
load_fp_regs_vx(state->vxrs);
|
|
return;
|
|
}
|
|
mask = flags & KERNEL_VXR;
|
|
if (mask == KERNEL_VXR) {
|
|
fpu_vlm(0, 15, &vxrs[0]);
|
|
fpu_vlm(16, 31, &vxrs[16]);
|
|
return;
|
|
}
|
|
if (mask == KERNEL_VXR_MID) {
|
|
fpu_vlm(8, 23, &vxrs[8]);
|
|
return;
|
|
}
|
|
mask = flags & KERNEL_VXR_LOW;
|
|
if (mask) {
|
|
if (mask == KERNEL_VXR_LOW)
|
|
fpu_vlm(0, 15, &vxrs[0]);
|
|
else if (mask == KERNEL_VXR_V0V7)
|
|
fpu_vlm(0, 7, &vxrs[0]);
|
|
else
|
|
fpu_vlm(8, 15, &vxrs[8]);
|
|
}
|
|
mask = flags & KERNEL_VXR_HIGH;
|
|
if (mask) {
|
|
if (mask == KERNEL_VXR_HIGH)
|
|
fpu_vlm(16, 31, &vxrs[16]);
|
|
else if (mask == KERNEL_VXR_V16V23)
|
|
fpu_vlm(16, 23, &vxrs[16]);
|
|
else
|
|
fpu_vlm(24, 31, &vxrs[24]);
|
|
}
|
|
}
|
|
|
|
void save_fpu_state(struct fpu *state, int flags)
|
|
{
|
|
__vector128 *vxrs = &state->vxrs[0];
|
|
int mask;
|
|
|
|
if (flags & KERNEL_FPC)
|
|
fpu_stfpc(&state->fpc);
|
|
if (!cpu_has_vx()) {
|
|
if (flags & KERNEL_VXR_LOW)
|
|
save_fp_regs_vx(state->vxrs);
|
|
return;
|
|
}
|
|
mask = flags & KERNEL_VXR;
|
|
if (mask == KERNEL_VXR) {
|
|
fpu_vstm(0, 15, &vxrs[0]);
|
|
fpu_vstm(16, 31, &vxrs[16]);
|
|
return;
|
|
}
|
|
if (mask == KERNEL_VXR_MID) {
|
|
fpu_vstm(8, 23, &vxrs[8]);
|
|
return;
|
|
}
|
|
mask = flags & KERNEL_VXR_LOW;
|
|
if (mask) {
|
|
if (mask == KERNEL_VXR_LOW)
|
|
fpu_vstm(0, 15, &vxrs[0]);
|
|
else if (mask == KERNEL_VXR_V0V7)
|
|
fpu_vstm(0, 7, &vxrs[0]);
|
|
else
|
|
fpu_vstm(8, 15, &vxrs[8]);
|
|
}
|
|
mask = flags & KERNEL_VXR_HIGH;
|
|
if (mask) {
|
|
if (mask == KERNEL_VXR_HIGH)
|
|
fpu_vstm(16, 31, &vxrs[16]);
|
|
else if (mask == KERNEL_VXR_V16V23)
|
|
fpu_vstm(16, 23, &vxrs[16]);
|
|
else
|
|
fpu_vstm(24, 31, &vxrs[24]);
|
|
}
|
|
}
|
|
EXPORT_SYMBOL(save_fpu_state);
|