linux/arch/riscv/kernel/fpu.S
Clément Léger 7c586a555a
riscv: add floating point insn support to misaligned access emulation
This support is partially based of openSBI misaligned emulation floating
point instruction support. It provides support for the existing
floating point instructions (both for 32/64 bits as well as compressed
ones). Since floating point registers are not part of the pt_regs
struct, we need to modify them directly using some assembly. We also
dirty the pt_regs status in case we modify them to be sure context
switch will save FP state. With this support, Linux is on par with
openSBI support.

Signed-off-by: Clément Léger <cleger@rivosinc.com>
Link: https://lore.kernel.org/r/20231004151405.521596-5-cleger@rivosinc.com
Signed-off-by: Palmer Dabbelt <palmer@rivosinc.com>
2023-11-01 08:34:55 -07:00

228 lines
5.9 KiB
ArmAsm

/* SPDX-License-Identifier: GPL-2.0 */
/*
* Copyright (C) 2012 Regents of the University of California
* Copyright (C) 2017 SiFive
*
* This program is free software; you can redistribute it and/or
* modify it under the terms of the GNU General Public License
* as published by the Free Software Foundation, version 2.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*/
#include <linux/linkage.h>
#include <asm/asm.h>
#include <asm/csr.h>
#include <asm/asm-offsets.h>
ENTRY(__fstate_save)
li a2, TASK_THREAD_F0
add a0, a0, a2
li t1, SR_FS
csrs CSR_STATUS, t1
frcsr t0
fsd f0, TASK_THREAD_F0_F0(a0)
fsd f1, TASK_THREAD_F1_F0(a0)
fsd f2, TASK_THREAD_F2_F0(a0)
fsd f3, TASK_THREAD_F3_F0(a0)
fsd f4, TASK_THREAD_F4_F0(a0)
fsd f5, TASK_THREAD_F5_F0(a0)
fsd f6, TASK_THREAD_F6_F0(a0)
fsd f7, TASK_THREAD_F7_F0(a0)
fsd f8, TASK_THREAD_F8_F0(a0)
fsd f9, TASK_THREAD_F9_F0(a0)
fsd f10, TASK_THREAD_F10_F0(a0)
fsd f11, TASK_THREAD_F11_F0(a0)
fsd f12, TASK_THREAD_F12_F0(a0)
fsd f13, TASK_THREAD_F13_F0(a0)
fsd f14, TASK_THREAD_F14_F0(a0)
fsd f15, TASK_THREAD_F15_F0(a0)
fsd f16, TASK_THREAD_F16_F0(a0)
fsd f17, TASK_THREAD_F17_F0(a0)
fsd f18, TASK_THREAD_F18_F0(a0)
fsd f19, TASK_THREAD_F19_F0(a0)
fsd f20, TASK_THREAD_F20_F0(a0)
fsd f21, TASK_THREAD_F21_F0(a0)
fsd f22, TASK_THREAD_F22_F0(a0)
fsd f23, TASK_THREAD_F23_F0(a0)
fsd f24, TASK_THREAD_F24_F0(a0)
fsd f25, TASK_THREAD_F25_F0(a0)
fsd f26, TASK_THREAD_F26_F0(a0)
fsd f27, TASK_THREAD_F27_F0(a0)
fsd f28, TASK_THREAD_F28_F0(a0)
fsd f29, TASK_THREAD_F29_F0(a0)
fsd f30, TASK_THREAD_F30_F0(a0)
fsd f31, TASK_THREAD_F31_F0(a0)
sw t0, TASK_THREAD_FCSR_F0(a0)
csrc CSR_STATUS, t1
ret
ENDPROC(__fstate_save)
ENTRY(__fstate_restore)
li a2, TASK_THREAD_F0
add a0, a0, a2
li t1, SR_FS
lw t0, TASK_THREAD_FCSR_F0(a0)
csrs CSR_STATUS, t1
fld f0, TASK_THREAD_F0_F0(a0)
fld f1, TASK_THREAD_F1_F0(a0)
fld f2, TASK_THREAD_F2_F0(a0)
fld f3, TASK_THREAD_F3_F0(a0)
fld f4, TASK_THREAD_F4_F0(a0)
fld f5, TASK_THREAD_F5_F0(a0)
fld f6, TASK_THREAD_F6_F0(a0)
fld f7, TASK_THREAD_F7_F0(a0)
fld f8, TASK_THREAD_F8_F0(a0)
fld f9, TASK_THREAD_F9_F0(a0)
fld f10, TASK_THREAD_F10_F0(a0)
fld f11, TASK_THREAD_F11_F0(a0)
fld f12, TASK_THREAD_F12_F0(a0)
fld f13, TASK_THREAD_F13_F0(a0)
fld f14, TASK_THREAD_F14_F0(a0)
fld f15, TASK_THREAD_F15_F0(a0)
fld f16, TASK_THREAD_F16_F0(a0)
fld f17, TASK_THREAD_F17_F0(a0)
fld f18, TASK_THREAD_F18_F0(a0)
fld f19, TASK_THREAD_F19_F0(a0)
fld f20, TASK_THREAD_F20_F0(a0)
fld f21, TASK_THREAD_F21_F0(a0)
fld f22, TASK_THREAD_F22_F0(a0)
fld f23, TASK_THREAD_F23_F0(a0)
fld f24, TASK_THREAD_F24_F0(a0)
fld f25, TASK_THREAD_F25_F0(a0)
fld f26, TASK_THREAD_F26_F0(a0)
fld f27, TASK_THREAD_F27_F0(a0)
fld f28, TASK_THREAD_F28_F0(a0)
fld f29, TASK_THREAD_F29_F0(a0)
fld f30, TASK_THREAD_F30_F0(a0)
fld f31, TASK_THREAD_F31_F0(a0)
fscsr t0
csrc CSR_STATUS, t1
ret
ENDPROC(__fstate_restore)
#define get_f32(which) fmv.x.s a0, which; j 2f
#define put_f32(which) fmv.s.x which, a1; j 2f
#if __riscv_xlen == 64
# define get_f64(which) fmv.x.d a0, which; j 2f
# define put_f64(which) fmv.d.x which, a1; j 2f
#else
# define get_f64(which) fsd which, 0(a1); j 2f
# define put_f64(which) fld which, 0(a1); j 2f
#endif
.macro fp_access_prologue
/*
* Compute jump offset to store the correct FP register since we don't
* have indirect FP register access
*/
sll t0, a0, 3
la t2, 1f
add t0, t0, t2
li t1, SR_FS
csrs CSR_STATUS, t1
jr t0
1:
.endm
.macro fp_access_epilogue
2:
csrc CSR_STATUS, t1
ret
.endm
#define fp_access_body(__access_func) \
__access_func(f0); \
__access_func(f1); \
__access_func(f2); \
__access_func(f3); \
__access_func(f4); \
__access_func(f5); \
__access_func(f6); \
__access_func(f7); \
__access_func(f8); \
__access_func(f9); \
__access_func(f10); \
__access_func(f11); \
__access_func(f12); \
__access_func(f13); \
__access_func(f14); \
__access_func(f15); \
__access_func(f16); \
__access_func(f17); \
__access_func(f18); \
__access_func(f19); \
__access_func(f20); \
__access_func(f21); \
__access_func(f22); \
__access_func(f23); \
__access_func(f24); \
__access_func(f25); \
__access_func(f26); \
__access_func(f27); \
__access_func(f28); \
__access_func(f29); \
__access_func(f30); \
__access_func(f31)
#ifdef CONFIG_RISCV_MISALIGNED
/*
* Disable compressed instructions set to keep a constant offset between FP
* load/store/move instructions
*/
.option norvc
/*
* put_f32_reg - Set a FP register from a register containing the value
* a0 = FP register index to be set
* a1 = value to be loaded in the FP register
*/
SYM_FUNC_START(put_f32_reg)
fp_access_prologue
fp_access_body(put_f32)
fp_access_epilogue
SYM_FUNC_END(put_f32_reg)
/*
* get_f32_reg - Get a FP register value and return it
* a0 = FP register index to be retrieved
*/
SYM_FUNC_START(get_f32_reg)
fp_access_prologue
fp_access_body(get_f32)
fp_access_epilogue
SYM_FUNC_END(get_f32_reg)
/*
* put_f64_reg - Set a 64 bits FP register from a value or a pointer.
* a0 = FP register index to be set
* a1 = value/pointer to be loaded in the FP register (when xlen == 32 bits, we
* load the value to a pointer).
*/
SYM_FUNC_START(put_f64_reg)
fp_access_prologue
fp_access_body(put_f64)
fp_access_epilogue
SYM_FUNC_END(put_f64_reg)
/*
* put_f64_reg - Get a 64 bits FP register value and returned it or store it to
* a pointer.
* a0 = FP register index to be retrieved
* a1 = If xlen == 32, pointer which should be loaded with the FP register value
* or unused if xlen == 64. In which case the FP register value is returned
* through a0
*/
SYM_FUNC_START(get_f64_reg)
fp_access_prologue
fp_access_body(get_f64)
fp_access_epilogue
SYM_FUNC_END(get_f64_reg)
#endif /* CONFIG_RISCV_MISALIGNED */