linux/arch/arm64/include/asm/fpsimd.h
Mark Brown 826a4fdd2a arm64/sme: Don't flush SVE register state when allocating SME storage
Currently when taking a SME access trap we allocate storage for the SVE
register state in order to be able to handle storage of streaming mode SVE.
Due to the original usage in a purely SVE context the SVE register state
allocation this also flushes the register state for SVE if storage was
already allocated but in the SME context this is not desirable. For a SME
access trap to be taken the task must not be in streaming mode so either
there already is SVE register state present for regular SVE mode which would
be corrupted or the task does not have TIF_SVE and the flush is redundant.

Fix this by adding a flag to sve_alloc() indicating if we are in a SVE
context and need to flush the state. Freshly allocated storage is always
zeroed either way.

Fixes: 8bd7f91c03 ("arm64/sme: Implement traps and syscall handling for SME")
Signed-off-by: Mark Brown <broonie@kernel.org>
Reviewed-by: Catalin Marinas <catalin.marinas@arm.com>
Link: https://lore.kernel.org/r/20220817182324.638214-4-broonie@kernel.org
Signed-off-by: Will Deacon <will@kernel.org>
2022-08-23 11:29:11 +01:00

390 lines
10 KiB
C

/* SPDX-License-Identifier: GPL-2.0-only */
/*
* Copyright (C) 2012 ARM Ltd.
*/
#ifndef __ASM_FP_H
#define __ASM_FP_H
#include <asm/errno.h>
#include <asm/ptrace.h>
#include <asm/processor.h>
#include <asm/sigcontext.h>
#include <asm/sysreg.h>
#ifndef __ASSEMBLY__
#include <linux/bitmap.h>
#include <linux/build_bug.h>
#include <linux/bug.h>
#include <linux/cache.h>
#include <linux/init.h>
#include <linux/stddef.h>
#include <linux/types.h>
#ifdef CONFIG_COMPAT
/* Masks for extracting the FPSR and FPCR from the FPSCR */
#define VFP_FPSCR_STAT_MASK 0xf800009f
#define VFP_FPSCR_CTRL_MASK 0x07f79f00
/*
* The VFP state has 32x64-bit registers and a single 32-bit
* control/status register.
*/
#define VFP_STATE_SIZE ((32 * 8) + 4)
#endif
/*
* When we defined the maximum SVE vector length we defined the ABI so
* that the maximum vector length included all the reserved for future
* expansion bits in ZCR rather than those just currently defined by
* the architecture. While SME follows a similar pattern the fact that
* it includes a square matrix means that any allocations that attempt
* to cover the maximum potential vector length (such as happen with
* the regset used for ptrace) end up being extremely large. Define
* the much lower actual limit for use in such situations.
*/
#define SME_VQ_MAX 16
struct task_struct;
extern void fpsimd_save_state(struct user_fpsimd_state *state);
extern void fpsimd_load_state(struct user_fpsimd_state *state);
extern void fpsimd_thread_switch(struct task_struct *next);
extern void fpsimd_flush_thread(void);
extern void fpsimd_signal_preserve_current_state(void);
extern void fpsimd_preserve_current_state(void);
extern void fpsimd_restore_current_state(void);
extern void fpsimd_update_current_state(struct user_fpsimd_state const *state);
extern void fpsimd_bind_state_to_cpu(struct user_fpsimd_state *state,
void *sve_state, unsigned int sve_vl,
void *za_state, unsigned int sme_vl,
u64 *svcr);
extern void fpsimd_flush_task_state(struct task_struct *target);
extern void fpsimd_save_and_flush_cpu_state(void);
static inline bool thread_sm_enabled(struct thread_struct *thread)
{
return system_supports_sme() && (thread->svcr & SVCR_SM_MASK);
}
static inline bool thread_za_enabled(struct thread_struct *thread)
{
return system_supports_sme() && (thread->svcr & SVCR_ZA_MASK);
}
/* Maximum VL that SVE/SME VL-agnostic software can transparently support */
#define VL_ARCH_MAX 0x100
/* Offset of FFR in the SVE register dump */
static inline size_t sve_ffr_offset(int vl)
{
return SVE_SIG_FFR_OFFSET(sve_vq_from_vl(vl)) - SVE_SIG_REGS_OFFSET;
}
static inline void *sve_pffr(struct thread_struct *thread)
{
unsigned int vl;
if (system_supports_sme() && thread_sm_enabled(thread))
vl = thread_get_sme_vl(thread);
else
vl = thread_get_sve_vl(thread);
return (char *)thread->sve_state + sve_ffr_offset(vl);
}
extern void sve_save_state(void *state, u32 *pfpsr, int save_ffr);
extern void sve_load_state(void const *state, u32 const *pfpsr,
int restore_ffr);
extern void sve_flush_live(bool flush_ffr, unsigned long vq_minus_1);
extern unsigned int sve_get_vl(void);
extern void sve_set_vq(unsigned long vq_minus_1);
extern void sme_set_vq(unsigned long vq_minus_1);
extern void za_save_state(void *state);
extern void za_load_state(void const *state);
struct arm64_cpu_capabilities;
extern void sve_kernel_enable(const struct arm64_cpu_capabilities *__unused);
extern void sme_kernel_enable(const struct arm64_cpu_capabilities *__unused);
extern void fa64_kernel_enable(const struct arm64_cpu_capabilities *__unused);
extern u64 read_zcr_features(void);
extern u64 read_smcr_features(void);
/*
* Helpers to translate bit indices in sve_vq_map to VQ values (and
* vice versa). This allows find_next_bit() to be used to find the
* _maximum_ VQ not exceeding a certain value.
*/
static inline unsigned int __vq_to_bit(unsigned int vq)
{
return SVE_VQ_MAX - vq;
}
static inline unsigned int __bit_to_vq(unsigned int bit)
{
return SVE_VQ_MAX - bit;
}
struct vl_info {
enum vec_type type;
const char *name; /* For display purposes */
/* Minimum supported vector length across all CPUs */
int min_vl;
/* Maximum supported vector length across all CPUs */
int max_vl;
int max_virtualisable_vl;
/*
* Set of available vector lengths,
* where length vq encoded as bit __vq_to_bit(vq):
*/
DECLARE_BITMAP(vq_map, SVE_VQ_MAX);
/* Set of vector lengths present on at least one cpu: */
DECLARE_BITMAP(vq_partial_map, SVE_VQ_MAX);
};
#ifdef CONFIG_ARM64_SVE
extern void sve_alloc(struct task_struct *task, bool flush);
extern void fpsimd_release_task(struct task_struct *task);
extern void fpsimd_sync_to_sve(struct task_struct *task);
extern void fpsimd_force_sync_to_sve(struct task_struct *task);
extern void sve_sync_to_fpsimd(struct task_struct *task);
extern void sve_sync_from_fpsimd_zeropad(struct task_struct *task);
extern int vec_set_vector_length(struct task_struct *task, enum vec_type type,
unsigned long vl, unsigned long flags);
extern int sve_set_current_vl(unsigned long arg);
extern int sve_get_current_vl(void);
static inline void sve_user_disable(void)
{
sysreg_clear_set(cpacr_el1, CPACR_EL1_ZEN_EL0EN, 0);
}
static inline void sve_user_enable(void)
{
sysreg_clear_set(cpacr_el1, 0, CPACR_EL1_ZEN_EL0EN);
}
#define sve_cond_update_zcr_vq(val, reg) \
do { \
u64 __zcr = read_sysreg_s((reg)); \
u64 __new = __zcr & ~ZCR_ELx_LEN_MASK; \
__new |= (val) & ZCR_ELx_LEN_MASK; \
if (__zcr != __new) \
write_sysreg_s(__new, (reg)); \
} while (0)
/*
* Probing and setup functions.
* Calls to these functions must be serialised with one another.
*/
enum vec_type;
extern void __init vec_init_vq_map(enum vec_type type);
extern void vec_update_vq_map(enum vec_type type);
extern int vec_verify_vq_map(enum vec_type type);
extern void __init sve_setup(void);
extern __ro_after_init struct vl_info vl_info[ARM64_VEC_MAX];
static inline void write_vl(enum vec_type type, u64 val)
{
u64 tmp;
switch (type) {
#ifdef CONFIG_ARM64_SVE
case ARM64_VEC_SVE:
tmp = read_sysreg_s(SYS_ZCR_EL1) & ~ZCR_ELx_LEN_MASK;
write_sysreg_s(tmp | val, SYS_ZCR_EL1);
break;
#endif
#ifdef CONFIG_ARM64_SME
case ARM64_VEC_SME:
tmp = read_sysreg_s(SYS_SMCR_EL1) & ~SMCR_ELx_LEN_MASK;
write_sysreg_s(tmp | val, SYS_SMCR_EL1);
break;
#endif
default:
WARN_ON_ONCE(1);
break;
}
}
static inline int vec_max_vl(enum vec_type type)
{
return vl_info[type].max_vl;
}
static inline int vec_max_virtualisable_vl(enum vec_type type)
{
return vl_info[type].max_virtualisable_vl;
}
static inline int sve_max_vl(void)
{
return vec_max_vl(ARM64_VEC_SVE);
}
static inline int sve_max_virtualisable_vl(void)
{
return vec_max_virtualisable_vl(ARM64_VEC_SVE);
}
/* Ensure vq >= SVE_VQ_MIN && vq <= SVE_VQ_MAX before calling this function */
static inline bool vq_available(enum vec_type type, unsigned int vq)
{
return test_bit(__vq_to_bit(vq), vl_info[type].vq_map);
}
static inline bool sve_vq_available(unsigned int vq)
{
return vq_available(ARM64_VEC_SVE, vq);
}
size_t sve_state_size(struct task_struct const *task);
#else /* ! CONFIG_ARM64_SVE */
static inline void sve_alloc(struct task_struct *task, bool flush) { }
static inline void fpsimd_release_task(struct task_struct *task) { }
static inline void sve_sync_to_fpsimd(struct task_struct *task) { }
static inline void sve_sync_from_fpsimd_zeropad(struct task_struct *task) { }
static inline int sve_max_virtualisable_vl(void)
{
return 0;
}
static inline int sve_set_current_vl(unsigned long arg)
{
return -EINVAL;
}
static inline int sve_get_current_vl(void)
{
return -EINVAL;
}
static inline int sve_max_vl(void)
{
return -EINVAL;
}
static inline bool sve_vq_available(unsigned int vq) { return false; }
static inline void sve_user_disable(void) { BUILD_BUG(); }
static inline void sve_user_enable(void) { BUILD_BUG(); }
#define sve_cond_update_zcr_vq(val, reg) do { } while (0)
static inline void vec_init_vq_map(enum vec_type t) { }
static inline void vec_update_vq_map(enum vec_type t) { }
static inline int vec_verify_vq_map(enum vec_type t) { return 0; }
static inline void sve_setup(void) { }
static inline size_t sve_state_size(struct task_struct const *task)
{
return 0;
}
#endif /* ! CONFIG_ARM64_SVE */
#ifdef CONFIG_ARM64_SME
static inline void sme_user_disable(void)
{
sysreg_clear_set(cpacr_el1, CPACR_EL1_SMEN_EL0EN, 0);
}
static inline void sme_user_enable(void)
{
sysreg_clear_set(cpacr_el1, 0, CPACR_EL1_SMEN_EL0EN);
}
static inline void sme_smstart_sm(void)
{
asm volatile(__msr_s(SYS_SVCR_SMSTART_SM_EL0, "xzr"));
}
static inline void sme_smstop_sm(void)
{
asm volatile(__msr_s(SYS_SVCR_SMSTOP_SM_EL0, "xzr"));
}
static inline void sme_smstop(void)
{
asm volatile(__msr_s(SYS_SVCR_SMSTOP_SMZA_EL0, "xzr"));
}
extern void __init sme_setup(void);
static inline int sme_max_vl(void)
{
return vec_max_vl(ARM64_VEC_SME);
}
static inline int sme_max_virtualisable_vl(void)
{
return vec_max_virtualisable_vl(ARM64_VEC_SME);
}
extern void sme_alloc(struct task_struct *task);
extern unsigned int sme_get_vl(void);
extern int sme_set_current_vl(unsigned long arg);
extern int sme_get_current_vl(void);
/*
* Return how many bytes of memory are required to store the full SME
* specific state (currently just ZA) for task, given task's currently
* configured vector length.
*/
static inline size_t za_state_size(struct task_struct const *task)
{
unsigned int vl = task_get_sme_vl(task);
return ZA_SIG_REGS_SIZE(sve_vq_from_vl(vl));
}
#else
static inline void sme_user_disable(void) { BUILD_BUG(); }
static inline void sme_user_enable(void) { BUILD_BUG(); }
static inline void sme_smstart_sm(void) { }
static inline void sme_smstop_sm(void) { }
static inline void sme_smstop(void) { }
static inline void sme_alloc(struct task_struct *task) { }
static inline void sme_setup(void) { }
static inline unsigned int sme_get_vl(void) { return 0; }
static inline int sme_max_vl(void) { return 0; }
static inline int sme_max_virtualisable_vl(void) { return 0; }
static inline int sme_set_current_vl(unsigned long arg) { return -EINVAL; }
static inline int sme_get_current_vl(void) { return -EINVAL; }
static inline size_t za_state_size(struct task_struct const *task)
{
return 0;
}
#endif /* ! CONFIG_ARM64_SME */
/* For use by EFI runtime services calls only */
extern void __efi_fpsimd_begin(void);
extern void __efi_fpsimd_end(void);
#endif
#endif