2019-06-03 13:44:50 +08:00
|
|
|
/* SPDX-License-Identifier: GPL-2.0-only */
|
2012-03-05 19:49:32 +08:00
|
|
|
/*
|
|
|
|
* Copyright (C) 2012 ARM Ltd.
|
|
|
|
*/
|
|
|
|
#ifndef __ASM_FP_H
|
|
|
|
#define __ASM_FP_H
|
|
|
|
|
2017-10-31 23:51:14 +08:00
|
|
|
#include <asm/errno.h>
|
2018-07-11 21:56:40 +08:00
|
|
|
#include <asm/ptrace.h>
|
2018-04-13 00:32:35 +08:00
|
|
|
#include <asm/processor.h>
|
|
|
|
#include <asm/sigcontext.h>
|
2018-07-11 21:56:40 +08:00
|
|
|
#include <asm/sysreg.h>
|
2012-03-05 19:49:32 +08:00
|
|
|
|
|
|
|
#ifndef __ASSEMBLY__
|
|
|
|
|
2018-09-28 21:39:21 +08:00
|
|
|
#include <linux/bitmap.h>
|
2018-07-11 21:56:40 +08:00
|
|
|
#include <linux/build_bug.h>
|
2018-09-28 21:39:21 +08:00
|
|
|
#include <linux/bug.h>
|
2017-10-31 23:51:08 +08:00
|
|
|
#include <linux/cache.h>
|
2018-02-14 01:43:23 +08:00
|
|
|
#include <linux/init.h>
|
arm64/sve: Core task context handling
This patch adds the core support for switching and managing the SVE
architectural state of user tasks.
Calls to the existing FPSIMD low-level save/restore functions are
factored out as new functions task_fpsimd_{save,load}(), since SVE
now dynamically may or may not need to be handled at these points
depending on the kernel configuration, hardware features discovered
at boot, and the runtime state of the task. To make these
decisions as fast as possible, const cpucaps are used where
feasible, via the system_supports_sve() helper.
The SVE registers are only tracked for threads that have explicitly
used SVE, indicated by the new thread flag TIF_SVE. Otherwise, the
FPSIMD view of the architectural state is stored in
thread.fpsimd_state as usual.
When in use, the SVE registers are not stored directly in
thread_struct due to their potentially large and variable size.
Because the task_struct slab allocator must be configured very
early during kernel boot, it is also tricky to configure it
correctly to match the maximum vector length provided by the
hardware, since this depends on examining secondary CPUs as well as
the primary. Instead, a pointer sve_state in thread_struct points
to a dynamically allocated buffer containing the SVE register data,
and code is added to allocate and free this buffer at appropriate
times.
TIF_SVE is set when taking an SVE access trap from userspace, if
suitable hardware support has been detected. This enables SVE for
the thread: a subsequent return to userspace will disable the trap
accordingly. If such a trap is taken without sufficient system-
wide hardware support, SIGILL is sent to the thread instead as if
an undefined instruction had been executed: this may happen if
userspace tries to use SVE in a system where not all CPUs support
it for example.
The kernel will clear TIF_SVE and disable SVE for the thread
whenever an explicit syscall is made by userspace. For backwards
compatibility reasons and conformance with the spirit of the base
AArch64 procedure call standard, the subset of the SVE register
state that aliases the FPSIMD registers is still preserved across a
syscall even if this happens. The remainder of the SVE register
state logically becomes zero at syscall entry, though the actual
zeroing work is currently deferred until the thread next tries to
use SVE, causing another trap to the kernel. This implementation
is suboptimal: in the future, the fastpath case may be optimised
to zero the registers in-place and leave SVE enabled for the task,
where beneficial.
TIF_SVE is also cleared in the following slowpath cases, which are
taken as reasonable hints that the task may no longer use SVE:
* exec
* fork and clone
Code is added to sync data between thread.fpsimd_state and
thread.sve_state whenever enabling/disabling SVE, in a manner
consistent with the SVE architectural programmer's model.
Signed-off-by: Dave Martin <Dave.Martin@arm.com>
Reviewed-by: Catalin Marinas <catalin.marinas@arm.com>
Cc: Ard Biesheuvel <ard.biesheuvel@linaro.org>
Cc: Alex Bennée <alex.bennee@linaro.org>
[will: added #include to fix allnoconfig build]
[will: use enable_daif in do_sve_acc]
Signed-off-by: Will Deacon <will.deacon@arm.com>
2017-10-31 23:51:05 +08:00
|
|
|
#include <linux/stddef.h>
|
2018-09-28 21:39:21 +08:00
|
|
|
#include <linux/types.h>
|
arm64/sve: Core task context handling
This patch adds the core support for switching and managing the SVE
architectural state of user tasks.
Calls to the existing FPSIMD low-level save/restore functions are
factored out as new functions task_fpsimd_{save,load}(), since SVE
now dynamically may or may not need to be handled at these points
depending on the kernel configuration, hardware features discovered
at boot, and the runtime state of the task. To make these
decisions as fast as possible, const cpucaps are used where
feasible, via the system_supports_sve() helper.
The SVE registers are only tracked for threads that have explicitly
used SVE, indicated by the new thread flag TIF_SVE. Otherwise, the
FPSIMD view of the architectural state is stored in
thread.fpsimd_state as usual.
When in use, the SVE registers are not stored directly in
thread_struct due to their potentially large and variable size.
Because the task_struct slab allocator must be configured very
early during kernel boot, it is also tricky to configure it
correctly to match the maximum vector length provided by the
hardware, since this depends on examining secondary CPUs as well as
the primary. Instead, a pointer sve_state in thread_struct points
to a dynamically allocated buffer containing the SVE register data,
and code is added to allocate and free this buffer at appropriate
times.
TIF_SVE is set when taking an SVE access trap from userspace, if
suitable hardware support has been detected. This enables SVE for
the thread: a subsequent return to userspace will disable the trap
accordingly. If such a trap is taken without sufficient system-
wide hardware support, SIGILL is sent to the thread instead as if
an undefined instruction had been executed: this may happen if
userspace tries to use SVE in a system where not all CPUs support
it for example.
The kernel will clear TIF_SVE and disable SVE for the thread
whenever an explicit syscall is made by userspace. For backwards
compatibility reasons and conformance with the spirit of the base
AArch64 procedure call standard, the subset of the SVE register
state that aliases the FPSIMD registers is still preserved across a
syscall even if this happens. The remainder of the SVE register
state logically becomes zero at syscall entry, though the actual
zeroing work is currently deferred until the thread next tries to
use SVE, causing another trap to the kernel. This implementation
is suboptimal: in the future, the fastpath case may be optimised
to zero the registers in-place and leave SVE enabled for the task,
where beneficial.
TIF_SVE is also cleared in the following slowpath cases, which are
taken as reasonable hints that the task may no longer use SVE:
* exec
* fork and clone
Code is added to sync data between thread.fpsimd_state and
thread.sve_state whenever enabling/disabling SVE, in a manner
consistent with the SVE architectural programmer's model.
Signed-off-by: Dave Martin <Dave.Martin@arm.com>
Reviewed-by: Catalin Marinas <catalin.marinas@arm.com>
Cc: Ard Biesheuvel <ard.biesheuvel@linaro.org>
Cc: Alex Bennée <alex.bennee@linaro.org>
[will: added #include to fix allnoconfig build]
[will: use enable_daif in do_sve_acc]
Signed-off-by: Will Deacon <will.deacon@arm.com>
2017-10-31 23:51:05 +08:00
|
|
|
|
2019-07-09 00:36:40 +08:00
|
|
|
#ifdef CONFIG_COMPAT
|
2012-03-05 19:49:32 +08:00
|
|
|
/* Masks for extracting the FPSR and FPCR from the FPSCR */
|
|
|
|
#define VFP_FPSCR_STAT_MASK 0xf800009f
|
|
|
|
#define VFP_FPSCR_CTRL_MASK 0x07f79f00
|
|
|
|
/*
|
|
|
|
* The VFP state has 32x64-bit registers and a single 32-bit
|
|
|
|
* control/status register.
|
|
|
|
*/
|
|
|
|
#define VFP_STATE_SIZE ((32 * 8) + 4)
|
|
|
|
#endif
|
|
|
|
|
2022-05-06 06:15:17 +08:00
|
|
|
/*
|
|
|
|
* When we defined the maximum SVE vector length we defined the ABI so
|
|
|
|
* that the maximum vector length included all the reserved for future
|
|
|
|
* expansion bits in ZCR rather than those just currently defined by
|
|
|
|
* the architecture. While SME follows a similar pattern the fact that
|
|
|
|
* it includes a square matrix means that any allocations that attempt
|
|
|
|
* to cover the maximum potential vector length (such as happen with
|
|
|
|
* the regset used for ptrace) end up being extremely large. Define
|
|
|
|
* the much lower actual limit for use in such situations.
|
|
|
|
*/
|
|
|
|
#define SME_VQ_MAX 16
|
|
|
|
|
2012-03-05 19:49:32 +08:00
|
|
|
struct task_struct;
|
|
|
|
|
2018-03-28 17:50:48 +08:00
|
|
|
extern void fpsimd_save_state(struct user_fpsimd_state *state);
|
|
|
|
extern void fpsimd_load_state(struct user_fpsimd_state *state);
|
2012-03-05 19:49:32 +08:00
|
|
|
|
|
|
|
extern void fpsimd_thread_switch(struct task_struct *next);
|
|
|
|
extern void fpsimd_flush_thread(void);
|
|
|
|
|
arm64/sve: Signal handling support
This patch implements support for saving and restoring the SVE
registers around signals.
A fixed-size header struct sve_context is always included in the
signal frame encoding the thread's vector length at the time of
signal delivery, optionally followed by a variable-layout structure
encoding the SVE registers.
Because of the need to preserve backwards compatibility, the FPSIMD
view of the SVE registers is always dumped as a struct
fpsimd_context in the usual way, in addition to any sve_context.
The SVE vector registers are dumped in full, including bits 127:0
of each register which alias the corresponding FPSIMD vector
registers in the hardware. To avoid any ambiguity about which
alias to restore during sigreturn, the kernel always restores bits
127:0 of each SVE vector register from the fpsimd_context in the
signal frame (which must be present): userspace needs to take this
into account if it wants to modify the SVE vector register contents
on return from a signal.
FPSR and FPCR, which are used by both FPSIMD and SVE, are not
included in sve_context because they are always present in
fpsimd_context anyway.
For signal delivery, a new helper
fpsimd_signal_preserve_current_state() is added to update _both_
the FPSIMD and SVE views in the task struct, to make it easier to
populate this information into the signal frame. Because of the
redundancy between the two views of the state, only one is updated
otherwise.
Reviewed-by: Catalin Marinas <catalin.marinas@arm.com>
Signed-off-by: Dave Martin <Dave.Martin@arm.com>
Cc: Alex Bennée <alex.bennee@linaro.org>
Cc: Ard Biesheuvel <ard.biesheuvel@linaro.org>
Cc: Will Deacon <will.deacon@arm.com>
Signed-off-by: Will Deacon <will.deacon@arm.com>
2017-10-31 23:51:07 +08:00
|
|
|
extern void fpsimd_signal_preserve_current_state(void);
|
2014-02-24 22:26:27 +08:00
|
|
|
extern void fpsimd_preserve_current_state(void);
|
2014-05-08 17:20:23 +08:00
|
|
|
extern void fpsimd_restore_current_state(void);
|
arm64: fpsimd: Fix state leakage when migrating after sigreturn
When refactoring the sigreturn code to handle SVE, I changed the
sigreturn implementation to store the new FPSIMD state from the
user sigframe into task_struct before reloading the state into the
CPU regs. This makes it easier to convert the data for SVE when
needed.
However, it turns out that the fpsimd_state structure passed into
fpsimd_update_current_state is not fully initialised, so assigning
the structure as a whole corrupts current->thread.fpsimd_state.cpu
with uninitialised data.
This means that if the garbage data written to .cpu happens to be a
valid cpu number, and the task is subsequently migrated to the cpu
identified by the that number, and then tries to enter userspace,
the CPU FPSIMD regs will be assumed to be correct for the task and
not reloaded as they should be. This can result in returning to
userspace with the FPSIMD registers containing data that is stale or
that belongs to another task or to the kernel.
Knowingly handing around a kernel structure that is incompletely
initialised with user data is a potential source of mistakes,
especially across source file boundaries. To help avoid a repeat
of this issue, this patch adapts the relevant internal API to hand
around the user-accessible subset only: struct user_fpsimd_state.
To avoid future surprises, this patch also converts all uses of
struct fpsimd_state that really only access the user subset, to use
struct user_fpsimd_state. A few missing consts are added to
function prototypes for good measure.
Thanks to Will for spotting the cause of the bug here.
Reported-by: Geert Uytterhoeven <geert@linux-m68k.org>
Signed-off-by: Dave Martin <Dave.Martin@arm.com>
Cc: Will Deacon <will.deacon@arm.com>
Cc: Ard Biesheuvel <ard.biesheuvel@linaro.org>
Signed-off-by: Catalin Marinas <catalin.marinas@arm.com>
2017-12-16 02:34:38 +08:00
|
|
|
extern void fpsimd_update_current_state(struct user_fpsimd_state const *state);
|
2014-02-24 22:26:27 +08:00
|
|
|
|
2018-09-28 21:39:11 +08:00
|
|
|
extern void fpsimd_bind_state_to_cpu(struct user_fpsimd_state *state,
|
2022-04-19 19:22:21 +08:00
|
|
|
void *sve_state, unsigned int sve_vl,
|
2022-04-19 19:22:23 +08:00
|
|
|
void *za_state, unsigned int sme_vl,
|
|
|
|
u64 *svcr);
|
2018-04-06 21:55:59 +08:00
|
|
|
|
2014-05-08 17:20:23 +08:00
|
|
|
extern void fpsimd_flush_task_state(struct task_struct *target);
|
2019-05-22 01:21:38 +08:00
|
|
|
extern void fpsimd_save_and_flush_cpu_state(void);
|
2014-05-08 17:20:23 +08:00
|
|
|
|
2022-04-19 19:22:22 +08:00
|
|
|
static inline bool thread_sm_enabled(struct thread_struct *thread)
|
|
|
|
{
|
2022-05-11 00:12:01 +08:00
|
|
|
return system_supports_sme() && (thread->svcr & SVCR_SM_MASK);
|
2022-04-19 19:22:22 +08:00
|
|
|
}
|
|
|
|
|
|
|
|
static inline bool thread_za_enabled(struct thread_struct *thread)
|
|
|
|
{
|
2022-05-11 00:12:01 +08:00
|
|
|
return system_supports_sme() && (thread->svcr & SVCR_ZA_MASK);
|
2022-04-19 19:22:22 +08:00
|
|
|
}
|
|
|
|
|
2021-12-11 02:40:58 +08:00
|
|
|
/* Maximum VL that SVE/SME VL-agnostic software can transparently support */
|
|
|
|
#define VL_ARCH_MAX 0x100
|
2017-10-31 23:51:08 +08:00
|
|
|
|
2018-04-13 00:32:35 +08:00
|
|
|
/* Offset of FFR in the SVE register dump */
|
|
|
|
static inline size_t sve_ffr_offset(int vl)
|
|
|
|
{
|
|
|
|
return SVE_SIG_FFR_OFFSET(sve_vq_from_vl(vl)) - SVE_SIG_REGS_OFFSET;
|
|
|
|
}
|
|
|
|
|
|
|
|
static inline void *sve_pffr(struct thread_struct *thread)
|
|
|
|
{
|
2022-04-19 19:22:22 +08:00
|
|
|
unsigned int vl;
|
|
|
|
|
|
|
|
if (system_supports_sme() && thread_sm_enabled(thread))
|
|
|
|
vl = thread_get_sme_vl(thread);
|
|
|
|
else
|
|
|
|
vl = thread_get_sve_vl(thread);
|
|
|
|
|
|
|
|
return (char *)thread->sve_state + sve_ffr_offset(vl);
|
2018-04-13 00:32:35 +08:00
|
|
|
}
|
|
|
|
|
2021-10-20 01:22:09 +08:00
|
|
|
extern void sve_save_state(void *state, u32 *pfpsr, int save_ffr);
|
2017-10-31 23:51:01 +08:00
|
|
|
extern void sve_load_state(void const *state, u32 const *pfpsr,
|
2021-10-20 01:22:13 +08:00
|
|
|
int restore_ffr);
|
2021-10-20 01:22:09 +08:00
|
|
|
extern void sve_flush_live(bool flush_ffr, unsigned long vq_minus_1);
|
2017-10-31 23:51:01 +08:00
|
|
|
extern unsigned int sve_get_vl(void);
|
2021-03-13 03:03:13 +08:00
|
|
|
extern void sve_set_vq(unsigned long vq_minus_1);
|
2022-04-19 19:22:22 +08:00
|
|
|
extern void sme_set_vq(unsigned long vq_minus_1);
|
2022-04-19 19:22:23 +08:00
|
|
|
extern void za_save_state(void *state);
|
|
|
|
extern void za_load_state(void const *state);
|
2018-03-26 22:12:28 +08:00
|
|
|
|
|
|
|
struct arm64_cpu_capabilities;
|
|
|
|
extern void sve_kernel_enable(const struct arm64_cpu_capabilities *__unused);
|
2022-04-19 19:22:16 +08:00
|
|
|
extern void sme_kernel_enable(const struct arm64_cpu_capabilities *__unused);
|
|
|
|
extern void fa64_kernel_enable(const struct arm64_cpu_capabilities *__unused);
|
2017-10-31 23:51:01 +08:00
|
|
|
|
2018-04-12 23:47:20 +08:00
|
|
|
extern u64 read_zcr_features(void);
|
2022-04-19 19:22:17 +08:00
|
|
|
extern u64 read_smcr_features(void);
|
2018-04-12 23:47:20 +08:00
|
|
|
|
2018-09-28 21:39:21 +08:00
|
|
|
/*
|
|
|
|
* Helpers to translate bit indices in sve_vq_map to VQ values (and
|
|
|
|
* vice versa). This allows find_next_bit() to be used to find the
|
|
|
|
* _maximum_ VQ not exceeding a certain value.
|
|
|
|
*/
|
|
|
|
static inline unsigned int __vq_to_bit(unsigned int vq)
|
|
|
|
{
|
|
|
|
return SVE_VQ_MAX - vq;
|
|
|
|
}
|
|
|
|
|
|
|
|
static inline unsigned int __bit_to_vq(unsigned int bit)
|
|
|
|
{
|
|
|
|
return SVE_VQ_MAX - bit;
|
|
|
|
}
|
|
|
|
|
2021-10-20 01:22:12 +08:00
|
|
|
|
|
|
|
struct vl_info {
|
|
|
|
enum vec_type type;
|
|
|
|
const char *name; /* For display purposes */
|
|
|
|
|
|
|
|
/* Minimum supported vector length across all CPUs */
|
|
|
|
int min_vl;
|
|
|
|
|
|
|
|
/* Maximum supported vector length across all CPUs */
|
|
|
|
int max_vl;
|
|
|
|
int max_virtualisable_vl;
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Set of available vector lengths,
|
|
|
|
* where length vq encoded as bit __vq_to_bit(vq):
|
|
|
|
*/
|
|
|
|
DECLARE_BITMAP(vq_map, SVE_VQ_MAX);
|
|
|
|
|
|
|
|
/* Set of vector lengths present on at least one cpu: */
|
|
|
|
DECLARE_BITMAP(vq_partial_map, SVE_VQ_MAX);
|
|
|
|
};
|
2017-10-31 23:51:08 +08:00
|
|
|
|
arm64/sve: Core task context handling
This patch adds the core support for switching and managing the SVE
architectural state of user tasks.
Calls to the existing FPSIMD low-level save/restore functions are
factored out as new functions task_fpsimd_{save,load}(), since SVE
now dynamically may or may not need to be handled at these points
depending on the kernel configuration, hardware features discovered
at boot, and the runtime state of the task. To make these
decisions as fast as possible, const cpucaps are used where
feasible, via the system_supports_sve() helper.
The SVE registers are only tracked for threads that have explicitly
used SVE, indicated by the new thread flag TIF_SVE. Otherwise, the
FPSIMD view of the architectural state is stored in
thread.fpsimd_state as usual.
When in use, the SVE registers are not stored directly in
thread_struct due to their potentially large and variable size.
Because the task_struct slab allocator must be configured very
early during kernel boot, it is also tricky to configure it
correctly to match the maximum vector length provided by the
hardware, since this depends on examining secondary CPUs as well as
the primary. Instead, a pointer sve_state in thread_struct points
to a dynamically allocated buffer containing the SVE register data,
and code is added to allocate and free this buffer at appropriate
times.
TIF_SVE is set when taking an SVE access trap from userspace, if
suitable hardware support has been detected. This enables SVE for
the thread: a subsequent return to userspace will disable the trap
accordingly. If such a trap is taken without sufficient system-
wide hardware support, SIGILL is sent to the thread instead as if
an undefined instruction had been executed: this may happen if
userspace tries to use SVE in a system where not all CPUs support
it for example.
The kernel will clear TIF_SVE and disable SVE for the thread
whenever an explicit syscall is made by userspace. For backwards
compatibility reasons and conformance with the spirit of the base
AArch64 procedure call standard, the subset of the SVE register
state that aliases the FPSIMD registers is still preserved across a
syscall even if this happens. The remainder of the SVE register
state logically becomes zero at syscall entry, though the actual
zeroing work is currently deferred until the thread next tries to
use SVE, causing another trap to the kernel. This implementation
is suboptimal: in the future, the fastpath case may be optimised
to zero the registers in-place and leave SVE enabled for the task,
where beneficial.
TIF_SVE is also cleared in the following slowpath cases, which are
taken as reasonable hints that the task may no longer use SVE:
* exec
* fork and clone
Code is added to sync data between thread.fpsimd_state and
thread.sve_state whenever enabling/disabling SVE, in a manner
consistent with the SVE architectural programmer's model.
Signed-off-by: Dave Martin <Dave.Martin@arm.com>
Reviewed-by: Catalin Marinas <catalin.marinas@arm.com>
Cc: Ard Biesheuvel <ard.biesheuvel@linaro.org>
Cc: Alex Bennée <alex.bennee@linaro.org>
[will: added #include to fix allnoconfig build]
[will: use enable_daif in do_sve_acc]
Signed-off-by: Will Deacon <will.deacon@arm.com>
2017-10-31 23:51:05 +08:00
|
|
|
#ifdef CONFIG_ARM64_SVE
|
|
|
|
|
2022-08-18 02:23:23 +08:00
|
|
|
extern void sve_alloc(struct task_struct *task, bool flush);
|
arm64/sve: Core task context handling
This patch adds the core support for switching and managing the SVE
architectural state of user tasks.
Calls to the existing FPSIMD low-level save/restore functions are
factored out as new functions task_fpsimd_{save,load}(), since SVE
now dynamically may or may not need to be handled at these points
depending on the kernel configuration, hardware features discovered
at boot, and the runtime state of the task. To make these
decisions as fast as possible, const cpucaps are used where
feasible, via the system_supports_sve() helper.
The SVE registers are only tracked for threads that have explicitly
used SVE, indicated by the new thread flag TIF_SVE. Otherwise, the
FPSIMD view of the architectural state is stored in
thread.fpsimd_state as usual.
When in use, the SVE registers are not stored directly in
thread_struct due to their potentially large and variable size.
Because the task_struct slab allocator must be configured very
early during kernel boot, it is also tricky to configure it
correctly to match the maximum vector length provided by the
hardware, since this depends on examining secondary CPUs as well as
the primary. Instead, a pointer sve_state in thread_struct points
to a dynamically allocated buffer containing the SVE register data,
and code is added to allocate and free this buffer at appropriate
times.
TIF_SVE is set when taking an SVE access trap from userspace, if
suitable hardware support has been detected. This enables SVE for
the thread: a subsequent return to userspace will disable the trap
accordingly. If such a trap is taken without sufficient system-
wide hardware support, SIGILL is sent to the thread instead as if
an undefined instruction had been executed: this may happen if
userspace tries to use SVE in a system where not all CPUs support
it for example.
The kernel will clear TIF_SVE and disable SVE for the thread
whenever an explicit syscall is made by userspace. For backwards
compatibility reasons and conformance with the spirit of the base
AArch64 procedure call standard, the subset of the SVE register
state that aliases the FPSIMD registers is still preserved across a
syscall even if this happens. The remainder of the SVE register
state logically becomes zero at syscall entry, though the actual
zeroing work is currently deferred until the thread next tries to
use SVE, causing another trap to the kernel. This implementation
is suboptimal: in the future, the fastpath case may be optimised
to zero the registers in-place and leave SVE enabled for the task,
where beneficial.
TIF_SVE is also cleared in the following slowpath cases, which are
taken as reasonable hints that the task may no longer use SVE:
* exec
* fork and clone
Code is added to sync data between thread.fpsimd_state and
thread.sve_state whenever enabling/disabling SVE, in a manner
consistent with the SVE architectural programmer's model.
Signed-off-by: Dave Martin <Dave.Martin@arm.com>
Reviewed-by: Catalin Marinas <catalin.marinas@arm.com>
Cc: Ard Biesheuvel <ard.biesheuvel@linaro.org>
Cc: Alex Bennée <alex.bennee@linaro.org>
[will: added #include to fix allnoconfig build]
[will: use enable_daif in do_sve_acc]
Signed-off-by: Will Deacon <will.deacon@arm.com>
2017-10-31 23:51:05 +08:00
|
|
|
extern void fpsimd_release_task(struct task_struct *task);
|
arm64/sve: ptrace and ELF coredump support
This patch defines and implements a new regset NT_ARM_SVE, which
describes a thread's SVE register state. This allows a debugger to
manipulate the SVE state, as well as being included in ELF
coredumps for post-mortem debugging.
Because the regset size and layout are dependent on the thread's
current vector length, it is not possible to define a C struct to
describe the regset contents as is done for existing regsets.
Instead, and for the same reasons, NT_ARM_SVE is based on the
freeform variable-layout approach used for the SVE signal frame.
Additionally, to reduce debug overhead when debugging threads that
might or might not have live SVE register state, NT_ARM_SVE may be
presented in one of two different formats: the old struct
user_fpsimd_state format is embedded for describing the state of a
thread with no live SVE state, whereas a new variable-layout
structure is embedded for describing live SVE state. This avoids a
debugger needing to poll NT_PRFPREG in addition to NT_ARM_SVE, and
allows existing userspace code to handle the non-SVE case without
too much modification.
For this to work, NT_ARM_SVE is defined with a fixed-format header
of type struct user_sve_header, which the recipient can use to
figure out the content, size and layout of the reset of the regset.
Accessor macros are defined to allow the vector-length-dependent
parts of the regset to be manipulated.
Signed-off-by: Alan Hayward <alan.hayward@arm.com>
Signed-off-by: Dave Martin <Dave.Martin@arm.com>
Reviewed-by: Catalin Marinas <catalin.marinas@arm.com>
Cc: Alex Bennée <alex.bennee@linaro.org>
Cc: Okamoto Takayuki <tokamoto@jp.fujitsu.com>
Signed-off-by: Will Deacon <will.deacon@arm.com>
2017-10-31 23:51:13 +08:00
|
|
|
extern void fpsimd_sync_to_sve(struct task_struct *task);
|
2022-04-19 19:22:28 +08:00
|
|
|
extern void fpsimd_force_sync_to_sve(struct task_struct *task);
|
arm64/sve: ptrace and ELF coredump support
This patch defines and implements a new regset NT_ARM_SVE, which
describes a thread's SVE register state. This allows a debugger to
manipulate the SVE state, as well as being included in ELF
coredumps for post-mortem debugging.
Because the regset size and layout are dependent on the thread's
current vector length, it is not possible to define a C struct to
describe the regset contents as is done for existing regsets.
Instead, and for the same reasons, NT_ARM_SVE is based on the
freeform variable-layout approach used for the SVE signal frame.
Additionally, to reduce debug overhead when debugging threads that
might or might not have live SVE register state, NT_ARM_SVE may be
presented in one of two different formats: the old struct
user_fpsimd_state format is embedded for describing the state of a
thread with no live SVE state, whereas a new variable-layout
structure is embedded for describing live SVE state. This avoids a
debugger needing to poll NT_PRFPREG in addition to NT_ARM_SVE, and
allows existing userspace code to handle the non-SVE case without
too much modification.
For this to work, NT_ARM_SVE is defined with a fixed-format header
of type struct user_sve_header, which the recipient can use to
figure out the content, size and layout of the reset of the regset.
Accessor macros are defined to allow the vector-length-dependent
parts of the regset to be manipulated.
Signed-off-by: Alan Hayward <alan.hayward@arm.com>
Signed-off-by: Dave Martin <Dave.Martin@arm.com>
Reviewed-by: Catalin Marinas <catalin.marinas@arm.com>
Cc: Alex Bennée <alex.bennee@linaro.org>
Cc: Okamoto Takayuki <tokamoto@jp.fujitsu.com>
Signed-off-by: Will Deacon <will.deacon@arm.com>
2017-10-31 23:51:13 +08:00
|
|
|
extern void sve_sync_to_fpsimd(struct task_struct *task);
|
|
|
|
extern void sve_sync_from_fpsimd_zeropad(struct task_struct *task);
|
|
|
|
|
2021-12-11 02:40:58 +08:00
|
|
|
extern int vec_set_vector_length(struct task_struct *task, enum vec_type type,
|
2017-10-31 23:51:08 +08:00
|
|
|
unsigned long vl, unsigned long flags);
|
arm64/sve: Core task context handling
This patch adds the core support for switching and managing the SVE
architectural state of user tasks.
Calls to the existing FPSIMD low-level save/restore functions are
factored out as new functions task_fpsimd_{save,load}(), since SVE
now dynamically may or may not need to be handled at these points
depending on the kernel configuration, hardware features discovered
at boot, and the runtime state of the task. To make these
decisions as fast as possible, const cpucaps are used where
feasible, via the system_supports_sve() helper.
The SVE registers are only tracked for threads that have explicitly
used SVE, indicated by the new thread flag TIF_SVE. Otherwise, the
FPSIMD view of the architectural state is stored in
thread.fpsimd_state as usual.
When in use, the SVE registers are not stored directly in
thread_struct due to their potentially large and variable size.
Because the task_struct slab allocator must be configured very
early during kernel boot, it is also tricky to configure it
correctly to match the maximum vector length provided by the
hardware, since this depends on examining secondary CPUs as well as
the primary. Instead, a pointer sve_state in thread_struct points
to a dynamically allocated buffer containing the SVE register data,
and code is added to allocate and free this buffer at appropriate
times.
TIF_SVE is set when taking an SVE access trap from userspace, if
suitable hardware support has been detected. This enables SVE for
the thread: a subsequent return to userspace will disable the trap
accordingly. If such a trap is taken without sufficient system-
wide hardware support, SIGILL is sent to the thread instead as if
an undefined instruction had been executed: this may happen if
userspace tries to use SVE in a system where not all CPUs support
it for example.
The kernel will clear TIF_SVE and disable SVE for the thread
whenever an explicit syscall is made by userspace. For backwards
compatibility reasons and conformance with the spirit of the base
AArch64 procedure call standard, the subset of the SVE register
state that aliases the FPSIMD registers is still preserved across a
syscall even if this happens. The remainder of the SVE register
state logically becomes zero at syscall entry, though the actual
zeroing work is currently deferred until the thread next tries to
use SVE, causing another trap to the kernel. This implementation
is suboptimal: in the future, the fastpath case may be optimised
to zero the registers in-place and leave SVE enabled for the task,
where beneficial.
TIF_SVE is also cleared in the following slowpath cases, which are
taken as reasonable hints that the task may no longer use SVE:
* exec
* fork and clone
Code is added to sync data between thread.fpsimd_state and
thread.sve_state whenever enabling/disabling SVE, in a manner
consistent with the SVE architectural programmer's model.
Signed-off-by: Dave Martin <Dave.Martin@arm.com>
Reviewed-by: Catalin Marinas <catalin.marinas@arm.com>
Cc: Ard Biesheuvel <ard.biesheuvel@linaro.org>
Cc: Alex Bennée <alex.bennee@linaro.org>
[will: added #include to fix allnoconfig build]
[will: use enable_daif in do_sve_acc]
Signed-off-by: Will Deacon <will.deacon@arm.com>
2017-10-31 23:51:05 +08:00
|
|
|
|
2017-10-31 23:51:14 +08:00
|
|
|
extern int sve_set_current_vl(unsigned long arg);
|
|
|
|
extern int sve_get_current_vl(void);
|
|
|
|
|
2018-07-11 21:56:40 +08:00
|
|
|
static inline void sve_user_disable(void)
|
|
|
|
{
|
|
|
|
sysreg_clear_set(cpacr_el1, CPACR_EL1_ZEN_EL0EN, 0);
|
|
|
|
}
|
|
|
|
|
|
|
|
static inline void sve_user_enable(void)
|
|
|
|
{
|
|
|
|
sysreg_clear_set(cpacr_el1, 0, CPACR_EL1_ZEN_EL0EN);
|
|
|
|
}
|
|
|
|
|
2021-03-18 17:43:03 +08:00
|
|
|
#define sve_cond_update_zcr_vq(val, reg) \
|
|
|
|
do { \
|
|
|
|
u64 __zcr = read_sysreg_s((reg)); \
|
|
|
|
u64 __new = __zcr & ~ZCR_ELx_LEN_MASK; \
|
|
|
|
__new |= (val) & ZCR_ELx_LEN_MASK; \
|
|
|
|
if (__zcr != __new) \
|
|
|
|
write_sysreg_s(__new, (reg)); \
|
|
|
|
} while (0)
|
|
|
|
|
2017-10-31 23:51:10 +08:00
|
|
|
/*
|
|
|
|
* Probing and setup functions.
|
|
|
|
* Calls to these functions must be serialised with one another.
|
|
|
|
*/
|
2021-10-20 01:22:12 +08:00
|
|
|
enum vec_type;
|
|
|
|
|
|
|
|
extern void __init vec_init_vq_map(enum vec_type type);
|
|
|
|
extern void vec_update_vq_map(enum vec_type type);
|
|
|
|
extern int vec_verify_vq_map(enum vec_type type);
|
2017-10-31 23:51:10 +08:00
|
|
|
extern void __init sve_setup(void);
|
|
|
|
|
2021-10-20 01:22:12 +08:00
|
|
|
extern __ro_after_init struct vl_info vl_info[ARM64_VEC_MAX];
|
|
|
|
|
|
|
|
static inline void write_vl(enum vec_type type, u64 val)
|
|
|
|
{
|
|
|
|
u64 tmp;
|
|
|
|
|
|
|
|
switch (type) {
|
|
|
|
#ifdef CONFIG_ARM64_SVE
|
|
|
|
case ARM64_VEC_SVE:
|
|
|
|
tmp = read_sysreg_s(SYS_ZCR_EL1) & ~ZCR_ELx_LEN_MASK;
|
|
|
|
write_sysreg_s(tmp | val, SYS_ZCR_EL1);
|
|
|
|
break;
|
2022-04-19 19:22:17 +08:00
|
|
|
#endif
|
|
|
|
#ifdef CONFIG_ARM64_SME
|
|
|
|
case ARM64_VEC_SME:
|
|
|
|
tmp = read_sysreg_s(SYS_SMCR_EL1) & ~SMCR_ELx_LEN_MASK;
|
|
|
|
write_sysreg_s(tmp | val, SYS_SMCR_EL1);
|
|
|
|
break;
|
2021-10-20 01:22:12 +08:00
|
|
|
#endif
|
|
|
|
default:
|
|
|
|
WARN_ON_ONCE(1);
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
static inline int vec_max_vl(enum vec_type type)
|
|
|
|
{
|
|
|
|
return vl_info[type].max_vl;
|
|
|
|
}
|
|
|
|
|
|
|
|
static inline int vec_max_virtualisable_vl(enum vec_type type)
|
|
|
|
{
|
|
|
|
return vl_info[type].max_virtualisable_vl;
|
|
|
|
}
|
|
|
|
|
|
|
|
static inline int sve_max_vl(void)
|
|
|
|
{
|
|
|
|
return vec_max_vl(ARM64_VEC_SVE);
|
|
|
|
}
|
|
|
|
|
|
|
|
static inline int sve_max_virtualisable_vl(void)
|
|
|
|
{
|
|
|
|
return vec_max_virtualisable_vl(ARM64_VEC_SVE);
|
|
|
|
}
|
|
|
|
|
|
|
|
/* Ensure vq >= SVE_VQ_MIN && vq <= SVE_VQ_MAX before calling this function */
|
|
|
|
static inline bool vq_available(enum vec_type type, unsigned int vq)
|
|
|
|
{
|
|
|
|
return test_bit(__vq_to_bit(vq), vl_info[type].vq_map);
|
|
|
|
}
|
|
|
|
|
|
|
|
static inline bool sve_vq_available(unsigned int vq)
|
|
|
|
{
|
|
|
|
return vq_available(ARM64_VEC_SVE, vq);
|
|
|
|
}
|
|
|
|
|
2022-04-19 19:22:24 +08:00
|
|
|
size_t sve_state_size(struct task_struct const *task);
|
|
|
|
|
arm64/sve: Core task context handling
This patch adds the core support for switching and managing the SVE
architectural state of user tasks.
Calls to the existing FPSIMD low-level save/restore functions are
factored out as new functions task_fpsimd_{save,load}(), since SVE
now dynamically may or may not need to be handled at these points
depending on the kernel configuration, hardware features discovered
at boot, and the runtime state of the task. To make these
decisions as fast as possible, const cpucaps are used where
feasible, via the system_supports_sve() helper.
The SVE registers are only tracked for threads that have explicitly
used SVE, indicated by the new thread flag TIF_SVE. Otherwise, the
FPSIMD view of the architectural state is stored in
thread.fpsimd_state as usual.
When in use, the SVE registers are not stored directly in
thread_struct due to their potentially large and variable size.
Because the task_struct slab allocator must be configured very
early during kernel boot, it is also tricky to configure it
correctly to match the maximum vector length provided by the
hardware, since this depends on examining secondary CPUs as well as
the primary. Instead, a pointer sve_state in thread_struct points
to a dynamically allocated buffer containing the SVE register data,
and code is added to allocate and free this buffer at appropriate
times.
TIF_SVE is set when taking an SVE access trap from userspace, if
suitable hardware support has been detected. This enables SVE for
the thread: a subsequent return to userspace will disable the trap
accordingly. If such a trap is taken without sufficient system-
wide hardware support, SIGILL is sent to the thread instead as if
an undefined instruction had been executed: this may happen if
userspace tries to use SVE in a system where not all CPUs support
it for example.
The kernel will clear TIF_SVE and disable SVE for the thread
whenever an explicit syscall is made by userspace. For backwards
compatibility reasons and conformance with the spirit of the base
AArch64 procedure call standard, the subset of the SVE register
state that aliases the FPSIMD registers is still preserved across a
syscall even if this happens. The remainder of the SVE register
state logically becomes zero at syscall entry, though the actual
zeroing work is currently deferred until the thread next tries to
use SVE, causing another trap to the kernel. This implementation
is suboptimal: in the future, the fastpath case may be optimised
to zero the registers in-place and leave SVE enabled for the task,
where beneficial.
TIF_SVE is also cleared in the following slowpath cases, which are
taken as reasonable hints that the task may no longer use SVE:
* exec
* fork and clone
Code is added to sync data between thread.fpsimd_state and
thread.sve_state whenever enabling/disabling SVE, in a manner
consistent with the SVE architectural programmer's model.
Signed-off-by: Dave Martin <Dave.Martin@arm.com>
Reviewed-by: Catalin Marinas <catalin.marinas@arm.com>
Cc: Ard Biesheuvel <ard.biesheuvel@linaro.org>
Cc: Alex Bennée <alex.bennee@linaro.org>
[will: added #include to fix allnoconfig build]
[will: use enable_daif in do_sve_acc]
Signed-off-by: Will Deacon <will.deacon@arm.com>
2017-10-31 23:51:05 +08:00
|
|
|
#else /* ! CONFIG_ARM64_SVE */
|
|
|
|
|
2022-08-18 02:23:23 +08:00
|
|
|
static inline void sve_alloc(struct task_struct *task, bool flush) { }
|
arm64/sve: Core task context handling
This patch adds the core support for switching and managing the SVE
architectural state of user tasks.
Calls to the existing FPSIMD low-level save/restore functions are
factored out as new functions task_fpsimd_{save,load}(), since SVE
now dynamically may or may not need to be handled at these points
depending on the kernel configuration, hardware features discovered
at boot, and the runtime state of the task. To make these
decisions as fast as possible, const cpucaps are used where
feasible, via the system_supports_sve() helper.
The SVE registers are only tracked for threads that have explicitly
used SVE, indicated by the new thread flag TIF_SVE. Otherwise, the
FPSIMD view of the architectural state is stored in
thread.fpsimd_state as usual.
When in use, the SVE registers are not stored directly in
thread_struct due to their potentially large and variable size.
Because the task_struct slab allocator must be configured very
early during kernel boot, it is also tricky to configure it
correctly to match the maximum vector length provided by the
hardware, since this depends on examining secondary CPUs as well as
the primary. Instead, a pointer sve_state in thread_struct points
to a dynamically allocated buffer containing the SVE register data,
and code is added to allocate and free this buffer at appropriate
times.
TIF_SVE is set when taking an SVE access trap from userspace, if
suitable hardware support has been detected. This enables SVE for
the thread: a subsequent return to userspace will disable the trap
accordingly. If such a trap is taken without sufficient system-
wide hardware support, SIGILL is sent to the thread instead as if
an undefined instruction had been executed: this may happen if
userspace tries to use SVE in a system where not all CPUs support
it for example.
The kernel will clear TIF_SVE and disable SVE for the thread
whenever an explicit syscall is made by userspace. For backwards
compatibility reasons and conformance with the spirit of the base
AArch64 procedure call standard, the subset of the SVE register
state that aliases the FPSIMD registers is still preserved across a
syscall even if this happens. The remainder of the SVE register
state logically becomes zero at syscall entry, though the actual
zeroing work is currently deferred until the thread next tries to
use SVE, causing another trap to the kernel. This implementation
is suboptimal: in the future, the fastpath case may be optimised
to zero the registers in-place and leave SVE enabled for the task,
where beneficial.
TIF_SVE is also cleared in the following slowpath cases, which are
taken as reasonable hints that the task may no longer use SVE:
* exec
* fork and clone
Code is added to sync data between thread.fpsimd_state and
thread.sve_state whenever enabling/disabling SVE, in a manner
consistent with the SVE architectural programmer's model.
Signed-off-by: Dave Martin <Dave.Martin@arm.com>
Reviewed-by: Catalin Marinas <catalin.marinas@arm.com>
Cc: Ard Biesheuvel <ard.biesheuvel@linaro.org>
Cc: Alex Bennée <alex.bennee@linaro.org>
[will: added #include to fix allnoconfig build]
[will: use enable_daif in do_sve_acc]
Signed-off-by: Will Deacon <will.deacon@arm.com>
2017-10-31 23:51:05 +08:00
|
|
|
static inline void fpsimd_release_task(struct task_struct *task) { }
|
arm64/sve: ptrace and ELF coredump support
This patch defines and implements a new regset NT_ARM_SVE, which
describes a thread's SVE register state. This allows a debugger to
manipulate the SVE state, as well as being included in ELF
coredumps for post-mortem debugging.
Because the regset size and layout are dependent on the thread's
current vector length, it is not possible to define a C struct to
describe the regset contents as is done for existing regsets.
Instead, and for the same reasons, NT_ARM_SVE is based on the
freeform variable-layout approach used for the SVE signal frame.
Additionally, to reduce debug overhead when debugging threads that
might or might not have live SVE register state, NT_ARM_SVE may be
presented in one of two different formats: the old struct
user_fpsimd_state format is embedded for describing the state of a
thread with no live SVE state, whereas a new variable-layout
structure is embedded for describing live SVE state. This avoids a
debugger needing to poll NT_PRFPREG in addition to NT_ARM_SVE, and
allows existing userspace code to handle the non-SVE case without
too much modification.
For this to work, NT_ARM_SVE is defined with a fixed-format header
of type struct user_sve_header, which the recipient can use to
figure out the content, size and layout of the reset of the regset.
Accessor macros are defined to allow the vector-length-dependent
parts of the regset to be manipulated.
Signed-off-by: Alan Hayward <alan.hayward@arm.com>
Signed-off-by: Dave Martin <Dave.Martin@arm.com>
Reviewed-by: Catalin Marinas <catalin.marinas@arm.com>
Cc: Alex Bennée <alex.bennee@linaro.org>
Cc: Okamoto Takayuki <tokamoto@jp.fujitsu.com>
Signed-off-by: Will Deacon <will.deacon@arm.com>
2017-10-31 23:51:13 +08:00
|
|
|
static inline void sve_sync_to_fpsimd(struct task_struct *task) { }
|
|
|
|
static inline void sve_sync_from_fpsimd_zeropad(struct task_struct *task) { }
|
|
|
|
|
2021-10-22 22:16:34 +08:00
|
|
|
static inline int sve_max_virtualisable_vl(void)
|
|
|
|
{
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
2017-10-31 23:51:14 +08:00
|
|
|
static inline int sve_set_current_vl(unsigned long arg)
|
|
|
|
{
|
|
|
|
return -EINVAL;
|
|
|
|
}
|
|
|
|
|
|
|
|
static inline int sve_get_current_vl(void)
|
|
|
|
{
|
|
|
|
return -EINVAL;
|
|
|
|
}
|
|
|
|
|
2021-10-20 01:22:12 +08:00
|
|
|
static inline int sve_max_vl(void)
|
|
|
|
{
|
|
|
|
return -EINVAL;
|
|
|
|
}
|
|
|
|
|
|
|
|
static inline bool sve_vq_available(unsigned int vq) { return false; }
|
|
|
|
|
2018-07-11 21:56:40 +08:00
|
|
|
static inline void sve_user_disable(void) { BUILD_BUG(); }
|
|
|
|
static inline void sve_user_enable(void) { BUILD_BUG(); }
|
|
|
|
|
2021-03-31 17:44:39 +08:00
|
|
|
#define sve_cond_update_zcr_vq(val, reg) do { } while (0)
|
|
|
|
|
2021-10-20 01:22:12 +08:00
|
|
|
static inline void vec_init_vq_map(enum vec_type t) { }
|
|
|
|
static inline void vec_update_vq_map(enum vec_type t) { }
|
|
|
|
static inline int vec_verify_vq_map(enum vec_type t) { return 0; }
|
2017-10-31 23:51:10 +08:00
|
|
|
static inline void sve_setup(void) { }
|
arm64/sve: Core task context handling
This patch adds the core support for switching and managing the SVE
architectural state of user tasks.
Calls to the existing FPSIMD low-level save/restore functions are
factored out as new functions task_fpsimd_{save,load}(), since SVE
now dynamically may or may not need to be handled at these points
depending on the kernel configuration, hardware features discovered
at boot, and the runtime state of the task. To make these
decisions as fast as possible, const cpucaps are used where
feasible, via the system_supports_sve() helper.
The SVE registers are only tracked for threads that have explicitly
used SVE, indicated by the new thread flag TIF_SVE. Otherwise, the
FPSIMD view of the architectural state is stored in
thread.fpsimd_state as usual.
When in use, the SVE registers are not stored directly in
thread_struct due to their potentially large and variable size.
Because the task_struct slab allocator must be configured very
early during kernel boot, it is also tricky to configure it
correctly to match the maximum vector length provided by the
hardware, since this depends on examining secondary CPUs as well as
the primary. Instead, a pointer sve_state in thread_struct points
to a dynamically allocated buffer containing the SVE register data,
and code is added to allocate and free this buffer at appropriate
times.
TIF_SVE is set when taking an SVE access trap from userspace, if
suitable hardware support has been detected. This enables SVE for
the thread: a subsequent return to userspace will disable the trap
accordingly. If such a trap is taken without sufficient system-
wide hardware support, SIGILL is sent to the thread instead as if
an undefined instruction had been executed: this may happen if
userspace tries to use SVE in a system where not all CPUs support
it for example.
The kernel will clear TIF_SVE and disable SVE for the thread
whenever an explicit syscall is made by userspace. For backwards
compatibility reasons and conformance with the spirit of the base
AArch64 procedure call standard, the subset of the SVE register
state that aliases the FPSIMD registers is still preserved across a
syscall even if this happens. The remainder of the SVE register
state logically becomes zero at syscall entry, though the actual
zeroing work is currently deferred until the thread next tries to
use SVE, causing another trap to the kernel. This implementation
is suboptimal: in the future, the fastpath case may be optimised
to zero the registers in-place and leave SVE enabled for the task,
where beneficial.
TIF_SVE is also cleared in the following slowpath cases, which are
taken as reasonable hints that the task may no longer use SVE:
* exec
* fork and clone
Code is added to sync data between thread.fpsimd_state and
thread.sve_state whenever enabling/disabling SVE, in a manner
consistent with the SVE architectural programmer's model.
Signed-off-by: Dave Martin <Dave.Martin@arm.com>
Reviewed-by: Catalin Marinas <catalin.marinas@arm.com>
Cc: Ard Biesheuvel <ard.biesheuvel@linaro.org>
Cc: Alex Bennée <alex.bennee@linaro.org>
[will: added #include to fix allnoconfig build]
[will: use enable_daif in do_sve_acc]
Signed-off-by: Will Deacon <will.deacon@arm.com>
2017-10-31 23:51:05 +08:00
|
|
|
|
2022-04-19 19:22:24 +08:00
|
|
|
static inline size_t sve_state_size(struct task_struct const *task)
|
|
|
|
{
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
arm64/sve: Core task context handling
This patch adds the core support for switching and managing the SVE
architectural state of user tasks.
Calls to the existing FPSIMD low-level save/restore functions are
factored out as new functions task_fpsimd_{save,load}(), since SVE
now dynamically may or may not need to be handled at these points
depending on the kernel configuration, hardware features discovered
at boot, and the runtime state of the task. To make these
decisions as fast as possible, const cpucaps are used where
feasible, via the system_supports_sve() helper.
The SVE registers are only tracked for threads that have explicitly
used SVE, indicated by the new thread flag TIF_SVE. Otherwise, the
FPSIMD view of the architectural state is stored in
thread.fpsimd_state as usual.
When in use, the SVE registers are not stored directly in
thread_struct due to their potentially large and variable size.
Because the task_struct slab allocator must be configured very
early during kernel boot, it is also tricky to configure it
correctly to match the maximum vector length provided by the
hardware, since this depends on examining secondary CPUs as well as
the primary. Instead, a pointer sve_state in thread_struct points
to a dynamically allocated buffer containing the SVE register data,
and code is added to allocate and free this buffer at appropriate
times.
TIF_SVE is set when taking an SVE access trap from userspace, if
suitable hardware support has been detected. This enables SVE for
the thread: a subsequent return to userspace will disable the trap
accordingly. If such a trap is taken without sufficient system-
wide hardware support, SIGILL is sent to the thread instead as if
an undefined instruction had been executed: this may happen if
userspace tries to use SVE in a system where not all CPUs support
it for example.
The kernel will clear TIF_SVE and disable SVE for the thread
whenever an explicit syscall is made by userspace. For backwards
compatibility reasons and conformance with the spirit of the base
AArch64 procedure call standard, the subset of the SVE register
state that aliases the FPSIMD registers is still preserved across a
syscall even if this happens. The remainder of the SVE register
state logically becomes zero at syscall entry, though the actual
zeroing work is currently deferred until the thread next tries to
use SVE, causing another trap to the kernel. This implementation
is suboptimal: in the future, the fastpath case may be optimised
to zero the registers in-place and leave SVE enabled for the task,
where beneficial.
TIF_SVE is also cleared in the following slowpath cases, which are
taken as reasonable hints that the task may no longer use SVE:
* exec
* fork and clone
Code is added to sync data between thread.fpsimd_state and
thread.sve_state whenever enabling/disabling SVE, in a manner
consistent with the SVE architectural programmer's model.
Signed-off-by: Dave Martin <Dave.Martin@arm.com>
Reviewed-by: Catalin Marinas <catalin.marinas@arm.com>
Cc: Ard Biesheuvel <ard.biesheuvel@linaro.org>
Cc: Alex Bennée <alex.bennee@linaro.org>
[will: added #include to fix allnoconfig build]
[will: use enable_daif in do_sve_acc]
Signed-off-by: Will Deacon <will.deacon@arm.com>
2017-10-31 23:51:05 +08:00
|
|
|
#endif /* ! CONFIG_ARM64_SVE */
|
|
|
|
|
2022-04-19 19:22:14 +08:00
|
|
|
#ifdef CONFIG_ARM64_SME
|
|
|
|
|
2022-04-19 19:22:24 +08:00
|
|
|
static inline void sme_user_disable(void)
|
|
|
|
{
|
|
|
|
sysreg_clear_set(cpacr_el1, CPACR_EL1_SMEN_EL0EN, 0);
|
|
|
|
}
|
|
|
|
|
|
|
|
static inline void sme_user_enable(void)
|
|
|
|
{
|
|
|
|
sysreg_clear_set(cpacr_el1, 0, CPACR_EL1_SMEN_EL0EN);
|
|
|
|
}
|
|
|
|
|
2022-04-19 19:22:14 +08:00
|
|
|
static inline void sme_smstart_sm(void)
|
|
|
|
{
|
|
|
|
asm volatile(__msr_s(SYS_SVCR_SMSTART_SM_EL0, "xzr"));
|
|
|
|
}
|
|
|
|
|
|
|
|
static inline void sme_smstop_sm(void)
|
|
|
|
{
|
|
|
|
asm volatile(__msr_s(SYS_SVCR_SMSTOP_SM_EL0, "xzr"));
|
|
|
|
}
|
|
|
|
|
|
|
|
static inline void sme_smstop(void)
|
|
|
|
{
|
|
|
|
asm volatile(__msr_s(SYS_SVCR_SMSTOP_SMZA_EL0, "xzr"));
|
|
|
|
}
|
|
|
|
|
2022-04-19 19:22:17 +08:00
|
|
|
extern void __init sme_setup(void);
|
|
|
|
|
|
|
|
static inline int sme_max_vl(void)
|
|
|
|
{
|
|
|
|
return vec_max_vl(ARM64_VEC_SME);
|
|
|
|
}
|
|
|
|
|
|
|
|
static inline int sme_max_virtualisable_vl(void)
|
|
|
|
{
|
|
|
|
return vec_max_virtualisable_vl(ARM64_VEC_SME);
|
|
|
|
}
|
|
|
|
|
2022-04-19 19:22:24 +08:00
|
|
|
extern void sme_alloc(struct task_struct *task);
|
2022-04-19 19:22:17 +08:00
|
|
|
extern unsigned int sme_get_vl(void);
|
2022-04-19 19:22:19 +08:00
|
|
|
extern int sme_set_current_vl(unsigned long arg);
|
|
|
|
extern int sme_get_current_vl(void);
|
2022-04-19 19:22:17 +08:00
|
|
|
|
2022-04-19 19:22:24 +08:00
|
|
|
/*
|
|
|
|
* Return how many bytes of memory are required to store the full SME
|
|
|
|
* specific state (currently just ZA) for task, given task's currently
|
|
|
|
* configured vector length.
|
|
|
|
*/
|
|
|
|
static inline size_t za_state_size(struct task_struct const *task)
|
|
|
|
{
|
|
|
|
unsigned int vl = task_get_sme_vl(task);
|
|
|
|
|
|
|
|
return ZA_SIG_REGS_SIZE(sve_vq_from_vl(vl));
|
|
|
|
}
|
|
|
|
|
2022-04-19 19:22:14 +08:00
|
|
|
#else
|
|
|
|
|
2022-04-19 19:22:24 +08:00
|
|
|
static inline void sme_user_disable(void) { BUILD_BUG(); }
|
|
|
|
static inline void sme_user_enable(void) { BUILD_BUG(); }
|
|
|
|
|
2022-04-19 19:22:14 +08:00
|
|
|
static inline void sme_smstart_sm(void) { }
|
|
|
|
static inline void sme_smstop_sm(void) { }
|
|
|
|
static inline void sme_smstop(void) { }
|
|
|
|
|
2022-04-19 19:22:24 +08:00
|
|
|
static inline void sme_alloc(struct task_struct *task) { }
|
2022-04-19 19:22:17 +08:00
|
|
|
static inline void sme_setup(void) { }
|
|
|
|
static inline unsigned int sme_get_vl(void) { return 0; }
|
|
|
|
static inline int sme_max_vl(void) { return 0; }
|
|
|
|
static inline int sme_max_virtualisable_vl(void) { return 0; }
|
2022-04-19 19:22:19 +08:00
|
|
|
static inline int sme_set_current_vl(unsigned long arg) { return -EINVAL; }
|
|
|
|
static inline int sme_get_current_vl(void) { return -EINVAL; }
|
2022-04-19 19:22:17 +08:00
|
|
|
|
2022-04-19 19:22:24 +08:00
|
|
|
static inline size_t za_state_size(struct task_struct const *task)
|
|
|
|
{
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
2022-04-19 19:22:14 +08:00
|
|
|
#endif /* ! CONFIG_ARM64_SME */
|
|
|
|
|
2017-08-04 00:23:22 +08:00
|
|
|
/* For use by EFI runtime services calls only */
|
|
|
|
extern void __efi_fpsimd_begin(void);
|
|
|
|
extern void __efi_fpsimd_end(void);
|
|
|
|
|
2012-03-05 19:49:32 +08:00
|
|
|
#endif
|
|
|
|
|
|
|
|
#endif
|