linux/arch/loongarch/lib/memcpy.S
Qing Zhang 5aa4ac64e6 LoongArch: Add KASAN (Kernel Address Sanitizer) support
1/8 of kernel addresses reserved for shadow memory. But for LoongArch,
There are a lot of holes between different segments and valid address
space (256T available) is insufficient to map all these segments to kasan
shadow memory with the common formula provided by kasan core, saying
(addr >> KASAN_SHADOW_SCALE_SHIFT) + KASAN_SHADOW_OFFSET

So LoongArch has a arch-specific mapping formula, different segments are
mapped individually, and only limited space lengths of these specific
segments are mapped to shadow.

At early boot stage the whole shadow region populated with just one
physical page (kasan_early_shadow_page). Later, this page is reused as
readonly zero shadow for some memory that kasan currently don't track.
After mapping the physical memory, pages for shadow memory are allocated
and mapped.

Functions like memset()/memcpy()/memmove() do a lot of memory accesses.
If bad pointer passed to one of these function it is important to be
caught. Compiler's instrumentation cannot do this since these functions
are written in assembly.

KASan replaces memory functions with manually instrumented variants.
Original functions declared as weak symbols so strong definitions in
mm/kasan/kasan.c could replace them. Original functions have aliases
with '__' prefix in names, so we could call non-instrumented variant
if needed.

Signed-off-by: Qing Zhang <zhangqing@loongson.cn>
Signed-off-by: Huacai Chen <chenhuacai@loongson.cn>
2023-09-06 22:54:16 +08:00

200 lines
3.0 KiB
ArmAsm

/* SPDX-License-Identifier: GPL-2.0 */
/*
* Copyright (C) 2020-2022 Loongson Technology Corporation Limited
*/
#include <linux/export.h>
#include <asm/alternative-asm.h>
#include <asm/asm.h>
#include <asm/asmmacro.h>
#include <asm/cpu.h>
#include <asm/regdef.h>
.section .noinstr.text, "ax"
SYM_FUNC_START(memcpy)
/*
* Some CPUs support hardware unaligned access
*/
ALTERNATIVE "b __memcpy_generic", \
"b __memcpy_fast", CPU_FEATURE_UAL
SYM_FUNC_END(memcpy)
SYM_FUNC_ALIAS(__memcpy, memcpy)
EXPORT_SYMBOL(memcpy)
EXPORT_SYMBOL(__memcpy)
_ASM_NOKPROBE(memcpy)
_ASM_NOKPROBE(__memcpy)
/*
* void *__memcpy_generic(void *dst, const void *src, size_t n)
*
* a0: dst
* a1: src
* a2: n
*/
SYM_FUNC_START(__memcpy_generic)
move a3, a0
beqz a2, 2f
1: ld.b t0, a1, 0
st.b t0, a0, 0
addi.d a0, a0, 1
addi.d a1, a1, 1
addi.d a2, a2, -1
bgt a2, zero, 1b
2: move a0, a3
jr ra
SYM_FUNC_END(__memcpy_generic)
_ASM_NOKPROBE(__memcpy_generic)
.align 5
SYM_FUNC_START_NOALIGN(__memcpy_small)
pcaddi t0, 8
slli.d a2, a2, 5
add.d t0, t0, a2
jr t0
.align 5
0: jr ra
.align 5
1: ld.b t0, a1, 0
st.b t0, a0, 0
jr ra
.align 5
2: ld.h t0, a1, 0
st.h t0, a0, 0
jr ra
.align 5
3: ld.h t0, a1, 0
ld.b t1, a1, 2
st.h t0, a0, 0
st.b t1, a0, 2
jr ra
.align 5
4: ld.w t0, a1, 0
st.w t0, a0, 0
jr ra
.align 5
5: ld.w t0, a1, 0
ld.b t1, a1, 4
st.w t0, a0, 0
st.b t1, a0, 4
jr ra
.align 5
6: ld.w t0, a1, 0
ld.h t1, a1, 4
st.w t0, a0, 0
st.h t1, a0, 4
jr ra
.align 5
7: ld.w t0, a1, 0
ld.w t1, a1, 3
st.w t0, a0, 0
st.w t1, a0, 3
jr ra
.align 5
8: ld.d t0, a1, 0
st.d t0, a0, 0
jr ra
SYM_FUNC_END(__memcpy_small)
_ASM_NOKPROBE(__memcpy_small)
/*
* void *__memcpy_fast(void *dst, const void *src, size_t n)
*
* a0: dst
* a1: src
* a2: n
*/
SYM_FUNC_START(__memcpy_fast)
sltui t0, a2, 9
bnez t0, __memcpy_small
add.d a3, a1, a2
add.d a2, a0, a2
ld.d a6, a1, 0
ld.d a7, a3, -8
/* align up destination address */
andi t1, a0, 7
sub.d t0, zero, t1
addi.d t0, t0, 8
add.d a1, a1, t0
add.d a5, a0, t0
addi.d a4, a3, -64
bgeu a1, a4, .Llt64
/* copy 64 bytes at a time */
.Lloop64:
ld.d t0, a1, 0
ld.d t1, a1, 8
ld.d t2, a1, 16
ld.d t3, a1, 24
ld.d t4, a1, 32
ld.d t5, a1, 40
ld.d t6, a1, 48
ld.d t7, a1, 56
addi.d a1, a1, 64
st.d t0, a5, 0
st.d t1, a5, 8
st.d t2, a5, 16
st.d t3, a5, 24
st.d t4, a5, 32
st.d t5, a5, 40
st.d t6, a5, 48
st.d t7, a5, 56
addi.d a5, a5, 64
bltu a1, a4, .Lloop64
/* copy the remaining bytes */
.Llt64:
addi.d a4, a3, -32
bgeu a1, a4, .Llt32
ld.d t0, a1, 0
ld.d t1, a1, 8
ld.d t2, a1, 16
ld.d t3, a1, 24
addi.d a1, a1, 32
st.d t0, a5, 0
st.d t1, a5, 8
st.d t2, a5, 16
st.d t3, a5, 24
addi.d a5, a5, 32
.Llt32:
addi.d a4, a3, -16
bgeu a1, a4, .Llt16
ld.d t0, a1, 0
ld.d t1, a1, 8
addi.d a1, a1, 16
st.d t0, a5, 0
st.d t1, a5, 8
addi.d a5, a5, 16
.Llt16:
addi.d a4, a3, -8
bgeu a1, a4, .Llt8
ld.d t0, a1, 0
st.d t0, a5, 0
.Llt8:
st.d a6, a0, 0
st.d a7, a2, -8
/* return */
jr ra
SYM_FUNC_END(__memcpy_fast)
_ASM_NOKPROBE(__memcpy_fast)