From 7f079fdc16e88ebb8020e17b2fd900e8924da29a Mon Sep 17 00:00:00 2001 From: caiyinyu Date: Wed, 5 Jul 2023 16:38:05 +0800 Subject: [PATCH] LoongArch: Add vector implementation for _dl_runtime_resolve. --- sysdeps/loongarch/dl-machine.h | 13 +- sysdeps/loongarch/dl-trampoline.S | 88 +++--------- sysdeps/loongarch/dl-trampoline.h | 129 ++++++++++++++++++ sysdeps/loongarch/ldsodefs.h | 1 + sysdeps/loongarch/sys/asm.h | 2 + sysdeps/loongarch/sys/regdef.h | 18 +++ .../unix/sysv/linux/loongarch/bits/hwcap.h | 37 +++++ .../unix/sysv/linux/loongarch/cpu-features.h | 29 ++++ 8 files changed, 246 insertions(+), 71 deletions(-) create mode 100644 sysdeps/loongarch/dl-trampoline.h create mode 100644 sysdeps/unix/sysv/linux/loongarch/bits/hwcap.h create mode 100644 sysdeps/unix/sysv/linux/loongarch/cpu-features.h diff --git a/sysdeps/loongarch/dl-machine.h b/sysdeps/loongarch/dl-machine.h index e217d37c4b..02ce17852c 100644 --- a/sysdeps/loongarch/dl-machine.h +++ b/sysdeps/loongarch/dl-machine.h @@ -270,6 +270,10 @@ elf_machine_runtime_setup (struct link_map *l, struct r_scope_elem *scope[], /* If using PLTs, fill in the first two entries of .got.plt. */ if (l->l_info[DT_JMPREL]) { +#if HAVE_LOONGARCH_VEC_ASM + extern void _dl_runtime_resolve_lasx (void) attribute_hidden; + extern void _dl_runtime_resolve_lsx (void) attribute_hidden; +#endif extern void _dl_runtime_resolve (void) attribute_hidden; extern void _dl_runtime_profile (void) attribute_hidden; @@ -296,7 +300,14 @@ elf_machine_runtime_setup (struct link_map *l, struct r_scope_elem *scope[], /* This function will get called to fix up the GOT entry indicated by the offset on the stack, and then jump to the resolved address. */ - gotplt[0] = (ElfW (Addr)) & _dl_runtime_resolve; +#if HAVE_LOONGARCH_VEC_ASM + if (SUPPORT_LASX) + gotplt[0] = (ElfW(Addr)) &_dl_runtime_resolve_lasx; + else if (SUPPORT_LSX) + gotplt[0] = (ElfW(Addr)) &_dl_runtime_resolve_lsx; + else +#endif + gotplt[0] = (ElfW(Addr)) &_dl_runtime_resolve; } gotplt[1] = (ElfW (Addr)) l; } diff --git a/sysdeps/loongarch/dl-trampoline.S b/sysdeps/loongarch/dl-trampoline.S index ed9ec0901c..2a561b7136 100644 --- a/sysdeps/loongarch/dl-trampoline.S +++ b/sysdeps/loongarch/dl-trampoline.S @@ -19,78 +19,26 @@ #include #include +#if HAVE_LOONGARCH_VEC_ASM +#define USE_LASX +#define _dl_runtime_resolve _dl_runtime_resolve_lasx +#include "dl-trampoline.h" +#undef FRAME_SIZE +#undef USE_LASX +#undef _dl_runtime_resolve + +#define USE_LSX +#define _dl_runtime_resolve _dl_runtime_resolve_lsx +#include "dl-trampoline.h" +#undef FRAME_SIZE +#undef USE_LSX +#undef _dl_runtime_resolve +#endif + +#include "dl-trampoline.h" + #include "dl-link.h" -/* Assembler veneer called from the PLT header code for lazy loading. - The PLT header passes its own args in t0-t2. */ -#ifdef __loongarch_soft_float -#define FRAME_SIZE (-((-10 * SZREG) & ALMASK)) -#else -#define FRAME_SIZE (-((-10 * SZREG - 8 * SZFREG) & ALMASK)) -#endif - -ENTRY (_dl_runtime_resolve) - - /* Save arguments to stack. */ - ADDI sp, sp, -FRAME_SIZE - REG_S ra, sp, 9*SZREG - REG_S a0, sp, 1*SZREG - REG_S a1, sp, 2*SZREG - REG_S a2, sp, 3*SZREG - REG_S a3, sp, 4*SZREG - REG_S a4, sp, 5*SZREG - REG_S a5, sp, 6*SZREG - REG_S a6, sp, 7*SZREG - REG_S a7, sp, 8*SZREG - -#ifndef __loongarch_soft_float - FREG_S fa0, sp, 10*SZREG + 0*SZFREG - FREG_S fa1, sp, 10*SZREG + 1*SZFREG - FREG_S fa2, sp, 10*SZREG + 2*SZFREG - FREG_S fa3, sp, 10*SZREG + 3*SZFREG - FREG_S fa4, sp, 10*SZREG + 4*SZFREG - FREG_S fa5, sp, 10*SZREG + 5*SZFREG - FREG_S fa6, sp, 10*SZREG + 6*SZFREG - FREG_S fa7, sp, 10*SZREG + 7*SZFREG -#endif - - /* Update .got.plt and obtain runtime address of callee */ - SLLI a1, t1, 1 - or a0, t0, zero - ADD a1, a1, t1 - la a2, _dl_fixup - jirl ra, a2, 0 - or t1, v0, zero - - /* Restore arguments from stack. */ - REG_L ra, sp, 9*SZREG - REG_L a0, sp, 1*SZREG - REG_L a1, sp, 2*SZREG - REG_L a2, sp, 3*SZREG - REG_L a3, sp, 4*SZREG - REG_L a4, sp, 5*SZREG - REG_L a5, sp, 6*SZREG - REG_L a6, sp, 7*SZREG - REG_L a7, sp, 8*SZREG - -#ifndef __loongarch_soft_float - FREG_L fa0, sp, 10*SZREG + 0*SZFREG - FREG_L fa1, sp, 10*SZREG + 1*SZFREG - FREG_L fa2, sp, 10*SZREG + 2*SZFREG - FREG_L fa3, sp, 10*SZREG + 3*SZFREG - FREG_L fa4, sp, 10*SZREG + 4*SZFREG - FREG_L fa5, sp, 10*SZREG + 5*SZFREG - FREG_L fa6, sp, 10*SZREG + 6*SZFREG - FREG_L fa7, sp, 10*SZREG + 7*SZFREG -#endif - - ADDI sp, sp, FRAME_SIZE - - /* Invoke the callee. */ - jirl zero, t1, 0 -END (_dl_runtime_resolve) - - ENTRY (_dl_runtime_profile) /* LoongArch we get called with: t0 linkr_map pointer diff --git a/sysdeps/loongarch/dl-trampoline.h b/sysdeps/loongarch/dl-trampoline.h new file mode 100644 index 0000000000..f60634a55a --- /dev/null +++ b/sysdeps/loongarch/dl-trampoline.h @@ -0,0 +1,129 @@ +/* PLT trampolines. + Copyright (C) 2022-2023 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library. If not, see + . */ + +/* Assembler veneer called from the PLT header code for lazy loading. + The PLT header passes its own args in t0-t2. */ +#ifndef __loongarch_soft_float +# ifdef USE_LASX +# define FRAME_SIZE (-((-9 * SZREG - 8 * SZFREG - 8 * SZXREG) & ALMASK)) +# elif defined USE_LSX +# define FRAME_SIZE (-((-9 * SZREG - 8 * SZFREG - 8 * SZVREG) & ALMASK)) +# else +# define FRAME_SIZE (-((-9 * SZREG - 8 * SZFREG) & ALMASK)) +# endif +#else +# define FRAME_SIZE (-((-9 * SZREG) & ALMASK)) +#endif + +ENTRY (_dl_runtime_resolve) + + /* Save arguments to stack. */ + ADDI sp, sp, -FRAME_SIZE + + REG_S ra, sp, 0*SZREG + REG_S a0, sp, 1*SZREG + REG_S a1, sp, 2*SZREG + REG_S a2, sp, 3*SZREG + REG_S a3, sp, 4*SZREG + REG_S a4, sp, 5*SZREG + REG_S a5, sp, 6*SZREG + REG_S a6, sp, 7*SZREG + REG_S a7, sp, 8*SZREG + +#ifdef USE_LASX + xvst xr0, sp, 9*SZREG + 8*SZFREG + 0*SZXREG + xvst xr1, sp, 9*SZREG + 8*SZFREG + 1*SZXREG + xvst xr2, sp, 9*SZREG + 8*SZFREG + 2*SZXREG + xvst xr3, sp, 9*SZREG + 8*SZFREG + 3*SZXREG + xvst xr4, sp, 9*SZREG + 8*SZFREG + 4*SZXREG + xvst xr5, sp, 9*SZREG + 8*SZFREG + 5*SZXREG + xvst xr6, sp, 9*SZREG + 8*SZFREG + 6*SZXREG + xvst xr7, sp, 9*SZREG + 8*SZFREG + 7*SZXREG +#elif defined USE_LSX + vst vr0, sp, 9*SZREG + 8*SZFREG + 0*SZVREG + vst vr1, sp, 9*SZREG + 8*SZFREG + 1*SZVREG + vst vr2, sp, 9*SZREG + 8*SZFREG + 2*SZVREG + vst vr3, sp, 9*SZREG + 8*SZFREG + 3*SZVREG + vst vr4, sp, 9*SZREG + 8*SZFREG + 4*SZVREG + vst vr5, sp, 9*SZREG + 8*SZFREG + 5*SZVREG + vst vr6, sp, 9*SZREG + 8*SZFREG + 6*SZVREG + vst vr7, sp, 9*SZREG + 8*SZFREG + 7*SZVREG +#elif !defined __loongarch_soft_float + FREG_S fa0, sp, 9*SZREG + 0*SZFREG + FREG_S fa1, sp, 9*SZREG + 1*SZFREG + FREG_S fa2, sp, 9*SZREG + 2*SZFREG + FREG_S fa3, sp, 9*SZREG + 3*SZFREG + FREG_S fa4, sp, 9*SZREG + 4*SZFREG + FREG_S fa5, sp, 9*SZREG + 5*SZFREG + FREG_S fa6, sp, 9*SZREG + 6*SZFREG + FREG_S fa7, sp, 9*SZREG + 7*SZFREG +#endif + + /* Update .got.plt and obtain runtime address of callee */ + SLLI a1, t1, 1 + or a0, t0, zero + ADD a1, a1, t1 + la a2, _dl_fixup + jirl ra, a2, 0 + or t1, v0, zero + + /* Restore arguments from stack. */ + REG_L ra, sp, 0*SZREG + REG_L a0, sp, 1*SZREG + REG_L a1, sp, 2*SZREG + REG_L a2, sp, 3*SZREG + REG_L a3, sp, 4*SZREG + REG_L a4, sp, 5*SZREG + REG_L a5, sp, 6*SZREG + REG_L a6, sp, 7*SZREG + REG_L a7, sp, 8*SZREG + +#ifdef USE_LASX + xvld xr0, sp, 9*SZREG + 8*SZFREG + 0*SZXREG + xvld xr1, sp, 9*SZREG + 8*SZFREG + 1*SZXREG + xvld xr2, sp, 9*SZREG + 8*SZFREG + 2*SZXREG + xvld xr3, sp, 9*SZREG + 8*SZFREG + 3*SZXREG + xvld xr4, sp, 9*SZREG + 8*SZFREG + 4*SZXREG + xvld xr5, sp, 9*SZREG + 8*SZFREG + 5*SZXREG + xvld xr6, sp, 9*SZREG + 8*SZFREG + 6*SZXREG + xvld xr7, sp, 9*SZREG + 8*SZFREG + 7*SZXREG +#elif defined USE_LSX + vld vr0, sp, 9*SZREG + 8*SZFREG + 0*SZVREG + vld vr1, sp, 9*SZREG + 8*SZFREG + 1*SZVREG + vld vr2, sp, 9*SZREG + 8*SZFREG + 2*SZVREG + vld vr3, sp, 9*SZREG + 8*SZFREG + 3*SZVREG + vld vr4, sp, 9*SZREG + 8*SZFREG + 4*SZVREG + vld vr5, sp, 9*SZREG + 8*SZFREG + 5*SZVREG + vld vr6, sp, 9*SZREG + 8*SZFREG + 6*SZVREG + vld vr7, sp, 9*SZREG + 8*SZFREG + 7*SZVREG +#elif !defined __loongarch_soft_float + FREG_L fa0, sp, 9*SZREG + 0*SZFREG + FREG_L fa1, sp, 9*SZREG + 1*SZFREG + FREG_L fa2, sp, 9*SZREG + 2*SZFREG + FREG_L fa3, sp, 9*SZREG + 3*SZFREG + FREG_L fa4, sp, 9*SZREG + 4*SZFREG + FREG_L fa5, sp, 9*SZREG + 5*SZFREG + FREG_L fa6, sp, 9*SZREG + 6*SZFREG + FREG_L fa7, sp, 9*SZREG + 7*SZFREG +#endif + + ADDI sp, sp, FRAME_SIZE + + /* Invoke the callee. */ + jirl zero, t1, 0 +END (_dl_runtime_resolve) diff --git a/sysdeps/loongarch/ldsodefs.h b/sysdeps/loongarch/ldsodefs.h index a8ef803aec..3b7c4ab83d 100644 --- a/sysdeps/loongarch/ldsodefs.h +++ b/sysdeps/loongarch/ldsodefs.h @@ -20,6 +20,7 @@ #define _LOONGARCH_LDSODEFS_H 1 #include +#include struct La_loongarch_regs; struct La_loongarch_retval; diff --git a/sysdeps/loongarch/sys/asm.h b/sysdeps/loongarch/sys/asm.h index 0bb430bb05..d1a279b8fb 100644 --- a/sysdeps/loongarch/sys/asm.h +++ b/sysdeps/loongarch/sys/asm.h @@ -25,6 +25,8 @@ /* Macros to handle different pointer/register sizes for 32/64-bit code. */ #define SZREG 8 #define SZFREG 8 +#define SZVREG 16 +#define SZXREG 32 #define REG_L ld.d #define REG_S st.d #define SRLI srli.d diff --git a/sysdeps/loongarch/sys/regdef.h b/sysdeps/loongarch/sys/regdef.h index 91810f5e8e..5100f36d24 100644 --- a/sysdeps/loongarch/sys/regdef.h +++ b/sysdeps/loongarch/sys/regdef.h @@ -90,4 +90,22 @@ #define fs6 $f30 #define fs7 $f31 +#define vr0 $vr0 +#define vr1 $vr1 +#define vr2 $vr2 +#define vr3 $vr3 +#define vr4 $vr4 +#define vr5 $vr5 +#define vr6 $vr6 +#define vr7 $vr7 + +#define xr0 $xr0 +#define xr1 $xr1 +#define xr2 $xr2 +#define xr3 $xr3 +#define xr4 $xr4 +#define xr5 $xr5 +#define xr6 $xr6 +#define xr7 $xr7 + #endif /* _SYS_REGDEF_H */ diff --git a/sysdeps/unix/sysv/linux/loongarch/bits/hwcap.h b/sysdeps/unix/sysv/linux/loongarch/bits/hwcap.h new file mode 100644 index 0000000000..5104b69cbc --- /dev/null +++ b/sysdeps/unix/sysv/linux/loongarch/bits/hwcap.h @@ -0,0 +1,37 @@ +/* Defines for bits in AT_HWCAP. LoongArch64 Linux version. + Copyright (C) 2022 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + . */ + +#if !defined (_SYS_AUXV_H) +# error "Never include directly; use instead." +#endif + +/* The following must match the kernel's . */ +/* HWCAP flags */ +#define HWCAP_LOONGARCH_CPUCFG (1 << 0) +#define HWCAP_LOONGARCH_LAM (1 << 1) +#define HWCAP_LOONGARCH_UAL (1 << 2) +#define HWCAP_LOONGARCH_FPU (1 << 3) +#define HWCAP_LOONGARCH_LSX (1 << 4) +#define HWCAP_LOONGARCH_LASX (1 << 5) +#define HWCAP_LOONGARCH_CRC32 (1 << 6) +#define HWCAP_LOONGARCH_COMPLEX (1 << 7) +#define HWCAP_LOONGARCH_CRYPTO (1 << 8) +#define HWCAP_LOONGARCH_LVZ (1 << 9) +#define HWCAP_LOONGARCH_LBT_X86 (1 << 10) +#define HWCAP_LOONGARCH_LBT_ARM (1 << 11) +#define HWCAP_LOONGARCH_LBT_MIPS (1 << 12) diff --git a/sysdeps/unix/sysv/linux/loongarch/cpu-features.h b/sysdeps/unix/sysv/linux/loongarch/cpu-features.h new file mode 100644 index 0000000000..e371e13b15 --- /dev/null +++ b/sysdeps/unix/sysv/linux/loongarch/cpu-features.h @@ -0,0 +1,29 @@ +/* Initialize CPU feature data. LoongArch64 version. + This file is part of the GNU C Library. + Copyright (C) 2022 Free Software Foundation, Inc. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + . */ + +#ifndef _CPU_FEATURES_LOONGARCH64_H +#define _CPU_FEATURES_LOONGARCH64_H + +#include + +#define SUPPORT_UAL (GLRO (dl_hwcap) & HWCAP_LOONGARCH_UAL) +#define SUPPORT_LSX (GLRO (dl_hwcap) & HWCAP_LOONGARCH_LSX) +#define SUPPORT_LASX (GLRO (dl_hwcap) & HWCAP_LOONGARCH_LASX) + +#endif /* _CPU_FEATURES_LOONGARCH64_H */ +