diff --git a/arch/xtensa/include/asm/asmmacro.h b/arch/xtensa/include/asm/asmmacro.h index d2a4415a4c08..7f2ae5872151 100644 --- a/arch/xtensa/include/asm/asmmacro.h +++ b/arch/xtensa/include/asm/asmmacro.h @@ -158,4 +158,37 @@ .previous \ 97: + +/* + * Extract unaligned word that is split between two registers w0 and w1 + * into r regardless of machine endianness. SAR must be loaded with the + * starting bit of the word (see __ssa8). + */ + + .macro __src_b r, w0, w1 +#ifdef __XTENSA_EB__ + src \r, \w0, \w1 +#else + src \r, \w1, \w0 +#endif + .endm + +/* + * Load 2 lowest address bits of r into SAR for __src_b to extract unaligned + * word starting at r from two registers loaded from consecutive aligned + * addresses covering r regardless of machine endianness. + * + * r 0 1 2 3 + * LE SAR 0 8 16 24 + * BE SAR 32 24 16 8 + */ + + .macro __ssa8 r +#ifdef __XTENSA_EB__ + ssa8b \r +#else + ssa8l \r +#endif + .endm + #endif /* _XTENSA_ASMMACRO_H */ diff --git a/arch/xtensa/kernel/align.S b/arch/xtensa/kernel/align.S index 24b3189d7841..9301452e521e 100644 --- a/arch/xtensa/kernel/align.S +++ b/arch/xtensa/kernel/align.S @@ -19,6 +19,7 @@ #include #include #include +#include #include #if XCHAL_UNALIGNED_LOAD_EXCEPTION || XCHAL_UNALIGNED_STORE_EXCEPTION @@ -66,8 +67,6 @@ #define INSN_T 24 #define INSN_OP1 16 -.macro __src_b r, w0, w1; src \r, \w0, \w1; .endm -.macro __ssa8 r; ssa8b \r; .endm .macro __ssa8r r; ssa8l \r; .endm .macro __sh r, s; srl \r, \s; .endm .macro __sl r, s; sll \r, \s; .endm @@ -81,8 +80,6 @@ #define INSN_T 4 #define INSN_OP1 12 -.macro __src_b r, w0, w1; src \r, \w1, \w0; .endm -.macro __ssa8 r; ssa8l \r; .endm .macro __ssa8r r; ssa8b \r; .endm .macro __sh r, s; sll \r, \s; .endm .macro __sl r, s; srl \r, \s; .endm diff --git a/arch/xtensa/lib/memcopy.S b/arch/xtensa/lib/memcopy.S index b1c219acabe7..9bda748a1e3e 100644 --- a/arch/xtensa/lib/memcopy.S +++ b/arch/xtensa/lib/memcopy.S @@ -10,22 +10,7 @@ */ #include - - .macro src_b r, w0, w1 -#ifdef __XTENSA_EB__ - src \r, \w0, \w1 -#else - src \r, \w1, \w0 -#endif - .endm - - .macro ssa8 r -#ifdef __XTENSA_EB__ - ssa8b \r -#else - ssa8l \r -#endif - .endm +#include /* * void *memcpy(void *dst, const void *src, size_t len); @@ -209,7 +194,7 @@ memcpy: .Lsrcunaligned: _beqz a4, .Ldone # avoid loading anything for zero-length copies # copy 16 bytes per iteration for word-aligned dst and unaligned src - ssa8 a3 # set shift amount from byte offset + __ssa8 a3 # set shift amount from byte offset /* set to 1 when running on ISS (simulator) with the lint or ferret client, or 0 to save a few cycles */ @@ -229,16 +214,16 @@ memcpy: .Loop2: l32i a7, a3, 4 l32i a8, a3, 8 - src_b a6, a6, a7 + __src_b a6, a6, a7 s32i a6, a5, 0 l32i a9, a3, 12 - src_b a7, a7, a8 + __src_b a7, a7, a8 s32i a7, a5, 4 l32i a6, a3, 16 - src_b a8, a8, a9 + __src_b a8, a8, a9 s32i a8, a5, 8 addi a3, a3, 16 - src_b a9, a9, a6 + __src_b a9, a9, a6 s32i a9, a5, 12 addi a5, a5, 16 #if !XCHAL_HAVE_LOOPS @@ -249,10 +234,10 @@ memcpy: # copy 8 bytes l32i a7, a3, 4 l32i a8, a3, 8 - src_b a6, a6, a7 + __src_b a6, a6, a7 s32i a6, a5, 0 addi a3, a3, 8 - src_b a7, a7, a8 + __src_b a7, a7, a8 s32i a7, a5, 4 addi a5, a5, 8 mov a6, a8 @@ -261,7 +246,7 @@ memcpy: # copy 4 bytes l32i a7, a3, 4 addi a3, a3, 4 - src_b a6, a6, a7 + __src_b a6, a6, a7 s32i a6, a5, 0 addi a5, a5, 4 mov a6, a7 @@ -485,7 +470,7 @@ memmove: .Lbacksrcunaligned: _beqz a4, .Lbackdone # avoid loading anything for zero-length copies # copy 16 bytes per iteration for word-aligned dst and unaligned src - ssa8 a3 # set shift amount from byte offset + __ssa8 a3 # set shift amount from byte offset #define SIM_CHECKS_ALIGNMENT 1 /* set to 1 when running on ISS with * the lint or ferret client, or 0 * to save a few cycles */ @@ -506,15 +491,15 @@ memmove: l32i a7, a3, 12 l32i a8, a3, 8 addi a5, a5, -16 - src_b a6, a7, a6 + __src_b a6, a7, a6 s32i a6, a5, 12 l32i a9, a3, 4 - src_b a7, a8, a7 + __src_b a7, a8, a7 s32i a7, a5, 8 l32i a6, a3, 0 - src_b a8, a9, a8 + __src_b a8, a9, a8 s32i a8, a5, 4 - src_b a9, a6, a9 + __src_b a9, a6, a9 s32i a9, a5, 0 #if !XCHAL_HAVE_LOOPS bne a3, a10, .backLoop2 # continue loop if a3:src != a10:src_start @@ -526,9 +511,9 @@ memmove: l32i a7, a3, 4 l32i a8, a3, 0 addi a5, a5, -8 - src_b a6, a7, a6 + __src_b a6, a7, a6 s32i a6, a5, 4 - src_b a7, a8, a7 + __src_b a7, a8, a7 s32i a7, a5, 0 mov a6, a8 .Lback12: @@ -537,7 +522,7 @@ memmove: addi a3, a3, -4 l32i a7, a3, 0 addi a5, a5, -4 - src_b a6, a7, a6 + __src_b a6, a7, a6 s32i a6, a5, 0 mov a6, a7 .Lback13: diff --git a/arch/xtensa/lib/usercopy.S b/arch/xtensa/lib/usercopy.S index 4172b73b0364..0959b6e71f11 100644 --- a/arch/xtensa/lib/usercopy.S +++ b/arch/xtensa/lib/usercopy.S @@ -56,14 +56,6 @@ #include #include -#ifdef __XTENSA_EB__ -#define ALIGN(R, W0, W1) src R, W0, W1 -#define SSA8(R) ssa8b R -#else -#define ALIGN(R, W0, W1) src R, W1, W0 -#define SSA8(R) ssa8l R -#endif - .text .align 4 .global __xtensa_copy_user @@ -81,7 +73,7 @@ __xtensa_copy_user: # per iteration movi a8, 3 # if source is also aligned, bnone a3, a8, .Laligned # then use word copy - SSA8( a3) # set shift amount from byte offset + __ssa8 a3 # set shift amount from byte offset bnez a4, .Lsrcunaligned movi a2, 0 # return success for len==0 retw @@ -220,16 +212,16 @@ EX(10f) l32i a6, a3, 0 # load first word .Loop2: EX(10f) l32i a7, a3, 4 EX(10f) l32i a8, a3, 8 - ALIGN( a6, a6, a7) + __src_b a6, a6, a7 EX(10f) s32i a6, a5, 0 EX(10f) l32i a9, a3, 12 - ALIGN( a7, a7, a8) + __src_b a7, a7, a8 EX(10f) s32i a7, a5, 4 EX(10f) l32i a6, a3, 16 - ALIGN( a8, a8, a9) + __src_b a8, a8, a9 EX(10f) s32i a8, a5, 8 addi a3, a3, 16 - ALIGN( a9, a9, a6) + __src_b a9, a9, a6 EX(10f) s32i a9, a5, 12 addi a5, a5, 16 #if !XCHAL_HAVE_LOOPS @@ -240,10 +232,10 @@ EX(10f) s32i a9, a5, 12 # copy 8 bytes EX(10f) l32i a7, a3, 4 EX(10f) l32i a8, a3, 8 - ALIGN( a6, a6, a7) + __src_b a6, a6, a7 EX(10f) s32i a6, a5, 0 addi a3, a3, 8 - ALIGN( a7, a7, a8) + __src_b a7, a7, a8 EX(10f) s32i a7, a5, 4 addi a5, a5, 8 mov a6, a8 @@ -252,7 +244,7 @@ EX(10f) s32i a7, a5, 4 # copy 4 bytes EX(10f) l32i a7, a3, 4 addi a3, a3, 4 - ALIGN( a6, a6, a7) + __src_b a6, a6, a7 EX(10f) s32i a6, a5, 0 addi a5, a5, 4 mov a6, a7