mirror of
https://sourceware.org/git/glibc.git
synced 2024-12-04 07:03:57 +08:00
Fix issues in x86 memset-sse2.S/memset-sse2-rep.S
This commit is contained in:
parent
7ca890b88e
commit
cc50f1a4b4
15
ChangeLog
15
ChangeLog
@ -1,3 +1,18 @@
|
||||
2010-02-24 H.J. Lu <hongjiu.lu@intel.com>
|
||||
|
||||
* sysdeps/i386/i686/multiarch/memset-sse2-rep.S: Remove redundant
|
||||
punpcklbw.
|
||||
Use unsigned conditional jumps.
|
||||
(128bytesormore_nt): Renamed to ...
|
||||
(128bytesormore_endof_L1): This.
|
||||
Use add instead of lea if possible.
|
||||
Correct unwind info.
|
||||
* sysdeps/i386/i686/multiarch/memset-sse2.S: Remove redundant
|
||||
punpcklbw.
|
||||
Use unsigned conditional jumps.
|
||||
Use add instead of lea if possible.
|
||||
Correct unwind info.
|
||||
|
||||
2010-02-24 Ulrich Drepper <drepper@redhat.com>
|
||||
|
||||
[BZ #11319]
|
||||
|
@ -243,7 +243,6 @@ L(32bytesormore):
|
||||
pxor %xmm0, %xmm0
|
||||
#else
|
||||
movd %eax, %xmm0
|
||||
punpcklbw %xmm0, %xmm0
|
||||
pshufd $0, %xmm0, %xmm0
|
||||
#endif
|
||||
testl $0xf, %edx
|
||||
@ -261,7 +260,7 @@ L(not_aligned_16):
|
||||
ALIGN (4)
|
||||
L(aligned_16):
|
||||
cmp $128, %ecx
|
||||
jge L(128bytesormore)
|
||||
jae L(128bytesormore)
|
||||
|
||||
L(aligned_16_less128bytes):
|
||||
BRANCH_TO_JMPTBL_ENTRY (L(table_16_128bytes))
|
||||
@ -293,7 +292,7 @@ L(128bytesormore):
|
||||
* fast string will prefetch and combine data efficiently.
|
||||
*/
|
||||
cmp %edi, %ecx
|
||||
jae L(128bytesormore_nt)
|
||||
jae L(128bytesormore_endof_L1)
|
||||
subl $128, %ecx
|
||||
L(128bytesormore_normal):
|
||||
sub $128, %ecx
|
||||
@ -306,7 +305,7 @@ L(128bytesormore_normal):
|
||||
movdqa %xmm0, 0x60(%edx)
|
||||
movdqa %xmm0, 0x70(%edx)
|
||||
lea 128(%edx), %edx
|
||||
jl L(128bytesless_normal)
|
||||
jb L(128bytesless_normal)
|
||||
|
||||
|
||||
sub $128, %ecx
|
||||
@ -319,15 +318,16 @@ L(128bytesormore_normal):
|
||||
movdqa %xmm0, 0x60(%edx)
|
||||
movdqa %xmm0, 0x70(%edx)
|
||||
lea 128(%edx), %edx
|
||||
jge L(128bytesormore_normal)
|
||||
jae L(128bytesormore_normal)
|
||||
|
||||
L(128bytesless_normal):
|
||||
POP (%edi)
|
||||
lea 128(%ecx), %ecx
|
||||
add $128, %ecx
|
||||
BRANCH_TO_JMPTBL_ENTRY (L(table_16_128bytes))
|
||||
|
||||
CFI_PUSH (%edi)
|
||||
ALIGN (4)
|
||||
L(128bytesormore_nt):
|
||||
L(128bytesormore_endof_L1):
|
||||
mov %edx, %edi
|
||||
mov %ecx, %edx
|
||||
shr $2, %ecx
|
||||
|
@ -243,7 +243,6 @@ L(32bytesormore):
|
||||
pxor %xmm0, %xmm0
|
||||
#else
|
||||
movd %eax, %xmm0
|
||||
punpcklbw %xmm0, %xmm0
|
||||
pshufd $0, %xmm0, %xmm0
|
||||
#endif
|
||||
testl $0xf, %edx
|
||||
@ -261,7 +260,7 @@ L(not_aligned_16):
|
||||
ALIGN (4)
|
||||
L(aligned_16):
|
||||
cmp $128, %ecx
|
||||
jge L(128bytesormore)
|
||||
jae L(128bytesormore)
|
||||
|
||||
L(aligned_16_less128bytes):
|
||||
BRANCH_TO_JMPTBL_ENTRY (L(table_16_128bytes))
|
||||
@ -287,14 +286,17 @@ L(128bytesormore):
|
||||
|
||||
#ifdef DATA_CACHE_SIZE
|
||||
POP (%ebx)
|
||||
# define RESTORE_EBX_STATE CFI_PUSH (%ebx)
|
||||
cmp $DATA_CACHE_SIZE, %ecx
|
||||
#else
|
||||
# ifdef SHARED
|
||||
# define RESTORE_EBX_STATE
|
||||
call __i686.get_pc_thunk.bx
|
||||
add $_GLOBAL_OFFSET_TABLE_, %ebx
|
||||
cmp __x86_data_cache_size@GOTOFF(%ebx), %ecx
|
||||
# else
|
||||
POP (%ebx)
|
||||
# define RESTORE_EBX_STATE CFI_PUSH (%ebx)
|
||||
cmp __x86_data_cache_size, %ecx
|
||||
# endif
|
||||
#endif
|
||||
@ -312,7 +314,7 @@ L(128bytesormore_normal):
|
||||
movdqa %xmm0, 0x60(%edx)
|
||||
movdqa %xmm0, 0x70(%edx)
|
||||
lea 128(%edx), %edx
|
||||
jl L(128bytesless_normal)
|
||||
jb L(128bytesless_normal)
|
||||
|
||||
|
||||
sub $128, %ecx
|
||||
@ -325,10 +327,10 @@ L(128bytesormore_normal):
|
||||
movdqa %xmm0, 0x60(%edx)
|
||||
movdqa %xmm0, 0x70(%edx)
|
||||
lea 128(%edx), %edx
|
||||
jge L(128bytesormore_normal)
|
||||
jae L(128bytesormore_normal)
|
||||
|
||||
L(128bytesless_normal):
|
||||
lea 128(%ecx), %ecx
|
||||
add $128, %ecx
|
||||
BRANCH_TO_JMPTBL_ENTRY (L(table_16_128bytes))
|
||||
|
||||
ALIGN (4)
|
||||
@ -346,11 +348,12 @@ L(128bytes_L2_normal):
|
||||
movaps %xmm0, 0x70(%edx)
|
||||
add $128, %edx
|
||||
cmp $128, %ecx
|
||||
jge L(128bytes_L2_normal)
|
||||
jae L(128bytes_L2_normal)
|
||||
|
||||
L(128bytesless_L2_normal):
|
||||
BRANCH_TO_JMPTBL_ENTRY (L(table_16_128bytes))
|
||||
|
||||
RESTORE_EBX_STATE
|
||||
L(128bytesormore_nt_start):
|
||||
sub %ebx, %ecx
|
||||
ALIGN (4)
|
||||
@ -368,7 +371,7 @@ L(128bytesormore_shared_cache_loop):
|
||||
movdqa %xmm0, 0x70(%edx)
|
||||
add $0x80, %edx
|
||||
cmp $0x80, %ebx
|
||||
jge L(128bytesormore_shared_cache_loop)
|
||||
jae L(128bytesormore_shared_cache_loop)
|
||||
cmp $0x80, %ecx
|
||||
jb L(shared_cache_loop_end)
|
||||
ALIGN (4)
|
||||
@ -384,7 +387,7 @@ L(128bytesormore_nt):
|
||||
movntdq %xmm0, 0x70(%edx)
|
||||
add $0x80, %edx
|
||||
cmp $0x80, %ecx
|
||||
jge L(128bytesormore_nt)
|
||||
jae L(128bytesormore_nt)
|
||||
sfence
|
||||
L(shared_cache_loop_end):
|
||||
#if defined DATA_CACHE_SIZE || !defined SHARED
|
||||
|
Loading…
Reference in New Issue
Block a user