mirror of
https://sourceware.org/git/glibc.git
synced 2024-11-23 09:43:32 +08:00
Correct cacheline size to 32-bytes for ppc405 memset.S (bug 14595).
This patch also creates a version of memset.S for the ppc476 processor which uses a 128-byte cacheline size for dcbz insns.
This commit is contained in:
parent
9f45bfe790
commit
09dec6c37e
4
NEWS
4
NEWS
@ -16,8 +16,8 @@ Version 2.17
|
||||
14303, 14307, 14328, 14331, 14336, 14337, 14347, 14349, 14376, 14417,
|
||||
14459, 14476, 14477, 14505, 14510, 14516, 14518, 14519, 14530, 14532,
|
||||
14538, 14543, 14544, 14545, 14557, 14562, 14568, 14576, 14579, 14583,
|
||||
14587, 14602, 14621, 14638, 14645, 14648, 14652, 14660, 14661, 14683,
|
||||
14694, 14716, 14743, 14767, 14783.
|
||||
14587, 14595, 14602, 14621, 14638, 14645, 14648, 14652, 14660, 14661,
|
||||
14683, 14694, 14716, 14743, 14767, 14783.
|
||||
|
||||
* Support for STT_GNU_IFUNC symbols added for s390 and s390x.
|
||||
Optimized versions of memcpy, memset, and memcmp added for System z10 and
|
||||
|
@ -1,3 +1,12 @@
|
||||
2012-09-25 Jason Gunthorpe <jgunthorpe@obsidianresearch.com>
|
||||
Ryan S. Arnold <rsa@linux.vnet.ibm.com>
|
||||
|
||||
[BZ #14595]
|
||||
* sysdeps/powerpc/powerpc32/476/memset.S: New file copied from
|
||||
405/memset.S to preserve 128-byte cacheline size.
|
||||
* sysdeps/powerpc/powerpc32/405/memset.S (memset): Fix cacheline size
|
||||
to 32-bytes for 405, 440, and 464 processors.
|
||||
|
||||
2012-10-19 Roland McGrath <roland@hack.frob.com>
|
||||
|
||||
* sysdeps/unix/sysv/linux/powerpc/powerpc32/nofpu/nptl/libc.abilist
|
||||
|
@ -1,5 +1,5 @@
|
||||
/* Optimized memset implementation for PowerPC476.
|
||||
Copyright (C) 2010 Free Software Foundation, Inc.
|
||||
/* Optimized memset for PowerPC405,440,464 (32-byte cacheline).
|
||||
Copyright (C) 2012 Free Software Foundation, Inc.
|
||||
This file is part of the GNU C Library.
|
||||
|
||||
The GNU C Library is free software; you can redistribute it and/or
|
||||
@ -104,7 +104,7 @@ L(use_dcbz):
|
||||
add r3,r3,r7
|
||||
|
||||
L(skip_string_loop):
|
||||
clrlwi r8,r6,25
|
||||
clrlwi r8,r6,27
|
||||
srwi. r8,r8,4
|
||||
beq L(dcbz_pre_loop)
|
||||
mtctr r8
|
||||
@ -119,14 +119,14 @@ L(word_loop):
|
||||
bdnz L(word_loop)
|
||||
|
||||
L(dcbz_pre_loop):
|
||||
srwi r6,r5,7
|
||||
srwi r6,r5,5
|
||||
mtctr r6
|
||||
addi r7,0,0
|
||||
|
||||
L(dcbz_loop):
|
||||
dcbz r3,r7
|
||||
addi r3,r3,0x80
|
||||
subi r5,r5,0x80
|
||||
addi r3,r3,0x20
|
||||
subi r5,r5,0x20
|
||||
bdnz L(dcbz_loop)
|
||||
srwi. r6,r5,4
|
||||
beq L(postword2_count_loop)
|
||||
|
154
ports/sysdeps/powerpc/powerpc32/476/memset.S
Normal file
154
ports/sysdeps/powerpc/powerpc32/476/memset.S
Normal file
@ -0,0 +1,154 @@
|
||||
/* Optimized memset for PowerPC476 (128-byte cacheline).
|
||||
Copyright (C) 2010 Free Software Foundation, Inc.
|
||||
This file is part of the GNU C Library.
|
||||
|
||||
The GNU C Library is free software; you can redistribute it and/or
|
||||
modify it under the terms of the GNU Lesser General Public
|
||||
License as published by the Free Software Foundation; either
|
||||
version 2.1 of the License, or (at your option) any later version.
|
||||
|
||||
The GNU C Library is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
||||
Lesser General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU Lesser General Public
|
||||
License along with the GNU C Library. If not, see
|
||||
<http://www.gnu.org/licenses/>. */
|
||||
|
||||
#include <sysdep.h>
|
||||
#include <bp-sym.h>
|
||||
#include <bp-asm.h>
|
||||
|
||||
/* memset
|
||||
|
||||
r3:destination address and return address
|
||||
r4:source integer to copy
|
||||
r5:byte count
|
||||
r11:sources integer to copy in all 32 bits of reg
|
||||
r12:temp return address
|
||||
|
||||
Save return address in r12
|
||||
If destinationn is unaligned and count is greater tha 255 bytes
|
||||
set 0-3 bytes to make destination aligned
|
||||
If count is greater tha 255 bytes and setting zero to memory
|
||||
use dbcz to set memeory when we can
|
||||
otherwsie do the follwoing
|
||||
If 16 or more words to set we use 16 word copy loop.
|
||||
Finaly we set 0-15 extra bytes with string store. */
|
||||
|
||||
EALIGN (BP_SYM (memset), 5, 0)
|
||||
rlwinm r11,r4,0,24,31
|
||||
rlwimi r11,r4,8,16,23
|
||||
rlwimi r11,r11,16,0,15
|
||||
addi r12,r3,0
|
||||
cmpwi r5,0x00FF
|
||||
ble L(preword8_count_loop)
|
||||
cmpwi r4,0x00
|
||||
beq L(use_dcbz)
|
||||
neg r6,r3
|
||||
clrlwi. r6,r6,30
|
||||
beq L(preword8_count_loop)
|
||||
addi r8,0,1
|
||||
mtctr r6
|
||||
subi r3,r3,1
|
||||
|
||||
L(unaligned_bytecopy_loop):
|
||||
stbu r11,0x1(r3)
|
||||
subf. r5,r8,r5
|
||||
beq L(end_memset)
|
||||
bdnz L(unaligned_bytecopy_loop)
|
||||
addi r3,r3,1
|
||||
|
||||
L(preword8_count_loop):
|
||||
srwi. r6,r5,4
|
||||
beq L(preword2_count_loop)
|
||||
mtctr r6
|
||||
addi r3,r3,-4
|
||||
mr r8,r11
|
||||
mr r9,r11
|
||||
mr r10,r11
|
||||
|
||||
L(word8_count_loop_no_dcbt):
|
||||
stwu r8,4(r3)
|
||||
stwu r9,4(r3)
|
||||
subi r5,r5,0x10
|
||||
stwu r10,4(r3)
|
||||
stwu r11,4(r3)
|
||||
bdnz L(word8_count_loop_no_dcbt)
|
||||
addi r3,r3,4
|
||||
|
||||
L(preword2_count_loop):
|
||||
clrlwi. r7,r5,28
|
||||
beq L(end_memset)
|
||||
mr r8,r11
|
||||
mr r9,r11
|
||||
mr r10,r11
|
||||
mtxer r7
|
||||
stswx r8,0,r3
|
||||
|
||||
L(end_memset):
|
||||
addi r3,r12,0
|
||||
blr
|
||||
|
||||
L(use_dcbz):
|
||||
neg r6,r3
|
||||
clrlwi. r7,r6,28
|
||||
beq L(skip_string_loop)
|
||||
mr r8,r11
|
||||
mr r9,r11
|
||||
mr r10,r11
|
||||
subf r5,r7,r5
|
||||
mtxer r7
|
||||
stswx r8,0,r3
|
||||
add r3,r3,r7
|
||||
|
||||
L(skip_string_loop):
|
||||
clrlwi r8,r6,25
|
||||
srwi. r8,r8,4
|
||||
beq L(dcbz_pre_loop)
|
||||
mtctr r8
|
||||
|
||||
L(word_loop):
|
||||
stw r11,0(r3)
|
||||
subi r5,r5,0x10
|
||||
stw r11,4(r3)
|
||||
stw r11,8(r3)
|
||||
stw r11,12(r3)
|
||||
addi r3,r3,0x10
|
||||
bdnz L(word_loop)
|
||||
|
||||
L(dcbz_pre_loop):
|
||||
srwi r6,r5,7
|
||||
mtctr r6
|
||||
addi r7,0,0
|
||||
|
||||
L(dcbz_loop):
|
||||
dcbz r3,r7
|
||||
addi r3,r3,0x80
|
||||
subi r5,r5,0x80
|
||||
bdnz L(dcbz_loop)
|
||||
srwi. r6,r5,4
|
||||
beq L(postword2_count_loop)
|
||||
mtctr r6
|
||||
|
||||
L(postword8_count_loop):
|
||||
stw r11,0(r3)
|
||||
subi r5,r5,0x10
|
||||
stw r11,4(r3)
|
||||
stw r11,8(r3)
|
||||
stw r11,12(r3)
|
||||
addi r3,r3,0x10
|
||||
bdnz L(postword8_count_loop)
|
||||
|
||||
L(postword2_count_loop):
|
||||
clrlwi. r7,r5,28
|
||||
beq L(end_memset)
|
||||
mr r8,r11
|
||||
mr r9,r11
|
||||
mr r10,r11
|
||||
mtxer r7
|
||||
stswx r8,0,r3
|
||||
b L(end_memset)
|
||||
END (BP_SYM (memset))
|
||||
libc_hidden_builtin_def (memset)
|
Loading…
Reference in New Issue
Block a user