mirror of
https://sourceware.org/git/glibc.git
synced 2024-11-24 10:22:41 +08:00
Update.
2001-05-22 David Mosberger <davidm@hpl.hp.com> * sysdeps/ia64/memmove.S: Increase MEMLAT from 6 to 21 for better performance. * sysdeps/ia64/memcpy.S: Likewise. * sysdeps/ia64/bcopy.S: New file. * sysdeps/ia64/bzero.S: New file (derived from memset.S). 2001-05-26 Ulrich Drepper <drepper@redhat.com> * sysdeps/ia64/fpu/libm-test-ulps: Add deltas for tanh(-0.7).
This commit is contained in:
parent
17ffa4986d
commit
995a692a48
14
ChangeLog
14
ChangeLog
@ -1,3 +1,17 @@
|
||||
2001-05-22 David Mosberger <davidm@hpl.hp.com>
|
||||
|
||||
* sysdeps/ia64/memmove.S: Increase MEMLAT from 6 to 21 for better
|
||||
performance.
|
||||
* sysdeps/ia64/memcpy.S: Likewise.
|
||||
|
||||
* sysdeps/ia64/bcopy.S: New file.
|
||||
|
||||
* sysdeps/ia64/bzero.S: New file (derived from memset.S).
|
||||
|
||||
2001-05-26 Ulrich Drepper <drepper@redhat.com>
|
||||
|
||||
* sysdeps/ia64/fpu/libm-test-ulps: Add deltas for tanh(-0.7).
|
||||
|
||||
2001-05-24 Ulrich Drepper <drepper@redhat.com>
|
||||
|
||||
* iconvdata/Makefile: Add rules for CP10007.
|
||||
|
10
sysdeps/ia64/bcopy.S
Normal file
10
sysdeps/ia64/bcopy.S
Normal file
@ -0,0 +1,10 @@
|
||||
#include <sysdep.h>
|
||||
|
||||
ENTRY(bcopy)
|
||||
.regstk 3, 0, 0, 0
|
||||
mov r8 = in0
|
||||
mov in0 = in1
|
||||
;;
|
||||
mov in1 = r8
|
||||
br.cond.sptk.many memmove
|
||||
END(bcopy)
|
94
sysdeps/ia64/bzero.S
Normal file
94
sysdeps/ia64/bzero.S
Normal file
@ -0,0 +1,94 @@
|
||||
/* Optimized version of the standard bzero() function.
|
||||
This file is part of the GNU C Library.
|
||||
Copyright (C) 2000, 2001 Free Software Foundation, Inc.
|
||||
Contributed by Dan Pop <Dan.Pop@cern.ch>.
|
||||
|
||||
The GNU C Library is free software; you can redistribute it and/or
|
||||
modify it under the terms of the GNU Library General Public License as
|
||||
published by the Free Software Foundation; either version 2 of the
|
||||
License, or (at your option) any later version.
|
||||
|
||||
The GNU C Library is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
||||
Library General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU Library General Public
|
||||
License along with the GNU C Library; see the file COPYING.LIB. If not,
|
||||
write to the Free Software Foundation, Inc., 59 Temple Place - Suite 330,
|
||||
Boston, MA 02111-1307, USA. */
|
||||
|
||||
/* Return: dest
|
||||
|
||||
Inputs:
|
||||
in0: dest
|
||||
in1: count
|
||||
|
||||
The algorithm is fairly straightforward: set byte by byte until we
|
||||
we get to a word aligned address, then set word by word as much as
|
||||
possible; the remaining few bytes are set one by one. */
|
||||
|
||||
#include <sysdep.h>
|
||||
#undef ret
|
||||
|
||||
#define dest in0
|
||||
#define cnt in1
|
||||
|
||||
#define save_pfs loc0
|
||||
#define ptr1 loc1
|
||||
#define ptr2 loc2
|
||||
#define tmp loc3
|
||||
#define loopcnt loc4
|
||||
#define save_lc loc5
|
||||
|
||||
ENTRY(bzero)
|
||||
.prologue
|
||||
alloc save_pfs = ar.pfs, 2, 6, 0, 0
|
||||
.save ar.lc, save_lc
|
||||
mov save_lc = ar.lc
|
||||
.body
|
||||
mov ret0 = dest
|
||||
and tmp = 7, dest
|
||||
cmp.eq p6, p0 = cnt, r0
|
||||
(p6) br.cond.spnt .restore_and_exit ;;
|
||||
mov ptr1 = dest
|
||||
sub loopcnt = 8, tmp
|
||||
cmp.gt p6, p0 = 16, cnt
|
||||
(p6) br.cond.spnt .set_few;;
|
||||
cmp.eq p6, p0 = tmp, r0
|
||||
(p6) br.cond.sptk .dest_aligned
|
||||
sub cnt = cnt, loopcnt
|
||||
adds loopcnt = -1, loopcnt;;
|
||||
mov ar.lc = loopcnt;;
|
||||
.l1:
|
||||
st1 [ptr1] = r0, 1
|
||||
br.cloop.dptk .l1 ;;
|
||||
.dest_aligned:
|
||||
adds ptr2 = 8, ptr1
|
||||
shr.u loopcnt = cnt, 4 ;; // loopcnt = cnt / 16
|
||||
cmp.eq p6, p0 = loopcnt, r0
|
||||
(p6) br.cond.spnt .one_more
|
||||
and cnt = 0xf, cnt // compute the remaining cnt
|
||||
adds loopcnt = -1, loopcnt;;
|
||||
mov ar.lc = loopcnt;;
|
||||
.l2:
|
||||
st8 [ptr1] = r0, 16
|
||||
st8 [ptr2] = r0, 16
|
||||
br.cloop.dptk .l2
|
||||
cmp.le p6, p0 = 8, cnt ;;
|
||||
.one_more:
|
||||
(p6) st8 [ptr1] = r0, 8
|
||||
(p6) adds cnt = -8, cnt ;;
|
||||
cmp.eq p6, p0 = cnt, r0
|
||||
(p6) br.cond.spnt .restore_and_exit
|
||||
.set_few:
|
||||
adds loopcnt = -1, cnt;;
|
||||
mov ar.lc = loopcnt;;
|
||||
.l3:
|
||||
st1 [ptr1] = r0, 1
|
||||
br.cloop.dptk .l3 ;;
|
||||
.restore_and_exit:
|
||||
mov ar.lc = save_lc
|
||||
mov ar.pfs = save_pfs
|
||||
br.ret.sptk.many b0
|
||||
END(bzero)
|
@ -974,6 +974,13 @@ double: 1
|
||||
idouble: 1
|
||||
ildouble: 521
|
||||
ldouble: 521
|
||||
Test "tanh (-0.7) == -0.60436777711716349631":
|
||||
float: 1
|
||||
ifloat: 1
|
||||
double: 1
|
||||
idouble: 1
|
||||
ildouble: 521
|
||||
ldouble: 521
|
||||
|
||||
# tgamma
|
||||
Test "tgamma (-0.5) == -2 sqrt (pi)":
|
||||
|
@ -68,10 +68,12 @@
|
||||
br.ctop.sptk .loop##shift ; \
|
||||
br.cond.sptk .cpyfew ; /* deal with the remaining bytes */
|
||||
|
||||
#define MEMLAT 21
|
||||
#define Nrot (((2*MEMLAT+3) + 7) & ~7)
|
||||
|
||||
ENTRY(memcpy)
|
||||
.prologue
|
||||
alloc r2 = ar.pfs, 3, 16 - 3, 0, 16
|
||||
#include "softpipe.h"
|
||||
alloc r2 = ar.pfs, 3, Nrot - 3, 0, Nrot
|
||||
.rotr r[MEMLAT + 2], q[MEMLAT + 1]
|
||||
.rotp p[MEMLAT + 2]
|
||||
mov ret0 = in0 // return value = dest
|
||||
|
@ -26,7 +26,7 @@
|
||||
in2: byte count
|
||||
|
||||
The core of the function is the memcpy implementation used in memcpy.S.
|
||||
When bytes have to be copied backwards, only the easy case, when
|
||||
When bytes have to be copied backwards, only the easy case, when
|
||||
all arguments are multiples of 8, is optimised.
|
||||
|
||||
In this form, it assumes little endian mode. For big endian mode,
|
||||
@ -67,10 +67,12 @@
|
||||
br.ctop.sptk .loop##shift ; \
|
||||
br.cond.sptk .cpyfew ; /* deal with the remaining bytes */
|
||||
|
||||
#define MEMLAT 21
|
||||
#define Nrot (((2*MEMLAT+3) + 7) & ~7)
|
||||
|
||||
ENTRY(memmove)
|
||||
.prologue
|
||||
alloc r2 = ar.pfs, 3, 29, 0, 32
|
||||
#include "softpipe.h"
|
||||
alloc r2 = ar.pfs, 3, Nrot - 3, 0, Nrot
|
||||
.rotr r[MEMLAT + 2], q[MEMLAT + 1]
|
||||
.rotp p[MEMLAT + 2]
|
||||
mov ret0 = in0 // return value = dest
|
||||
@ -90,7 +92,7 @@ ENTRY(memmove)
|
||||
and tmp4 = 7, tmp3 // tmp4 = (dest | src | len) & 7
|
||||
cmp.le p6, p0 = dest, src // if dest <= src it's always safe
|
||||
(p6) br.cond.spnt .forward // to copy forward
|
||||
add tmp3 = src, len;;
|
||||
add tmp3 = src, len;;
|
||||
cmp.lt p6, p0 = dest, tmp3 // if dest > src && dest < src + len
|
||||
(p6) br.cond.spnt .backward // we have to copy backward
|
||||
|
||||
@ -113,7 +115,7 @@ ENTRY(memmove)
|
||||
(p6) br.cond.spnt .restore_and_exit;;// the one-word special case
|
||||
adds adest = 8, dest // set adest one word ahead of dest
|
||||
adds asrc = 8, src ;; // set asrc one word ahead of src
|
||||
nop.b 0 // get the "golden" alignment for
|
||||
nop.b 0 // get the "golden" alignment for
|
||||
nop.b 0 // the next loop
|
||||
.l0:
|
||||
(p[0]) ld8 r[0] = [src], 16
|
||||
@ -139,8 +141,8 @@ ENTRY(memmove)
|
||||
.l1: // copy -dest % 8 bytes
|
||||
ld1 value = [src], 1 // value = *src++
|
||||
;;
|
||||
st1 [dest] = value, 1 // *dest++ = value
|
||||
br.cloop.dptk .l1
|
||||
st1 [dest] = value, 1 // *dest++ = value
|
||||
br.cloop.dptk .l1
|
||||
.dest_aligned:
|
||||
and sh1 = 7, src // sh1 = src % 8
|
||||
and tmp2 = -8, len // tmp2 = len & -OPSIZ
|
||||
@ -148,7 +150,7 @@ ENTRY(memmove)
|
||||
shr.u loopcnt = len, 3 // loopcnt = len / 8
|
||||
and len = 7, len;; // len = len % 8
|
||||
adds loopcnt = -1, loopcnt // --loopcnt
|
||||
addl tmp4 = @ltoff(.table), gp
|
||||
addl tmp4 = @ltoff(.table), gp
|
||||
addl tmp3 = @ltoff(.loop56), gp
|
||||
mov ar.ec = MEMLAT + 1 // set EC
|
||||
mov pr.rot = 1 << 16;; // set rotating predicates
|
||||
@ -174,7 +176,7 @@ ENTRY(memmove)
|
||||
LOOP(40)
|
||||
LOOP(48)
|
||||
LOOP(56)
|
||||
|
||||
|
||||
.src_aligned:
|
||||
.l3:
|
||||
(p[0]) ld8 r[0] = [src], 8
|
||||
@ -220,7 +222,7 @@ ENTRY(memmove)
|
||||
adds src = -1, src // src points to the last byte
|
||||
adds dest = -1, dest // dest points to the last byte
|
||||
adds loopcnt = -1, len;; // loopcnt = len - 1
|
||||
mov ar.lc = loopcnt;; // set the loop counter
|
||||
mov ar.lc = loopcnt;; // set the loop counter
|
||||
.l6:
|
||||
(p[0]) ld1 r[0] = [src], -1
|
||||
(p[MEMLAT]) st1 [dest] = r[MEMLAT], -1
|
||||
|
Loading…
Reference in New Issue
Block a user