mirror of
https://sourceware.org/git/glibc.git
synced 2024-11-23 17:53:37 +08:00
Moved alpha to ports repository.
2008-11-25 Roland McGrath <roland@redhat.com> * sysdeps/alpha, sysdeps/unix/bsd/osf/alpha, sysdeps/unix/bsd/Attic/osf1/alpha, sysdeps/unix/sysv/linux/alpha, sysdeps/unix/sysv/linux/alpha/alpha, sysdeps/unix/alpha, sysdeps/mach/alpha, sysdeps/mach/hurd/alpha: Subdirectories moved to ports repository. * configure.in (base_machine): Remove alpha case.
This commit is contained in:
parent
f1092afe15
commit
c0439b95b8
@ -1,3 +1,12 @@
|
||||
2008-11-25 Roland McGrath <roland@redhat.com>
|
||||
|
||||
* sysdeps/alpha, sysdeps/unix/bsd/osf/alpha,
|
||||
sysdeps/unix/bsd/Attic/osf1/alpha, sysdeps/unix/sysv/linux/alpha,
|
||||
sysdeps/unix/sysv/linux/alpha/alpha, sysdeps/unix/alpha,
|
||||
sysdeps/mach/alpha, sysdeps/mach/hurd/alpha:
|
||||
Subdirectories moved to ports repository.
|
||||
* configure.in (base_machine): Remove alpha case.
|
||||
|
||||
2008-11-13 Ryan S. Arnold <rsa@us.ibm.com>
|
||||
|
||||
[BZ #6411]
|
||||
|
@ -484,7 +484,6 @@ changequote(,)dnl
|
||||
# base_machine, we don't change it.
|
||||
test -n "$base_machine" || case "$machine" in
|
||||
a29k | am29000) base_machine=a29k machine=a29k ;;
|
||||
alpha*) base_machine=alpha machine=alpha/$machine ;;
|
||||
c3[012]) base_machine=cx0 machine=cx0/c30 ;;
|
||||
c4[04]) base_machine=cx0 machine=cx0/c40 ;;
|
||||
i[34567]86) base_machine=i386 machine=i386/$machine ;;
|
||||
|
@ -1,3 +1,9 @@
|
||||
2008-11-25 Roland McGrath <roland@redhat.com>
|
||||
|
||||
* sysdeps/alpha, sysdeps/unix/sysv/linux/alpha:
|
||||
Subdirectories moved to ports repository as
|
||||
sysdeps/.../nptl subdirectories.
|
||||
|
||||
2008-11-12 Jakub Jelinek <jakub@redhat.com>
|
||||
|
||||
[BZ #7008]
|
||||
|
@ -1,21 +0,0 @@
|
||||
# Copyright (C) 2003 Free Software Foundation, Inc.
|
||||
# This file is part of the GNU C Library.
|
||||
#
|
||||
# The GNU C Library is free software; you can redistribute it and/or
|
||||
# modify it under the terms of the GNU Lesser General Public
|
||||
# License as published by the Free Software Foundation; either
|
||||
# version 2.1 of the License, or (at your option) any later version.
|
||||
#
|
||||
# The GNU C Library is distributed in the hope that it will be useful,
|
||||
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
||||
# Lesser General Public License for more details.
|
||||
#
|
||||
# You should have received a copy of the GNU Lesser General Public
|
||||
# License along with the GNU C Library; if not, write to the Free
|
||||
# Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
|
||||
# 02111-1307 USA.
|
||||
|
||||
ifeq ($(subdir),csu)
|
||||
gen-as-const-headers += tcb-offsets.sym
|
||||
endif
|
@ -1,89 +0,0 @@
|
||||
/* Special .init and .fini section support for Alpha. NPTL version.
|
||||
Copyright (C) 2003 Free Software Foundation, Inc.
|
||||
This file is part of the GNU C Library.
|
||||
|
||||
The GNU C Library is free software; you can redistribute it and/or
|
||||
modify it under the terms of the GNU Lesser General Public
|
||||
License as published by the Free Software Foundation; either
|
||||
version 2.1 of the License, or (at your option) any later version.
|
||||
|
||||
The GNU C Library is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
||||
Lesser General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU Lesser General Public
|
||||
License along with the GNU C Library; if not, write to the Free
|
||||
Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
|
||||
02111-1307 USA. */
|
||||
|
||||
/* This file is compiled into assembly code which is then munged by a sed
|
||||
script into two files: crti.s and crtn.s.
|
||||
|
||||
* crti.s puts a function prologue at the beginning of the .init and .fini
|
||||
sections and defines global symbols for those addresses, so they can be
|
||||
called as functions.
|
||||
|
||||
* crtn.s puts the corresponding function epilogues in the .init and .fini
|
||||
sections.
|
||||
|
||||
This differs from what would be generated by the generic code in that
|
||||
we save and restore the GP within the function. In order for linker
|
||||
relaxation to work, the value in the GP register on exit from a function
|
||||
must be valid for the function entry point. Normally, a function is
|
||||
contained within one object file and this is not an issue, provided
|
||||
that the function reloads the gp after making any function calls.
|
||||
However, _init and _fini are constructed from pieces of many object
|
||||
files, all of which may have different GP values. So we must reload
|
||||
the GP value from crti.o in crtn.o. */
|
||||
|
||||
__asm__ (" \n\
|
||||
#include \"defs.h\" \n\
|
||||
\n\
|
||||
/*@HEADER_ENDS*/ \n\
|
||||
\n\
|
||||
/*@_init_PROLOG_BEGINS*/ \n\
|
||||
.section .init, \"ax\", @progbits \n\
|
||||
.globl _init \n\
|
||||
.type _init,@function \n\
|
||||
.usepv _init,std \n\
|
||||
_init: \n\
|
||||
ldgp $29, 0($27) \n\
|
||||
subq $30, 16, $30 \n\
|
||||
stq $26, 0($30) \n\
|
||||
stq $29, 8($30) \n\
|
||||
bsr $26, __pthread_initialize_minimal_internal !samegp \n\
|
||||
.align 3 \n\
|
||||
/*@_init_PROLOG_ENDS*/ \n\
|
||||
\n\
|
||||
/*@_init_EPILOG_BEGINS*/ \n\
|
||||
.section .init, \"ax\", @progbits \n\
|
||||
ldq $26, 0($30) \n\
|
||||
ldq $29, 8($30) \n\
|
||||
addq $30, 16, $30 \n\
|
||||
ret \n\
|
||||
/*@_init_EPILOG_ENDS*/ \n\
|
||||
\n\
|
||||
/*@_fini_PROLOG_BEGINS*/ \n\
|
||||
.section .fini, \"ax\", @progbits \n\
|
||||
.globl _fini \n\
|
||||
.type _fini,@function \n\
|
||||
.usepv _fini,std \n\
|
||||
_fini: \n\
|
||||
ldgp $29, 0($27) \n\
|
||||
subq $30, 16, $30 \n\
|
||||
stq $26, 0($30) \n\
|
||||
stq $29, 8($30) \n\
|
||||
.align 3 \n\
|
||||
/*@_fini_PROLOG_ENDS*/ \n\
|
||||
\n\
|
||||
/*@_fini_EPILOG_BEGINS*/ \n\
|
||||
.section .fini, \"ax\", @progbits \n\
|
||||
ldq $26, 0($30) \n\
|
||||
ldq $29, 8($30) \n\
|
||||
addq $30, 16, $30 \n\
|
||||
ret \n\
|
||||
/*@_fini_EPILOG_ENDS*/ \n\
|
||||
\n\
|
||||
/*@TRAILER_BEGINS*/ \n\
|
||||
");
|
@ -1,45 +0,0 @@
|
||||
/* Copyright (C) 2003 Free Software Foundation, Inc.
|
||||
This file is part of the GNU C Library.
|
||||
Contributed by Richard Henderson <rth@twiddle.net>, 2003.
|
||||
|
||||
The GNU C Library is free software; you can redistribute it and/or
|
||||
modify it under the terms of the GNU Lesser General Public
|
||||
License as published by the Free Software Foundation; either
|
||||
version 2.1 of the License, or (at your option) any later version.
|
||||
|
||||
The GNU C Library is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
||||
Lesser General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU Lesser General Public
|
||||
License along with the GNU C Library; if not, write to the Free
|
||||
Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
|
||||
02111-1307 USA. */
|
||||
|
||||
|
||||
.text
|
||||
.align 4
|
||||
|
||||
.globl pthread_spin_lock
|
||||
.ent pthread_spin_lock
|
||||
pthread_spin_lock:
|
||||
.frame $sp, 0, $26, 0
|
||||
.prologue 0
|
||||
|
||||
0: ldl_l $1, 0($16)
|
||||
lda $2, 1
|
||||
lda $0, 0
|
||||
bne $1, 1f
|
||||
|
||||
stl_c $2, 0($16)
|
||||
beq $2, 1f
|
||||
mb
|
||||
ret
|
||||
|
||||
1: ldl $1, 0($16)
|
||||
bne $1, 1b
|
||||
unop
|
||||
br 0b
|
||||
|
||||
.end pthread_spin_lock
|
@ -1,46 +0,0 @@
|
||||
/* Copyright (C) 2003 Free Software Foundation, Inc.
|
||||
This file is part of the GNU C Library.
|
||||
Contributed by Richard Henderson <rth@twiddle.net>, 2003.
|
||||
|
||||
The GNU C Library is free software; you can redistribute it and/or
|
||||
modify it under the terms of the GNU Lesser General Public
|
||||
License as published by the Free Software Foundation; either
|
||||
version 2.1 of the License, or (at your option) any later version.
|
||||
|
||||
The GNU C Library is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
||||
Lesser General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU Lesser General Public
|
||||
License along with the GNU C Library; if not, write to the Free
|
||||
Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
|
||||
02111-1307 USA. */
|
||||
|
||||
|
||||
#define _ERRNO_H 1
|
||||
#include <bits/errno.h>
|
||||
|
||||
.text
|
||||
.align 4
|
||||
|
||||
.globl pthread_spin_trylock
|
||||
.ent pthread_spin_trylock
|
||||
pthread_spin_trylock:
|
||||
.frame $sp, 0, $26, 0
|
||||
.prologue 0
|
||||
|
||||
0: ldl_l $1, 0($16)
|
||||
lda $2, 1
|
||||
lda $0, EBUSY
|
||||
bne $1, 1f
|
||||
|
||||
stl_c $2, 0($16)
|
||||
beq $2, 2f
|
||||
mb
|
||||
lda $0, 0
|
||||
|
||||
1: ret
|
||||
2: br 0b
|
||||
|
||||
.end pthread_spin_trylock
|
@ -1,38 +0,0 @@
|
||||
/* Copyright (C) 2003 Free Software Foundation, Inc.
|
||||
This file is part of the GNU C Library.
|
||||
|
||||
The GNU C Library is free software; you can redistribute it and/or
|
||||
modify it under the terms of the GNU Lesser General Public
|
||||
License as published by the Free Software Foundation; either
|
||||
version 2.1 of the License, or (at your option) any later version.
|
||||
|
||||
The GNU C Library is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
||||
Lesser General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU Lesser General Public
|
||||
License along with the GNU C Library; if not, write to the Free
|
||||
Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
|
||||
02111-1307 USA. */
|
||||
|
||||
/* Default stack size. */
|
||||
#define ARCH_STACK_DEFAULT_SIZE (4 * 1024 * 1024)
|
||||
|
||||
/* Required stack pointer alignment at beginning. The ABI requires 16. */
|
||||
#define STACK_ALIGN 16
|
||||
|
||||
/* Minimal stack size after allocating thread descriptor and guard size. */
|
||||
#define MINIMAL_REST_STACK 4096
|
||||
|
||||
/* Alignment requirement for TCB. */
|
||||
#define TCB_ALIGNMENT 16
|
||||
|
||||
/* Location of current stack frame. */
|
||||
#define CURRENT_STACK_FRAME __builtin_frame_address (0)
|
||||
|
||||
/* XXX Until we have a better place keep the definitions here. */
|
||||
|
||||
/* While there is no such syscall. */
|
||||
#define __exit_thread_inline(val) \
|
||||
INLINE_SYSCALL (exit, 1, (val))
|
@ -1,14 +0,0 @@
|
||||
#include <sysdep.h>
|
||||
#include <tls.h>
|
||||
|
||||
--
|
||||
|
||||
-- Abuse tls.h macros to derive offsets relative to the thread register.
|
||||
-- # define __builtin_thread_pointer() ((void *) 0)
|
||||
-- # define thread_offsetof(mem) ((void *) &THREAD_SELF->mem - (void *) 0)
|
||||
-- Ho hum, this doesn't work in gcc4, so Know Things about THREAD_SELF
|
||||
#define thread_offsetof(mem) (long)(offsetof(struct pthread, mem) - sizeof(struct pthread))
|
||||
|
||||
MULTIPLE_THREADS_OFFSET thread_offsetof (header.multiple_threads)
|
||||
PID_OFFSET thread_offsetof (pid)
|
||||
TID_OFFSET thread_offsetof (tid)
|
@ -1,149 +0,0 @@
|
||||
/* Definition for thread-local data handling. NPTL/Alpha version.
|
||||
Copyright (C) 2003, 2005, 2006, 2007 Free Software Foundation, Inc.
|
||||
This file is part of the GNU C Library.
|
||||
|
||||
The GNU C Library is free software; you can redistribute it and/or
|
||||
modify it under the terms of the GNU Lesser General Public
|
||||
License as published by the Free Software Foundation; either
|
||||
version 2.1 of the License, or (at your option) any later version.
|
||||
|
||||
The GNU C Library is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
||||
Lesser General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU Lesser General Public
|
||||
License along with the GNU C Library; if not, write to the Free
|
||||
Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
|
||||
02111-1307 USA. */
|
||||
|
||||
#ifndef _TLS_H
|
||||
#define _TLS_H 1
|
||||
|
||||
# include <dl-sysdep.h>
|
||||
|
||||
#ifndef __ASSEMBLER__
|
||||
# include <stdbool.h>
|
||||
# include <stddef.h>
|
||||
# include <stdint.h>
|
||||
|
||||
/* Type for the dtv. */
|
||||
typedef union dtv
|
||||
{
|
||||
size_t counter;
|
||||
struct
|
||||
{
|
||||
void *val;
|
||||
bool is_static;
|
||||
} pointer;
|
||||
} dtv_t;
|
||||
|
||||
#else /* __ASSEMBLER__ */
|
||||
# include <tcb-offsets.h>
|
||||
#endif /* __ASSEMBLER__ */
|
||||
|
||||
|
||||
/* We require TLS support in the tools. */
|
||||
#ifndef HAVE_TLS_SUPPORT
|
||||
# error "TLS support is required."
|
||||
#endif
|
||||
|
||||
#ifndef __ASSEMBLER__
|
||||
|
||||
/* Get system call information. */
|
||||
# include <sysdep.h>
|
||||
|
||||
/* The TP points to the start of the thread blocks. */
|
||||
# define TLS_DTV_AT_TP 1
|
||||
|
||||
/* Get the thread descriptor definition. */
|
||||
# include <nptl/descr.h>
|
||||
|
||||
typedef struct
|
||||
{
|
||||
dtv_t *dtv;
|
||||
void *__private;
|
||||
} tcbhead_t;
|
||||
|
||||
/* This is the size of the initial TCB. */
|
||||
# define TLS_INIT_TCB_SIZE sizeof (tcbhead_t)
|
||||
|
||||
/* Alignment requirements for the initial TCB. */
|
||||
# define TLS_INIT_TCB_ALIGN 16
|
||||
|
||||
/* This is the size of the TCB. */
|
||||
# define TLS_TCB_SIZE sizeof (tcbhead_t)
|
||||
|
||||
/* This is the size we need before TCB. */
|
||||
# define TLS_PRE_TCB_SIZE sizeof (struct pthread)
|
||||
|
||||
/* Alignment requirements for the TCB. */
|
||||
# define TLS_TCB_ALIGN 16
|
||||
|
||||
/* Install the dtv pointer. The pointer passed is to the element with
|
||||
index -1 which contain the length. */
|
||||
# define INSTALL_DTV(tcbp, dtvp) \
|
||||
(((tcbhead_t *) (tcbp))->dtv = (dtvp) + 1)
|
||||
|
||||
/* Install new dtv for current thread. */
|
||||
# define INSTALL_NEW_DTV(dtv) \
|
||||
(THREAD_DTV() = (dtv))
|
||||
|
||||
/* Return dtv of given thread descriptor. */
|
||||
# define GET_DTV(tcbp) \
|
||||
(((tcbhead_t *) (tcbp))->dtv)
|
||||
|
||||
/* Code to initially initialize the thread pointer. This might need
|
||||
special attention since 'errno' is not yet available and if the
|
||||
operation can cause a failure 'errno' must not be touched. */
|
||||
# define TLS_INIT_TP(tcbp, secondcall) \
|
||||
(__builtin_set_thread_pointer ((void *)(tcbp)), NULL)
|
||||
|
||||
/* Return the address of the dtv for the current thread. */
|
||||
# define THREAD_DTV() \
|
||||
(((tcbhead_t *) __builtin_thread_pointer ())->dtv)
|
||||
|
||||
/* Return the thread descriptor for the current thread. */
|
||||
# define THREAD_SELF \
|
||||
((struct pthread *)__builtin_thread_pointer () - 1)
|
||||
|
||||
/* Magic for libthread_db to know how to do THREAD_SELF. */
|
||||
# define DB_THREAD_SELF \
|
||||
REGISTER (64, 64, 32 * 8, -sizeof (struct pthread))
|
||||
|
||||
/* Access to data in the thread descriptor is easy. */
|
||||
#define THREAD_GETMEM(descr, member) \
|
||||
descr->member
|
||||
#define THREAD_GETMEM_NC(descr, member, idx) \
|
||||
descr->member[idx]
|
||||
#define THREAD_SETMEM(descr, member, value) \
|
||||
descr->member = (value)
|
||||
#define THREAD_SETMEM_NC(descr, member, idx, value) \
|
||||
descr->member[idx] = (value)
|
||||
|
||||
/* Get and set the global scope generation counter in struct pthread. */
|
||||
#define THREAD_GSCOPE_FLAG_UNUSED 0
|
||||
#define THREAD_GSCOPE_FLAG_USED 1
|
||||
#define THREAD_GSCOPE_FLAG_WAIT 2
|
||||
#define THREAD_GSCOPE_RESET_FLAG() \
|
||||
do \
|
||||
{ int __res \
|
||||
= atomic_exchange_rel (&THREAD_SELF->header.gscope_flag, \
|
||||
THREAD_GSCOPE_FLAG_UNUSED); \
|
||||
if (__res == THREAD_GSCOPE_FLAG_WAIT) \
|
||||
lll_futex_wake (&THREAD_SELF->header.gscope_flag, 1, LLL_PRIVATE); \
|
||||
} \
|
||||
while (0)
|
||||
#define THREAD_GSCOPE_SET_FLAG() \
|
||||
do \
|
||||
{ \
|
||||
THREAD_SELF->header.gscope_flag = THREAD_GSCOPE_FLAG_USED; \
|
||||
atomic_write_barrier (); \
|
||||
} \
|
||||
while (0)
|
||||
#define THREAD_GSCOPE_WAIT() \
|
||||
GL(dl_wait_lookup_done) ()
|
||||
|
||||
#endif /* __ASSEMBLER__ */
|
||||
|
||||
#endif /* tls.h */
|
@ -1,2 +0,0 @@
|
||||
# pull in __syscall_error routine, __sigprocmask, __syscall_rt_sigaction
|
||||
libpthread-routines += ptw-sysdep ptw-sigprocmask ptw-rt_sigaction
|
@ -1,13 +0,0 @@
|
||||
libpthread {
|
||||
GLIBC_2.3.3 {
|
||||
# Changed PTHREAD_STACK_MIN.
|
||||
pthread_attr_setstack; pthread_attr_setstacksize;
|
||||
}
|
||||
}
|
||||
librt {
|
||||
GLIBC_2.3.3 {
|
||||
# Changed timer_t.
|
||||
timer_create; timer_delete; timer_getoverrun; timer_gettime;
|
||||
timer_settime;
|
||||
}
|
||||
}
|
@ -1,33 +0,0 @@
|
||||
#include <shlib-compat.h>
|
||||
|
||||
#define aio_cancel64 XXX
|
||||
#include <aio.h>
|
||||
#undef aio_cancel64
|
||||
#include <errno.h>
|
||||
|
||||
extern __typeof (aio_cancel) __new_aio_cancel;
|
||||
extern __typeof (aio_cancel) __old_aio_cancel;
|
||||
|
||||
#define aio_cancel __new_aio_cancel
|
||||
|
||||
#include <sysdeps/pthread/aio_cancel.c>
|
||||
|
||||
#undef aio_cancel
|
||||
strong_alias (__new_aio_cancel, __new_aio_cancel64);
|
||||
versioned_symbol (librt, __new_aio_cancel, aio_cancel, GLIBC_2_3);
|
||||
versioned_symbol (librt, __new_aio_cancel64, aio_cancel64, GLIBC_2_3);
|
||||
|
||||
#if SHLIB_COMPAT (librt, GLIBC_2_1, GLIBC_2_3)
|
||||
|
||||
#undef ECANCELED
|
||||
#define aio_cancel __old_aio_cancel
|
||||
#define ECANCELED 125
|
||||
|
||||
#include <sysdeps/pthread/aio_cancel.c>
|
||||
|
||||
#undef aio_cancel
|
||||
strong_alias (__old_aio_cancel, __old_aio_cancel64);
|
||||
compat_symbol (librt, __old_aio_cancel, aio_cancel, GLIBC_2_1);
|
||||
compat_symbol (librt, __old_aio_cancel64, aio_cancel64, GLIBC_2_1);
|
||||
|
||||
#endif
|
@ -1,100 +0,0 @@
|
||||
/* Minimum guaranteed maximum values for system limits. Linux/Alpha version.
|
||||
Copyright (C) 1993-1998,2000,2002-2004,2008 Free Software Foundation, Inc.
|
||||
This file is part of the GNU C Library.
|
||||
|
||||
The GNU C Library is free software; you can redistribute it and/or
|
||||
modify it under the terms of the GNU Library General Public License as
|
||||
published by the Free Software Foundation; either version 2 of the
|
||||
License, or (at your option) any later version.
|
||||
|
||||
The GNU C Library is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
||||
Library General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU Library General Public
|
||||
License along with the GNU C Library; see the file COPYING.LIB. If not,
|
||||
write to the Free Software Foundation, Inc., 59 Temple Place - Suite 330,
|
||||
Boston, MA 02111-1307, USA. */
|
||||
|
||||
/* The kernel header pollutes the namespace with the NR_OPEN symbol
|
||||
and defines LINK_MAX although filesystems have different maxima. A
|
||||
similar thing is true for OPEN_MAX: the limit can be changed at
|
||||
runtime and therefore the macro must not be defined. Remove this
|
||||
after including the header if necessary. */
|
||||
#ifndef NR_OPEN
|
||||
# define __undef_NR_OPEN
|
||||
#endif
|
||||
#ifndef LINK_MAX
|
||||
# define __undef_LINK_MAX
|
||||
#endif
|
||||
#ifndef OPEN_MAX
|
||||
# define __undef_OPEN_MAX
|
||||
#endif
|
||||
#ifndef ARG_MAX
|
||||
# define __undef_ARG_MAX
|
||||
#endif
|
||||
|
||||
/* The kernel sources contain a file with all the needed information. */
|
||||
#include <linux/limits.h>
|
||||
|
||||
/* Have to remove NR_OPEN? */
|
||||
#ifdef __undef_NR_OPEN
|
||||
# undef NR_OPEN
|
||||
# undef __undef_NR_OPEN
|
||||
#endif
|
||||
/* Have to remove LINK_MAX? */
|
||||
#ifdef __undef_LINK_MAX
|
||||
# undef LINK_MAX
|
||||
# undef __undef_LINK_MAX
|
||||
#endif
|
||||
/* Have to remove OPEN_MAX? */
|
||||
#ifdef __undef_OPEN_MAX
|
||||
# undef OPEN_MAX
|
||||
# undef __undef_OPEN_MAX
|
||||
#endif
|
||||
/* Have to remove ARG_MAX? */
|
||||
#ifdef __undef_ARG_MAX
|
||||
# undef ARG_MAX
|
||||
# undef __undef_ARG_MAX
|
||||
#endif
|
||||
|
||||
/* The number of data keys per process. */
|
||||
#define _POSIX_THREAD_KEYS_MAX 128
|
||||
/* This is the value this implementation supports. */
|
||||
#define PTHREAD_KEYS_MAX 1024
|
||||
|
||||
/* Controlling the iterations of destructors for thread-specific data. */
|
||||
#define _POSIX_THREAD_DESTRUCTOR_ITERATIONS 4
|
||||
/* Number of iterations this implementation does. */
|
||||
#define PTHREAD_DESTRUCTOR_ITERATIONS _POSIX_THREAD_DESTRUCTOR_ITERATIONS
|
||||
|
||||
/* The number of threads per process. */
|
||||
#define _POSIX_THREAD_THREADS_MAX 64
|
||||
/* We have no predefined limit on the number of threads. */
|
||||
#undef PTHREAD_THREADS_MAX
|
||||
|
||||
/* Maximum amount by which a process can descrease its asynchronous I/O
|
||||
priority level. */
|
||||
#define AIO_PRIO_DELTA_MAX 20
|
||||
|
||||
/* Minimum size for a thread. We are free to choose a reasonable value. */
|
||||
#define PTHREAD_STACK_MIN 24576
|
||||
|
||||
/* Maximum number of timer expiration overruns. */
|
||||
#define DELAYTIMER_MAX 2147483647
|
||||
|
||||
/* Maximum tty name length. */
|
||||
#define TTY_NAME_MAX 32
|
||||
|
||||
/* Maximum login name length. This is arbitrary. */
|
||||
#define LOGIN_NAME_MAX 256
|
||||
|
||||
/* Maximum host name length. */
|
||||
#define HOST_NAME_MAX 64
|
||||
|
||||
/* Maximum message queue priority level. */
|
||||
#define MQ_PRIO_MAX 32768
|
||||
|
||||
/* Maximum value the semaphore can have. */
|
||||
#define SEM_VALUE_MAX (2147483647)
|
@ -1,168 +0,0 @@
|
||||
/* Machine-specific pthread type layouts. Alpha version.
|
||||
Copyright (C) 2003, 2004, 2005, 2006, 2007 Free Software Foundation, Inc.
|
||||
This file is part of the GNU C Library.
|
||||
|
||||
The GNU C Library is free software; you can redistribute it and/or
|
||||
modify it under the terms of the GNU Lesser General Public
|
||||
License as published by the Free Software Foundation; either
|
||||
version 2.1 of the License, or (at your option) any later version.
|
||||
|
||||
The GNU C Library is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
||||
Lesser General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU Lesser General Public
|
||||
License along with the GNU C Library; if not, write to the Free
|
||||
Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
|
||||
02111-1307 USA. */
|
||||
|
||||
#ifndef _BITS_PTHREADTYPES_H
|
||||
#define _BITS_PTHREADTYPES_H 1
|
||||
|
||||
#define __SIZEOF_PTHREAD_ATTR_T 56
|
||||
#define __SIZEOF_PTHREAD_MUTEX_T 40
|
||||
#define __SIZEOF_PTHREAD_MUTEXATTR_T 4
|
||||
#define __SIZEOF_PTHREAD_COND_T 48
|
||||
#define __SIZEOF_PTHREAD_CONDATTR_T 4
|
||||
#define __SIZEOF_PTHREAD_RWLOCK_T 56
|
||||
#define __SIZEOF_PTHREAD_RWLOCKATTR_T 8
|
||||
#define __SIZEOF_PTHREAD_BARRIER_T 32
|
||||
#define __SIZEOF_PTHREAD_BARRIERATTR_T 4
|
||||
|
||||
|
||||
/* Thread identifiers. The structure of the attribute type is
|
||||
deliberately not exposed. */
|
||||
typedef unsigned long int pthread_t;
|
||||
|
||||
|
||||
typedef union
|
||||
{
|
||||
char __size[__SIZEOF_PTHREAD_ATTR_T];
|
||||
long int __align;
|
||||
} pthread_attr_t;
|
||||
|
||||
|
||||
typedef struct __pthread_internal_list
|
||||
{
|
||||
struct __pthread_internal_list *__prev;
|
||||
struct __pthread_internal_list *__next;
|
||||
} __pthread_list_t;
|
||||
|
||||
|
||||
/* Data structures for mutex handling. The structure of the attribute
|
||||
type is deliberately not exposed. */
|
||||
typedef union
|
||||
{
|
||||
struct __pthread_mutex_s
|
||||
{
|
||||
int __lock;
|
||||
unsigned int __count;
|
||||
int __owner;
|
||||
unsigned int __nusers;
|
||||
/* KIND must stay at this position in the structure to maintain
|
||||
binary compatibility. */
|
||||
int __kind;
|
||||
int __spins;
|
||||
__pthread_list_t __list;
|
||||
#define __PTHREAD_MUTEX_HAVE_PREV 1
|
||||
} __data;
|
||||
char __size[__SIZEOF_PTHREAD_MUTEX_T];
|
||||
long int __align;
|
||||
} pthread_mutex_t;
|
||||
|
||||
typedef union
|
||||
{
|
||||
char __size[__SIZEOF_PTHREAD_MUTEXATTR_T];
|
||||
int __align;
|
||||
} pthread_mutexattr_t;
|
||||
|
||||
|
||||
/* Data structure for conditional variable handling. The structure of
|
||||
the attribute type is deliberately not exposed. */
|
||||
typedef union
|
||||
{
|
||||
struct
|
||||
{
|
||||
int __lock;
|
||||
unsigned int __futex;
|
||||
__extension__ unsigned long long int __total_seq;
|
||||
__extension__ unsigned long long int __wakeup_seq;
|
||||
__extension__ unsigned long long int __woken_seq;
|
||||
void *__mutex;
|
||||
unsigned int __nwaiters;
|
||||
unsigned int __broadcast_seq;
|
||||
} __data;
|
||||
char __size[__SIZEOF_PTHREAD_COND_T];
|
||||
__extension__ long long int __align;
|
||||
} pthread_cond_t;
|
||||
|
||||
typedef union
|
||||
{
|
||||
char __size[__SIZEOF_PTHREAD_CONDATTR_T];
|
||||
int __align;
|
||||
} pthread_condattr_t;
|
||||
|
||||
|
||||
/* Keys for thread-specific data */
|
||||
typedef unsigned int pthread_key_t;
|
||||
|
||||
|
||||
/* Once-only execution */
|
||||
typedef int pthread_once_t;
|
||||
|
||||
|
||||
#if defined __USE_UNIX98 || defined __USE_XOPEN2K
|
||||
/* Data structure for read-write lock variable handling. The
|
||||
structure of the attribute type is deliberately not exposed. */
|
||||
typedef union
|
||||
{
|
||||
struct
|
||||
{
|
||||
int __lock;
|
||||
unsigned int __nr_readers;
|
||||
unsigned int __readers_wakeup;
|
||||
unsigned int __writer_wakeup;
|
||||
unsigned int __nr_readers_queued;
|
||||
unsigned int __nr_writers_queued;
|
||||
int __writer;
|
||||
int __shared;
|
||||
unsigned long int __pad1;
|
||||
unsigned long int __pad2;
|
||||
/* FLAGS must stay at this position in the structure to maintain
|
||||
binary compatibility. */
|
||||
unsigned int __flags;
|
||||
} __data;
|
||||
char __size[__SIZEOF_PTHREAD_RWLOCK_T];
|
||||
long int __align;
|
||||
} pthread_rwlock_t;
|
||||
|
||||
typedef union
|
||||
{
|
||||
char __size[__SIZEOF_PTHREAD_RWLOCKATTR_T];
|
||||
long int __align;
|
||||
} pthread_rwlockattr_t;
|
||||
#endif
|
||||
|
||||
|
||||
#ifdef __USE_XOPEN2K
|
||||
/* POSIX spinlock data type. */
|
||||
typedef volatile int pthread_spinlock_t;
|
||||
|
||||
/* POSIX barriers data type. The structure of the type is
|
||||
deliberately not exposed. */
|
||||
typedef union
|
||||
{
|
||||
char __size[__SIZEOF_PTHREAD_BARRIER_T];
|
||||
long int __align;
|
||||
} pthread_barrier_t;
|
||||
|
||||
typedef union
|
||||
{
|
||||
char __size[__SIZEOF_PTHREAD_BARRIERATTR_T];
|
||||
int __align;
|
||||
} pthread_barrierattr_t;
|
||||
#endif
|
||||
|
||||
|
||||
#endif /* bits/pthreadtypes.h */
|
@ -1,34 +0,0 @@
|
||||
/* Machine-specific POSIX semaphore type layouts. Alpha version.
|
||||
Copyright (C) 2003 Free Software Foundation, Inc.
|
||||
This file is part of the GNU C Library.
|
||||
|
||||
The GNU C Library is free software; you can redistribute it and/or
|
||||
modify it under the terms of the GNU Lesser General Public
|
||||
License as published by the Free Software Foundation; either
|
||||
version 2.1 of the License, or (at your option) any later version.
|
||||
|
||||
The GNU C Library is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
||||
Lesser General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU Lesser General Public
|
||||
License along with the GNU C Library; if not, write to the Free
|
||||
Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
|
||||
02111-1307 USA. */
|
||||
|
||||
#ifndef _SEMAPHORE_H
|
||||
# error "Never use <bits/semaphore.h> directly; include <semaphore.h> instead."
|
||||
#endif
|
||||
|
||||
# define __SIZEOF_SEM_T 32
|
||||
|
||||
/* Value returned if `sem_open' failed. */
|
||||
#define SEM_FAILED ((sem_t *) 0)
|
||||
|
||||
|
||||
typedef union
|
||||
{
|
||||
char __size[__SIZEOF_SEM_T];
|
||||
long int __align;
|
||||
} sem_t;
|
@ -1,9 +0,0 @@
|
||||
/* We want an #include_next, but we are the main source file.
|
||||
So, #include ourselves and in that incarnation we can use #include_next. */
|
||||
#ifndef INCLUDED_SELF
|
||||
# define INCLUDED_SELF
|
||||
# include <clone.S>
|
||||
#else
|
||||
# define RESET_PID
|
||||
# include_next <clone.S>
|
||||
#endif
|
@ -1,23 +0,0 @@
|
||||
/* Copyright (C) 2003 Free Software Foundation, Inc.
|
||||
This file is part of the GNU C Library.
|
||||
|
||||
The GNU C Library is free software; you can redistribute it and/or
|
||||
modify it under the terms of the GNU Lesser General Public
|
||||
License as published by the Free Software Foundation; either
|
||||
version 2.1 of the License, or (at your option) any later version.
|
||||
|
||||
The GNU C Library is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
||||
Lesser General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU Lesser General Public
|
||||
License along with the GNU C Library; if not, write to the Free
|
||||
Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
|
||||
02111-1307 USA. */
|
||||
|
||||
/* Value passed to 'clone' for initialization of the thread register. */
|
||||
#define TLS_VALUE (pd + 1)
|
||||
|
||||
/* Get the real implementation. */
|
||||
#include <nptl/sysdeps/pthread/createthread.c>
|
@ -1,30 +0,0 @@
|
||||
/* Copyright (C) 2003 Free Software Foundation, Inc.
|
||||
This file is part of the GNU C Library.
|
||||
|
||||
The GNU C Library is free software; you can redistribute it and/or
|
||||
modify it under the terms of the GNU Lesser General Public
|
||||
License as published by the Free Software Foundation; either
|
||||
version 2.1 of the License, or (at your option) any later version.
|
||||
|
||||
The GNU C Library is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
||||
Lesser General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU Lesser General Public
|
||||
License along with the GNU C Library; if not, write to the Free
|
||||
Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
|
||||
02111-1307 USA. */
|
||||
|
||||
#include <sched.h>
|
||||
#include <signal.h>
|
||||
#include <sysdep.h>
|
||||
#include <tls.h>
|
||||
|
||||
|
||||
#define ARCH_FORK() \
|
||||
INLINE_SYSCALL (clone, 5, \
|
||||
CLONE_CHILD_SETTID | CLONE_CHILD_CLEARTID | SIGCHLD, \
|
||||
NULL, NULL, &THREAD_SELF->tid, NULL)
|
||||
|
||||
#include "../fork.c"
|
@ -1,280 +0,0 @@
|
||||
/* Copyright (C) 2003, 2004, 2006, 2007 Free Software Foundation, Inc.
|
||||
This file is part of the GNU C Library.
|
||||
|
||||
The GNU C Library is free software; you can redistribute it and/or
|
||||
modify it under the terms of the GNU Lesser General Public
|
||||
License as published by the Free Software Foundation; either
|
||||
version 2.1 of the License, or (at your option) any later version.
|
||||
|
||||
The GNU C Library is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
||||
Lesser General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU Lesser General Public
|
||||
License along with the GNU C Libr \ary; if not, write to the Free
|
||||
Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
|
||||
02111-1307 USA. */
|
||||
|
||||
#ifndef _LOWLEVELLOCK_H
|
||||
#define _LOWLEVELLOCK_H 1
|
||||
|
||||
#include <time.h>
|
||||
#include <sys/param.h>
|
||||
#include <bits/pthreadtypes.h>
|
||||
#include <atomic.h>
|
||||
#include <sysdep.h>
|
||||
#include <kernel-features.h>
|
||||
|
||||
|
||||
#define __NR_futex 394
|
||||
#define FUTEX_WAIT 0
|
||||
#define FUTEX_WAKE 1
|
||||
#define FUTEX_REQUEUE 3
|
||||
#define FUTEX_CMP_REQUEUE 4
|
||||
#define FUTEX_WAKE_OP 5
|
||||
#define FUTEX_OP_CLEAR_WAKE_IF_GT_ONE ((4 << 24) | 1)
|
||||
#define FUTEX_LOCK_PI 6
|
||||
#define FUTEX_UNLOCK_PI 7
|
||||
#define FUTEX_TRYLOCK_PI 8
|
||||
#define FUTEX_PRIVATE_FLAG 128
|
||||
|
||||
/* Values for 'private' parameter of locking macros. Yes, the
|
||||
definition seems to be backwards. But it is not. The bit will be
|
||||
reversed before passing to the system call. */
|
||||
#define LLL_PRIVATE 0
|
||||
#define LLL_SHARED FUTEX_PRIVATE_FLAG
|
||||
|
||||
|
||||
#if !defined NOT_IN_libc || defined IS_IN_rtld
|
||||
/* In libc.so or ld.so all futexes are private. */
|
||||
# ifdef __ASSUME_PRIVATE_FUTEX
|
||||
# define __lll_private_flag(fl, private) \
|
||||
((fl) | FUTEX_PRIVATE_FLAG)
|
||||
# else
|
||||
# define __lll_private_flag(fl, private) \
|
||||
((fl) | THREAD_GETMEM (THREAD_SELF, header.private_futex))
|
||||
# endif
|
||||
#else
|
||||
# ifdef __ASSUME_PRIVATE_FUTEX
|
||||
# define __lll_private_flag(fl, private) \
|
||||
(((fl) | FUTEX_PRIVATE_FLAG) ^ (private))
|
||||
# else
|
||||
# define __lll_private_flag(fl, private) \
|
||||
(__builtin_constant_p (private) \
|
||||
? ((private) == 0 \
|
||||
? ((fl) | THREAD_GETMEM (THREAD_SELF, header.private_futex)) \
|
||||
: (fl)) \
|
||||
: ((fl) | (((private) ^ FUTEX_PRIVATE_FLAG) \
|
||||
& THREAD_GETMEM (THREAD_SELF, header.private_futex))))
|
||||
# endif
|
||||
#endif
|
||||
|
||||
|
||||
#define lll_futex_wait(futexp, val, private) \
|
||||
lll_futex_timed_wait (futexp, val, NULL, private)
|
||||
|
||||
#define lll_futex_timed_wait(futexp, val, timespec, private) \
|
||||
({ \
|
||||
INTERNAL_SYSCALL_DECL (__err); \
|
||||
long int __ret; \
|
||||
__ret = INTERNAL_SYSCALL (futex, __err, 4, (futexp), \
|
||||
__lll_private_flag (FUTEX_WAIT, private), \
|
||||
(val), (timespec)); \
|
||||
INTERNAL_SYSCALL_ERROR_P (__ret, __err)? -__ret : __ret; \
|
||||
})
|
||||
|
||||
#define lll_futex_wake(futexp, nr, private) \
|
||||
({ \
|
||||
INTERNAL_SYSCALL_DECL (__err); \
|
||||
long int __ret; \
|
||||
__ret = INTERNAL_SYSCALL (futex, __err, 4, (futexp), \
|
||||
__lll_private_flag (FUTEX_WAKE, private), \
|
||||
(nr), 0); \
|
||||
INTERNAL_SYSCALL_ERROR_P (__ret, __err)? -__ret : __ret; \
|
||||
})
|
||||
|
||||
#define lll_robust_dead(futexv, private) \
|
||||
do \
|
||||
{ \
|
||||
int *__futexp = &(futexv); \
|
||||
atomic_or (__futexp, FUTEX_OWNER_DIED); \
|
||||
lll_futex_wake (__futexp, 1, private); \
|
||||
} \
|
||||
while (0)
|
||||
|
||||
/* Returns non-zero if error happened, zero if success. */
|
||||
#define lll_futex_requeue(futexp, nr_wake, nr_move, mutex, val, private) \
|
||||
({ \
|
||||
INTERNAL_SYSCALL_DECL (__err); \
|
||||
long int __ret; \
|
||||
__ret = INTERNAL_SYSCALL (futex, __err, 6, (futexp), \
|
||||
__lll_private_flag (FUTEX_CMP_REQUEUE, private),\
|
||||
(nr_wake), (nr_move), (mutex), (val)); \
|
||||
INTERNAL_SYSCALL_ERROR_P (__ret, __err); \
|
||||
})
|
||||
|
||||
/* Returns non-zero if error happened, zero if success. */
|
||||
#define lll_futex_wake_unlock(futexp, nr_wake, nr_wake2, futexp2, private) \
|
||||
({ \
|
||||
INTERNAL_SYSCALL_DECL (__err); \
|
||||
long int __ret; \
|
||||
__ret = INTERNAL_SYSCALL (futex, __err, 6, (futexp), \
|
||||
__lll_private_flag (FUTEX_WAKE_OP, private), \
|
||||
(nr_wake), (nr_wake2), (futexp2), \
|
||||
FUTEX_OP_CLEAR_WAKE_IF_GT_ONE); \
|
||||
INTERNAL_SYSCALL_ERROR_P (__ret, __err); \
|
||||
})
|
||||
|
||||
|
||||
|
||||
|
||||
static inline int __attribute__((always_inline))
|
||||
__lll_trylock(int *futex)
|
||||
{
|
||||
return atomic_compare_and_exchange_val_acq (futex, 1, 0) != 0;
|
||||
}
|
||||
#define lll_trylock(lock) __lll_trylock (&(lock))
|
||||
|
||||
|
||||
static inline int __attribute__((always_inline))
|
||||
__lll_cond_trylock(int *futex)
|
||||
{
|
||||
return atomic_compare_and_exchange_val_acq (futex, 2, 0) != 0;
|
||||
}
|
||||
#define lll_cond_trylock(lock) __lll_cond_trylock (&(lock))
|
||||
|
||||
|
||||
static inline int __attribute__((always_inline))
|
||||
__lll_robust_trylock(int *futex, int id)
|
||||
{
|
||||
return atomic_compare_and_exchange_val_acq (futex, id, 0) != 0;
|
||||
}
|
||||
#define lll_robust_trylock(lock, id) \
|
||||
__lll_robust_trylock (&(lock), id)
|
||||
|
||||
extern void __lll_lock_wait_private (int *futex) attribute_hidden;
|
||||
extern void __lll_lock_wait (int *futex, int private) attribute_hidden;
|
||||
extern int __lll_robust_lock_wait (int *futex, int private) attribute_hidden;
|
||||
|
||||
static inline void __attribute__((always_inline))
|
||||
__lll_lock(int *futex, int private)
|
||||
{
|
||||
if (atomic_compare_and_exchange_bool_acq (futex, 1, 0) != 0)
|
||||
{
|
||||
if (__builtin_constant_p (private) && private == LLL_PRIVATE)
|
||||
__lll_lock_wait_private (futex);
|
||||
else
|
||||
__lll_lock_wait (futex, private);
|
||||
}
|
||||
}
|
||||
#define lll_lock(futex, private) __lll_lock (&(futex), private)
|
||||
|
||||
|
||||
static inline int __attribute__ ((always_inline))
|
||||
__lll_robust_lock (int *futex, int id, int private)
|
||||
{
|
||||
int result = 0;
|
||||
if (atomic_compare_and_exchange_bool_acq (futex, id, 0) != 0)
|
||||
result = __lll_robust_lock_wait (futex, private);
|
||||
return result;
|
||||
}
|
||||
#define lll_robust_lock(futex, id, private) \
|
||||
__lll_robust_lock (&(futex), id, private)
|
||||
|
||||
|
||||
static inline void __attribute__ ((always_inline))
|
||||
__lll_cond_lock (int *futex, int private)
|
||||
{
|
||||
if (atomic_compare_and_exchange_bool_acq (futex, 2, 0) != 0)
|
||||
__lll_lock_wait (futex, private);
|
||||
}
|
||||
#define lll_cond_lock(futex, private) __lll_cond_lock (&(futex), private)
|
||||
|
||||
|
||||
#define lll_robust_cond_lock(futex, id, private) \
|
||||
__lll_robust_lock (&(futex), (id) | FUTEX_WAITERS, private)
|
||||
|
||||
|
||||
extern int __lll_timedlock_wait (int *futex, const struct timespec *,
|
||||
int private) attribute_hidden;
|
||||
extern int __lll_robust_timedlock_wait (int *futex, const struct timespec *,
|
||||
int private) attribute_hidden;
|
||||
|
||||
static inline int __attribute__ ((always_inline))
|
||||
__lll_timedlock (int *futex, const struct timespec *abstime, int private)
|
||||
{
|
||||
int result = 0;
|
||||
if (atomic_compare_and_exchange_bool_acq (futex, 1, 0) != 0)
|
||||
result = __lll_timedlock_wait (futex, abstime, private);
|
||||
return result;
|
||||
}
|
||||
#define lll_timedlock(futex, abstime, private) \
|
||||
__lll_timedlock (&(futex), abstime, private)
|
||||
|
||||
|
||||
static inline int __attribute__ ((always_inline))
|
||||
__lll_robust_timedlock (int *futex, const struct timespec *abstime,
|
||||
int id, int private)
|
||||
{
|
||||
int result = 0;
|
||||
if (atomic_compare_and_exchange_bool_acq (futex, id, 0) != 0)
|
||||
result = __lll_robust_timedlock_wait (futex, abstime, private);
|
||||
return result;
|
||||
}
|
||||
#define lll_robust_timedlock(futex, abstime, id, private) \
|
||||
__lll_robust_timedlock (&(futex), abstime, id, private)
|
||||
|
||||
|
||||
#define __lll_unlock(futex, private) \
|
||||
(void) \
|
||||
({ int *__futex = (futex); \
|
||||
int __oldval = atomic_exchange_rel (__futex, 0); \
|
||||
if (__builtin_expect (__oldval > 1, 0)) \
|
||||
lll_futex_wake (__futex, 1, private); \
|
||||
})
|
||||
#define lll_unlock(futex, private) __lll_unlock(&(futex), private)
|
||||
|
||||
|
||||
#define __lll_robust_unlock(futex, private) \
|
||||
(void) \
|
||||
({ int *__futex = (futex); \
|
||||
int __oldval = atomic_exchange_rel (__futex, 0); \
|
||||
if (__builtin_expect (__oldval & FUTEX_WAITERS, 0)) \
|
||||
lll_futex_wake (__futex, 1, private); \
|
||||
})
|
||||
#define lll_robust_unlock(futex, private) \
|
||||
__lll_robust_unlock(&(futex), private)
|
||||
|
||||
|
||||
#define lll_islocked(futex) \
|
||||
(futex != 0)
|
||||
|
||||
/* Initializers for lock. */
|
||||
#define LLL_LOCK_INITIALIZER (0)
|
||||
#define LLL_LOCK_INITIALIZER_LOCKED (1)
|
||||
|
||||
|
||||
/* The kernel notifies a process which uses CLONE_CLEARTID via futex
|
||||
wakeup when the clone terminates. The memory location contains the
|
||||
thread ID while the clone is running and is reset to zero
|
||||
afterwards. */
|
||||
#define lll_wait_tid(tid) \
|
||||
do { \
|
||||
__typeof (tid) __tid; \
|
||||
while ((__tid = (tid)) != 0) \
|
||||
lll_futex_wait (&(tid), __tid, LLL_SHARED); \
|
||||
} while (0)
|
||||
|
||||
extern int __lll_timedwait_tid (int *, const struct timespec *)
|
||||
attribute_hidden;
|
||||
|
||||
#define lll_timedwait_tid(tid, abstime) \
|
||||
({ \
|
||||
int __res = 0; \
|
||||
if ((tid) != 0) \
|
||||
__res = __lll_timedwait_tid (&(tid), (abstime)); \
|
||||
__res; \
|
||||
})
|
||||
|
||||
#endif /* lowlevellock.h */
|
@ -1,43 +0,0 @@
|
||||
/* Copyright (C) 2003, 2004 Free Software Foundation, Inc.
|
||||
This file is part of the GNU C Library.
|
||||
|
||||
The GNU C Library is free software; you can redistribute it and/or
|
||||
modify it under the terms of the GNU Lesser General Public
|
||||
License as published by the Free Software Foundation; either
|
||||
version 2.1 of the License, or (at your option) any later version.
|
||||
|
||||
The GNU C Library is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
||||
Lesser General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU Lesser General Public
|
||||
License along with the GNU C Library; if not, write to the Free
|
||||
Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
|
||||
02111-1307 USA. */
|
||||
|
||||
#include <sysdep.h>
|
||||
#include <tcb-offsets.h>
|
||||
|
||||
#undef PSEUDO_PREPARE_ARGS
|
||||
#define PSEUDO_PREPARE_ARGS \
|
||||
/* Load the current cached pid value across the vfork. */ \
|
||||
rduniq; \
|
||||
ldl a2, PID_OFFSET(v0); \
|
||||
mov v0, a1; \
|
||||
/* Write back its negation, to indicate that the pid value is \
|
||||
uninitialized in the the child, and in the window between \
|
||||
here and the point at which we restore the value. */ \
|
||||
negl a2, t0; \
|
||||
stl t0, PID_OFFSET(v0);
|
||||
|
||||
PSEUDO (__vfork, vfork, 0)
|
||||
|
||||
/* If we're back in the parent, restore the saved pid. */
|
||||
beq v0, 1f
|
||||
stl a2, PID_OFFSET(a1)
|
||||
1: ret
|
||||
|
||||
PSEUDO_END (__vfork)
|
||||
|
||||
weak_alias (__vfork, vfork)
|
@ -1,96 +0,0 @@
|
||||
/* Copyright (C) 2003, 2004, 2007 Free Software Foundation, Inc.
|
||||
This file is part of the GNU C Library.
|
||||
|
||||
The GNU C Library is free software; you can redistribute it and/or
|
||||
modify it under the terms of the GNU Lesser General Public
|
||||
License as published by the Free Software Foundation; either
|
||||
version 2.1 of the License, or (at your option) any later version.
|
||||
|
||||
The GNU C Library is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
||||
Lesser General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU Lesser General Public
|
||||
License along with the GNU C Library; if not, write to the Free
|
||||
Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
|
||||
02111-1307 USA. */
|
||||
|
||||
#include "pthreadP.h"
|
||||
#include <lowlevellock.h>
|
||||
|
||||
|
||||
unsigned long int __fork_generation attribute_hidden;
|
||||
|
||||
static void
|
||||
clear_once_control (void *arg)
|
||||
{
|
||||
pthread_once_t *once_control = (pthread_once_t *) arg;
|
||||
|
||||
*once_control = 0;
|
||||
lll_futex_wake (once_control, INT_MAX, LLL_PRIVATE);
|
||||
}
|
||||
|
||||
int
|
||||
__pthread_once (pthread_once_t *once_control, void (*init_routine) (void))
|
||||
{
|
||||
for (;;)
|
||||
{
|
||||
int oldval;
|
||||
int newval;
|
||||
int tmp;
|
||||
|
||||
/* Pseudo code:
|
||||
newval = __fork_generation | 1;
|
||||
oldval = *once_control;
|
||||
if ((oldval & 2) == 0)
|
||||
*once_control = newval;
|
||||
Do this atomically.
|
||||
*/
|
||||
newval = __fork_generation | 1;
|
||||
__asm __volatile (
|
||||
"1: ldl_l %0, %2\n"
|
||||
" and %0, 2, %1\n"
|
||||
" bne %1, 2f\n"
|
||||
" mov %3, %1\n"
|
||||
" stl_c %1, %2\n"
|
||||
" beq %1, 1b\n"
|
||||
"2: mb"
|
||||
: "=&r" (oldval), "=&r" (tmp), "=m" (*once_control)
|
||||
: "r" (newval), "m" (*once_control));
|
||||
|
||||
/* Check if the initializer has already been done. */
|
||||
if ((oldval & 2) != 0)
|
||||
return 0;
|
||||
|
||||
/* Check if another thread already runs the initializer. */
|
||||
if ((oldval & 1) == 0)
|
||||
break;
|
||||
|
||||
/* Check whether the initializer execution was interrupted by a fork. */
|
||||
if (oldval != newval)
|
||||
break;
|
||||
|
||||
/* Same generation, some other thread was faster. Wait. */
|
||||
lll_futex_wait (once_control, oldval, LLL_PRIVATE);
|
||||
}
|
||||
|
||||
/* This thread is the first here. Do the initialization.
|
||||
Register a cleanup handler so that in case the thread gets
|
||||
interrupted the initialization can be restarted. */
|
||||
pthread_cleanup_push (clear_once_control, once_control);
|
||||
|
||||
init_routine ();
|
||||
|
||||
pthread_cleanup_pop (0);
|
||||
|
||||
/* Add one to *once_control to take the bottom 2 bits from 01 to 10. */
|
||||
atomic_increment (once_control);
|
||||
|
||||
/* Wake up all other threads. */
|
||||
lll_futex_wake (once_control, INT_MAX, LLL_PRIVATE);
|
||||
|
||||
return 0;
|
||||
}
|
||||
weak_alias (__pthread_once, pthread_once)
|
||||
strong_alias (__pthread_once, __pthread_once_internal)
|
@ -1,5 +0,0 @@
|
||||
/* ??? This is an ass-backwards way to do this. We should simply define
|
||||
the acquire/release semantics of atomic_exchange_and_add. And even if
|
||||
we don't do this, we should be using atomic_full_barrier or otherwise. */
|
||||
#define __lll_rel_instr "mb"
|
||||
#include "../sem_post.c"
|
@ -1,177 +0,0 @@
|
||||
/* Copyright (C) 2003, 2006 Free Software Foundation, Inc.
|
||||
This file is part of the GNU C Library.
|
||||
|
||||
The GNU C Library is free software; you can redistribute it and/or
|
||||
modify it under the terms of the GNU Lesser General Public
|
||||
License as published by the Free Software Foundation; either
|
||||
version 2.1 of the License, or (at your option) any later version.
|
||||
|
||||
The GNU C Library is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
||||
Lesser General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU Lesser General Public
|
||||
License along with the GNU C Library; if not, write to the Free
|
||||
Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
|
||||
02111-1307 USA. */
|
||||
|
||||
#include <sysdep.h>
|
||||
#include <tls.h>
|
||||
#ifndef __ASSEMBLER__
|
||||
# include <nptl/pthreadP.h>
|
||||
#endif
|
||||
|
||||
#if !defined NOT_IN_libc || defined IS_IN_libpthread || defined IS_IN_librt
|
||||
|
||||
# ifdef PROF
|
||||
# define PSEUDO_PROF \
|
||||
.set noat; \
|
||||
lda AT, _mcount; \
|
||||
jsr AT, (AT), _mcount; \
|
||||
.set at
|
||||
# else
|
||||
# define PSEUDO_PROF
|
||||
# endif
|
||||
|
||||
/* ??? Assumes that nothing comes between PSEUDO and PSEUDO_END
|
||||
besides "ret". */
|
||||
|
||||
# undef PSEUDO
|
||||
# define PSEUDO(name, syscall_name, args) \
|
||||
.globl name; \
|
||||
.align 4; \
|
||||
.type name, @function; \
|
||||
.usepv name, std; \
|
||||
cfi_startproc; \
|
||||
__LABEL(name) \
|
||||
ldgp gp, 0(pv); \
|
||||
PSEUDO_PROF; \
|
||||
PSEUDO_PREPARE_ARGS \
|
||||
SINGLE_THREAD_P(t0); \
|
||||
bne t0, $pseudo_cancel; \
|
||||
lda v0, SYS_ify(syscall_name); \
|
||||
call_pal PAL_callsys; \
|
||||
bne a3, SYSCALL_ERROR_LABEL; \
|
||||
__LABEL($pseudo_ret) \
|
||||
.subsection 2; \
|
||||
cfi_startproc; \
|
||||
__LABEL($pseudo_cancel) \
|
||||
subq sp, 64, sp; \
|
||||
cfi_def_cfa_offset(64); \
|
||||
stq ra, 0(sp); \
|
||||
cfi_offset(ra, -64); \
|
||||
SAVE_ARGS_##args; \
|
||||
CENABLE; \
|
||||
LOAD_ARGS_##args; \
|
||||
/* Save the CENABLE return value in RA. That register \
|
||||
is preserved across syscall and the real return \
|
||||
address is saved on the stack. */ \
|
||||
mov v0, ra; \
|
||||
lda v0, SYS_ify(syscall_name); \
|
||||
call_pal PAL_callsys; \
|
||||
stq v0, 8(sp); \
|
||||
mov ra, a0; \
|
||||
bne a3, $multi_error; \
|
||||
CDISABLE; \
|
||||
ldq ra, 0(sp); \
|
||||
ldq v0, 8(sp); \
|
||||
addq sp, 64, sp; \
|
||||
cfi_remember_state; \
|
||||
cfi_restore(ra); \
|
||||
cfi_def_cfa_offset(0); \
|
||||
ret; \
|
||||
cfi_restore_state; \
|
||||
__LABEL($multi_error) \
|
||||
CDISABLE; \
|
||||
ldq ra, 0(sp); \
|
||||
ldq v0, 8(sp); \
|
||||
addq sp, 64, sp; \
|
||||
cfi_restore(ra); \
|
||||
cfi_def_cfa_offset(0); \
|
||||
__LABEL($syscall_error) \
|
||||
SYSCALL_ERROR_HANDLER; \
|
||||
cfi_endproc; \
|
||||
.previous
|
||||
|
||||
# undef PSEUDO_END
|
||||
# define PSEUDO_END(sym) \
|
||||
cfi_endproc; \
|
||||
.subsection 2; \
|
||||
.size sym, .-sym
|
||||
|
||||
# define SAVE_ARGS_0 /* Nothing. */
|
||||
# define SAVE_ARGS_1 SAVE_ARGS_0; stq a0, 8(sp)
|
||||
# define SAVE_ARGS_2 SAVE_ARGS_1; stq a1, 16(sp)
|
||||
# define SAVE_ARGS_3 SAVE_ARGS_2; stq a2, 24(sp)
|
||||
# define SAVE_ARGS_4 SAVE_ARGS_3; stq a3, 32(sp)
|
||||
# define SAVE_ARGS_5 SAVE_ARGS_4; stq a4, 40(sp)
|
||||
# define SAVE_ARGS_6 SAVE_ARGS_5; stq a5, 48(sp)
|
||||
|
||||
# define LOAD_ARGS_0 /* Nothing. */
|
||||
# define LOAD_ARGS_1 LOAD_ARGS_0; ldq a0, 8(sp)
|
||||
# define LOAD_ARGS_2 LOAD_ARGS_1; ldq a1, 16(sp)
|
||||
# define LOAD_ARGS_3 LOAD_ARGS_2; ldq a2, 24(sp)
|
||||
# define LOAD_ARGS_4 LOAD_ARGS_3; ldq a3, 32(sp)
|
||||
# define LOAD_ARGS_5 LOAD_ARGS_4; ldq a4, 40(sp)
|
||||
# define LOAD_ARGS_6 LOAD_ARGS_5; ldq a5, 48(sp)
|
||||
|
||||
# ifdef IS_IN_libpthread
|
||||
# define __local_enable_asynccancel __pthread_enable_asynccancel
|
||||
# define __local_disable_asynccancel __pthread_disable_asynccancel
|
||||
# define __local_multiple_threads __pthread_multiple_threads
|
||||
# elif !defined NOT_IN_libc
|
||||
# define __local_enable_asynccancel __libc_enable_asynccancel
|
||||
# define __local_disable_asynccancel __libc_disable_asynccancel
|
||||
# define __local_multiple_threads __libc_multiple_threads
|
||||
# elif defined IS_IN_librt
|
||||
# define __local_enable_asynccancel __librt_enable_asynccancel
|
||||
# define __local_disable_asynccancel __librt_disable_asynccancel
|
||||
# else
|
||||
# error Unsupported library
|
||||
# endif
|
||||
|
||||
# ifdef PIC
|
||||
# define CENABLE bsr ra, __local_enable_asynccancel !samegp
|
||||
# define CDISABLE bsr ra, __local_disable_asynccancel !samegp
|
||||
# else
|
||||
# define CENABLE jsr ra, __local_enable_asynccancel; ldgp ra, 0(gp)
|
||||
# define CDISABLE jsr ra, __local_disable_asynccancel; ldgp ra, 0(gp)
|
||||
# endif
|
||||
|
||||
# if defined IS_IN_libpthread || !defined NOT_IN_libc
|
||||
# ifndef __ASSEMBLER__
|
||||
extern int __local_multiple_threads attribute_hidden;
|
||||
# define SINGLE_THREAD_P \
|
||||
__builtin_expect (__local_multiple_threads == 0, 1)
|
||||
# elif defined(PIC)
|
||||
# define SINGLE_THREAD_P(reg) ldl reg, __local_multiple_threads(gp) !gprel
|
||||
# else
|
||||
# define SINGLE_THREAD_P(reg) \
|
||||
ldah reg, __local_multiple_threads(gp) !gprelhigh; \
|
||||
ldl reg, __local_multiple_threads(reg) !gprellow
|
||||
# endif
|
||||
# else
|
||||
# ifndef __ASSEMBLER__
|
||||
# define SINGLE_THREAD_P \
|
||||
__builtin_expect (THREAD_GETMEM (THREAD_SELF, \
|
||||
header.multiple_threads) == 0, 1)
|
||||
# else
|
||||
# define SINGLE_THREAD_P(reg) \
|
||||
call_pal PAL_rduniq; \
|
||||
ldl reg, MULTIPLE_THREADS_OFFSET($0)
|
||||
# endif
|
||||
# endif
|
||||
|
||||
#else
|
||||
|
||||
# define SINGLE_THREAD_P (1)
|
||||
# define NO_CANCELLATION 1
|
||||
|
||||
#endif
|
||||
|
||||
#ifndef __ASSEMBLER__
|
||||
# define RTLD_SINGLE_THREAD_P \
|
||||
__builtin_expect (THREAD_GETMEM (THREAD_SELF, \
|
||||
header.multiple_threads) == 0, 1)
|
||||
#endif
|
@ -1 +0,0 @@
|
||||
#include "../x86_64/timer_create.c"
|
@ -1 +0,0 @@
|
||||
#include "../x86_64/timer_delete.c"
|
@ -1 +0,0 @@
|
||||
#include "../x86_64/timer_getoverr.c"
|
@ -1 +0,0 @@
|
||||
#include "../x86_64/timer_gettime.c"
|
@ -1 +0,0 @@
|
||||
#include "../x86_64/timer_settime.c"
|
@ -1,46 +0,0 @@
|
||||
/* Copyright (C) 2004 Free Software Foundation, Inc.
|
||||
This file is part of the GNU C Library.
|
||||
|
||||
The GNU C Library is free software; you can redistribute it and/or
|
||||
modify it under the terms of the GNU Lesser General Public
|
||||
License as published by the Free Software Foundation; either
|
||||
version 2.1 of the License, or (at your option) any later version.
|
||||
|
||||
The GNU C Library is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
||||
Lesser General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU Lesser General Public
|
||||
License along with the GNU C Library; if not, write to the Free
|
||||
Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
|
||||
02111-1307 USA. */
|
||||
|
||||
#include <sysdep.h>
|
||||
#include <tcb-offsets.h>
|
||||
|
||||
#undef PSEUDO_PREPARE_ARGS
|
||||
#define PSEUDO_PREPARE_ARGS \
|
||||
/* Load the current cached pid value across the vfork. */ \
|
||||
rduniq; \
|
||||
ldl a2, PID_OFFSET(v0); \
|
||||
mov v0, a1; \
|
||||
/* If the cached value is initialized (nonzero), then write \
|
||||
back its negation, or INT_MIN, to indicate that the pid \
|
||||
value is uninitialized in the the child, and in the window \
|
||||
between here and the point at which we restore the value. */ \
|
||||
ldah t0, -0x8000; \
|
||||
negl a2, t1; \
|
||||
cmovne a2, t1, t0; \
|
||||
stl t0, PID_OFFSET(v0);
|
||||
|
||||
PSEUDO (__vfork, vfork, 0)
|
||||
|
||||
/* If we're back in the parent, restore the saved pid. */
|
||||
beq v0, 1f
|
||||
stl a2, PID_OFFSET(a1)
|
||||
1: ret
|
||||
|
||||
PSEUDO_END (__vfork)
|
||||
libc_hidden_def (__vfork)
|
||||
weak_alias (__vfork, vfork)
|
@ -1,6 +0,0 @@
|
||||
wordsize-64
|
||||
# Alpha uses IEEE 754 single, double and quad precision floating point.
|
||||
ieee754/ldbl-128
|
||||
ieee754/dbl-64
|
||||
ieee754/flt-32
|
||||
alpha/soft-fp
|
@ -1,48 +0,0 @@
|
||||
# Copyright (C) 1993, 94, 95, 96, 97, 99 Free Software Foundation, Inc.
|
||||
# This file is part of the GNU C Library.
|
||||
# Contributed by Brendan Kehoe (brendan@zen.org).
|
||||
|
||||
# The GNU C Library is free software; you can redistribute it and/or
|
||||
# modify it under the terms of the GNU Lesser General Public
|
||||
# License as published by the Free Software Foundation; either
|
||||
# version 2.1 of the License, or (at your option) any later version.
|
||||
|
||||
# The GNU C Library is distributed in the hope that it will be useful,
|
||||
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
||||
# Lesser General Public License for more details.
|
||||
|
||||
# You should have received a copy of the GNU Lesser General Public
|
||||
# License along with the GNU C Library; if not, write to the Free
|
||||
# Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
|
||||
# 02111-1307 USA.
|
||||
|
||||
ifeq ($(subdir),db2)
|
||||
CPPFLAGS += -DHAVE_SPINLOCKS=1 -DHAVE_ASSEM_ALPHA=1
|
||||
endif
|
||||
|
||||
ifeq ($(subdir),gmon)
|
||||
sysdep_routines += _mcount
|
||||
endif
|
||||
|
||||
ifeq ($(subdir),gnulib)
|
||||
sysdep_routines += divl divlu divq divqu reml remlu remq remqu
|
||||
endif
|
||||
|
||||
ifeq ($(subdir),string)
|
||||
sysdep_routines += stxcpy stxncpy
|
||||
endif
|
||||
|
||||
ifeq ($(subdir),elf)
|
||||
# The ld.so startup code cannot use literals until it self-relocates.
|
||||
CFLAGS-rtld.c = -mbuild-constants
|
||||
endif
|
||||
|
||||
# Build everything with full IEEE math support, and with dynamic rounding;
|
||||
# there are a number of math routines that are defined to work with the
|
||||
# "current" rounding mode, and it's easiest to set this with all of them.
|
||||
sysdep-CFLAGS += -mieee -mfp-rounding-mode=d
|
||||
|
||||
# libc.so requires about 16k for the small data area, which is well
|
||||
# below the 64k maximum.
|
||||
pic-ccflag = -fpic
|
@ -1 +0,0 @@
|
||||
soft-fp
|
@ -1,13 +0,0 @@
|
||||
libc {
|
||||
GLIBC_2.0 {
|
||||
# functions with special/multiple interfaces
|
||||
__divqu; __remqu; __divqs; __remqs; __divlu; __remlu; __divls;
|
||||
__remls; __divl; __reml; __divq; __remq; __divqu; __remqu;
|
||||
}
|
||||
}
|
||||
libm {
|
||||
GLIBC_2.0 {
|
||||
# used in inline functions.
|
||||
__atan2;
|
||||
}
|
||||
}
|
@ -1,64 +0,0 @@
|
||||
/* Copyright (C) 1992, 1994, 1997, 2006 Free Software Foundation, Inc.
|
||||
This file is part of the GNU C Library.
|
||||
|
||||
The GNU C Library is free software; you can redistribute it and/or
|
||||
modify it under the terms of the GNU Lesser General Public
|
||||
License as published by the Free Software Foundation; either
|
||||
version 2.1 of the License, or (at your option) any later version.
|
||||
|
||||
The GNU C Library is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
||||
Lesser General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU Lesser General Public
|
||||
License along with the GNU C Library; if not, write to the Free
|
||||
Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
|
||||
02111-1307 USA. */
|
||||
|
||||
#define __ASSEMBLY__
|
||||
|
||||
#include <sysdep.h>
|
||||
#include <jmpbuf-offsets.h>
|
||||
|
||||
|
||||
ENTRY(__longjmp)
|
||||
#ifdef PROF
|
||||
ldgp gp, 0(pv)
|
||||
.set noat
|
||||
lda AT, _mcount
|
||||
jsr AT, (AT), _mcount
|
||||
.set at
|
||||
.prologue 1
|
||||
#else
|
||||
.prologue 0
|
||||
#endif
|
||||
|
||||
mov a1, v0
|
||||
ldq s0, JB_S0*8(a0)
|
||||
ldq s1, JB_S1*8(a0)
|
||||
ldq s2, JB_S2*8(a0)
|
||||
ldq s3, JB_S3*8(a0)
|
||||
ldq s4, JB_S4*8(a0)
|
||||
ldq s5, JB_S5*8(a0)
|
||||
ldq ra, JB_PC*8(a0)
|
||||
ldq fp, JB_FP*8(a0)
|
||||
ldq t0, JB_SP*8(a0)
|
||||
ldt $f2, JB_F2*8(a0)
|
||||
ldt $f3, JB_F3*8(a0)
|
||||
ldt $f4, JB_F4*8(a0)
|
||||
ldt $f5, JB_F5*8(a0)
|
||||
ldt $f6, JB_F6*8(a0)
|
||||
ldt $f7, JB_F7*8(a0)
|
||||
ldt $f8, JB_F8*8(a0)
|
||||
ldt $f9, JB_F9*8(a0)
|
||||
#ifdef PTR_DEMANGLE
|
||||
PTR_DEMANGLE(ra, t1)
|
||||
PTR_DEMANGLE2(t0, t1)
|
||||
PTR_DEMANGLE2(fp, t1)
|
||||
#endif
|
||||
cmoveq v0, 1, v0
|
||||
mov t0, sp
|
||||
ret
|
||||
|
||||
END(__longjmp)
|
@ -1,107 +0,0 @@
|
||||
/* Machine-specific calling sequence for `mcount' profiling function. alpha
|
||||
Copyright (C) 1995, 1996, 1997 Free Software Foundation, Inc.
|
||||
Contributed by David Mosberger (davidm@cs.arizona.edu).
|
||||
This file is part of the GNU C Library.
|
||||
|
||||
The GNU C Library is free software; you can redistribute it and/or
|
||||
modify it under the terms of the GNU Lesser General Public
|
||||
License as published by the Free Software Foundation; either
|
||||
version 2.1 of the License, or (at your option) any later version.
|
||||
|
||||
The GNU C Library is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
||||
Lesser General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU Lesser General Public
|
||||
License along with the GNU C Library; if not, write to the Free
|
||||
Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
|
||||
02111-1307 USA. */
|
||||
|
||||
/* Assembly stub to invoke _mcount(). Compiler generated code calls
|
||||
this stub after executing a function's prologue and without saving any
|
||||
registers. It is therefore necessary to preserve a0..a5 as they may
|
||||
contain function arguments. To work correctly with frame- less
|
||||
functions, it is also necessary to preserve ra. Finally, division
|
||||
routines are invoked with a special calling convention and the
|
||||
compiler treats those calls as if they were instructions. In
|
||||
particular, it doesn't save any of the temporary registers (caller
|
||||
saved registers). It is therefore necessary to preserve all
|
||||
caller-saved registers as well.
|
||||
|
||||
Upon entering _mcount, register $at holds the return address and ra
|
||||
holds the return address of the function's caller (selfpc and frompc,
|
||||
respectively in gmon.c language...). */
|
||||
|
||||
#include <sysdep.h>
|
||||
|
||||
.set noat
|
||||
.set noreorder
|
||||
|
||||
LEAF(_mcount, 0xb0)
|
||||
.prologue 0
|
||||
|
||||
subq sp, 0xb0, sp
|
||||
stq a0, 0x00(sp)
|
||||
mov ra, a0 # a0 = caller-pc
|
||||
stq a1, 0x08(sp)
|
||||
mov $at, a1 # a1 = self-pc
|
||||
stq $at, 0x10(sp)
|
||||
|
||||
stq a2, 0x18(sp)
|
||||
stq a3, 0x20(sp)
|
||||
stq a4, 0x28(sp)
|
||||
stq a5, 0x30(sp)
|
||||
stq ra, 0x38(sp)
|
||||
stq gp, 0x40(sp)
|
||||
|
||||
br gp, 1f
|
||||
1: ldgp gp, 0(gp)
|
||||
|
||||
stq t0, 0x48(sp)
|
||||
stq t1, 0x50(sp)
|
||||
stq t2, 0x58(sp)
|
||||
stq t3, 0x60(sp)
|
||||
stq t4, 0x68(sp)
|
||||
stq t5, 0x70(sp)
|
||||
stq t6, 0x78(sp)
|
||||
|
||||
stq t7, 0x80(sp)
|
||||
stq t8, 0x88(sp)
|
||||
stq t9, 0x90(sp)
|
||||
stq t10, 0x98(sp)
|
||||
stq t11, 0xa0(sp)
|
||||
stq v0, 0xa8(sp)
|
||||
|
||||
jsr ra, __mcount
|
||||
|
||||
ldq a0, 0x00(sp)
|
||||
ldq a1, 0x08(sp)
|
||||
ldq $at, 0x10(sp) # restore self-pc
|
||||
ldq a2, 0x18(sp)
|
||||
ldq a3, 0x20(sp)
|
||||
ldq a4, 0x28(sp)
|
||||
ldq a5, 0x30(sp)
|
||||
ldq ra, 0x38(sp)
|
||||
ldq gp, 0x40(sp)
|
||||
mov $at, pv # make pv point to return address
|
||||
ldq t0, 0x48(sp) # this is important under OSF/1 to
|
||||
ldq t1, 0x50(sp) # ensure that the code that we return
|
||||
ldq t2, 0x58(sp) # can correctly compute its gp
|
||||
ldq t3, 0x60(sp)
|
||||
ldq t4, 0x68(sp)
|
||||
ldq t5, 0x70(sp)
|
||||
ldq t6, 0x78(sp)
|
||||
ldq t7, 0x80(sp)
|
||||
ldq t8, 0x88(sp)
|
||||
ldq t9, 0x90(sp)
|
||||
ldq t10, 0x98(sp)
|
||||
ldq t11, 0xa0(sp)
|
||||
ldq v0, 0xa8(sp)
|
||||
|
||||
addq sp, 0xb0, sp
|
||||
ret zero,($at),1
|
||||
|
||||
END(_mcount)
|
||||
|
||||
weak_alias (_mcount, mcount)
|
@ -1,120 +0,0 @@
|
||||
# Alpha __mpn_add_n -- Add two limb vectors of the same length > 0 and
|
||||
# store sum in a third limb vector.
|
||||
|
||||
# Copyright (C) 1995 Free Software Foundation, Inc.
|
||||
|
||||
# This file is part of the GNU MP Library.
|
||||
|
||||
# The GNU MP Library is free software; you can redistribute it and/or modify
|
||||
# it under the terms of the GNU Lesser General Public License as published by
|
||||
# the Free Software Foundation; either version 2.1 of the License, or (at your
|
||||
# option) any later version.
|
||||
|
||||
# The GNU MP Library is distributed in the hope that it will be useful, but
|
||||
# WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
|
||||
# or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
|
||||
# License for more details.
|
||||
|
||||
# You should have received a copy of the GNU Lesser General Public License
|
||||
# along with the GNU MP Library; see the file COPYING.LIB. If not, write to
|
||||
# the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston,
|
||||
# MA 02111-1307, USA.
|
||||
|
||||
|
||||
# INPUT PARAMETERS
|
||||
# res_ptr $16
|
||||
# s1_ptr $17
|
||||
# s2_ptr $18
|
||||
# size $19
|
||||
|
||||
.set noreorder
|
||||
.set noat
|
||||
.text
|
||||
.align 3
|
||||
.globl __mpn_add_n
|
||||
.ent __mpn_add_n
|
||||
__mpn_add_n:
|
||||
.frame $30,0,$26,0
|
||||
|
||||
ldq $3,0($17)
|
||||
ldq $4,0($18)
|
||||
|
||||
subq $19,1,$19
|
||||
and $19,4-1,$2 # number of limbs in first loop
|
||||
bis $31,$31,$0
|
||||
beq $2,.L0 # if multiple of 4 limbs, skip first loop
|
||||
|
||||
subq $19,$2,$19
|
||||
|
||||
.Loop0: subq $2,1,$2
|
||||
ldq $5,8($17)
|
||||
addq $4,$0,$4
|
||||
ldq $6,8($18)
|
||||
cmpult $4,$0,$1
|
||||
addq $3,$4,$4
|
||||
cmpult $4,$3,$0
|
||||
stq $4,0($16)
|
||||
or $0,$1,$0
|
||||
|
||||
addq $17,8,$17
|
||||
addq $18,8,$18
|
||||
bis $5,$5,$3
|
||||
bis $6,$6,$4
|
||||
addq $16,8,$16
|
||||
bne $2,.Loop0
|
||||
|
||||
.L0: beq $19,.Lend
|
||||
|
||||
.align 3
|
||||
.Loop: subq $19,4,$19
|
||||
|
||||
ldq $5,8($17)
|
||||
addq $4,$0,$4
|
||||
ldq $6,8($18)
|
||||
cmpult $4,$0,$1
|
||||
addq $3,$4,$4
|
||||
cmpult $4,$3,$0
|
||||
stq $4,0($16)
|
||||
or $0,$1,$0
|
||||
|
||||
ldq $3,16($17)
|
||||
addq $6,$0,$6
|
||||
ldq $4,16($18)
|
||||
cmpult $6,$0,$1
|
||||
addq $5,$6,$6
|
||||
cmpult $6,$5,$0
|
||||
stq $6,8($16)
|
||||
or $0,$1,$0
|
||||
|
||||
ldq $5,24($17)
|
||||
addq $4,$0,$4
|
||||
ldq $6,24($18)
|
||||
cmpult $4,$0,$1
|
||||
addq $3,$4,$4
|
||||
cmpult $4,$3,$0
|
||||
stq $4,16($16)
|
||||
or $0,$1,$0
|
||||
|
||||
ldq $3,32($17)
|
||||
addq $6,$0,$6
|
||||
ldq $4,32($18)
|
||||
cmpult $6,$0,$1
|
||||
addq $5,$6,$6
|
||||
cmpult $6,$5,$0
|
||||
stq $6,24($16)
|
||||
or $0,$1,$0
|
||||
|
||||
addq $17,32,$17
|
||||
addq $18,32,$18
|
||||
addq $16,32,$16
|
||||
bne $19,.Loop
|
||||
|
||||
.Lend: addq $4,$0,$4
|
||||
cmpult $4,$0,$1
|
||||
addq $3,$4,$4
|
||||
cmpult $4,$3,$0
|
||||
stq $4,0($16)
|
||||
or $0,$1,$0
|
||||
ret $31,($26),1
|
||||
|
||||
.end __mpn_add_n
|
@ -1,92 +0,0 @@
|
||||
# Alpha 21064 __mpn_addmul_1 -- Multiply a limb vector with a limb and add
|
||||
# the result to a second limb vector.
|
||||
|
||||
# Copyright (C) 1992, 1994, 1995 Free Software Foundation, Inc.
|
||||
|
||||
# This file is part of the GNU MP Library.
|
||||
|
||||
# The GNU MP Library is free software; you can redistribute it and/or modify
|
||||
# it under the terms of the GNU Lesser General Public License as published by
|
||||
# the Free Software Foundation; either version 2.1 of the License, or (at your
|
||||
# option) any later version.
|
||||
|
||||
# The GNU MP Library is distributed in the hope that it will be useful, but
|
||||
# WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
|
||||
# or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
|
||||
# License for more details.
|
||||
|
||||
# You should have received a copy of the GNU Lesser General Public License
|
||||
# along with the GNU MP Library; see the file COPYING.LIB. If not, write to
|
||||
# the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston,
|
||||
# MA 02111-1307, USA.
|
||||
|
||||
|
||||
# INPUT PARAMETERS
|
||||
# res_ptr r16
|
||||
# s1_ptr r17
|
||||
# size r18
|
||||
# s2_limb r19
|
||||
|
||||
# This code runs at 42 cycles/limb on EV4 and 18 cycles/limb on EV5.
|
||||
|
||||
.set noreorder
|
||||
.set noat
|
||||
.text
|
||||
.align 3
|
||||
.globl __mpn_addmul_1
|
||||
.ent __mpn_addmul_1 2
|
||||
__mpn_addmul_1:
|
||||
.frame $30,0,$26
|
||||
|
||||
ldq $2,0($17) # $2 = s1_limb
|
||||
addq $17,8,$17 # s1_ptr++
|
||||
subq $18,1,$18 # size--
|
||||
mulq $2,$19,$3 # $3 = prod_low
|
||||
ldq $5,0($16) # $5 = *res_ptr
|
||||
umulh $2,$19,$0 # $0 = prod_high
|
||||
beq $18,.Lend1 # jump if size was == 1
|
||||
ldq $2,0($17) # $2 = s1_limb
|
||||
addq $17,8,$17 # s1_ptr++
|
||||
subq $18,1,$18 # size--
|
||||
addq $5,$3,$3
|
||||
cmpult $3,$5,$4
|
||||
stq $3,0($16)
|
||||
addq $16,8,$16 # res_ptr++
|
||||
beq $18,.Lend2 # jump if size was == 2
|
||||
|
||||
.align 3
|
||||
.Loop: mulq $2,$19,$3 # $3 = prod_low
|
||||
ldq $5,0($16) # $5 = *res_ptr
|
||||
addq $4,$0,$0 # cy_limb = cy_limb + 'cy'
|
||||
subq $18,1,$18 # size--
|
||||
umulh $2,$19,$4 # $4 = cy_limb
|
||||
ldq $2,0($17) # $2 = s1_limb
|
||||
addq $17,8,$17 # s1_ptr++
|
||||
addq $3,$0,$3 # $3 = cy_limb + prod_low
|
||||
cmpult $3,$0,$0 # $0 = carry from (cy_limb + prod_low)
|
||||
addq $5,$3,$3
|
||||
cmpult $3,$5,$5
|
||||
stq $3,0($16)
|
||||
addq $16,8,$16 # res_ptr++
|
||||
addq $5,$0,$0 # combine carries
|
||||
bne $18,.Loop
|
||||
|
||||
.Lend2: mulq $2,$19,$3 # $3 = prod_low
|
||||
ldq $5,0($16) # $5 = *res_ptr
|
||||
addq $4,$0,$0 # cy_limb = cy_limb + 'cy'
|
||||
umulh $2,$19,$4 # $4 = cy_limb
|
||||
addq $3,$0,$3 # $3 = cy_limb + prod_low
|
||||
cmpult $3,$0,$0 # $0 = carry from (cy_limb + prod_low)
|
||||
addq $5,$3,$3
|
||||
cmpult $3,$5,$5
|
||||
stq $3,0($16)
|
||||
addq $5,$0,$0 # combine carries
|
||||
addq $4,$0,$0 # cy_limb = prod_high + cy
|
||||
ret $31,($26),1
|
||||
.Lend1: addq $5,$3,$3
|
||||
cmpult $3,$5,$5
|
||||
stq $3,0($16)
|
||||
addq $0,$5,$0
|
||||
ret $31,($26),1
|
||||
|
||||
.end __mpn_addmul_1
|
@ -1,148 +0,0 @@
|
||||
# Alpha __mpn_add_n -- Add two limb vectors of the same length > 0 and
|
||||
# store sum in a third limb vector.
|
||||
|
||||
# Copyright (C) 1995 Free Software Foundation, Inc.
|
||||
|
||||
# This file is part of the GNU MP Library.
|
||||
|
||||
# The GNU MP Library is free software; you can redistribute it and/or modify
|
||||
# it under the terms of the GNU Lesser General Public License as published by
|
||||
# the Free Software Foundation; either version 2.1 of the License, or (at your
|
||||
# option) any later version.
|
||||
|
||||
# The GNU MP Library is distributed in the hope that it will be useful, but
|
||||
# WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
|
||||
# or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
|
||||
# License for more details.
|
||||
|
||||
# You should have received a copy of the GNU Lesser General Public License
|
||||
# along with the GNU MP Library; see the file COPYING.LIB. If not, write to
|
||||
# the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston,
|
||||
# MA 02111-1307, USA.
|
||||
|
||||
|
||||
# INPUT PARAMETERS
|
||||
# res_ptr $16
|
||||
# s1_ptr $17
|
||||
# s2_ptr $18
|
||||
# size $19
|
||||
|
||||
.set noreorder
|
||||
.set noat
|
||||
.text
|
||||
.align 3
|
||||
.globl __mpn_add_n
|
||||
.ent __mpn_add_n
|
||||
__mpn_add_n:
|
||||
.frame $30,0,$26,0
|
||||
|
||||
or $31,$31,$25 # clear cy
|
||||
subq $19,4,$19 # decr loop cnt
|
||||
blt $19,.Lend2 # if less than 4 limbs, goto 2nd loop
|
||||
# Start software pipeline for 1st loop
|
||||
ldq $0,0($18)
|
||||
ldq $1,8($18)
|
||||
ldq $4,0($17)
|
||||
ldq $5,8($17)
|
||||
addq $17,32,$17 # update s1_ptr
|
||||
ldq $2,16($18)
|
||||
addq $0,$4,$20 # 1st main add
|
||||
ldq $3,24($18)
|
||||
subq $19,4,$19 # decr loop cnt
|
||||
ldq $6,-16($17)
|
||||
cmpult $20,$0,$25 # compute cy from last add
|
||||
ldq $7,-8($17)
|
||||
addq $1,$25,$28 # cy add
|
||||
addq $18,32,$18 # update s2_ptr
|
||||
addq $5,$28,$21 # 2nd main add
|
||||
cmpult $28,$25,$8 # compute cy from last add
|
||||
blt $19,.Lend1 # if less than 4 limbs remain, jump
|
||||
# 1st loop handles groups of 4 limbs in a software pipeline
|
||||
.align 4
|
||||
.Loop: cmpult $21,$28,$25 # compute cy from last add
|
||||
ldq $0,0($18)
|
||||
or $8,$25,$25 # combine cy from the two adds
|
||||
ldq $1,8($18)
|
||||
addq $2,$25,$28 # cy add
|
||||
ldq $4,0($17)
|
||||
addq $28,$6,$22 # 3rd main add
|
||||
ldq $5,8($17)
|
||||
cmpult $28,$25,$8 # compute cy from last add
|
||||
cmpult $22,$28,$25 # compute cy from last add
|
||||
stq $20,0($16)
|
||||
or $8,$25,$25 # combine cy from the two adds
|
||||
stq $21,8($16)
|
||||
addq $3,$25,$28 # cy add
|
||||
addq $28,$7,$23 # 4th main add
|
||||
cmpult $28,$25,$8 # compute cy from last add
|
||||
cmpult $23,$28,$25 # compute cy from last add
|
||||
addq $17,32,$17 # update s1_ptr
|
||||
or $8,$25,$25 # combine cy from the two adds
|
||||
addq $16,32,$16 # update res_ptr
|
||||
addq $0,$25,$28 # cy add
|
||||
ldq $2,16($18)
|
||||
addq $4,$28,$20 # 1st main add
|
||||
ldq $3,24($18)
|
||||
cmpult $28,$25,$8 # compute cy from last add
|
||||
ldq $6,-16($17)
|
||||
cmpult $20,$28,$25 # compute cy from last add
|
||||
ldq $7,-8($17)
|
||||
or $8,$25,$25 # combine cy from the two adds
|
||||
subq $19,4,$19 # decr loop cnt
|
||||
stq $22,-16($16)
|
||||
addq $1,$25,$28 # cy add
|
||||
stq $23,-8($16)
|
||||
addq $5,$28,$21 # 2nd main add
|
||||
addq $18,32,$18 # update s2_ptr
|
||||
cmpult $28,$25,$8 # compute cy from last add
|
||||
bge $19,.Loop
|
||||
# Finish software pipeline for 1st loop
|
||||
.Lend1: cmpult $21,$28,$25 # compute cy from last add
|
||||
or $8,$25,$25 # combine cy from the two adds
|
||||
addq $2,$25,$28 # cy add
|
||||
addq $28,$6,$22 # 3rd main add
|
||||
cmpult $28,$25,$8 # compute cy from last add
|
||||
cmpult $22,$28,$25 # compute cy from last add
|
||||
stq $20,0($16)
|
||||
or $8,$25,$25 # combine cy from the two adds
|
||||
stq $21,8($16)
|
||||
addq $3,$25,$28 # cy add
|
||||
addq $28,$7,$23 # 4th main add
|
||||
cmpult $28,$25,$8 # compute cy from last add
|
||||
cmpult $23,$28,$25 # compute cy from last add
|
||||
or $8,$25,$25 # combine cy from the two adds
|
||||
addq $16,32,$16 # update res_ptr
|
||||
stq $22,-16($16)
|
||||
stq $23,-8($16)
|
||||
.Lend2: addq $19,4,$19 # restore loop cnt
|
||||
beq $19,.Lret
|
||||
# Start software pipeline for 2nd loop
|
||||
ldq $0,0($18)
|
||||
ldq $4,0($17)
|
||||
subq $19,1,$19
|
||||
beq $19,.Lend0
|
||||
# 2nd loop handles remaining 1-3 limbs
|
||||
.align 4
|
||||
.Loop0: addq $0,$25,$28 # cy add
|
||||
ldq $0,8($18)
|
||||
addq $4,$28,$20 # main add
|
||||
ldq $4,8($17)
|
||||
addq $18,8,$18
|
||||
cmpult $28,$25,$8 # compute cy from last add
|
||||
addq $17,8,$17
|
||||
stq $20,0($16)
|
||||
cmpult $20,$28,$25 # compute cy from last add
|
||||
subq $19,1,$19 # decr loop cnt
|
||||
or $8,$25,$25 # combine cy from the two adds
|
||||
addq $16,8,$16
|
||||
bne $19,.Loop0
|
||||
.Lend0: addq $0,$25,$28 # cy add
|
||||
addq $4,$28,$20 # main add
|
||||
cmpult $28,$25,$8 # compute cy from last add
|
||||
cmpult $20,$28,$25 # compute cy from last add
|
||||
stq $20,0($16)
|
||||
or $8,$25,$25 # combine cy from the two adds
|
||||
|
||||
.Lret: or $25,$31,$0 # return cy
|
||||
ret $31,($26),1
|
||||
.end __mpn_add_n
|
@ -1,174 +0,0 @@
|
||||
# Alpha EV5 __mpn_lshift --
|
||||
|
||||
# Copyright (C) 1994, 1995 Free Software Foundation, Inc.
|
||||
|
||||
# This file is part of the GNU MP Library.
|
||||
|
||||
# The GNU MP Library is free software; you can redistribute it and/or modify
|
||||
# it under the terms of the GNU Lesser General Public License as published by
|
||||
# the Free Software Foundation; either version 2.1 of the License, or (at your
|
||||
# option) any later version.
|
||||
|
||||
# The GNU MP Library is distributed in the hope that it will be useful, but
|
||||
# WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
|
||||
# or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
|
||||
# License for more details.
|
||||
|
||||
# You should have received a copy of the GNU Lesser General Public License
|
||||
# along with the GNU MP Library; see the file COPYING.LIB. If not, write to
|
||||
# the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston,
|
||||
# MA 02111-1307, USA.
|
||||
|
||||
|
||||
# INPUT PARAMETERS
|
||||
# res_ptr r16
|
||||
# s1_ptr r17
|
||||
# size r18
|
||||
# cnt r19
|
||||
|
||||
# This code runs at 3.25 cycles/limb on the EV5.
|
||||
|
||||
.set noreorder
|
||||
.set noat
|
||||
.text
|
||||
.align 3
|
||||
.globl __mpn_lshift
|
||||
.ent __mpn_lshift
|
||||
__mpn_lshift:
|
||||
.frame $30,0,$26,0
|
||||
|
||||
s8addq $18,$17,$17 # make r17 point at end of s1
|
||||
ldq $4,-8($17) # load first limb
|
||||
subq $31,$19,$20
|
||||
s8addq $18,$16,$16 # make r16 point at end of RES
|
||||
subq $18,1,$18
|
||||
and $18,4-1,$28 # number of limbs in first loop
|
||||
srl $4,$20,$0 # compute function result
|
||||
|
||||
beq $28,.L0
|
||||
subq $18,$28,$18
|
||||
|
||||
.align 3
|
||||
.Loop0: ldq $3,-16($17)
|
||||
subq $16,8,$16
|
||||
sll $4,$19,$5
|
||||
subq $17,8,$17
|
||||
subq $28,1,$28
|
||||
srl $3,$20,$6
|
||||
or $3,$3,$4
|
||||
or $5,$6,$8
|
||||
stq $8,0($16)
|
||||
bne $28,.Loop0
|
||||
|
||||
.L0: sll $4,$19,$24
|
||||
beq $18,.Lend
|
||||
# warm up phase 1
|
||||
ldq $1,-16($17)
|
||||
subq $18,4,$18
|
||||
ldq $2,-24($17)
|
||||
ldq $3,-32($17)
|
||||
ldq $4,-40($17)
|
||||
beq $18,.Lend1
|
||||
# warm up phase 2
|
||||
srl $1,$20,$7
|
||||
sll $1,$19,$21
|
||||
srl $2,$20,$8
|
||||
ldq $1,-48($17)
|
||||
sll $2,$19,$22
|
||||
ldq $2,-56($17)
|
||||
srl $3,$20,$5
|
||||
or $7,$24,$7
|
||||
sll $3,$19,$23
|
||||
or $8,$21,$8
|
||||
srl $4,$20,$6
|
||||
ldq $3,-64($17)
|
||||
sll $4,$19,$24
|
||||
ldq $4,-72($17)
|
||||
subq $18,4,$18
|
||||
beq $18,.Lend2
|
||||
.align 4
|
||||
# main loop
|
||||
.Loop: stq $7,-8($16)
|
||||
or $5,$22,$5
|
||||
stq $8,-16($16)
|
||||
or $6,$23,$6
|
||||
|
||||
srl $1,$20,$7
|
||||
subq $18,4,$18
|
||||
sll $1,$19,$21
|
||||
unop # ldq $31,-96($17)
|
||||
|
||||
srl $2,$20,$8
|
||||
ldq $1,-80($17)
|
||||
sll $2,$19,$22
|
||||
ldq $2,-88($17)
|
||||
|
||||
stq $5,-24($16)
|
||||
or $7,$24,$7
|
||||
stq $6,-32($16)
|
||||
or $8,$21,$8
|
||||
|
||||
srl $3,$20,$5
|
||||
unop # ldq $31,-96($17)
|
||||
sll $3,$19,$23
|
||||
subq $16,32,$16
|
||||
|
||||
srl $4,$20,$6
|
||||
ldq $3,-96($17)
|
||||
sll $4,$19,$24
|
||||
ldq $4,-104($17)
|
||||
|
||||
subq $17,32,$17
|
||||
bne $18,.Loop
|
||||
# cool down phase 2/1
|
||||
.Lend2: stq $7,-8($16)
|
||||
or $5,$22,$5
|
||||
stq $8,-16($16)
|
||||
or $6,$23,$6
|
||||
srl $1,$20,$7
|
||||
sll $1,$19,$21
|
||||
srl $2,$20,$8
|
||||
sll $2,$19,$22
|
||||
stq $5,-24($16)
|
||||
or $7,$24,$7
|
||||
stq $6,-32($16)
|
||||
or $8,$21,$8
|
||||
srl $3,$20,$5
|
||||
sll $3,$19,$23
|
||||
srl $4,$20,$6
|
||||
sll $4,$19,$24
|
||||
# cool down phase 2/2
|
||||
stq $7,-40($16)
|
||||
or $5,$22,$5
|
||||
stq $8,-48($16)
|
||||
or $6,$23,$6
|
||||
stq $5,-56($16)
|
||||
stq $6,-64($16)
|
||||
# cool down phase 2/3
|
||||
stq $24,-72($16)
|
||||
ret $31,($26),1
|
||||
|
||||
# cool down phase 1/1
|
||||
.Lend1: srl $1,$20,$7
|
||||
sll $1,$19,$21
|
||||
srl $2,$20,$8
|
||||
sll $2,$19,$22
|
||||
srl $3,$20,$5
|
||||
or $7,$24,$7
|
||||
sll $3,$19,$23
|
||||
or $8,$21,$8
|
||||
srl $4,$20,$6
|
||||
sll $4,$19,$24
|
||||
# cool down phase 1/2
|
||||
stq $7,-8($16)
|
||||
or $5,$22,$5
|
||||
stq $8,-16($16)
|
||||
or $6,$23,$6
|
||||
stq $5,-24($16)
|
||||
stq $6,-32($16)
|
||||
stq $24,-40($16)
|
||||
ret $31,($26),1
|
||||
|
||||
.Lend: stq $24,-8($16)
|
||||
ret $31,($26),1
|
||||
.end __mpn_lshift
|
@ -1,172 +0,0 @@
|
||||
# Alpha EV5 __mpn_rshift --
|
||||
|
||||
# Copyright (C) 1994, 1995 Free Software Foundation, Inc.
|
||||
|
||||
# This file is part of the GNU MP Library.
|
||||
|
||||
# The GNU MP Library is free software; you can redistribute it and/or modify
|
||||
# it under the terms of the GNU Lesser General Public License as published by
|
||||
# the Free Software Foundation; either version 2.1 of the License, or (at your
|
||||
# option) any later version.
|
||||
|
||||
# The GNU MP Library is distributed in the hope that it will be useful, but
|
||||
# WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
|
||||
# or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
|
||||
# License for more details.
|
||||
|
||||
# You should have received a copy of the GNU Lesser General Public License
|
||||
# along with the GNU MP Library; see the file COPYING.LIB. If not, write to
|
||||
# the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston,
|
||||
# MA 02111-1307, USA.
|
||||
|
||||
|
||||
# INPUT PARAMETERS
|
||||
# res_ptr r16
|
||||
# s1_ptr r17
|
||||
# size r18
|
||||
# cnt r19
|
||||
|
||||
# This code runs at 3.25 cycles/limb on the EV5.
|
||||
|
||||
.set noreorder
|
||||
.set noat
|
||||
.text
|
||||
.align 3
|
||||
.globl __mpn_rshift
|
||||
.ent __mpn_rshift
|
||||
__mpn_rshift:
|
||||
.frame $30,0,$26,0
|
||||
|
||||
ldq $4,0($17) # load first limb
|
||||
subq $31,$19,$20
|
||||
subq $18,1,$18
|
||||
and $18,4-1,$28 # number of limbs in first loop
|
||||
sll $4,$20,$0 # compute function result
|
||||
|
||||
beq $28,.L0
|
||||
subq $18,$28,$18
|
||||
|
||||
.align 3
|
||||
.Loop0: ldq $3,8($17)
|
||||
addq $16,8,$16
|
||||
srl $4,$19,$5
|
||||
addq $17,8,$17
|
||||
subq $28,1,$28
|
||||
sll $3,$20,$6
|
||||
or $3,$3,$4
|
||||
or $5,$6,$8
|
||||
stq $8,-8($16)
|
||||
bne $28,.Loop0
|
||||
|
||||
.L0: srl $4,$19,$24
|
||||
beq $18,.Lend
|
||||
# warm up phase 1
|
||||
ldq $1,8($17)
|
||||
subq $18,4,$18
|
||||
ldq $2,16($17)
|
||||
ldq $3,24($17)
|
||||
ldq $4,32($17)
|
||||
beq $18,.Lend1
|
||||
# warm up phase 2
|
||||
sll $1,$20,$7
|
||||
srl $1,$19,$21
|
||||
sll $2,$20,$8
|
||||
ldq $1,40($17)
|
||||
srl $2,$19,$22
|
||||
ldq $2,48($17)
|
||||
sll $3,$20,$5
|
||||
or $7,$24,$7
|
||||
srl $3,$19,$23
|
||||
or $8,$21,$8
|
||||
sll $4,$20,$6
|
||||
ldq $3,56($17)
|
||||
srl $4,$19,$24
|
||||
ldq $4,64($17)
|
||||
subq $18,4,$18
|
||||
beq $18,.Lend2
|
||||
.align 4
|
||||
# main loop
|
||||
.Loop: stq $7,0($16)
|
||||
or $5,$22,$5
|
||||
stq $8,8($16)
|
||||
or $6,$23,$6
|
||||
|
||||
sll $1,$20,$7
|
||||
subq $18,4,$18
|
||||
srl $1,$19,$21
|
||||
unop # ldq $31,-96($17)
|
||||
|
||||
sll $2,$20,$8
|
||||
ldq $1,72($17)
|
||||
srl $2,$19,$22
|
||||
ldq $2,80($17)
|
||||
|
||||
stq $5,16($16)
|
||||
or $7,$24,$7
|
||||
stq $6,24($16)
|
||||
or $8,$21,$8
|
||||
|
||||
sll $3,$20,$5
|
||||
unop # ldq $31,-96($17)
|
||||
srl $3,$19,$23
|
||||
addq $16,32,$16
|
||||
|
||||
sll $4,$20,$6
|
||||
ldq $3,88($17)
|
||||
srl $4,$19,$24
|
||||
ldq $4,96($17)
|
||||
|
||||
addq $17,32,$17
|
||||
bne $18,.Loop
|
||||
# cool down phase 2/1
|
||||
.Lend2: stq $7,0($16)
|
||||
or $5,$22,$5
|
||||
stq $8,8($16)
|
||||
or $6,$23,$6
|
||||
sll $1,$20,$7
|
||||
srl $1,$19,$21
|
||||
sll $2,$20,$8
|
||||
srl $2,$19,$22
|
||||
stq $5,16($16)
|
||||
or $7,$24,$7
|
||||
stq $6,24($16)
|
||||
or $8,$21,$8
|
||||
sll $3,$20,$5
|
||||
srl $3,$19,$23
|
||||
sll $4,$20,$6
|
||||
srl $4,$19,$24
|
||||
# cool down phase 2/2
|
||||
stq $7,32($16)
|
||||
or $5,$22,$5
|
||||
stq $8,40($16)
|
||||
or $6,$23,$6
|
||||
stq $5,48($16)
|
||||
stq $6,56($16)
|
||||
# cool down phase 2/3
|
||||
stq $24,64($16)
|
||||
ret $31,($26),1
|
||||
|
||||
# cool down phase 1/1
|
||||
.Lend1: sll $1,$20,$7
|
||||
srl $1,$19,$21
|
||||
sll $2,$20,$8
|
||||
srl $2,$19,$22
|
||||
sll $3,$20,$5
|
||||
or $7,$24,$7
|
||||
srl $3,$19,$23
|
||||
or $8,$21,$8
|
||||
sll $4,$20,$6
|
||||
srl $4,$19,$24
|
||||
# cool down phase 1/2
|
||||
stq $7,0($16)
|
||||
or $5,$22,$5
|
||||
stq $8,8($16)
|
||||
or $6,$23,$6
|
||||
stq $5,16($16)
|
||||
stq $6,24($16)
|
||||
stq $24,32($16)
|
||||
ret $31,($26),1
|
||||
|
||||
.Lend: stq $24,0($16)
|
||||
ret $31,($26),1
|
||||
.end __mpn_rshift
|
@ -1,149 +0,0 @@
|
||||
# Alpha __mpn_sub_n -- Subtract two limb vectors of the same length > 0 and
|
||||
# store difference in a third limb vector.
|
||||
|
||||
# Copyright (C) 1995 Free Software Foundation, Inc.
|
||||
|
||||
# This file is part of the GNU MP Library.
|
||||
|
||||
# The GNU MP Library is free software; you can redistribute it and/or modify
|
||||
# it under the terms of the GNU Lesser General Public License as published by
|
||||
# the Free Software Foundation; either version 2.1 of the License, or (at your
|
||||
# option) any later version.
|
||||
|
||||
# The GNU MP Library is distributed in the hope that it will be useful, but
|
||||
# WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
|
||||
# or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
|
||||
# License for more details.
|
||||
|
||||
# You should have received a copy of the GNU Lesser General Public License
|
||||
# along with the GNU MP Library; see the file COPYING.LIB. If not, write to
|
||||
# the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston,
|
||||
# MA 02111-1307, USA.
|
||||
|
||||
|
||||
# INPUT PARAMETERS
|
||||
# res_ptr $16
|
||||
# s1_ptr $17
|
||||
# s2_ptr $18
|
||||
# size $19
|
||||
|
||||
.set noreorder
|
||||
.set noat
|
||||
.text
|
||||
.align 3
|
||||
.globl __mpn_sub_n
|
||||
.ent __mpn_sub_n
|
||||
__mpn_sub_n:
|
||||
.frame $30,0,$26,0
|
||||
|
||||
or $31,$31,$25 # clear cy
|
||||
subq $19,4,$19 # decr loop cnt
|
||||
blt $19,.Lend2 # if less than 4 limbs, goto 2nd loop
|
||||
# Start software pipeline for 1st loop
|
||||
ldq $0,0($18)
|
||||
ldq $1,8($18)
|
||||
ldq $4,0($17)
|
||||
ldq $5,8($17)
|
||||
addq $17,32,$17 # update s1_ptr
|
||||
ldq $2,16($18)
|
||||
subq $4,$0,$20 # 1st main sub
|
||||
ldq $3,24($18)
|
||||
subq $19,4,$19 # decr loop cnt
|
||||
ldq $6,-16($17)
|
||||
cmpult $4,$20,$25 # compute cy from last sub
|
||||
ldq $7,-8($17)
|
||||
addq $1,$25,$28 # cy add
|
||||
addq $18,32,$18 # update s2_ptr
|
||||
subq $5,$28,$21 # 2nd main sub
|
||||
cmpult $28,$25,$8 # compute cy from last add
|
||||
blt $19,.Lend1 # if less than 4 limbs remain, jump
|
||||
# 1st loop handles groups of 4 limbs in a software pipeline
|
||||
.align 4
|
||||
.Loop: cmpult $5,$21,$25 # compute cy from last add
|
||||
ldq $0,0($18)
|
||||
or $8,$25,$25 # combine cy from the two adds
|
||||
ldq $1,8($18)
|
||||
addq $2,$25,$28 # cy add
|
||||
ldq $4,0($17)
|
||||
subq $6,$28,$22 # 3rd main sub
|
||||
ldq $5,8($17)
|
||||
cmpult $28,$25,$8 # compute cy from last add
|
||||
cmpult $6,$22,$25 # compute cy from last add
|
||||
stq $20,0($16)
|
||||
or $8,$25,$25 # combine cy from the two adds
|
||||
stq $21,8($16)
|
||||
addq $3,$25,$28 # cy add
|
||||
subq $7,$28,$23 # 4th main sub
|
||||
cmpult $28,$25,$8 # compute cy from last add
|
||||
cmpult $7,$23,$25 # compute cy from last add
|
||||
addq $17,32,$17 # update s1_ptr
|
||||
or $8,$25,$25 # combine cy from the two adds
|
||||
addq $16,32,$16 # update res_ptr
|
||||
addq $0,$25,$28 # cy add
|
||||
ldq $2,16($18)
|
||||
subq $4,$28,$20 # 1st main sub
|
||||
ldq $3,24($18)
|
||||
cmpult $28,$25,$8 # compute cy from last add
|
||||
ldq $6,-16($17)
|
||||
cmpult $4,$20,$25 # compute cy from last add
|
||||
ldq $7,-8($17)
|
||||
or $8,$25,$25 # combine cy from the two adds
|
||||
subq $19,4,$19 # decr loop cnt
|
||||
stq $22,-16($16)
|
||||
addq $1,$25,$28 # cy add
|
||||
stq $23,-8($16)
|
||||
subq $5,$28,$21 # 2nd main sub
|
||||
addq $18,32,$18 # update s2_ptr
|
||||
cmpult $28,$25,$8 # compute cy from last add
|
||||
bge $19,.Loop
|
||||
# Finish software pipeline for 1st loop
|
||||
.Lend1: cmpult $5,$21,$25 # compute cy from last add
|
||||
or $8,$25,$25 # combine cy from the two adds
|
||||
addq $2,$25,$28 # cy add
|
||||
subq $6,$28,$22 # 3rd main sub
|
||||
cmpult $28,$25,$8 # compute cy from last add
|
||||
cmpult $6,$22,$25 # compute cy from last add
|
||||
stq $20,0($16)
|
||||
or $8,$25,$25 # combine cy from the two adds
|
||||
stq $21,8($16)
|
||||
addq $3,$25,$28 # cy add
|
||||
subq $7,$28,$23 # 4th main sub
|
||||
cmpult $28,$25,$8 # compute cy from last add
|
||||
cmpult $7,$23,$25 # compute cy from last add
|
||||
or $8,$25,$25 # combine cy from the two adds
|
||||
addq $16,32,$16 # update res_ptr
|
||||
stq $22,-16($16)
|
||||
stq $23,-8($16)
|
||||
.Lend2: addq $19,4,$19 # restore loop cnt
|
||||
beq $19,.Lret
|
||||
# Start software pipeline for 2nd loop
|
||||
ldq $0,0($18)
|
||||
ldq $4,0($17)
|
||||
subq $19,1,$19
|
||||
beq $19,.Lend0
|
||||
# 2nd loop handles remaining 1-3 limbs
|
||||
.align 4
|
||||
.Loop0: addq $0,$25,$28 # cy add
|
||||
ldq $0,8($18)
|
||||
subq $4,$28,$20 # main sub
|
||||
ldq $1,8($17)
|
||||
addq $18,8,$18
|
||||
cmpult $28,$25,$8 # compute cy from last add
|
||||
addq $17,8,$17
|
||||
stq $20,0($16)
|
||||
cmpult $4,$20,$25 # compute cy from last add
|
||||
subq $19,1,$19 # decr loop cnt
|
||||
or $8,$25,$25 # combine cy from the two adds
|
||||
addq $16,8,$16
|
||||
or $1,$31,$4
|
||||
bne $19,.Loop0
|
||||
.Lend0: addq $0,$25,$28 # cy add
|
||||
subq $4,$28,$20 # main sub
|
||||
cmpult $28,$25,$8 # compute cy from last add
|
||||
cmpult $4,$20,$25 # compute cy from last add
|
||||
stq $20,0($16)
|
||||
or $8,$25,$25 # combine cy from the two adds
|
||||
|
||||
.Lret: or $25,$31,$0 # return cy
|
||||
ret $31,($26),1
|
||||
.end __mpn_sub_n
|
@ -1 +0,0 @@
|
||||
alpha/alphaev5
|
@ -1,479 +0,0 @@
|
||||
# Alpha ev6 mpn_addmul_1 -- Multiply a limb vector with a limb and add
|
||||
# the result to a second limb vector.
|
||||
#
|
||||
# Copyright (C) 2000 Free Software Foundation, Inc.
|
||||
#
|
||||
# This file is part of the GNU MP Library.
|
||||
#
|
||||
# The GNU MP Library is free software; you can redistribute it and/or modify
|
||||
# it under the terms of the GNU Lesser General Public License as published
|
||||
# by the Free Software Foundation; either version 2.1 of the License, or (at
|
||||
# your option) any later version.
|
||||
#
|
||||
# The GNU MP Library is distributed in the hope that it will be useful, but
|
||||
# WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
|
||||
# or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
|
||||
# License for more details.
|
||||
#
|
||||
# You should have received a copy of the GNU Lesser General Public License
|
||||
# along with the GNU MP Library; see the file COPYING.LIB. If not, write to
|
||||
# the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston,
|
||||
# MA 02111-1307, USA.
|
||||
|
||||
# INPUT PARAMETERS
|
||||
# res_ptr $16
|
||||
# s1_ptr $17
|
||||
# size $18
|
||||
# s2_limb $19
|
||||
#
|
||||
# This code runs at 42 cycles/limb on EV4, 18 cycles/limb on EV5, and
|
||||
# exactly 3.625 cycles/limb on EV6...
|
||||
#
|
||||
# This code was written in close cooperation with ev6 pipeline expert
|
||||
# Steve Root (root@toober.hlo.dec.com). Any errors are tege's fault, though.
|
||||
#
|
||||
# Register usages for unrolled loop:
|
||||
# 0-3 mul's
|
||||
# 4-7 acc's
|
||||
# 8-15 mul results
|
||||
# 20,21 carry's
|
||||
# 22,23 save for stores
|
||||
#
|
||||
# Sustains 8 mul-adds in 29 cycles in the unrolled inner loop.
|
||||
#
|
||||
# The stores can issue a cycle late so we have paired no-op's to 'catch'
|
||||
# them, so that further disturbance to the schedule is damped.
|
||||
#
|
||||
# We couldn't pair the loads, because the entangled schedule of the
|
||||
# carry's has to happen on one side {0} of the machine. Note, the total
|
||||
# use of U0, and the total use of L0 (after attending to the stores).
|
||||
# which is part of the reason why....
|
||||
#
|
||||
# This is a great schedule for the d_cache, a poor schedule for the
|
||||
# b_cache. The lockup on U0 means that any stall can't be recovered
|
||||
# from. Consider a ldq in L1. say that load gets stalled because it
|
||||
# collides with a fill from the b_Cache. On the next cycle, this load
|
||||
# gets priority. If first looks at L0, and goes there. The instruction
|
||||
# we intended for L0 gets to look at L1, which is NOT where we want
|
||||
# it. It either stalls 1, because it can't go in L0, or goes there, and
|
||||
# causes a further instruction to stall.
|
||||
#
|
||||
# So for b_cache, we're likely going to want to put one or more cycles
|
||||
# back into the code! And, of course, put in prefetches. For the
|
||||
# accumulator, lds, intent to modify. For the multiplier, you might
|
||||
# want ldq, evict next, if you're not wanting to use it again soon. Use
|
||||
# 256 ahead of present pointer value. At a place where we have an mt
|
||||
# followed by a bookkeeping, put the bookkeeping in upper, and the
|
||||
# prefetch into lower.
|
||||
#
|
||||
# Note, the usage of physical registers per cycle is smoothed off, as
|
||||
# much as possible.
|
||||
#
|
||||
# Note, the ldq's and stq's are at the end of the quadpacks. note, we'd
|
||||
# like not to have a ldq or stq to preceded a conditional branch in a
|
||||
# quadpack. The conditional branch moves the retire pointer one cycle
|
||||
# later.
|
||||
#
|
||||
# Optimization notes:
|
||||
# Callee-saves regs: $9 $10 $11 $12 $13 $14 $15 $26 ?$27?
|
||||
# Reserved regs: $29 $30 $31
|
||||
# Free caller-saves regs in unrolled code: $24 $25 $28
|
||||
# We should swap some of the callee-saves regs for some of the free
|
||||
# caller-saves regs, saving some overhead cycles.
|
||||
# Most importantly, we should write fast code for the 0-7 case.
|
||||
# The code we use there are for the 21164, and runs at 7 cycles/limb
|
||||
# on the 21264. Should not be hard, if we write specialized code for
|
||||
# 1-7 limbs (the one for 0 limbs should be straightforward). We then just
|
||||
# need a jump table indexed by the low 3 bits of the count argument.
|
||||
|
||||
.set noreorder
|
||||
.set noat
|
||||
.text
|
||||
|
||||
.globl __mpn_addmul_1
|
||||
.ent __mpn_addmul_1
|
||||
__mpn_addmul_1:
|
||||
.frame $30,0,$26,0
|
||||
.prologue 0
|
||||
|
||||
cmpult $18, 8, $1
|
||||
beq $1, $Large
|
||||
|
||||
ldq $2, 0($17) # $2 = s1_limb
|
||||
addq $17, 8, $17 # s1_ptr++
|
||||
subq $18, 1, $18 # size--
|
||||
mulq $2, $19, $3 # $3 = prod_low
|
||||
ldq $5, 0($16) # $5 = *res_ptr
|
||||
umulh $2, $19, $0 # $0 = prod_high
|
||||
beq $18, $Lend0b # jump if size was == 1
|
||||
ldq $2, 0($17) # $2 = s1_limb
|
||||
addq $17, 8, $17 # s1_ptr++
|
||||
subq $18, 1, $18 # size--
|
||||
addq $5, $3, $3
|
||||
cmpult $3, $5, $4
|
||||
stq $3, 0($16)
|
||||
addq $16, 8, $16 # res_ptr++
|
||||
beq $18, $Lend0a # jump if size was == 2
|
||||
|
||||
.align 3
|
||||
$Loop0: mulq $2, $19, $3 # $3 = prod_low
|
||||
ldq $5, 0($16) # $5 = *res_ptr
|
||||
addq $4, $0, $0 # cy_limb = cy_limb + 'cy'
|
||||
subq $18, 1, $18 # size--
|
||||
umulh $2, $19, $4 # $4 = cy_limb
|
||||
ldq $2, 0($17) # $2 = s1_limb
|
||||
addq $17, 8, $17 # s1_ptr++
|
||||
addq $3, $0, $3 # $3 = cy_limb + prod_low
|
||||
cmpult $3, $0, $0 # $0 = carry from (cy_limb + prod_low)
|
||||
addq $5, $3, $3
|
||||
cmpult $3, $5, $5
|
||||
stq $3, 0($16)
|
||||
addq $16, 8, $16 # res_ptr++
|
||||
addq $5, $0, $0 # combine carries
|
||||
bne $18, $Loop0
|
||||
$Lend0a:
|
||||
mulq $2, $19, $3 # $3 = prod_low
|
||||
ldq $5, 0($16) # $5 = *res_ptr
|
||||
addq $4, $0, $0 # cy_limb = cy_limb + 'cy'
|
||||
umulh $2, $19, $4 # $4 = cy_limb
|
||||
addq $3, $0, $3 # $3 = cy_limb + prod_low
|
||||
cmpult $3, $0, $0 # $0 = carry from (cy_limb + prod_low)
|
||||
addq $5, $3, $3
|
||||
cmpult $3, $5, $5
|
||||
stq $3, 0($16)
|
||||
addq $5, $0, $0 # combine carries
|
||||
addq $4, $0, $0 # cy_limb = prod_high + cy
|
||||
ret $31, ($26), 1
|
||||
$Lend0b:
|
||||
addq $5, $3, $3
|
||||
cmpult $3, $5, $5
|
||||
stq $3, 0($16)
|
||||
addq $0, $5, $0
|
||||
ret $31, ($26), 1
|
||||
|
||||
$Large:
|
||||
lda $30, -240($30)
|
||||
stq $9, 8($30)
|
||||
stq $10, 16($30)
|
||||
stq $11, 24($30)
|
||||
stq $12, 32($30)
|
||||
stq $13, 40($30)
|
||||
stq $14, 48($30)
|
||||
stq $15, 56($30)
|
||||
|
||||
and $18, 7, $20 # count for the first loop, 0-7
|
||||
srl $18, 3, $18 # count for unrolled loop
|
||||
bis $31, $31, $0
|
||||
beq $20, $Lunroll
|
||||
ldq $2, 0($17) # $2 = s1_limb
|
||||
addq $17, 8, $17 # s1_ptr++
|
||||
subq $20, 1, $20 # size--
|
||||
mulq $2, $19, $3 # $3 = prod_low
|
||||
ldq $5, 0($16) # $5 = *res_ptr
|
||||
umulh $2, $19, $0 # $0 = prod_high
|
||||
beq $20, $Lend1b # jump if size was == 1
|
||||
ldq $2, 0($17) # $2 = s1_limb
|
||||
addq $17, 8, $17 # s1_ptr++
|
||||
subq $20, 1, $20 # size--
|
||||
addq $5, $3, $3
|
||||
cmpult $3, $5, $4
|
||||
stq $3, 0($16)
|
||||
addq $16, 8, $16 # res_ptr++
|
||||
beq $20, $Lend1a # jump if size was == 2
|
||||
|
||||
.align 3
|
||||
$Loop1: mulq $2, $19, $3 # $3 = prod_low
|
||||
ldq $5, 0($16) # $5 = *res_ptr
|
||||
addq $4, $0, $0 # cy_limb = cy_limb + 'cy'
|
||||
subq $20, 1, $20 # size--
|
||||
umulh $2, $19, $4 # $4 = cy_limb
|
||||
ldq $2, 0($17) # $2 = s1_limb
|
||||
addq $17, 8, $17 # s1_ptr++
|
||||
addq $3, $0, $3 # $3 = cy_limb + prod_low
|
||||
cmpult $3, $0, $0 # $0 = carry from (cy_limb + prod_low)
|
||||
addq $5, $3, $3
|
||||
cmpult $3, $5, $5
|
||||
stq $3, 0($16)
|
||||
addq $16, 8, $16 # res_ptr++
|
||||
addq $5, $0, $0 # combine carries
|
||||
bne $20, $Loop1
|
||||
|
||||
$Lend1a:
|
||||
mulq $2, $19, $3 # $3 = prod_low
|
||||
ldq $5, 0($16) # $5 = *res_ptr
|
||||
addq $4, $0, $0 # cy_limb = cy_limb + 'cy'
|
||||
umulh $2, $19, $4 # $4 = cy_limb
|
||||
addq $3, $0, $3 # $3 = cy_limb + prod_low
|
||||
cmpult $3, $0, $0 # $0 = carry from (cy_limb + prod_low)
|
||||
addq $5, $3, $3
|
||||
cmpult $3, $5, $5
|
||||
stq $3, 0($16)
|
||||
addq $16, 8, $16 # res_ptr++
|
||||
addq $5, $0, $0 # combine carries
|
||||
addq $4, $0, $0 # cy_limb = prod_high + cy
|
||||
br $31, $Lunroll
|
||||
$Lend1b:
|
||||
addq $5, $3, $3
|
||||
cmpult $3, $5, $5
|
||||
stq $3, 0($16)
|
||||
addq $16, 8, $16 # res_ptr++
|
||||
addq $0, $5, $0
|
||||
|
||||
$Lunroll:
|
||||
lda $17, -16($17) # L1 bookkeeping
|
||||
lda $16, -16($16) # L1 bookkeeping
|
||||
bis $0, $31, $12
|
||||
|
||||
# ____ UNROLLED LOOP SOFTWARE PIPELINE STARTUP ____
|
||||
|
||||
ldq $2, 16($17) # L1
|
||||
ldq $3, 24($17) # L1
|
||||
lda $18, -1($18) # L1 bookkeeping
|
||||
ldq $6, 16($16) # L1
|
||||
ldq $7, 24($16) # L1
|
||||
ldq $0, 32($17) # L1
|
||||
mulq $19, $2, $13 # U1
|
||||
ldq $1, 40($17) # L1
|
||||
umulh $19, $2, $14 # U1
|
||||
mulq $19, $3, $15 # U1
|
||||
lda $17, 64($17) # L1 bookkeeping
|
||||
ldq $4, 32($16) # L1
|
||||
ldq $5, 40($16) # L1
|
||||
umulh $19, $3, $8 # U1
|
||||
ldq $2, -16($17) # L1
|
||||
mulq $19, $0, $9 # U1
|
||||
ldq $3, -8($17) # L1
|
||||
umulh $19, $0, $10 # U1
|
||||
addq $6, $13, $6 # L0 lo + acc
|
||||
mulq $19, $1, $11 # U1
|
||||
cmpult $6, $13, $20 # L0 lo add => carry
|
||||
lda $16, 64($16) # L1 bookkeeping
|
||||
addq $6, $12, $22 # U0 hi add => answer
|
||||
cmpult $22, $12, $21 # L0 hi add => carry
|
||||
addq $14, $20, $14 # U0 hi mul + carry
|
||||
ldq $6, -16($16) # L1
|
||||
addq $7, $15, $23 # L0 lo + acc
|
||||
addq $14, $21, $14 # U0 hi mul + carry
|
||||
ldq $7, -8($16) # L1
|
||||
umulh $19, $1, $12 # U1
|
||||
cmpult $23, $15, $20 # L0 lo add => carry
|
||||
addq $23, $14, $23 # U0 hi add => answer
|
||||
ldq $0, 0($17) # L1
|
||||
mulq $19, $2, $13 # U1
|
||||
cmpult $23, $14, $21 # L0 hi add => carry
|
||||
addq $8, $20, $8 # U0 hi mul + carry
|
||||
ldq $1, 8($17) # L1
|
||||
umulh $19, $2, $14 # U1
|
||||
addq $4, $9, $4 # L0 lo + acc
|
||||
stq $22, -48($16) # L0
|
||||
stq $23, -40($16) # L1
|
||||
mulq $19, $3, $15 # U1
|
||||
addq $8, $21, $8 # U0 hi mul + carry
|
||||
cmpult $4, $9, $20 # L0 lo add => carry
|
||||
addq $4, $8, $22 # U0 hi add => answer
|
||||
ble $18, $Lend # U1 bookkeeping
|
||||
|
||||
# ____ MAIN UNROLLED LOOP ____
|
||||
.align 4
|
||||
$Loop:
|
||||
bis $31, $31, $31 # U1 mt
|
||||
cmpult $22, $8, $21 # L0 hi add => carry
|
||||
addq $10, $20, $10 # U0 hi mul + carry
|
||||
ldq $4, 0($16) # L1
|
||||
|
||||
bis $31, $31, $31 # U1 mt
|
||||
addq $5, $11, $23 # L0 lo + acc
|
||||
addq $10, $21, $10 # L0 hi mul + carry
|
||||
ldq $5, 8($16) # L1
|
||||
|
||||
umulh $19, $3, $8 # U1
|
||||
cmpult $23, $11, $20 # L0 lo add => carry
|
||||
addq $23, $10, $23 # U0 hi add => answer
|
||||
ldq $2, 16($17) # L1
|
||||
|
||||
mulq $19, $0, $9 # U1
|
||||
cmpult $23, $10, $21 # L0 hi add => carry
|
||||
addq $12, $20, $12 # U0 hi mul + carry
|
||||
ldq $3, 24($17) # L1
|
||||
|
||||
umulh $19, $0, $10 # U1
|
||||
addq $6, $13, $6 # L0 lo + acc
|
||||
stq $22, -32($16) # L0
|
||||
stq $23, -24($16) # L1
|
||||
|
||||
bis $31, $31, $31 # L0 st slosh
|
||||
mulq $19, $1, $11 # U1
|
||||
bis $31, $31, $31 # L1 st slosh
|
||||
addq $12, $21, $12 # U0 hi mul + carry
|
||||
|
||||
cmpult $6, $13, $20 # L0 lo add => carry
|
||||
bis $31, $31, $31 # U1 mt
|
||||
lda $18, -1($18) # L1 bookkeeping
|
||||
addq $6, $12, $22 # U0 hi add => answer
|
||||
|
||||
bis $31, $31, $31 # U1 mt
|
||||
cmpult $22, $12, $21 # L0 hi add => carry
|
||||
addq $14, $20, $14 # U0 hi mul + carry
|
||||
ldq $6, 16($16) # L1
|
||||
|
||||
bis $31, $31, $31 # U1 mt
|
||||
addq $7, $15, $23 # L0 lo + acc
|
||||
addq $14, $21, $14 # U0 hi mul + carry
|
||||
ldq $7, 24($16) # L1
|
||||
|
||||
umulh $19, $1, $12 # U1
|
||||
cmpult $23, $15, $20 # L0 lo add => carry
|
||||
addq $23, $14, $23 # U0 hi add => answer
|
||||
ldq $0, 32($17) # L1
|
||||
|
||||
mulq $19, $2, $13 # U1
|
||||
cmpult $23, $14, $21 # L0 hi add => carry
|
||||
addq $8, $20, $8 # U0 hi mul + carry
|
||||
ldq $1, 40($17) # L1
|
||||
|
||||
umulh $19, $2, $14 # U1
|
||||
addq $4, $9, $4 # U0 lo + acc
|
||||
stq $22, -16($16) # L0
|
||||
stq $23, -8($16) # L1
|
||||
|
||||
bis $31, $31, $31 # L0 st slosh
|
||||
mulq $19, $3, $15 # U1
|
||||
bis $31, $31, $31 # L1 st slosh
|
||||
addq $8, $21, $8 # L0 hi mul + carry
|
||||
|
||||
cmpult $4, $9, $20 # L0 lo add => carry
|
||||
bis $31, $31, $31 # U1 mt
|
||||
lda $17, 64($17) # L1 bookkeeping
|
||||
addq $4, $8, $22 # U0 hi add => answer
|
||||
|
||||
bis $31, $31, $31 # U1 mt
|
||||
cmpult $22, $8, $21 # L0 hi add => carry
|
||||
addq $10, $20, $10 # U0 hi mul + carry
|
||||
ldq $4, 32($16) # L1
|
||||
|
||||
bis $31, $31, $31 # U1 mt
|
||||
addq $5, $11, $23 # L0 lo + acc
|
||||
addq $10, $21, $10 # L0 hi mul + carry
|
||||
ldq $5, 40($16) # L1
|
||||
|
||||
umulh $19, $3, $8 # U1
|
||||
cmpult $23, $11, $20 # L0 lo add => carry
|
||||
addq $23, $10, $23 # U0 hi add => answer
|
||||
ldq $2, -16($17) # L1
|
||||
|
||||
mulq $19, $0, $9 # U1
|
||||
cmpult $23, $10, $21 # L0 hi add => carry
|
||||
addq $12, $20, $12 # U0 hi mul + carry
|
||||
ldq $3, -8($17) # L1
|
||||
|
||||
umulh $19, $0, $10 # U1
|
||||
addq $6, $13, $6 # L0 lo + acc
|
||||
stq $22, 0($16) # L0
|
||||
stq $23, 8($16) # L1
|
||||
|
||||
bis $31, $31, $31 # L0 st slosh
|
||||
mulq $19, $1, $11 # U1
|
||||
bis $31, $31, $31 # L1 st slosh
|
||||
addq $12, $21, $12 # U0 hi mul + carry
|
||||
|
||||
cmpult $6, $13, $20 # L0 lo add => carry
|
||||
bis $31, $31, $31 # U1 mt
|
||||
lda $16, 64($16) # L1 bookkeeping
|
||||
addq $6, $12, $22 # U0 hi add => answer
|
||||
|
||||
bis $31, $31, $31 # U1 mt
|
||||
cmpult $22, $12, $21 # L0 hi add => carry
|
||||
addq $14, $20, $14 # U0 hi mul + carry
|
||||
ldq $6, -16($16) # L1
|
||||
|
||||
bis $31, $31, $31 # U1 mt
|
||||
addq $7, $15, $23 # L0 lo + acc
|
||||
addq $14, $21, $14 # U0 hi mul + carry
|
||||
ldq $7, -8($16) # L1
|
||||
|
||||
umulh $19, $1, $12 # U1
|
||||
cmpult $23, $15, $20 # L0 lo add => carry
|
||||
addq $23, $14, $23 # U0 hi add => answer
|
||||
ldq $0, 0($17) # L1
|
||||
|
||||
mulq $19, $2, $13 # U1
|
||||
cmpult $23, $14, $21 # L0 hi add => carry
|
||||
addq $8, $20, $8 # U0 hi mul + carry
|
||||
ldq $1, 8($17) # L1
|
||||
|
||||
umulh $19, $2, $14 # U1
|
||||
addq $4, $9, $4 # L0 lo + acc
|
||||
stq $22, -48($16) # L0
|
||||
stq $23, -40($16) # L1
|
||||
|
||||
bis $31, $31, $31 # L0 st slosh
|
||||
mulq $19, $3, $15 # U1
|
||||
bis $31, $31, $31 # L1 st slosh
|
||||
addq $8, $21, $8 # U0 hi mul + carry
|
||||
|
||||
cmpult $4, $9, $20 # L0 lo add => carry
|
||||
addq $4, $8, $22 # U0 hi add => answer
|
||||
bis $31, $31, $31 # L1 mt
|
||||
bgt $18, $Loop # U1 bookkeeping
|
||||
|
||||
# ____ UNROLLED LOOP SOFTWARE PIPELINE FINISH ____
|
||||
$Lend:
|
||||
cmpult $22, $8, $21 # L0 hi add => carry
|
||||
addq $10, $20, $10 # U0 hi mul + carry
|
||||
ldq $4, 0($16) # L1
|
||||
addq $5, $11, $23 # L0 lo + acc
|
||||
addq $10, $21, $10 # L0 hi mul + carry
|
||||
ldq $5, 8($16) # L1
|
||||
umulh $19, $3, $8 # U1
|
||||
cmpult $23, $11, $20 # L0 lo add => carry
|
||||
addq $23, $10, $23 # U0 hi add => answer
|
||||
mulq $19, $0, $9 # U1
|
||||
cmpult $23, $10, $21 # L0 hi add => carry
|
||||
addq $12, $20, $12 # U0 hi mul + carry
|
||||
umulh $19, $0, $10 # U1
|
||||
addq $6, $13, $6 # L0 lo + acc
|
||||
stq $22, -32($16) # L0
|
||||
stq $23, -24($16) # L1
|
||||
mulq $19, $1, $11 # U1
|
||||
addq $12, $21, $12 # U0 hi mul + carry
|
||||
cmpult $6, $13, $20 # L0 lo add => carry
|
||||
addq $6, $12, $22 # U0 hi add => answer
|
||||
cmpult $22, $12, $21 # L0 hi add => carry
|
||||
addq $14, $20, $14 # U0 hi mul + carry
|
||||
addq $7, $15, $23 # L0 lo + acc
|
||||
addq $14, $21, $14 # U0 hi mul + carry
|
||||
umulh $19, $1, $12 # U1
|
||||
cmpult $23, $15, $20 # L0 lo add => carry
|
||||
addq $23, $14, $23 # U0 hi add => answer
|
||||
cmpult $23, $14, $21 # L0 hi add => carry
|
||||
addq $8, $20, $8 # U0 hi mul + carry
|
||||
addq $4, $9, $4 # U0 lo + acc
|
||||
stq $22, -16($16) # L0
|
||||
stq $23, -8($16) # L1
|
||||
bis $31, $31, $31 # L0 st slosh
|
||||
addq $8, $21, $8 # L0 hi mul + carry
|
||||
cmpult $4, $9, $20 # L0 lo add => carry
|
||||
addq $4, $8, $22 # U0 hi add => answer
|
||||
cmpult $22, $8, $21 # L0 hi add => carry
|
||||
addq $10, $20, $10 # U0 hi mul + carry
|
||||
addq $5, $11, $23 # L0 lo + acc
|
||||
addq $10, $21, $10 # L0 hi mul + carry
|
||||
cmpult $23, $11, $20 # L0 lo add => carry
|
||||
addq $23, $10, $23 # U0 hi add => answer
|
||||
cmpult $23, $10, $21 # L0 hi add => carry
|
||||
addq $12, $20, $12 # U0 hi mul + carry
|
||||
stq $22, 0($16) # L0
|
||||
stq $23, 8($16) # L1
|
||||
addq $12, $21, $0 # U0 hi mul + carry
|
||||
|
||||
ldq $9, 8($30)
|
||||
ldq $10, 16($30)
|
||||
ldq $11, 24($30)
|
||||
ldq $12, 32($30)
|
||||
ldq $13, 40($30)
|
||||
ldq $14, 48($30)
|
||||
ldq $15, 56($30)
|
||||
lda $30, 240($30)
|
||||
ret $31, ($26), 1
|
||||
|
||||
.end __mpn_addmul_1
|
@ -1,45 +0,0 @@
|
||||
/* Copyright (C) 2000 Free Software Foundation, Inc.
|
||||
This file is part of the GNU C Library.
|
||||
|
||||
The GNU C Library is free software; you can redistribute it and/or
|
||||
modify it under the terms of the GNU Lesser General Public
|
||||
License as published by the Free Software Foundation; either
|
||||
version 2.1 of the License, or (at your option) any later version.
|
||||
|
||||
The GNU C Library is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
||||
Lesser General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU Lesser General Public
|
||||
License along with the GNU C Library; if not, write to the Free
|
||||
Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
|
||||
02111-1307 USA. */
|
||||
|
||||
#include <sysdep.h>
|
||||
|
||||
.arch ev6
|
||||
.set noreorder
|
||||
.set noat
|
||||
|
||||
ENTRY(__ieee754_sqrt)
|
||||
#ifdef PROF
|
||||
ldgp gp, 0(pv)
|
||||
lda AT, _mcount
|
||||
jsr AT, (AT), _mcount
|
||||
.prologue 1
|
||||
#else
|
||||
.prologue 0
|
||||
#endif
|
||||
|
||||
.align 4
|
||||
#ifdef _IEEE_FP_INEXACT
|
||||
sqrtt/sui $f16, $f0
|
||||
#else
|
||||
sqrtt/su $f16, $f0
|
||||
#endif
|
||||
ret
|
||||
nop
|
||||
nop
|
||||
|
||||
END(__ieee754_sqrt)
|
@ -1,45 +0,0 @@
|
||||
/* Copyright (C) 2000 Free Software Foundation, Inc.
|
||||
This file is part of the GNU C Library.
|
||||
|
||||
The GNU C Library is free software; you can redistribute it and/or
|
||||
modify it under the terms of the GNU Lesser General Public
|
||||
License as published by the Free Software Foundation; either
|
||||
version 2.1 of the License, or (at your option) any later version.
|
||||
|
||||
The GNU C Library is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
||||
Lesser General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU Lesser General Public
|
||||
License along with the GNU C Library; if not, write to the Free
|
||||
Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
|
||||
02111-1307 USA. */
|
||||
|
||||
#include <sysdep.h>
|
||||
|
||||
.arch ev6
|
||||
.set noreorder
|
||||
.set noat
|
||||
|
||||
ENTRY(__ieee754_sqrtf)
|
||||
#ifdef PROF
|
||||
ldgp gp, 0(pv)
|
||||
lda AT, _mcount
|
||||
jsr AT, (AT), _mcount
|
||||
.prologue 1
|
||||
#else
|
||||
.prologue 0
|
||||
#endif
|
||||
|
||||
.align 4
|
||||
#ifdef _IEEE_FP_INEXACT
|
||||
sqrts/sui $f16, $f0
|
||||
#else
|
||||
sqrts/su $f16, $f0
|
||||
#endif
|
||||
ret
|
||||
nop
|
||||
nop
|
||||
|
||||
END(__ieee754_sqrtf)
|
@ -1,193 +0,0 @@
|
||||
/* Copyright (C) 2000, 2003 Free Software Foundation, Inc.
|
||||
This file is part of the GNU C Library.
|
||||
Contributed by David Mosberger (davidm@cs.arizona.edu).
|
||||
EV6 optimized by Rick Gorton <rick.gorton@alpha-processor.com>.
|
||||
|
||||
The GNU C Library is free software; you can redistribute it and/or
|
||||
modify it under the terms of the GNU Lesser General Public
|
||||
License as published by the Free Software Foundation; either
|
||||
version 2.1 of the License, or (at your option) any later version.
|
||||
|
||||
The GNU C Library is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
||||
Lesser General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU Lesser General Public
|
||||
License along with the GNU C Library; if not, write to the Free
|
||||
Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
|
||||
02111-1307 USA. */
|
||||
|
||||
#include <sysdep.h>
|
||||
|
||||
.arch ev6
|
||||
.set noreorder
|
||||
.set noat
|
||||
|
||||
ENTRY(__memchr)
|
||||
#ifdef PROF
|
||||
ldgp gp, 0(pv)
|
||||
lda AT, _mcount
|
||||
jsr AT, (AT), _mcount
|
||||
.prologue 1
|
||||
#else
|
||||
.prologue 0
|
||||
#endif
|
||||
|
||||
# Hack -- if someone passes in (size_t)-1, hoping to just
|
||||
# search til the end of the address space, we will overflow
|
||||
# below when we find the address of the last byte. Given
|
||||
# that we will never have a 56-bit address space, cropping
|
||||
# the length is the easiest way to avoid trouble.
|
||||
zap $18, 0x80, $5 # U : Bound length
|
||||
beq $18, $not_found # U :
|
||||
ldq_u $1, 0($16) # L : load first quadword Latency=3
|
||||
and $17, 0xff, $17 # E : L L U U : 00000000000000ch
|
||||
|
||||
insbl $17, 1, $2 # U : 000000000000ch00
|
||||
cmpult $18, 9, $4 # E : small (< 1 quad) string?
|
||||
or $2, $17, $17 # E : 000000000000chch
|
||||
lda $3, -1($31) # E : U L L U
|
||||
|
||||
sll $17, 16, $2 # U : 00000000chch0000
|
||||
addq $16, $5, $5 # E : Max search address
|
||||
or $2, $17, $17 # E : 00000000chchchch
|
||||
sll $17, 32, $2 # U : U L L U : chchchch00000000
|
||||
|
||||
or $2, $17, $17 # E : chchchchchchchch
|
||||
extql $1, $16, $7 # U : $7 is upper bits
|
||||
beq $4, $first_quad # U :
|
||||
ldq_u $6, -1($5) # L : L U U L : eight or less bytes to search Latency=3
|
||||
|
||||
extqh $6, $16, $6 # U : 2 cycle stall for $6
|
||||
mov $16, $0 # E :
|
||||
nop # E :
|
||||
or $7, $6, $1 # E : L U L U $1 = quadword starting at $16
|
||||
|
||||
# Deal with the case where at most 8 bytes remain to be searched
|
||||
# in $1. E.g.:
|
||||
# $18 = 6
|
||||
# $1 = ????c6c5c4c3c2c1
|
||||
$last_quad:
|
||||
negq $18, $6 # E :
|
||||
xor $17, $1, $1 # E :
|
||||
srl $3, $6, $6 # U : $6 = mask of $18 bits set
|
||||
cmpbge $31, $1, $2 # E : L U L U
|
||||
|
||||
nop
|
||||
nop
|
||||
and $2, $6, $2 # E :
|
||||
beq $2, $not_found # U : U L U L
|
||||
|
||||
$found_it:
|
||||
#if defined(__alpha_fix__) && defined(__alpha_cix__)
|
||||
/*
|
||||
* Since we are guaranteed to have set one of the bits, we don't
|
||||
* have to worry about coming back with a 0x40 out of cttz...
|
||||
*/
|
||||
cttz $2, $3 # U0 :
|
||||
addq $0, $3, $0 # E : All done
|
||||
nop # E :
|
||||
ret # L0 : L U L U
|
||||
#else
|
||||
/*
|
||||
* Slow and clunky. It can probably be improved.
|
||||
* An exercise left for others.
|
||||
*/
|
||||
negq $2, $3 # E :
|
||||
and $2, $3, $2 # E :
|
||||
and $2, 0x0f, $1 # E :
|
||||
addq $0, 4, $3 # E :
|
||||
|
||||
cmoveq $1, $3, $0 # E : Latency 2, extra map cycle
|
||||
nop # E : keep with cmov
|
||||
and $2, 0x33, $1 # E :
|
||||
addq $0, 2, $3 # E : U L U L : 2 cycle stall on $0
|
||||
|
||||
cmoveq $1, $3, $0 # E : Latency 2, extra map cycle
|
||||
nop # E : keep with cmov
|
||||
and $2, 0x55, $1 # E :
|
||||
addq $0, 1, $3 # E : U L U L : 2 cycle stall on $0
|
||||
|
||||
cmoveq $1, $3, $0 # E : Latency 2, extra map cycle
|
||||
nop
|
||||
nop
|
||||
ret # L0 : L U L U
|
||||
#endif
|
||||
|
||||
# Deal with the case where $18 > 8 bytes remain to be
|
||||
# searched. $16 may not be aligned.
|
||||
.align 4
|
||||
$first_quad:
|
||||
andnot $16, 0x7, $0 # E :
|
||||
insqh $3, $16, $2 # U : $2 = 0000ffffffffffff ($16<0:2> ff)
|
||||
xor $1, $17, $1 # E :
|
||||
or $1, $2, $1 # E : U L U L $1 = ====ffffffffffff
|
||||
|
||||
cmpbge $31, $1, $2 # E :
|
||||
bne $2, $found_it # U :
|
||||
# At least one byte left to process.
|
||||
ldq $1, 8($0) # L :
|
||||
subq $5, 1, $18 # E : U L U L
|
||||
|
||||
addq $0, 8, $0 # E :
|
||||
# Make $18 point to last quad to be accessed (the
|
||||
# last quad may or may not be partial).
|
||||
andnot $18, 0x7, $18 # E :
|
||||
cmpult $0, $18, $2 # E :
|
||||
beq $2, $final # U : U L U L
|
||||
|
||||
# At least two quads remain to be accessed.
|
||||
|
||||
subq $18, $0, $4 # E : $4 <- nr quads to be processed
|
||||
and $4, 8, $4 # E : odd number of quads?
|
||||
bne $4, $odd_quad_count # U :
|
||||
# At least three quads remain to be accessed
|
||||
mov $1, $4 # E : L U L U : move prefetched value to correct reg
|
||||
|
||||
.align 4
|
||||
$unrolled_loop:
|
||||
ldq $1, 8($0) # L : prefetch $1
|
||||
xor $17, $4, $2 # E :
|
||||
cmpbge $31, $2, $2 # E :
|
||||
bne $2, $found_it # U : U L U L
|
||||
|
||||
addq $0, 8, $0 # E :
|
||||
nop # E :
|
||||
nop # E :
|
||||
nop # E :
|
||||
|
||||
$odd_quad_count:
|
||||
xor $17, $1, $2 # E :
|
||||
ldq $4, 8($0) # L : prefetch $4
|
||||
cmpbge $31, $2, $2 # E :
|
||||
addq $0, 8, $6 # E :
|
||||
|
||||
bne $2, $found_it # U :
|
||||
cmpult $6, $18, $6 # E :
|
||||
addq $0, 8, $0 # E :
|
||||
nop # E :
|
||||
|
||||
bne $6, $unrolled_loop # U :
|
||||
mov $4, $1 # E : move prefetched value into $1
|
||||
nop # E :
|
||||
nop # E :
|
||||
|
||||
$final: subq $5, $0, $18 # E : $18 <- number of bytes left to do
|
||||
nop # E :
|
||||
nop # E :
|
||||
bne $18, $last_quad # U :
|
||||
|
||||
$not_found:
|
||||
mov $31, $0 # E :
|
||||
nop # E :
|
||||
nop # E :
|
||||
ret # L0 :
|
||||
|
||||
END(__memchr)
|
||||
|
||||
weak_alias (__memchr, memchr)
|
||||
#if !__BOUNDED_POINTERS__
|
||||
weak_alias (__memchr, __ubp_memchr)
|
||||
#endif
|
||||
libc_hidden_builtin_def (memchr)
|
@ -1,256 +0,0 @@
|
||||
/* Copyright (C) 2000, 2003 Free Software Foundation, Inc.
|
||||
This file is part of the GNU C Library.
|
||||
EV6 optimized by Rick Gorton <rick.gorton@alpha-processor.com>.
|
||||
|
||||
The GNU C Library is free software; you can redistribute it and/or
|
||||
modify it under the terms of the GNU Lesser General Public
|
||||
License as published by the Free Software Foundation; either
|
||||
version 2.1 of the License, or (at your option) any later version.
|
||||
|
||||
The GNU C Library is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
||||
Lesser General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU Lesser General Public
|
||||
License along with the GNU C Library; if not, write to the Free
|
||||
Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
|
||||
02111-1307 USA. */
|
||||
|
||||
/*
|
||||
* Much of the information about 21264 scheduling/coding comes from:
|
||||
* Compiler Writer's Guide for the Alpha 21264
|
||||
* abbreviated as 'CWG' in other comments here
|
||||
* ftp.digital.com/pub/Digital/info/semiconductor/literature/dsc-library.html
|
||||
* Scheduling notation:
|
||||
* E - either cluster
|
||||
* U - upper subcluster; U0 - subcluster U0; U1 - subcluster U1
|
||||
* L - lower subcluster; L0 - subcluster L0; L1 - subcluster L1
|
||||
*
|
||||
* Temp usage notes:
|
||||
* $0 - destination address
|
||||
* $1,$2, - scratch
|
||||
*/
|
||||
|
||||
#include <sysdep.h>
|
||||
|
||||
.arch ev6
|
||||
.set noreorder
|
||||
.set noat
|
||||
|
||||
ENTRY(memcpy)
|
||||
.prologue 0
|
||||
|
||||
mov $16, $0 # E : copy dest to return
|
||||
ble $18, $nomoredata # U : done with the copy?
|
||||
xor $16, $17, $1 # E : are source and dest alignments the same?
|
||||
and $1, 7, $1 # E : are they the same mod 8?
|
||||
|
||||
bne $1, $misaligned # U : Nope - gotta do this the slow way
|
||||
/* source and dest are same mod 8 address */
|
||||
and $16, 7, $1 # E : Are both 0mod8?
|
||||
beq $1, $both_0mod8 # U : Yes
|
||||
nop # E :
|
||||
|
||||
/*
|
||||
* source and dest are same misalignment. move a byte at a time
|
||||
* until a 0mod8 alignment for both is reached.
|
||||
* At least one byte more to move
|
||||
*/
|
||||
|
||||
$head_align:
|
||||
ldbu $1, 0($17) # L : grab a byte
|
||||
subq $18, 1, $18 # E : count--
|
||||
addq $17, 1, $17 # E : src++
|
||||
stb $1, 0($16) # L :
|
||||
addq $16, 1, $16 # E : dest++
|
||||
and $16, 7, $1 # E : Are we at 0mod8 yet?
|
||||
ble $18, $nomoredata # U : done with the copy?
|
||||
bne $1, $head_align # U :
|
||||
|
||||
$both_0mod8:
|
||||
cmple $18, 127, $1 # E : Can we unroll the loop?
|
||||
bne $1, $no_unroll # U :
|
||||
and $16, 63, $1 # E : get mod64 alignment
|
||||
beq $1, $do_unroll # U : no single quads to fiddle
|
||||
|
||||
$single_head_quad:
|
||||
ldq $1, 0($17) # L : get 8 bytes
|
||||
subq $18, 8, $18 # E : count -= 8
|
||||
addq $17, 8, $17 # E : src += 8
|
||||
nop # E :
|
||||
|
||||
stq $1, 0($16) # L : store
|
||||
addq $16, 8, $16 # E : dest += 8
|
||||
and $16, 63, $1 # E : get mod64 alignment
|
||||
bne $1, $single_head_quad # U : still not fully aligned
|
||||
|
||||
$do_unroll:
|
||||
addq $16, 64, $7 # E : Initial (+1 trip) wh64 address
|
||||
cmple $18, 127, $1 # E : Can we go through the unrolled loop?
|
||||
bne $1, $tail_quads # U : Nope
|
||||
nop # E :
|
||||
|
||||
$unroll_body:
|
||||
wh64 ($7) # L1 : memory subsystem hint: 64 bytes at
|
||||
# ($7) are about to be over-written
|
||||
ldq $6, 0($17) # L0 : bytes 0..7
|
||||
nop # E :
|
||||
nop # E :
|
||||
|
||||
ldq $4, 8($17) # L : bytes 8..15
|
||||
ldq $5, 16($17) # L : bytes 16..23
|
||||
addq $7, 64, $7 # E : Update next wh64 address
|
||||
nop # E :
|
||||
|
||||
ldq $3, 24($17) # L : bytes 24..31
|
||||
addq $16, 64, $1 # E : fallback value for wh64
|
||||
nop # E :
|
||||
nop # E :
|
||||
|
||||
addq $17, 32, $17 # E : src += 32 bytes
|
||||
stq $6, 0($16) # L : bytes 0..7
|
||||
nop # E :
|
||||
nop # E :
|
||||
|
||||
stq $4, 8($16) # L : bytes 8..15
|
||||
stq $5, 16($16) # L : bytes 16..23
|
||||
subq $18, 192, $2 # E : At least two more trips to go?
|
||||
nop # E :
|
||||
|
||||
stq $3, 24($16) # L : bytes 24..31
|
||||
addq $16, 32, $16 # E : dest += 32 bytes
|
||||
nop # E :
|
||||
nop # E :
|
||||
|
||||
ldq $6, 0($17) # L : bytes 0..7
|
||||
ldq $4, 8($17) # L : bytes 8..15
|
||||
cmovlt $2, $1, $7 # E : Latency 2, extra map slot - Use
|
||||
# fallback wh64 address if < 2 more trips
|
||||
nop # E :
|
||||
|
||||
ldq $5, 16($17) # L : bytes 16..23
|
||||
ldq $3, 24($17) # L : bytes 24..31
|
||||
addq $16, 32, $16 # E : dest += 32
|
||||
subq $18, 64, $18 # E : count -= 64
|
||||
|
||||
addq $17, 32, $17 # E : src += 32
|
||||
stq $6, -32($16) # L : bytes 0..7
|
||||
stq $4, -24($16) # L : bytes 8..15
|
||||
cmple $18, 63, $1 # E : At least one more trip?
|
||||
|
||||
stq $5, -16($16) # L : bytes 16..23
|
||||
stq $3, -8($16) # L : bytes 24..31
|
||||
nop # E :
|
||||
beq $1, $unroll_body
|
||||
|
||||
$tail_quads:
|
||||
$no_unroll:
|
||||
.align 4
|
||||
subq $18, 8, $18 # E : At least a quad left?
|
||||
blt $18, $less_than_8 # U : Nope
|
||||
nop # E :
|
||||
nop # E :
|
||||
|
||||
$move_a_quad:
|
||||
ldq $1, 0($17) # L : fetch 8
|
||||
subq $18, 8, $18 # E : count -= 8
|
||||
addq $17, 8, $17 # E : src += 8
|
||||
nop # E :
|
||||
|
||||
stq $1, 0($16) # L : store 8
|
||||
addq $16, 8, $16 # E : dest += 8
|
||||
bge $18, $move_a_quad # U :
|
||||
nop # E :
|
||||
|
||||
$less_than_8:
|
||||
.align 4
|
||||
addq $18, 8, $18 # E : add back for trailing bytes
|
||||
ble $18, $nomoredata # U : All-done
|
||||
nop # E :
|
||||
nop # E :
|
||||
|
||||
/* Trailing bytes */
|
||||
$tail_bytes:
|
||||
subq $18, 1, $18 # E : count--
|
||||
ldbu $1, 0($17) # L : fetch a byte
|
||||
addq $17, 1, $17 # E : src++
|
||||
nop # E :
|
||||
|
||||
stb $1, 0($16) # L : store a byte
|
||||
addq $16, 1, $16 # E : dest++
|
||||
bgt $18, $tail_bytes # U : more to be done?
|
||||
nop # E :
|
||||
|
||||
/* branching to exit takes 3 extra cycles, so replicate exit here */
|
||||
ret $31, ($26), 1 # L0 :
|
||||
nop # E :
|
||||
nop # E :
|
||||
nop # E :
|
||||
|
||||
$misaligned:
|
||||
mov $0, $4 # E : dest temp
|
||||
and $0, 7, $1 # E : dest alignment mod8
|
||||
beq $1, $dest_0mod8 # U : life doesnt totally suck
|
||||
nop
|
||||
|
||||
$aligndest:
|
||||
ble $18, $nomoredata # U :
|
||||
ldbu $1, 0($17) # L : fetch a byte
|
||||
subq $18, 1, $18 # E : count--
|
||||
addq $17, 1, $17 # E : src++
|
||||
|
||||
stb $1, 0($4) # L : store it
|
||||
addq $4, 1, $4 # E : dest++
|
||||
and $4, 7, $1 # E : dest 0mod8 yet?
|
||||
bne $1, $aligndest # U : go until we are aligned.
|
||||
|
||||
/* Source has unknown alignment, but dest is known to be 0mod8 */
|
||||
$dest_0mod8:
|
||||
subq $18, 8, $18 # E : At least a quad left?
|
||||
blt $18, $misalign_tail # U : Nope
|
||||
ldq_u $3, 0($17) # L : seed (rotating load) of 8 bytes
|
||||
nop # E :
|
||||
|
||||
$mis_quad:
|
||||
ldq_u $16, 8($17) # L : Fetch next 8
|
||||
extql $3, $17, $3 # U : masking
|
||||
extqh $16, $17, $1 # U : masking
|
||||
bis $3, $1, $1 # E : merged bytes to store
|
||||
|
||||
subq $18, 8, $18 # E : count -= 8
|
||||
addq $17, 8, $17 # E : src += 8
|
||||
stq $1, 0($4) # L : store 8 (aligned)
|
||||
mov $16, $3 # E : "rotate" source data
|
||||
|
||||
addq $4, 8, $4 # E : dest += 8
|
||||
bge $18, $mis_quad # U : More quads to move
|
||||
nop
|
||||
nop
|
||||
|
||||
$misalign_tail:
|
||||
addq $18, 8, $18 # E : account for tail stuff
|
||||
ble $18, $nomoredata # U :
|
||||
nop
|
||||
nop
|
||||
|
||||
$misalign_byte:
|
||||
ldbu $1, 0($17) # L : fetch 1
|
||||
subq $18, 1, $18 # E : count--
|
||||
addq $17, 1, $17 # E : src++
|
||||
nop # E :
|
||||
|
||||
stb $1, 0($4) # L : store
|
||||
addq $4, 1, $4 # E : dest++
|
||||
bgt $18, $misalign_byte # U : more to go?
|
||||
nop
|
||||
|
||||
|
||||
$nomoredata:
|
||||
ret $31, ($26), 1 # L0 :
|
||||
nop # E :
|
||||
nop # E :
|
||||
nop # E :
|
||||
|
||||
END(memcpy)
|
||||
libc_hidden_builtin_def (memcpy)
|
@ -1,224 +0,0 @@
|
||||
/* Copyright (C) 2000, 2003 Free Software Foundation, Inc.
|
||||
Contributed by Richard Henderson (rth@tamu.edu)
|
||||
EV6 optimized by Rick Gorton <rick.gorton@alpha-processor.com>.
|
||||
This file is part of the GNU C Library.
|
||||
|
||||
The GNU C Library is free software; you can redistribute it and/or
|
||||
modify it under the terms of the GNU Lesser General Public
|
||||
License as published by the Free Software Foundation; either
|
||||
version 2.1 of the License, or (at your option) any later version.
|
||||
|
||||
The GNU C Library is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
||||
Lesser General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU Lesser General Public
|
||||
License along with the GNU C Library; if not, write to the Free
|
||||
Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
|
||||
02111-1307 USA. */
|
||||
|
||||
#include <sysdep.h>
|
||||
|
||||
.arch ev6
|
||||
.set noat
|
||||
.set noreorder
|
||||
|
||||
ENTRY(memset)
|
||||
#ifdef PROF
|
||||
ldgp gp, 0(pv)
|
||||
lda AT, _mcount
|
||||
jsr AT, (AT), _mcount
|
||||
.prologue 1
|
||||
#else
|
||||
.prologue 0
|
||||
#endif
|
||||
|
||||
/*
|
||||
* Serious stalling happens. The only way to mitigate this is to
|
||||
* undertake a major re-write to interleave the constant materialization
|
||||
* with other parts of the fall-through code. This is important, even
|
||||
* though it makes maintenance tougher.
|
||||
* Do this later.
|
||||
*/
|
||||
and $17, 255, $1 # E : 00000000000000ch
|
||||
insbl $17, 1, $2 # U : 000000000000ch00
|
||||
mov $16, $0 # E : return value
|
||||
ble $18, $end # U : zero length requested?
|
||||
|
||||
addq $18, $16, $6 # E : max address to write to
|
||||
or $1, $2, $17 # E : 000000000000chch
|
||||
insbl $1, 2, $3 # U : 0000000000ch0000
|
||||
insbl $1, 3, $4 # U : 00000000ch000000
|
||||
|
||||
or $3, $4, $3 # E : 00000000chch0000
|
||||
inswl $17, 4, $5 # U : 0000chch00000000
|
||||
xor $16, $6, $1 # E : will complete write be within one quadword?
|
||||
inswl $17, 6, $2 # U : chch000000000000
|
||||
|
||||
or $17, $3, $17 # E : 00000000chchchch
|
||||
or $2, $5, $2 # E : chchchch00000000
|
||||
bic $1, 7, $1 # E : fit within a single quadword?
|
||||
and $16, 7, $3 # E : Target addr misalignment
|
||||
|
||||
or $17, $2, $17 # E : chchchchchchchch
|
||||
beq $1, $within_quad # U :
|
||||
nop # E :
|
||||
beq $3, $aligned # U : target is 0mod8
|
||||
|
||||
/*
|
||||
* Target address is misaligned, and won't fit within a quadword.
|
||||
*/
|
||||
ldq_u $4, 0($16) # L : Fetch first partial
|
||||
mov $16, $5 # E : Save the address
|
||||
insql $17, $16, $2 # U : Insert new bytes
|
||||
subq $3, 8, $3 # E : Invert (for addressing uses)
|
||||
|
||||
addq $18, $3, $18 # E : $18 is new count ($3 is negative)
|
||||
mskql $4, $16, $4 # U : clear relevant parts of the quad
|
||||
subq $16, $3, $16 # E : $16 is new aligned destination
|
||||
or $2, $4, $1 # E : Final bytes
|
||||
|
||||
nop
|
||||
stq_u $1,0($5) # L : Store result
|
||||
nop
|
||||
nop
|
||||
|
||||
.align 4
|
||||
$aligned:
|
||||
/*
|
||||
* We are now guaranteed to be quad aligned, with at least
|
||||
* one partial quad to write.
|
||||
*/
|
||||
|
||||
sra $18, 3, $3 # U : Number of remaining quads to write
|
||||
and $18, 7, $18 # E : Number of trailing bytes to write
|
||||
mov $16, $5 # E : Save dest address
|
||||
beq $3, $no_quad # U : tail stuff only
|
||||
|
||||
/*
|
||||
* It's worth the effort to unroll this and use wh64 if possible.
|
||||
* At this point, entry values are:
|
||||
* $16 Current destination address
|
||||
* $5 A copy of $16
|
||||
* $6 The max quadword address to write to
|
||||
* $18 Number trailer bytes
|
||||
* $3 Number quads to write
|
||||
*/
|
||||
|
||||
and $16, 0x3f, $2 # E : Forward work (only useful for unrolled loop)
|
||||
subq $3, 16, $4 # E : Only try to unroll if > 128 bytes
|
||||
subq $2, 0x40, $1 # E : bias counter (aligning stuff 0mod64)
|
||||
blt $4, $loop # U :
|
||||
|
||||
/*
|
||||
* We know we've got at least 16 quads, minimum of one trip
|
||||
* through unrolled loop. Do a quad at a time to get us 0mod64
|
||||
* aligned.
|
||||
*/
|
||||
|
||||
nop # E :
|
||||
nop # E :
|
||||
nop # E :
|
||||
beq $1, $bigalign # U :
|
||||
|
||||
$alignmod64:
|
||||
stq $17, 0($5) # L :
|
||||
subq $3, 1, $3 # E : For consistency later
|
||||
addq $1, 8, $1 # E : Increment towards zero for alignment
|
||||
addq $5, 8, $4 # E : Initial wh64 address (filler instruction)
|
||||
|
||||
nop
|
||||
nop
|
||||
addq $5, 8, $5 # E : Inc address
|
||||
blt $1, $alignmod64 # U :
|
||||
|
||||
$bigalign:
|
||||
/*
|
||||
* $3 - number quads left to go
|
||||
* $5 - target address (aligned 0mod64)
|
||||
* $17 - mask of stuff to store
|
||||
* Scratch registers available: $7, $2, $4, $1
|
||||
* We know that we'll be taking a minimum of one trip through.
|
||||
* CWG Section 3.7.6: do not expect a sustained store rate of > 1/cycle
|
||||
* Assumes the wh64 needs to be for 2 trips through the loop in the future.
|
||||
* The wh64 is issued on for the starting destination address for trip +2
|
||||
* through the loop, and if there are less than two trips left, the target
|
||||
* address will be for the current trip.
|
||||
*/
|
||||
|
||||
$do_wh64:
|
||||
wh64 ($4) # L1 : memory subsystem write hint
|
||||
subq $3, 24, $2 # E : For determining future wh64 addresses
|
||||
stq $17, 0($5) # L :
|
||||
nop # E :
|
||||
|
||||
addq $5, 128, $4 # E : speculative target of next wh64
|
||||
stq $17, 8($5) # L :
|
||||
stq $17, 16($5) # L :
|
||||
addq $5, 64, $7 # E : Fallback address for wh64 (== next trip addr)
|
||||
|
||||
stq $17, 24($5) # L :
|
||||
stq $17, 32($5) # L :
|
||||
cmovlt $2, $7, $4 # E : Latency 2, extra mapping cycle
|
||||
nop
|
||||
|
||||
stq $17, 40($5) # L :
|
||||
stq $17, 48($5) # L :
|
||||
subq $3, 16, $2 # E : Repeat the loop at least once more?
|
||||
nop
|
||||
|
||||
stq $17, 56($5) # L :
|
||||
addq $5, 64, $5 # E :
|
||||
subq $3, 8, $3 # E :
|
||||
bge $2, $do_wh64 # U :
|
||||
|
||||
nop
|
||||
nop
|
||||
nop
|
||||
beq $3, $no_quad # U : Might have finished already
|
||||
|
||||
.align 4
|
||||
/*
|
||||
* Simple loop for trailing quadwords, or for small amounts
|
||||
* of data (where we can't use an unrolled loop and wh64)
|
||||
*/
|
||||
$loop:
|
||||
stq $17, 0($5) # L :
|
||||
subq $3, 1, $3 # E : Decrement number quads left
|
||||
addq $5, 8, $5 # E : Inc address
|
||||
bne $3, $loop # U : more?
|
||||
|
||||
$no_quad:
|
||||
/*
|
||||
* Write 0..7 trailing bytes.
|
||||
*/
|
||||
nop # E :
|
||||
beq $18, $end # U : All done?
|
||||
ldq $7, 0($5) # L :
|
||||
mskqh $7, $6, $2 # U : Mask final quad
|
||||
|
||||
insqh $17, $6, $4 # U : New bits
|
||||
or $2, $4, $1 # E : Put it all together
|
||||
stq $1, 0($5) # L : And back to memory
|
||||
ret $31,($26),1 # L0 :
|
||||
|
||||
$within_quad:
|
||||
ldq_u $1, 0($16) # L :
|
||||
insql $17, $16, $2 # U : New bits
|
||||
mskql $1, $16, $4 # U : Clear old
|
||||
or $2, $4, $2 # E : New result
|
||||
|
||||
mskql $2, $6, $4 # U :
|
||||
mskqh $1, $6, $2 # U :
|
||||
or $2, $4, $1 # E :
|
||||
stq_u $1, 0($16) # L :
|
||||
|
||||
$end:
|
||||
nop
|
||||
nop
|
||||
nop
|
||||
ret $31,($26),1 # L0 :
|
||||
|
||||
END(memset)
|
||||
libc_hidden_builtin_def (memset)
|
@ -1,328 +0,0 @@
|
||||
/* Copyright (C) 2000 Free Software Foundation, Inc.
|
||||
Contributed by Richard Henderson (rth@tamu.edu)
|
||||
EV6 optimized by Rick Gorton <rick.gorton@alpha-processor.com>.
|
||||
This file is part of the GNU C Library.
|
||||
|
||||
The GNU C Library is free software; you can redistribute it and/or
|
||||
modify it under the terms of the GNU Lesser General Public
|
||||
License as published by the Free Software Foundation; either
|
||||
version 2.1 of the License, or (at your option) any later version.
|
||||
|
||||
The GNU C Library is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
||||
Lesser General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU Lesser General Public
|
||||
License along with the GNU C Library; if not, write to the Free
|
||||
Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
|
||||
02111-1307 USA. */
|
||||
|
||||
/* Copy a null-terminated string from SRC to DST.
|
||||
|
||||
This is an internal routine used by strcpy, stpcpy, and strcat.
|
||||
As such, it uses special linkage conventions to make implementation
|
||||
of these public functions more efficient.
|
||||
|
||||
On input:
|
||||
t9 = return address
|
||||
a0 = DST
|
||||
a1 = SRC
|
||||
|
||||
On output:
|
||||
t8 = bitmask (with one bit set) indicating the last byte written
|
||||
a0 = unaligned address of the last *word* written
|
||||
|
||||
Furthermore, v0, a3-a5, t11, and t12 are untouched.
|
||||
*/
|
||||
|
||||
|
||||
#include <sysdep.h>
|
||||
|
||||
.arch ev6
|
||||
.set noat
|
||||
.set noreorder
|
||||
.text
|
||||
|
||||
/* There is a problem with either gdb (as of 4.16) or gas (as of 2.7) that
|
||||
doesn't like putting the entry point for a procedure somewhere in the
|
||||
middle of the procedure descriptor. Work around this by putting the
|
||||
aligned copy in its own procedure descriptor */
|
||||
|
||||
|
||||
.ent stxcpy_aligned
|
||||
.align 4
|
||||
stxcpy_aligned:
|
||||
.frame sp, 0, t9
|
||||
.prologue 0
|
||||
|
||||
/* On entry to this basic block:
|
||||
t0 == the first destination word for masking back in
|
||||
t1 == the first source word. */
|
||||
|
||||
/* Create the 1st output word and detect 0's in the 1st input word. */
|
||||
lda t2, -1 # E : build a mask against false zero
|
||||
mskqh t2, a1, t2 # U : detection in the src word (stall)
|
||||
mskqh t1, a1, t3 # U :
|
||||
ornot t1, t2, t2 # E : (stall)
|
||||
|
||||
mskql t0, a1, t0 # U : assemble the first output word
|
||||
cmpbge zero, t2, t10 # E : bits set iff null found
|
||||
or t0, t3, t1 # E : (stall)
|
||||
bne t10, $a_eos # U : (stall)
|
||||
|
||||
/* On entry to this basic block:
|
||||
t0 == the first destination word for masking back in
|
||||
t1 == a source word not containing a null. */
|
||||
/* Nops here to separate store quads from load quads */
|
||||
|
||||
$a_loop:
|
||||
stq_u t1, 0(a0) # L :
|
||||
addq a0, 8, a0 # E :
|
||||
nop
|
||||
nop
|
||||
|
||||
ldq_u t1, 0(a1) # L : Latency=3
|
||||
addq a1, 8, a1 # E :
|
||||
cmpbge zero, t1, t10 # E : (3 cycle stall)
|
||||
beq t10, $a_loop # U : (stall for t10)
|
||||
|
||||
/* Take care of the final (partial) word store.
|
||||
On entry to this basic block we have:
|
||||
t1 == the source word containing the null
|
||||
t10 == the cmpbge mask that found it. */
|
||||
$a_eos:
|
||||
negq t10, t6 # E : find low bit set
|
||||
and t10, t6, t8 # E : (stall)
|
||||
/* For the sake of the cache, don't read a destination word
|
||||
if we're not going to need it. */
|
||||
and t8, 0x80, t6 # E : (stall)
|
||||
bne t6, 1f # U : (stall)
|
||||
|
||||
/* We're doing a partial word store and so need to combine
|
||||
our source and original destination words. */
|
||||
ldq_u t0, 0(a0) # L : Latency=3
|
||||
subq t8, 1, t6 # E :
|
||||
zapnot t1, t6, t1 # U : clear src bytes >= null (stall)
|
||||
or t8, t6, t10 # E : (stall)
|
||||
|
||||
zap t0, t10, t0 # E : clear dst bytes <= null
|
||||
or t0, t1, t1 # E : (stall)
|
||||
nop
|
||||
nop
|
||||
|
||||
1: stq_u t1, 0(a0) # L :
|
||||
ret (t9) # L0 : Latency=3
|
||||
nop
|
||||
nop
|
||||
|
||||
.end stxcpy_aligned
|
||||
|
||||
.align 4
|
||||
.ent __stxcpy
|
||||
.globl __stxcpy
|
||||
__stxcpy:
|
||||
.frame sp, 0, t9
|
||||
.prologue 0
|
||||
|
||||
/* Are source and destination co-aligned? */
|
||||
xor a0, a1, t0 # E :
|
||||
unop # E :
|
||||
and t0, 7, t0 # E : (stall)
|
||||
bne t0, $unaligned # U : (stall)
|
||||
|
||||
/* We are co-aligned; take care of a partial first word. */
|
||||
ldq_u t1, 0(a1) # L : load first src word
|
||||
and a0, 7, t0 # E : take care not to load a word ...
|
||||
addq a1, 8, a1 # E :
|
||||
beq t0, stxcpy_aligned # U : ... if we wont need it (stall)
|
||||
|
||||
ldq_u t0, 0(a0) # L :
|
||||
br stxcpy_aligned # L0 : Latency=3
|
||||
nop
|
||||
nop
|
||||
|
||||
|
||||
/* The source and destination are not co-aligned. Align the destination
|
||||
and cope. We have to be very careful about not reading too much and
|
||||
causing a SEGV. */
|
||||
|
||||
.align 4
|
||||
$u_head:
|
||||
/* We know just enough now to be able to assemble the first
|
||||
full source word. We can still find a zero at the end of it
|
||||
that prevents us from outputting the whole thing.
|
||||
|
||||
On entry to this basic block:
|
||||
t0 == the first dest word, for masking back in, if needed else 0
|
||||
t1 == the low bits of the first source word
|
||||
t6 == bytemask that is -1 in dest word bytes */
|
||||
|
||||
ldq_u t2, 8(a1) # L :
|
||||
addq a1, 8, a1 # E :
|
||||
extql t1, a1, t1 # U : (stall on a1)
|
||||
extqh t2, a1, t4 # U : (stall on a1)
|
||||
|
||||
mskql t0, a0, t0 # U :
|
||||
or t1, t4, t1 # E :
|
||||
mskqh t1, a0, t1 # U : (stall on t1)
|
||||
or t0, t1, t1 # E : (stall on t1)
|
||||
|
||||
or t1, t6, t6 # E :
|
||||
cmpbge zero, t6, t10 # E : (stall)
|
||||
lda t6, -1 # E : for masking just below
|
||||
bne t10, $u_final # U : (stall)
|
||||
|
||||
mskql t6, a1, t6 # U : mask out the bits we have
|
||||
or t6, t2, t2 # E : already extracted before (stall)
|
||||
cmpbge zero, t2, t10 # E : testing eos (stall)
|
||||
bne t10, $u_late_head_exit # U : (stall)
|
||||
|
||||
/* Finally, we've got all the stupid leading edge cases taken care
|
||||
of and we can set up to enter the main loop. */
|
||||
|
||||
stq_u t1, 0(a0) # L : store first output word
|
||||
addq a0, 8, a0 # E :
|
||||
extql t2, a1, t0 # U : position ho-bits of lo word
|
||||
ldq_u t2, 8(a1) # U : read next high-order source word
|
||||
|
||||
addq a1, 8, a1 # E :
|
||||
cmpbge zero, t2, t10 # E : (stall for t2)
|
||||
nop # E :
|
||||
bne t10, $u_eos # U : (stall)
|
||||
|
||||
/* Unaligned copy main loop. In order to avoid reading too much,
|
||||
the loop is structured to detect zeros in aligned source words.
|
||||
This has, unfortunately, effectively pulled half of a loop
|
||||
iteration out into the head and half into the tail, but it does
|
||||
prevent nastiness from accumulating in the very thing we want
|
||||
to run as fast as possible.
|
||||
|
||||
On entry to this basic block:
|
||||
t0 == the shifted high-order bits from the previous source word
|
||||
t2 == the unshifted current source word
|
||||
|
||||
We further know that t2 does not contain a null terminator. */
|
||||
|
||||
.align 3
|
||||
$u_loop:
|
||||
extqh t2, a1, t1 # U : extract high bits for current word
|
||||
addq a1, 8, a1 # E : (stall)
|
||||
extql t2, a1, t3 # U : extract low bits for next time (stall)
|
||||
addq a0, 8, a0 # E :
|
||||
|
||||
or t0, t1, t1 # E : current dst word now complete
|
||||
ldq_u t2, 0(a1) # L : Latency=3 load high word for next time
|
||||
stq_u t1, -8(a0) # L : save the current word (stall)
|
||||
mov t3, t0 # E :
|
||||
|
||||
cmpbge zero, t2, t10 # E : test new word for eos
|
||||
beq t10, $u_loop # U : (stall)
|
||||
nop
|
||||
nop
|
||||
|
||||
/* We've found a zero somewhere in the source word we just read.
|
||||
If it resides in the lower half, we have one (probably partial)
|
||||
word to write out, and if it resides in the upper half, we
|
||||
have one full and one partial word left to write out.
|
||||
|
||||
On entry to this basic block:
|
||||
t0 == the shifted high-order bits from the previous source word
|
||||
t2 == the unshifted current source word. */
|
||||
$u_eos:
|
||||
extqh t2, a1, t1 # U :
|
||||
or t0, t1, t1 # E : first (partial) source word complete (stall)
|
||||
cmpbge zero, t1, t10 # E : is the null in this first bit? (stall)
|
||||
bne t10, $u_final # U : (stall)
|
||||
|
||||
$u_late_head_exit:
|
||||
stq_u t1, 0(a0) # L : the null was in the high-order bits
|
||||
addq a0, 8, a0 # E :
|
||||
extql t2, a1, t1 # U :
|
||||
cmpbge zero, t1, t10 # E : (stall)
|
||||
|
||||
/* Take care of a final (probably partial) result word.
|
||||
On entry to this basic block:
|
||||
t1 == assembled source word
|
||||
t10 == cmpbge mask that found the null. */
|
||||
$u_final:
|
||||
negq t10, t6 # E : isolate low bit set
|
||||
and t6, t10, t8 # E : (stall)
|
||||
and t8, 0x80, t6 # E : avoid dest word load if we can (stall)
|
||||
bne t6, 1f # U : (stall)
|
||||
|
||||
ldq_u t0, 0(a0) # E :
|
||||
subq t8, 1, t6 # E :
|
||||
or t6, t8, t10 # E : (stall)
|
||||
zapnot t1, t6, t1 # U : kill source bytes >= null (stall)
|
||||
|
||||
zap t0, t10, t0 # U : kill dest bytes <= null (2 cycle data stall)
|
||||
or t0, t1, t1 # E : (stall)
|
||||
nop
|
||||
nop
|
||||
|
||||
1: stq_u t1, 0(a0) # L :
|
||||
ret (t9) # L0 : Latency=3
|
||||
nop
|
||||
nop
|
||||
|
||||
/* Unaligned copy entry point. */
|
||||
.align 4
|
||||
$unaligned:
|
||||
|
||||
ldq_u t1, 0(a1) # L : load first source word
|
||||
and a0, 7, t4 # E : find dest misalignment
|
||||
and a1, 7, t5 # E : find src misalignment
|
||||
/* Conditionally load the first destination word and a bytemask
|
||||
with 0xff indicating that the destination byte is sacrosanct. */
|
||||
mov zero, t0 # E :
|
||||
|
||||
mov zero, t6 # E :
|
||||
beq t4, 1f # U :
|
||||
ldq_u t0, 0(a0) # L :
|
||||
lda t6, -1 # E :
|
||||
|
||||
mskql t6, a0, t6 # U :
|
||||
nop
|
||||
nop
|
||||
nop
|
||||
1:
|
||||
subq a1, t4, a1 # E : sub dest misalignment from src addr
|
||||
/* If source misalignment is larger than dest misalignment, we need
|
||||
extra startup checks to avoid SEGV. */
|
||||
cmplt t4, t5, t8 # E :
|
||||
beq t8, $u_head # U :
|
||||
lda t2, -1 # E : mask out leading garbage in source
|
||||
|
||||
mskqh t2, t5, t2 # U :
|
||||
ornot t1, t2, t3 # E : (stall)
|
||||
cmpbge zero, t3, t10 # E : is there a zero? (stall)
|
||||
beq t10, $u_head # U : (stall)
|
||||
|
||||
/* At this point we've found a zero in the first partial word of
|
||||
the source. We need to isolate the valid source data and mask
|
||||
it into the original destination data. (Incidentally, we know
|
||||
that we'll need at least one byte of that original dest word.) */
|
||||
|
||||
ldq_u t0, 0(a0) # L :
|
||||
negq t10, t6 # E : build bitmask of bytes <= zero
|
||||
and t6, t10, t8 # E : (stall)
|
||||
and a1, 7, t5 # E :
|
||||
|
||||
subq t8, 1, t6 # E :
|
||||
or t6, t8, t10 # E : (stall)
|
||||
srl t8, t5, t8 # U : adjust final null return value
|
||||
zapnot t2, t10, t2 # U : prepare source word; mirror changes (stall)
|
||||
|
||||
and t1, t2, t1 # E : to source validity mask
|
||||
extql t2, a1, t2 # U :
|
||||
extql t1, a1, t1 # U : (stall)
|
||||
andnot t0, t2, t0 # .. e1 : zero place for source to reside (stall)
|
||||
|
||||
or t0, t1, t1 # e1 : and put it there
|
||||
stq_u t1, 0(a0) # .. e0 : (stall)
|
||||
ret (t9) # e1 :
|
||||
nop
|
||||
|
||||
.end __stxcpy
|
||||
|
@ -1,403 +0,0 @@
|
||||
/* Copyright (C) 2000, 2002 Free Software Foundation, Inc.
|
||||
Contributed by Richard Henderson (rth@tamu.edu)
|
||||
EV6 optimized by Rick Gorton <rick.gorton@alpha-processor.com>.
|
||||
This file is part of the GNU C Library.
|
||||
|
||||
The GNU C Library is free software; you can redistribute it and/or
|
||||
modify it under the terms of the GNU Lesser General Public
|
||||
License as published by the Free Software Foundation; either
|
||||
version 2.1 of the License, or (at your option) any later version.
|
||||
|
||||
The GNU C Library is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
||||
Lesser General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU Lesser General Public
|
||||
License along with the GNU C Library; if not, write to the Free
|
||||
Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
|
||||
02111-1307 USA. */
|
||||
|
||||
/* Copy no more than COUNT bytes of the null-terminated string from
|
||||
SRC to DST.
|
||||
|
||||
This is an internal routine used by strncpy, stpncpy, and strncat.
|
||||
As such, it uses special linkage conventions to make implementation
|
||||
of these public functions more efficient.
|
||||
|
||||
On input:
|
||||
t9 = return address
|
||||
a0 = DST
|
||||
a1 = SRC
|
||||
a2 = COUNT
|
||||
|
||||
Furthermore, COUNT may not be zero.
|
||||
|
||||
On output:
|
||||
t0 = last word written
|
||||
t8 = bitmask (with one bit set) indicating the last byte written
|
||||
t10 = bitmask (with one bit set) indicating the byte position of
|
||||
the end of the range specified by COUNT
|
||||
a0 = unaligned address of the last *word* written
|
||||
a2 = the number of full words left in COUNT
|
||||
|
||||
Furthermore, v0, a3-a5, t11, and t12 are untouched.
|
||||
*/
|
||||
|
||||
#include <sysdep.h>
|
||||
|
||||
.arch ev6
|
||||
.set noat
|
||||
.set noreorder
|
||||
|
||||
/* There is a problem with either gdb (as of 4.16) or gas (as of 2.7) that
|
||||
doesn't like putting the entry point for a procedure somewhere in the
|
||||
middle of the procedure descriptor. Work around this by putting the
|
||||
aligned copy in its own procedure descriptor */
|
||||
|
||||
|
||||
.ent stxncpy_aligned
|
||||
.align 4
|
||||
stxncpy_aligned:
|
||||
.frame sp, 0, t9, 0
|
||||
.prologue 0
|
||||
|
||||
/* On entry to this basic block:
|
||||
t0 == the first destination word for masking back in
|
||||
t1 == the first source word. */
|
||||
|
||||
/* Create the 1st output word and detect 0's in the 1st input word. */
|
||||
lda t2, -1 # E : build a mask against false zero
|
||||
mskqh t2, a1, t2 # U : detection in the src word (stall)
|
||||
mskqh t1, a1, t3 # U :
|
||||
ornot t1, t2, t2 # E : (stall)
|
||||
|
||||
mskql t0, a1, t0 # U : assemble the first output word
|
||||
cmpbge zero, t2, t7 # E : bits set iff null found
|
||||
or t0, t3, t0 # E : (stall)
|
||||
beq a2, $a_eoc # U :
|
||||
|
||||
bne t7, $a_eos # U :
|
||||
nop
|
||||
nop
|
||||
nop
|
||||
|
||||
/* On entry to this basic block:
|
||||
t0 == a source word not containing a null. */
|
||||
|
||||
/*
|
||||
* nops here to:
|
||||
* separate store quads from load quads
|
||||
* limit of 1 bcond/quad to permit training
|
||||
*/
|
||||
$a_loop:
|
||||
stq_u t0, 0(a0) # L :
|
||||
addq a0, 8, a0 # E :
|
||||
subq a2, 1, a2 # E :
|
||||
nop
|
||||
|
||||
ldq_u t0, 0(a1) # L :
|
||||
addq a1, 8, a1 # E :
|
||||
cmpbge zero, t0, t7 # E :
|
||||
beq a2, $a_eoc # U :
|
||||
|
||||
beq t7, $a_loop # U :
|
||||
nop
|
||||
nop
|
||||
nop
|
||||
|
||||
/* Take care of the final (partial) word store. At this point
|
||||
the end-of-count bit is set in t7 iff it applies.
|
||||
|
||||
On entry to this basic block we have:
|
||||
t0 == the source word containing the null
|
||||
t7 == the cmpbge mask that found it. */
|
||||
|
||||
$a_eos:
|
||||
negq t7, t8 # E : find low bit set
|
||||
and t7, t8, t8 # E : (stall)
|
||||
/* For the sake of the cache, don't read a destination word
|
||||
if we're not going to need it. */
|
||||
and t8, 0x80, t6 # E : (stall)
|
||||
bne t6, 1f # U : (stall)
|
||||
|
||||
/* We're doing a partial word store and so need to combine
|
||||
our source and original destination words. */
|
||||
ldq_u t1, 0(a0) # L :
|
||||
subq t8, 1, t6 # E :
|
||||
or t8, t6, t7 # E : (stall)
|
||||
zapnot t0, t7, t0 # U : clear src bytes > null (stall)
|
||||
|
||||
zap t1, t7, t1 # .. e1 : clear dst bytes <= null
|
||||
or t0, t1, t0 # e1 : (stall)
|
||||
nop
|
||||
nop
|
||||
|
||||
1: stq_u t0, 0(a0) # L :
|
||||
ret (t9) # L0 : Latency=3
|
||||
nop
|
||||
nop
|
||||
|
||||
/* Add the end-of-count bit to the eos detection bitmask. */
|
||||
$a_eoc:
|
||||
or t10, t7, t7 # E :
|
||||
br $a_eos # L0 : Latency=3
|
||||
nop
|
||||
nop
|
||||
|
||||
.end stxncpy_aligned
|
||||
|
||||
.align 4
|
||||
.ent __stxncpy
|
||||
.globl __stxncpy
|
||||
__stxncpy:
|
||||
.frame sp, 0, t9, 0
|
||||
.prologue 0
|
||||
|
||||
/* Are source and destination co-aligned? */
|
||||
xor a0, a1, t1 # E :
|
||||
and a0, 7, t0 # E : find dest misalignment
|
||||
and t1, 7, t1 # E : (stall)
|
||||
addq a2, t0, a2 # E : bias count by dest misalignment (stall)
|
||||
|
||||
subq a2, 1, a2 # E :
|
||||
and a2, 7, t2 # E : (stall)
|
||||
srl a2, 3, a2 # U : a2 = loop counter = (count - 1)/8 (stall)
|
||||
addq zero, 1, t10 # E :
|
||||
|
||||
sll t10, t2, t10 # U : t10 = bitmask of last count byte
|
||||
bne t1, $unaligned # U :
|
||||
/* We are co-aligned; take care of a partial first word. */
|
||||
ldq_u t1, 0(a1) # L : load first src word
|
||||
addq a1, 8, a1 # E :
|
||||
|
||||
beq t0, stxncpy_aligned # U : avoid loading dest word if not needed
|
||||
ldq_u t0, 0(a0) # L :
|
||||
nop
|
||||
nop
|
||||
|
||||
br stxncpy_aligned # .. e1 :
|
||||
nop
|
||||
nop
|
||||
nop
|
||||
|
||||
|
||||
|
||||
/* The source and destination are not co-aligned. Align the destination
|
||||
and cope. We have to be very careful about not reading too much and
|
||||
causing a SEGV. */
|
||||
|
||||
.align 4
|
||||
$u_head:
|
||||
/* We know just enough now to be able to assemble the first
|
||||
full source word. We can still find a zero at the end of it
|
||||
that prevents us from outputting the whole thing.
|
||||
|
||||
On entry to this basic block:
|
||||
t0 == the first dest word, unmasked
|
||||
t1 == the shifted low bits of the first source word
|
||||
t6 == bytemask that is -1 in dest word bytes */
|
||||
|
||||
ldq_u t2, 8(a1) # L : Latency=3 load second src word
|
||||
addq a1, 8, a1 # E :
|
||||
mskql t0, a0, t0 # U : mask trailing garbage in dst
|
||||
extqh t2, a1, t4 # U : (3 cycle stall on t2)
|
||||
|
||||
or t1, t4, t1 # E : first aligned src word complete (stall)
|
||||
mskqh t1, a0, t1 # U : mask leading garbage in src (stall)
|
||||
or t0, t1, t0 # E : first output word complete (stall)
|
||||
or t0, t6, t6 # E : mask original data for zero test (stall)
|
||||
|
||||
cmpbge zero, t6, t7 # E :
|
||||
beq a2, $u_eocfin # U :
|
||||
lda t6, -1 # E :
|
||||
nop
|
||||
|
||||
bne t7, $u_final # U :
|
||||
mskql t6, a1, t6 # U : mask out bits already seen
|
||||
stq_u t0, 0(a0) # L : store first output word
|
||||
or t6, t2, t2 # E :
|
||||
|
||||
cmpbge zero, t2, t7 # E : find nulls in second partial
|
||||
addq a0, 8, a0 # E :
|
||||
subq a2, 1, a2 # E :
|
||||
bne t7, $u_late_head_exit # U :
|
||||
|
||||
/* Finally, we've got all the stupid leading edge cases taken care
|
||||
of and we can set up to enter the main loop. */
|
||||
extql t2, a1, t1 # U : position hi-bits of lo word
|
||||
beq a2, $u_eoc # U :
|
||||
ldq_u t2, 8(a1) # L : read next high-order source word
|
||||
addq a1, 8, a1 # E :
|
||||
|
||||
extqh t2, a1, t0 # U : position lo-bits of hi word (stall)
|
||||
cmpbge zero, t2, t7 # E :
|
||||
nop
|
||||
bne t7, $u_eos # U :
|
||||
|
||||
/* Unaligned copy main loop. In order to avoid reading too much,
|
||||
the loop is structured to detect zeros in aligned source words.
|
||||
This has, unfortunately, effectively pulled half of a loop
|
||||
iteration out into the head and half into the tail, but it does
|
||||
prevent nastiness from accumulating in the very thing we want
|
||||
to run as fast as possible.
|
||||
|
||||
On entry to this basic block:
|
||||
t0 == the shifted low-order bits from the current source word
|
||||
t1 == the shifted high-order bits from the previous source word
|
||||
t2 == the unshifted current source word
|
||||
|
||||
We further know that t2 does not contain a null terminator. */
|
||||
|
||||
.align 4
|
||||
$u_loop:
|
||||
or t0, t1, t0 # E : current dst word now complete
|
||||
subq a2, 1, a2 # E : decrement word count
|
||||
extql t2, a1, t1 # U : extract high bits for next time
|
||||
addq a0, 8, a0 # E :
|
||||
|
||||
stq_u t0, -8(a0) # L : save the current word
|
||||
beq a2, $u_eoc # U :
|
||||
ldq_u t2, 8(a1) # L : Latency=3 load high word for next time
|
||||
addq a1, 8, a1 # E :
|
||||
|
||||
extqh t2, a1, t0 # U : extract low bits (2 cycle stall)
|
||||
cmpbge zero, t2, t7 # E : test new word for eos
|
||||
nop
|
||||
beq t7, $u_loop # U :
|
||||
|
||||
/* We've found a zero somewhere in the source word we just read.
|
||||
If it resides in the lower half, we have one (probably partial)
|
||||
word to write out, and if it resides in the upper half, we
|
||||
have one full and one partial word left to write out.
|
||||
|
||||
On entry to this basic block:
|
||||
t0 == the shifted low-order bits from the current source word
|
||||
t1 == the shifted high-order bits from the previous source word
|
||||
t2 == the unshifted current source word. */
|
||||
$u_eos:
|
||||
or t0, t1, t0 # E : first (partial) source word complete
|
||||
nop
|
||||
cmpbge zero, t0, t7 # E : is the null in this first bit? (stall)
|
||||
bne t7, $u_final # U : (stall)
|
||||
|
||||
stq_u t0, 0(a0) # L : the null was in the high-order bits
|
||||
addq a0, 8, a0 # E :
|
||||
subq a2, 1, a2 # E :
|
||||
nop
|
||||
|
||||
$u_late_head_exit:
|
||||
extql t2, a1, t0 # U :
|
||||
cmpbge zero, t0, t7 # E :
|
||||
or t7, t10, t6 # E : (stall)
|
||||
cmoveq a2, t6, t7 # E : Latency=2, extra map slot (stall)
|
||||
|
||||
/* Take care of a final (probably partial) result word.
|
||||
On entry to this basic block:
|
||||
t0 == assembled source word
|
||||
t7 == cmpbge mask that found the null. */
|
||||
$u_final:
|
||||
negq t7, t6 # E : isolate low bit set
|
||||
and t6, t7, t8 # E : (stall)
|
||||
and t8, 0x80, t6 # E : avoid dest word load if we can (stall)
|
||||
bne t6, 1f # U : (stall)
|
||||
|
||||
ldq_u t1, 0(a0) # L :
|
||||
subq t8, 1, t6 # E :
|
||||
or t6, t8, t7 # E : (stall)
|
||||
zapnot t0, t7, t0 # U : kill source bytes > null
|
||||
|
||||
zap t1, t7, t1 # U : kill dest bytes <= null
|
||||
or t0, t1, t0 # E : (stall)
|
||||
nop
|
||||
nop
|
||||
|
||||
1: stq_u t0, 0(a0) # L :
|
||||
ret (t9) # L0 : Latency=3
|
||||
|
||||
/* Got to end-of-count before end of string.
|
||||
On entry to this basic block:
|
||||
t1 == the shifted high-order bits from the previous source word */
|
||||
$u_eoc:
|
||||
and a1, 7, t6 # E :
|
||||
sll t10, t6, t6 # U : (stall)
|
||||
and t6, 0xff, t6 # E : (stall)
|
||||
bne t6, 1f # U : (stall)
|
||||
|
||||
ldq_u t2, 8(a1) # L : load final src word
|
||||
nop
|
||||
extqh t2, a1, t0 # U : extract low bits for last word (stall)
|
||||
or t1, t0, t1 # E : (stall)
|
||||
|
||||
1: cmpbge zero, t1, t7 # E :
|
||||
mov t1, t0
|
||||
|
||||
$u_eocfin: # end-of-count, final word
|
||||
or t10, t7, t7 # E :
|
||||
br $u_final # L0 : Latency=3
|
||||
|
||||
/* Unaligned copy entry point. */
|
||||
.align 4
|
||||
$unaligned:
|
||||
|
||||
ldq_u t1, 0(a1) # L : load first source word
|
||||
and a0, 7, t4 # E : find dest misalignment
|
||||
and a1, 7, t5 # E : find src misalignment
|
||||
/* Conditionally load the first destination word and a bytemask
|
||||
with 0xff indicating that the destination byte is sacrosanct. */
|
||||
mov zero, t0 # E :
|
||||
|
||||
mov zero, t6 # E :
|
||||
beq t4, 1f # U :
|
||||
ldq_u t0, 0(a0) # L :
|
||||
lda t6, -1 # E :
|
||||
|
||||
mskql t6, a0, t6 # U :
|
||||
nop
|
||||
nop
|
||||
1: subq a1, t4, a1 # E : sub dest misalignment from src addr
|
||||
|
||||
/* If source misalignment is larger than dest misalignment, we need
|
||||
extra startup checks to avoid SEGV. */
|
||||
|
||||
cmplt t4, t5, t8 # E :
|
||||
extql t1, a1, t1 # U : shift src into place
|
||||
lda t2, -1 # E : for creating masks later
|
||||
beq t8, $u_head # U : (stall)
|
||||
|
||||
mskqh t2, t5, t2 # U : begin src byte validity mask
|
||||
cmpbge zero, t1, t7 # E : is there a zero?
|
||||
extql t2, a1, t2 # U :
|
||||
or t7, t10, t5 # E : test for end-of-count too
|
||||
|
||||
cmpbge zero, t2, t3 # E :
|
||||
cmoveq a2, t5, t7 # E : Latency=2, extra map slot
|
||||
nop # E : keep with cmoveq
|
||||
andnot t7, t3, t7 # E : (stall)
|
||||
|
||||
beq t7, $u_head # U :
|
||||
/* At this point we've found a zero in the first partial word of
|
||||
the source. We need to isolate the valid source data and mask
|
||||
it into the original destination data. (Incidentally, we know
|
||||
that we'll need at least one byte of that original dest word.) */
|
||||
ldq_u t0, 0(a0) # L :
|
||||
negq t7, t6 # E : build bitmask of bytes <= zero
|
||||
mskqh t1, t4, t1 # U :
|
||||
|
||||
and t6, t7, t8 # E :
|
||||
subq t8, 1, t6 # E : (stall)
|
||||
or t6, t8, t7 # E : (stall)
|
||||
zapnot t2, t7, t2 # U : prepare source word; mirror changes (stall)
|
||||
|
||||
zapnot t1, t7, t1 # U : to source validity mask
|
||||
andnot t0, t2, t0 # E : zero place for source to reside
|
||||
or t0, t1, t0 # E : and put it there (stall both t0, t1)
|
||||
stq_u t0, 0(a0) # L : (stall)
|
||||
|
||||
ret (t9) # L0 : Latency=3
|
||||
nop
|
||||
nop
|
||||
nop
|
||||
|
||||
.end __stxncpy
|
||||
|
@ -1 +0,0 @@
|
||||
alpha/alphaev6
|
@ -1,51 +0,0 @@
|
||||
/* Copyright (C) 2000, 2004 Free Software Foundation, Inc.
|
||||
This file is part of the GNU C Library.
|
||||
|
||||
The GNU C Library is free software; you can redistribute it and/or
|
||||
modify it under the terms of the GNU Lesser General Public
|
||||
License as published by the Free Software Foundation; either
|
||||
version 2.1 of the License, or (at your option) any later version.
|
||||
|
||||
The GNU C Library is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
||||
Lesser General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU Lesser General Public
|
||||
License along with the GNU C Library; if not, write to the Free
|
||||
Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
|
||||
02111-1307 USA. */
|
||||
|
||||
/* Finds the first bit set in an integer. */
|
||||
|
||||
#include <sysdep.h>
|
||||
|
||||
.arch ev6
|
||||
.set noreorder
|
||||
.set noat
|
||||
|
||||
|
||||
ENTRY(__ffs)
|
||||
#ifdef PROF
|
||||
ldgp gp, 0(pv)
|
||||
lda AT, _mcount
|
||||
jsr AT, (AT), _mcount
|
||||
.prologue 1
|
||||
#else
|
||||
.prologue 0
|
||||
#endif
|
||||
|
||||
zap $16, 0xF0, $16
|
||||
cttz $16, $0
|
||||
addq $0, 1, $0
|
||||
cmoveq $16, 0, $0
|
||||
|
||||
nop
|
||||
nop
|
||||
nop
|
||||
ret
|
||||
|
||||
END(__ffs)
|
||||
|
||||
weak_alias (__ffs, ffs)
|
||||
libc_hidden_builtin_def (ffs)
|
@ -1,45 +0,0 @@
|
||||
/* Copyright (C) 2000 Free Software Foundation, Inc.
|
||||
This file is part of the GNU C Library.
|
||||
|
||||
The GNU C Library is free software; you can redistribute it and/or
|
||||
modify it under the terms of the GNU Lesser General Public
|
||||
License as published by the Free Software Foundation; either
|
||||
version 2.1 of the License, or (at your option) any later version.
|
||||
|
||||
The GNU C Library is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
||||
Lesser General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU Lesser General Public
|
||||
License along with the GNU C Library; if not, write to the Free
|
||||
Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
|
||||
02111-1307 USA. */
|
||||
|
||||
/* Finds the first bit set in a long. */
|
||||
|
||||
#include <sysdep.h>
|
||||
|
||||
.arch ev6
|
||||
.set noreorder
|
||||
.set noat
|
||||
|
||||
ENTRY(ffsl)
|
||||
#ifdef PROF
|
||||
ldgp gp, 0(pv)
|
||||
lda AT, _mcount
|
||||
jsr AT, (AT), _mcount
|
||||
.prologue 1
|
||||
#else
|
||||
.prologue 0
|
||||
#endif
|
||||
|
||||
cttz $16, $0
|
||||
addq $0, 1, $0
|
||||
cmoveq $16, 0, $0
|
||||
ret
|
||||
|
||||
END(ffsl)
|
||||
|
||||
weak_extern (ffsl)
|
||||
weak_alias (ffsl, ffsll)
|
@ -1 +0,0 @@
|
||||
alpha/alphaev6/fpu
|
@ -1,93 +0,0 @@
|
||||
/* Copyright (C) 2000, 2002 Free Software Foundation, Inc.
|
||||
This file is part of the GNU C Library.
|
||||
|
||||
The GNU C Library is free software; you can redistribute it and/or
|
||||
modify it under the terms of the GNU Lesser General Public
|
||||
License as published by the Free Software Foundation; either
|
||||
version 2.1 of the License, or (at your option) any later version.
|
||||
|
||||
The GNU C Library is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
||||
Lesser General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU Lesser General Public
|
||||
License along with the GNU C Library; if not, write to the Free
|
||||
Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
|
||||
02111-1307 USA. */
|
||||
|
||||
/* Return pointer to first occurrence of CH in STR. */
|
||||
|
||||
#include <sysdep.h>
|
||||
|
||||
.arch ev6
|
||||
.set noreorder
|
||||
.set noat
|
||||
|
||||
ENTRY(__rawmemchr)
|
||||
#ifdef PROF
|
||||
ldgp gp, 0(pv)
|
||||
lda AT, _mcount
|
||||
jsr AT, (AT), _mcount
|
||||
.prologue 1
|
||||
#else
|
||||
.prologue 0
|
||||
#endif
|
||||
|
||||
ldq_u t0, 0(a0) # L : load first quadword Latency=3
|
||||
and a1, 0xff, t3 # E : 00000000000000ch
|
||||
insbl a1, 1, t5 # U : 000000000000ch00
|
||||
insbl a1, 7, a2 # U : ch00000000000000
|
||||
|
||||
insbl t3, 6, a3 # U : 00ch000000000000
|
||||
or t5, t3, a1 # E : 000000000000chch
|
||||
andnot a0, 7, v0 # E : align our loop pointer
|
||||
lda t4, -1 # E : build garbage mask
|
||||
|
||||
mskqh t4, a0, t4 # U : only want relevant part of first quad
|
||||
or a2, a3, a2 # E : chch000000000000
|
||||
inswl a1, 2, t5 # E : 00000000chch0000
|
||||
inswl a1, 4, a3 # E : 0000chch00000000
|
||||
|
||||
or a1, a2, a1 # E : chch00000000chch
|
||||
or a3, t5, t5 # E : 0000chchchch0000
|
||||
cmpbge zero, t4, t4 # E : bits set iff byte is garbage
|
||||
nop # E :
|
||||
|
||||
/* This quad is _very_ serialized. Lots of stalling happens */
|
||||
or t5, a1, a1 # E : chchchchchchchch
|
||||
xor t0, a1, t1 # E : make bytes == c zero
|
||||
cmpbge zero, t1, t0 # E : bits set iff byte == c
|
||||
andnot t0, t4, t0 # E : clear garbage bits
|
||||
|
||||
cttz t0, a2 # U0 : speculative (in case we get a match)
|
||||
nop # E :
|
||||
nop # E :
|
||||
bne t0, $found # U :
|
||||
|
||||
/*
|
||||
* Yuk. This loop is going to stall like crazy waiting for the
|
||||
* data to be loaded. Not much can be done about it unless it's
|
||||
* unrolled multiple times, which is generally unsafe.
|
||||
*/
|
||||
$loop:
|
||||
ldq t0, 8(v0) # L : Latency=3
|
||||
addq v0, 8, v0 # E :
|
||||
xor t0, a1, t1 # E :
|
||||
cmpbge zero, t1, t0 # E : bits set iff byte == c
|
||||
|
||||
cttz t0, a2 # U0 : speculative (in case we get a match)
|
||||
nop # E :
|
||||
nop # E :
|
||||
beq t0, $loop # U :
|
||||
|
||||
$found:
|
||||
negq t0, t1 # E : clear all but least set bit
|
||||
and t0, t1, t0 # E :
|
||||
addq v0, a2, v0 # E : Add in the bit number from above
|
||||
ret # L0 :
|
||||
|
||||
END(__rawmemchr)
|
||||
|
||||
libc_hidden_def (__rawmemchr)
|
||||
weak_alias (__rawmemchr, rawmemchr)
|
@ -1,54 +0,0 @@
|
||||
/* Copyright (C) 2000, 2002, 2004 Free Software Foundation, Inc.
|
||||
This file is part of the GNU C Library.
|
||||
Contributed by Richard Henderson <rth@redhat.com>.
|
||||
|
||||
The GNU C Library is free software; you can redistribute it and/or
|
||||
modify it under the terms of the GNU Lesser General Public
|
||||
License as published by the Free Software Foundation; either
|
||||
version 2.1 of the License, or (at your option) any later version.
|
||||
|
||||
The GNU C Library is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
||||
Lesser General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU Lesser General Public
|
||||
License along with the GNU C Library; if not, write to the Free
|
||||
Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
|
||||
02111-1307 USA. */
|
||||
|
||||
/* Copy SRC to DEST returning the address of the terminating 0 in DEST. */
|
||||
|
||||
#include <sysdep.h>
|
||||
|
||||
.arch ev6
|
||||
.set noreorder
|
||||
.set noat
|
||||
.text
|
||||
|
||||
ENTRY(__stpcpy)
|
||||
ldgp gp, 0(pv)
|
||||
#ifdef PROF
|
||||
lda AT, _mcount
|
||||
jsr AT, (AT), _mcount
|
||||
#endif
|
||||
.prologue 1
|
||||
|
||||
.align 4
|
||||
mov a0, v0
|
||||
nop
|
||||
jsr t9, __stxcpy
|
||||
|
||||
# t8 = bitmask (with one bit set) indicating the last byte written
|
||||
# a0 = unaligned address of the last *word* written
|
||||
|
||||
cttz t8, t8
|
||||
andnot a0, 7, a0
|
||||
addq a0, t8, v0
|
||||
ret
|
||||
|
||||
END(__stpcpy)
|
||||
|
||||
weak_alias (__stpcpy, stpcpy)
|
||||
libc_hidden_def (__stpcpy)
|
||||
libc_hidden_builtin_def (stpcpy)
|
@ -1,116 +0,0 @@
|
||||
/* Copyright (C) 2000, 2002 Free Software Foundation, Inc.
|
||||
Contributed by Richard Henderson (rth@redhat.com)
|
||||
This file is part of the GNU C Library.
|
||||
|
||||
The GNU C Library is free software; you can redistribute it and/or
|
||||
modify it under the terms of the GNU Lesser General Public
|
||||
License as published by the Free Software Foundation; either
|
||||
version 2.1 of the License, or (at your option) any later version.
|
||||
|
||||
The GNU C Library is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
||||
Lesser General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU Lesser General Public
|
||||
License along with the GNU C Library; if not, write to the Free
|
||||
Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
|
||||
02111-1307 USA. */
|
||||
|
||||
/* Copy no more then N bytes from SRC to DEST, returning the address of
|
||||
the terminating '\0' in DEST. */
|
||||
|
||||
#include <sysdep.h>
|
||||
|
||||
.arch ev6
|
||||
.set noat
|
||||
.set noreorder
|
||||
.text
|
||||
|
||||
ENTRY(__stpncpy)
|
||||
ldgp gp, 0(pv)
|
||||
#ifdef PROF
|
||||
lda AT, _mcount
|
||||
jsr AT, (AT), _mcount
|
||||
#endif
|
||||
.prologue 1
|
||||
|
||||
mov a0, v0
|
||||
beq a2, $zerocount
|
||||
|
||||
.align 4
|
||||
nop
|
||||
nop
|
||||
jsr t9, __stxncpy # do the work of the copy
|
||||
|
||||
cttz t8, t4
|
||||
zapnot t0, t8, t5
|
||||
andnot a0, 7, a0
|
||||
bne a2, $multiword # do we have full words left?
|
||||
|
||||
subq t8, 1, t2
|
||||
subq t10, 1, t3
|
||||
cmpult zero, t5, t5
|
||||
addq a0, t4, v0
|
||||
|
||||
or t2, t8, t2
|
||||
or t3, t10, t3
|
||||
addq v0, t5, v0
|
||||
andnot t3, t2, t3
|
||||
|
||||
zap t0, t3, t0
|
||||
nop
|
||||
stq t0, 0(a0)
|
||||
ret
|
||||
|
||||
$multiword:
|
||||
subq t8, 1, t7 # clear the final bits in the prev word
|
||||
cmpult zero, t5, t5
|
||||
or t7, t8, t7
|
||||
zapnot t0, t7, t0
|
||||
|
||||
subq a2, 1, a2
|
||||
stq t0, 0(a0)
|
||||
addq a0, 8, a1
|
||||
beq a2, 1f # loop over full words remaining
|
||||
|
||||
nop
|
||||
nop
|
||||
nop
|
||||
blbc a2, 0f
|
||||
|
||||
stq zero, 0(a1)
|
||||
subq a2, 1, a2
|
||||
addq a1, 8, a1
|
||||
beq a2, 1f
|
||||
|
||||
0: stq zero, 0(a1)
|
||||
subq a2, 2, a2
|
||||
nop
|
||||
nop
|
||||
|
||||
stq zero, 8(a1)
|
||||
addq a1, 16, a1
|
||||
nop
|
||||
bne a2, 0b
|
||||
|
||||
1: ldq t0, 0(a1) # clear the leading bits in the final word
|
||||
subq t10, 1, t7
|
||||
addq a0, t4, v0
|
||||
nop
|
||||
|
||||
or t7, t10, t7
|
||||
addq v0, t5, v0
|
||||
zap t0, t7, t0
|
||||
stq t0, 0(a1)
|
||||
|
||||
$zerocount:
|
||||
nop
|
||||
nop
|
||||
nop
|
||||
ret
|
||||
|
||||
END(__stpncpy)
|
||||
|
||||
libc_hidden_def (__stpncpy)
|
||||
weak_alias (__stpncpy, stpncpy)
|
@ -1,62 +0,0 @@
|
||||
/* Copyright (C) 2000, 2003 Free Software Foundation, Inc.
|
||||
Contributed by Richard Henderson <rth@tamu.edu>, 1996.
|
||||
EV67 optimized by Rick Gorton <rick.gorton@alpha-processor.com>.
|
||||
This file is part of the GNU C Library.
|
||||
|
||||
The GNU C Library is free software; you can redistribute it and/or
|
||||
modify it under the terms of the GNU Lesser General Public
|
||||
License as published by the Free Software Foundation; either
|
||||
version 2.1 of the License, or (at your option) any later version.
|
||||
|
||||
The GNU C Library is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
||||
Lesser General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU Lesser General Public
|
||||
License along with the GNU C Library; if not, write to the Free
|
||||
Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
|
||||
02111-1307 USA. */
|
||||
|
||||
/* Append a null-terminated string from SRC to DST. */
|
||||
|
||||
#include <sysdep.h>
|
||||
|
||||
.arch ev6
|
||||
.set noreorder
|
||||
.text
|
||||
|
||||
ENTRY(strcat)
|
||||
ldgp gp, 0(pv)
|
||||
#ifdef PROF
|
||||
.set noat
|
||||
lda AT, _mcount
|
||||
jsr AT, (AT), _mcount
|
||||
.set at
|
||||
#endif
|
||||
.prologue 1
|
||||
|
||||
mov $16, $0 # E : set up return value
|
||||
/* Find the end of the string. */
|
||||
ldq_u $1, 0($16) # L : load first quadword (a0 may be misaligned)
|
||||
lda $2, -1 # E :
|
||||
insqh $2, $16, $2 # U :
|
||||
|
||||
andnot $16, 7, $16 # E :
|
||||
or $2, $1, $1 # E :
|
||||
cmpbge $31, $1, $2 # E : bits set iff byte == 0
|
||||
bne $2, $found # U :
|
||||
|
||||
$loop: ldq $1, 8($16) # L :
|
||||
addq $16, 8, $16 # E :
|
||||
cmpbge $31, $1, $2 # E :
|
||||
beq $2, $loop # U :
|
||||
|
||||
$found: cttz $2, $3 # U0 :
|
||||
addq $16, $3, $16 # E :
|
||||
/* Now do the append. */
|
||||
mov $26, $23 # E :
|
||||
jmp $31, __stxcpy # L0 :
|
||||
|
||||
END(strcat)
|
||||
libc_hidden_builtin_def (strcat)
|
@ -1,101 +0,0 @@
|
||||
/* Copyright (C) 2000, 2003 Free Software Foundation, Inc.
|
||||
Contributed by Richard Henderson <rth@tamu.edu>, 1996.
|
||||
EV67 optimized by Rick Gorton <rick.gorton@alpha-processor.com>.
|
||||
This file is part of the GNU C Library.
|
||||
|
||||
The GNU C Library is free software; you can redistribute it and/or
|
||||
modify it under the terms of the GNU Lesser General Public
|
||||
License as published by the Free Software Foundation; either
|
||||
version 2.1 of the License, or (at your option) any later version.
|
||||
|
||||
The GNU C Library is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
||||
Lesser General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU Lesser General Public
|
||||
License along with the GNU C Library; if not, write to the Free
|
||||
Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
|
||||
02111-1307 USA. */
|
||||
|
||||
/* Return the address of a given character within a null-terminated
|
||||
string, or null if it is not found. */
|
||||
|
||||
#include <sysdep.h>
|
||||
|
||||
.arch ev6
|
||||
.set noreorder
|
||||
.set noat
|
||||
|
||||
ENTRY(strchr)
|
||||
#ifdef PROF
|
||||
ldgp gp, 0(pv)
|
||||
lda AT, _mcount
|
||||
jsr AT, (AT), _mcount
|
||||
.prologue 1
|
||||
#else
|
||||
.prologue 0
|
||||
#endif
|
||||
|
||||
ldq_u t0, 0(a0) # L : load first quadword Latency=3
|
||||
and a1, 0xff, t3 # E : 00000000000000ch
|
||||
insbl a1, 1, t5 # U : 000000000000ch00
|
||||
insbl a1, 7, a2 # U : ch00000000000000
|
||||
|
||||
insbl t3, 6, a3 # U : 00ch000000000000
|
||||
or t5, t3, a1 # E : 000000000000chch
|
||||
andnot a0, 7, v0 # E : align our loop pointer
|
||||
lda t4, -1 # E : build garbage mask
|
||||
|
||||
mskqh t4, a0, t4 # U : only want relevant part of first quad
|
||||
or a2, a3, a2 # E : chch000000000000
|
||||
inswl a1, 2, t5 # E : 00000000chch0000
|
||||
inswl a1, 4, a3 # E : 0000chch00000000
|
||||
|
||||
or a1, a2, a1 # E : chch00000000chch
|
||||
or a3, t5, t5 # E : 0000chchchch0000
|
||||
cmpbge zero, t0, t2 # E : bits set iff byte == zero
|
||||
cmpbge zero, t4, t4 # E : bits set iff byte is garbage
|
||||
|
||||
/* This quad is _very_ serialized. Lots of stalling happens */
|
||||
or t5, a1, a1 # E : chchchchchchchch
|
||||
xor t0, a1, t1 # E : make bytes == c zero
|
||||
cmpbge zero, t1, t3 # E : bits set iff byte == c
|
||||
or t2, t3, t0 # E : bits set iff char match or zero match
|
||||
|
||||
andnot t0, t4, t0 # E : clear garbage bits
|
||||
cttz t0, a2 # U0 : speculative (in case we get a match)
|
||||
nop # E :
|
||||
bne t0, $found # U :
|
||||
|
||||
/*
|
||||
* Yuk. This loop is going to stall like crazy waiting for the
|
||||
* data to be loaded. Not much can be done about it unless it's
|
||||
* unrolled multiple times, which is generally unsafe.
|
||||
*/
|
||||
$loop:
|
||||
ldq t0, 8(v0) # L : Latency=3
|
||||
addq v0, 8, v0 # E :
|
||||
xor t0, a1, t1 # E :
|
||||
cmpbge zero, t0, t2 # E : bits set iff byte == 0
|
||||
|
||||
cmpbge zero, t1, t3 # E : bits set iff byte == c
|
||||
or t2, t3, t0 # E :
|
||||
cttz t3, a2 # U0 : speculative (in case we get a match)
|
||||
beq t0, $loop # U :
|
||||
|
||||
$found:
|
||||
negq t0, t1 # E : clear all but least set bit
|
||||
and t0, t1, t0 # E :
|
||||
and t0, t3, t1 # E : bit set iff byte was the char
|
||||
addq v0, a2, v0 # E : Add in the bit number from above
|
||||
|
||||
cmoveq t1, $31, v0 # E : Two mapping slots, latency = 2
|
||||
nop
|
||||
nop
|
||||
ret # L0 :
|
||||
|
||||
END(strchr)
|
||||
|
||||
weak_alias (strchr, index)
|
||||
libc_hidden_builtin_def (strchr)
|
@ -1,61 +0,0 @@
|
||||
/* Copyright (C) 2000, 2003 Free Software Foundation, Inc.
|
||||
Contributed by David Mosberger (davidm@cs.arizona.edu).
|
||||
EV67 optimized by Rick Gorton <rick.gorton@alpha-processor.com>.
|
||||
This file is part of the GNU C Library.
|
||||
|
||||
The GNU C Library is free software; you can redistribute it and/or
|
||||
modify it under the terms of the GNU Lesser General Public
|
||||
License as published by the Free Software Foundation; either
|
||||
version 2.1 of the License, or (at your option) any later version.
|
||||
|
||||
The GNU C Library is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
||||
Lesser General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU Lesser General Public
|
||||
License along with the GNU C Library; if not, write to the Free
|
||||
Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
|
||||
02111-1307 USA. */
|
||||
|
||||
/* Finds length of a 0-terminated string. */
|
||||
|
||||
#include <sysdep.h>
|
||||
|
||||
.arch ev6
|
||||
.set noreorder
|
||||
.set noat
|
||||
|
||||
ENTRY(strlen)
|
||||
#ifdef PROF
|
||||
ldgp gp, 0(pv)
|
||||
lda AT, _mcount
|
||||
jsr AT, (AT), _mcount
|
||||
.prologue 1
|
||||
#else
|
||||
.prologue 0
|
||||
#endif
|
||||
|
||||
ldq_u $1, 0($16) # L : load first quadword ($16 may be misaligned)
|
||||
lda $2, -1($31) # E :
|
||||
insqh $2, $16, $2 # U :
|
||||
andnot $16, 7, $0 # E :
|
||||
|
||||
or $2, $1, $1 # E :
|
||||
cmpbge $31, $1, $2 # E : $2 <- bitmask: bit i == 1 <==> i-th byte == 0
|
||||
nop # E :
|
||||
bne $2, $found # U :
|
||||
|
||||
$loop: ldq $1, 8($0) # L :
|
||||
addq $0, 8, $0 # E : addr += 8
|
||||
cmpbge $31, $1, $2 # E :
|
||||
beq $2, $loop # U :
|
||||
|
||||
$found:
|
||||
cttz $2, $3 # U0 :
|
||||
addq $0, $3, $0 # E :
|
||||
subq $0, $16, $0 # E :
|
||||
ret $31, ($26) # L0 :
|
||||
|
||||
END(strlen)
|
||||
libc_hidden_builtin_def (strlen)
|
@ -1,88 +0,0 @@
|
||||
/* Copyright (C) 2000, 2001 Free Software Foundation, Inc.
|
||||
Contributed by Richard Henderson <rth@tamu.edu>, 1996.
|
||||
EV67 optimized by Rick Gorton <rick.gorton@alpha-processor.com>.
|
||||
This file is part of the GNU C Library.
|
||||
|
||||
The GNU C Library is free software; you can redistribute it and/or
|
||||
modify it under the terms of the GNU Lesser General Public
|
||||
License as published by the Free Software Foundation; either
|
||||
version 2.1 of the License, or (at your option) any later version.
|
||||
|
||||
The GNU C Library is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
||||
Lesser General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU Lesser General Public
|
||||
License along with the GNU C Library; if not, write to the Free
|
||||
Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
|
||||
02111-1307 USA. */
|
||||
|
||||
/* Append no more than COUNT characters from the null-terminated string SRC
|
||||
to the null-terminated string DST. Always null-terminate the new DST. */
|
||||
|
||||
#include <sysdep.h>
|
||||
|
||||
.arch ev6
|
||||
.set noreorder
|
||||
.text
|
||||
|
||||
ENTRY(strncat)
|
||||
ldgp gp, 0(pv)
|
||||
#ifdef PROF
|
||||
.set noat
|
||||
lda AT, _mcount
|
||||
jsr AT, (AT), _mcount
|
||||
.set at
|
||||
#endif
|
||||
.prologue 1
|
||||
|
||||
mov a0, v0 # set up return value
|
||||
beq a2, $zerocount # U :
|
||||
/* Find the end of the string. */
|
||||
ldq_u t0, 0(a0) # L : load first quadword (a0 may be misaligned)
|
||||
lda t1, -1 # E :
|
||||
|
||||
insqh t1, v0, t1 # U :
|
||||
andnot a0, 7, a0 # E :
|
||||
nop # E :
|
||||
or t1, t0, t0 # E :
|
||||
|
||||
nop # E :
|
||||
nop # E :
|
||||
cmpbge zero, t0, t1 # E : bits set iff byte == 0
|
||||
bne t1, $found # U :
|
||||
|
||||
$loop: ldq t0, 8(a0) # L :
|
||||
addq a0, 8, a0 # E :
|
||||
cmpbge zero, t0, t1 # E :
|
||||
beq t1, $loop # U :
|
||||
|
||||
$found: cttz t1, t2 # U0 :
|
||||
addq a0, t2, a0 # E :
|
||||
jsr t9, __stxncpy # L0 : Now do the append.
|
||||
|
||||
/* Worry about the null termination. */
|
||||
|
||||
cttz t10, t2 # U0: byte offset of end-of-count.
|
||||
bic a0, 7, a0 # E : word align the last write address.
|
||||
zapnot t0, t8, t1 # U : was last byte a null?
|
||||
nop # E :
|
||||
|
||||
bne t1, 0f # U :
|
||||
nop # E :
|
||||
nop # E :
|
||||
ret # L0 :
|
||||
|
||||
0: addq t2, a0, a0 # E : address of end-of-count
|
||||
stb zero, 1(a0) # L :
|
||||
nop # E :
|
||||
ret # L0 :
|
||||
|
||||
$zerocount:
|
||||
nop # E :
|
||||
nop # E :
|
||||
nop # E :
|
||||
ret # L0 :
|
||||
|
||||
END(strncat)
|
@ -1,117 +0,0 @@
|
||||
/* Copyright (C) 2000, 2003 Free Software Foundation, Inc.
|
||||
EV67 optimized by Rick Gorton <rick.gorton@alpha-processor.com>.
|
||||
This file is part of the GNU C Library.
|
||||
|
||||
The GNU C Library is free software; you can redistribute it and/or
|
||||
modify it under the terms of the GNU Lesser General Public
|
||||
License as published by the Free Software Foundation; either
|
||||
version 2.1 of the License, or (at your option) any later version.
|
||||
|
||||
The GNU C Library is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
||||
Lesser General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU Lesser General Public
|
||||
License along with the GNU C Library; if not, write to the Free
|
||||
Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
|
||||
02111-1307 USA. */
|
||||
|
||||
/* Return the address of the last occurrence of a given character
|
||||
within a null-terminated string, or null if it is not found. */
|
||||
|
||||
#include <sysdep.h>
|
||||
|
||||
.arch ev6
|
||||
.set noreorder
|
||||
.set noat
|
||||
|
||||
ENTRY(strrchr)
|
||||
#ifdef PROF
|
||||
ldgp gp, 0(pv)
|
||||
lda AT, _mcount
|
||||
jsr AT, (AT), _mcount
|
||||
.prologue 1
|
||||
#else
|
||||
.prologue 0
|
||||
#endif
|
||||
|
||||
and a1, 0xff, t2 # E : 00000000000000ch
|
||||
insbl a1, 1, t4 # U : 000000000000ch00
|
||||
insbl a1, 2, t5 # U : 0000000000ch0000
|
||||
ldq_u t0, 0(a0) # L : load first quadword Latency=3
|
||||
|
||||
mov zero, t6 # E : t6 is last match aligned addr
|
||||
or t2, t4, a1 # E : 000000000000chch
|
||||
sll t5, 8, t3 # U : 00000000ch000000
|
||||
mov zero, t8 # E : t8 is last match byte compare mask
|
||||
|
||||
andnot a0, 7, v0 # E : align source addr
|
||||
or t5, t3, t3 # E : 00000000chch0000
|
||||
sll a1, 32, t2 # U : 0000chch00000000
|
||||
sll a1, 48, t4 # U : chch000000000000
|
||||
|
||||
or t4, a1, a1 # E : chch00000000chch
|
||||
or t2, t3, t2 # E : 0000chchchch0000
|
||||
or a1, t2, a1 # E : chchchchchchchch
|
||||
lda t5, -1 # E : build garbage mask
|
||||
|
||||
cmpbge zero, t0, t1 # E : bits set iff byte == zero
|
||||
mskqh t5, a0, t4 # E : Complete garbage mask
|
||||
xor t0, a1, t2 # E : make bytes == c zero
|
||||
cmpbge zero, t4, t4 # E : bits set iff byte is garbage
|
||||
|
||||
cmpbge zero, t2, t3 # E : bits set iff byte == c
|
||||
andnot t1, t4, t1 # E : clear garbage from null test
|
||||
andnot t3, t4, t3 # E : clear garbage from char test
|
||||
bne t1, $eos # U : did we already hit the terminator?
|
||||
|
||||
/* Character search main loop */
|
||||
$loop:
|
||||
ldq t0, 8(v0) # L : load next quadword
|
||||
cmovne t3, v0, t6 # E : save previous comparisons match
|
||||
nop # : Latency=2, extra map slot (keep nop with cmov)
|
||||
nop
|
||||
|
||||
cmovne t3, t3, t8 # E : Latency=2, extra map slot
|
||||
nop # : keep with cmovne
|
||||
addq v0, 8, v0 # E :
|
||||
xor t0, a1, t2 # E :
|
||||
|
||||
cmpbge zero, t0, t1 # E : bits set iff byte == zero
|
||||
cmpbge zero, t2, t3 # E : bits set iff byte == c
|
||||
beq t1, $loop # U : if we havnt seen a null, loop
|
||||
nop
|
||||
|
||||
/* Mask out character matches after terminator */
|
||||
$eos:
|
||||
negq t1, t4 # E : isolate first null byte match
|
||||
and t1, t4, t4 # E :
|
||||
subq t4, 1, t5 # E : build a mask of the bytes upto...
|
||||
or t4, t5, t4 # E : ... and including the null
|
||||
|
||||
and t3, t4, t3 # E : mask out char matches after null
|
||||
cmovne t3, t3, t8 # E : save it, if match found Latency=2, extra map slot
|
||||
nop # : Keep with cmovne
|
||||
nop
|
||||
|
||||
cmovne t3, v0, t6 # E :
|
||||
nop # : Keep with cmovne
|
||||
/* Locate the address of the last matched character */
|
||||
ctlz t8, t2 # U0 : Latency=3 (0x40 for t8=0)
|
||||
nop
|
||||
|
||||
cmoveq t8, 0x3f, t2 # E : Compensate for case when no match is seen
|
||||
nop # E : hide the cmov latency (2) behind ctlz latency
|
||||
lda t5, 0x3f($31) # E :
|
||||
subq t5, t2, t5 # E : Normalize leading zero count
|
||||
|
||||
addq t6, t5, v0 # E : and add to quadword address
|
||||
ret # L0 : Latency=3
|
||||
nop
|
||||
nop
|
||||
|
||||
END(strrchr)
|
||||
|
||||
weak_alias (strrchr, rindex)
|
||||
libc_hidden_builtin_def (strrchr)
|
@ -1 +0,0 @@
|
||||
#include <sysdeps/ia64/backtrace.c>
|
@ -1,87 +0,0 @@
|
||||
/* Copyright (C) 1996, 1997 Free Software Foundation, Inc.
|
||||
Contributed by David Mosberger (davidm@cs.arizona.edu).
|
||||
This file is part of the GNU C Library.
|
||||
|
||||
The GNU C Library is free software; you can redistribute it and/or
|
||||
modify it under the terms of the GNU Lesser General Public
|
||||
License as published by the Free Software Foundation; either
|
||||
version 2.1 of the License, or (at your option) any later version.
|
||||
|
||||
The GNU C Library is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
||||
Lesser General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU Lesser General Public
|
||||
License along with the GNU C Library; if not, write to the Free
|
||||
Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
|
||||
02111-1307 USA. */
|
||||
|
||||
/* __bb_init_func is invoked at the beginning of each function, before
|
||||
any registers have been saved. It is therefore safe to use any
|
||||
caller-saved (call-used) registers (except for argument registers
|
||||
a1-a5). */
|
||||
|
||||
#include <sysdep.h>
|
||||
|
||||
/*
|
||||
* These offsets should match with "struct bb" declared in gcc/libgcc2.c.
|
||||
*/
|
||||
#define ZERO_WORD 0x00
|
||||
#define NEXT 0x20
|
||||
|
||||
.set noat
|
||||
.set noreorder
|
||||
|
||||
ENTRY(__bb_init_func)
|
||||
.prologue 0
|
||||
|
||||
ldq t0, ZERO_WORD(a0) /* t0 <- blocks->zero_word */
|
||||
beq t0, init /* not initialized yet -> */
|
||||
ret
|
||||
|
||||
END(__bb_init_func)
|
||||
|
||||
.ent init
|
||||
init:
|
||||
.frame sp, 0x38, ra, 0
|
||||
subq sp, 0x38, sp
|
||||
.prologue 0
|
||||
|
||||
stq pv, 0x30(sp)
|
||||
br pv, 1f
|
||||
1: ldgp gp, 0(pv)
|
||||
|
||||
ldiq t1, __bb_head
|
||||
lda t3, _gmonparam
|
||||
ldq t2, 0(t1)
|
||||
ldl t3, 0(t3) /* t3 = _gmonparam.state */
|
||||
lda t0, 1
|
||||
stq t0, ZERO_WORD(a0) /* blocks->zero_word = 1 */
|
||||
stq t2, NEXT(a0) /* blocks->next = __bb_head */
|
||||
stq a0, 0(t1)
|
||||
bne t2, $leave
|
||||
beq t3, $leave /* t3 == GMON_PROF_ON? yes -> */
|
||||
|
||||
/* also need to initialize destructor: */
|
||||
stq ra, 0x00(sp)
|
||||
lda a0, __bb_exit_func
|
||||
stq a1, 0x08(sp)
|
||||
lda pv, atexit
|
||||
stq a2, 0x10(sp)
|
||||
stq a3, 0x18(sp)
|
||||
stq a4, 0x20(sp)
|
||||
stq a5, 0x28(sp)
|
||||
jsr ra, (pv), atexit
|
||||
ldq ra, 0x00(sp)
|
||||
ldq a1, 0x08(sp)
|
||||
ldq a2, 0x10(sp)
|
||||
ldq a3, 0x18(sp)
|
||||
ldq a4, 0x20(sp)
|
||||
ldq a5, 0x28(sp)
|
||||
|
||||
$leave: ldq pv, 0x30(sp)
|
||||
addq sp, 0x38, sp
|
||||
ret
|
||||
|
||||
.end init
|
@ -1,369 +0,0 @@
|
||||
/* Copyright (C) 2003 Free Software Foundation, Inc.
|
||||
This file is part of the GNU C Library.
|
||||
|
||||
The GNU C Library is free software; you can redistribute it and/or
|
||||
modify it under the terms of the GNU Lesser General Public
|
||||
License as published by the Free Software Foundation; either
|
||||
version 2.1 of the License, or (at your option) any later version.
|
||||
|
||||
The GNU C Library is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
||||
Lesser General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU Lesser General Public
|
||||
License along with the GNU C Library; if not, write to the Free
|
||||
Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
|
||||
02111-1307 USA. */
|
||||
|
||||
#include <stdint.h>
|
||||
|
||||
typedef int8_t atomic8_t;
|
||||
typedef uint8_t uatomic8_t;
|
||||
typedef int_fast8_t atomic_fast8_t;
|
||||
typedef uint_fast8_t uatomic_fast8_t;
|
||||
|
||||
typedef int16_t atomic16_t;
|
||||
typedef uint16_t uatomic16_t;
|
||||
typedef int_fast16_t atomic_fast16_t;
|
||||
typedef uint_fast16_t uatomic_fast16_t;
|
||||
|
||||
typedef int32_t atomic32_t;
|
||||
typedef uint32_t uatomic32_t;
|
||||
typedef int_fast32_t atomic_fast32_t;
|
||||
typedef uint_fast32_t uatomic_fast32_t;
|
||||
|
||||
typedef int64_t atomic64_t;
|
||||
typedef uint64_t uatomic64_t;
|
||||
typedef int_fast64_t atomic_fast64_t;
|
||||
typedef uint_fast64_t uatomic_fast64_t;
|
||||
|
||||
typedef intptr_t atomicptr_t;
|
||||
typedef uintptr_t uatomicptr_t;
|
||||
typedef intmax_t atomic_max_t;
|
||||
typedef uintmax_t uatomic_max_t;
|
||||
|
||||
|
||||
#ifdef UP
|
||||
# define __MB /* nothing */
|
||||
#else
|
||||
# define __MB " mb\n"
|
||||
#endif
|
||||
|
||||
|
||||
/* Compare and exchange. For all of the "xxx" routines, we expect a
|
||||
"__prev" and a "__cmp" variable to be provided by the enclosing scope,
|
||||
in which values are returned. */
|
||||
|
||||
#define __arch_compare_and_exchange_xxx_8_int(mem, new, old, mb1, mb2) \
|
||||
({ \
|
||||
unsigned long __tmp, __snew, __addr64; \
|
||||
__asm__ __volatile__ ( \
|
||||
mb1 \
|
||||
" andnot %[__addr8],7,%[__addr64]\n" \
|
||||
" insbl %[__new],%[__addr8],%[__snew]\n" \
|
||||
"1: ldq_l %[__tmp],0(%[__addr64])\n" \
|
||||
" extbl %[__tmp],%[__addr8],%[__prev]\n" \
|
||||
" cmpeq %[__prev],%[__old],%[__cmp]\n" \
|
||||
" beq %[__cmp],2f\n" \
|
||||
" mskbl %[__tmp],%[__addr8],%[__tmp]\n" \
|
||||
" or %[__snew],%[__tmp],%[__tmp]\n" \
|
||||
" stq_c %[__tmp],0(%[__addr64])\n" \
|
||||
" beq %[__tmp],1b\n" \
|
||||
mb2 \
|
||||
"2:" \
|
||||
: [__prev] "=&r" (__prev), \
|
||||
[__snew] "=&r" (__snew), \
|
||||
[__tmp] "=&r" (__tmp), \
|
||||
[__cmp] "=&r" (__cmp), \
|
||||
[__addr64] "=&r" (__addr64) \
|
||||
: [__addr8] "r" (mem), \
|
||||
[__old] "Ir" ((uint64_t)(uint8_t)(uint64_t)(old)), \
|
||||
[__new] "r" (new) \
|
||||
: "memory"); \
|
||||
})
|
||||
|
||||
#define __arch_compare_and_exchange_xxx_16_int(mem, new, old, mb1, mb2) \
|
||||
({ \
|
||||
unsigned long __tmp, __snew, __addr64; \
|
||||
__asm__ __volatile__ ( \
|
||||
mb1 \
|
||||
" andnot %[__addr16],7,%[__addr64]\n" \
|
||||
" inswl %[__new],%[__addr16],%[__snew]\n" \
|
||||
"1: ldq_l %[__tmp],0(%[__addr64])\n" \
|
||||
" extwl %[__tmp],%[__addr16],%[__prev]\n" \
|
||||
" cmpeq %[__prev],%[__old],%[__cmp]\n" \
|
||||
" beq %[__cmp],2f\n" \
|
||||
" mskwl %[__tmp],%[__addr16],%[__tmp]\n" \
|
||||
" or %[__snew],%[__tmp],%[__tmp]\n" \
|
||||
" stq_c %[__tmp],0(%[__addr64])\n" \
|
||||
" beq %[__tmp],1b\n" \
|
||||
mb2 \
|
||||
"2:" \
|
||||
: [__prev] "=&r" (__prev), \
|
||||
[__snew] "=&r" (__snew), \
|
||||
[__tmp] "=&r" (__tmp), \
|
||||
[__cmp] "=&r" (__cmp), \
|
||||
[__addr64] "=&r" (__addr64) \
|
||||
: [__addr16] "r" (mem), \
|
||||
[__old] "Ir" ((uint64_t)(uint16_t)(uint64_t)(old)), \
|
||||
[__new] "r" (new) \
|
||||
: "memory"); \
|
||||
})
|
||||
|
||||
#define __arch_compare_and_exchange_xxx_32_int(mem, new, old, mb1, mb2) \
|
||||
({ \
|
||||
__asm__ __volatile__ ( \
|
||||
mb1 \
|
||||
"1: ldl_l %[__prev],%[__mem]\n" \
|
||||
" cmpeq %[__prev],%[__old],%[__cmp]\n" \
|
||||
" beq %[__cmp],2f\n" \
|
||||
" mov %[__new],%[__cmp]\n" \
|
||||
" stl_c %[__cmp],%[__mem]\n" \
|
||||
" beq %[__cmp],1b\n" \
|
||||
mb2 \
|
||||
"2:" \
|
||||
: [__prev] "=&r" (__prev), \
|
||||
[__cmp] "=&r" (__cmp) \
|
||||
: [__mem] "m" (*(mem)), \
|
||||
[__old] "Ir" ((uint64_t)(atomic32_t)(uint64_t)(old)), \
|
||||
[__new] "Ir" (new) \
|
||||
: "memory"); \
|
||||
})
|
||||
|
||||
#define __arch_compare_and_exchange_xxx_64_int(mem, new, old, mb1, mb2) \
|
||||
({ \
|
||||
__asm__ __volatile__ ( \
|
||||
mb1 \
|
||||
"1: ldq_l %[__prev],%[__mem]\n" \
|
||||
" cmpeq %[__prev],%[__old],%[__cmp]\n" \
|
||||
" beq %[__cmp],2f\n" \
|
||||
" mov %[__new],%[__cmp]\n" \
|
||||
" stq_c %[__cmp],%[__mem]\n" \
|
||||
" beq %[__cmp],1b\n" \
|
||||
mb2 \
|
||||
"2:" \
|
||||
: [__prev] "=&r" (__prev), \
|
||||
[__cmp] "=&r" (__cmp) \
|
||||
: [__mem] "m" (*(mem)), \
|
||||
[__old] "Ir" ((uint64_t)(old)), \
|
||||
[__new] "Ir" (new) \
|
||||
: "memory"); \
|
||||
})
|
||||
|
||||
/* For all "bool" routines, we return FALSE if exchange succesful. */
|
||||
|
||||
#define __arch_compare_and_exchange_bool_8_int(mem, new, old, mb1, mb2) \
|
||||
({ unsigned long __prev; int __cmp; \
|
||||
__arch_compare_and_exchange_xxx_8_int(mem, new, old, mb1, mb2); \
|
||||
!__cmp; })
|
||||
|
||||
#define __arch_compare_and_exchange_bool_16_int(mem, new, old, mb1, mb2) \
|
||||
({ unsigned long __prev; int __cmp; \
|
||||
__arch_compare_and_exchange_xxx_16_int(mem, new, old, mb1, mb2); \
|
||||
!__cmp; })
|
||||
|
||||
#define __arch_compare_and_exchange_bool_32_int(mem, new, old, mb1, mb2) \
|
||||
({ unsigned long __prev; int __cmp; \
|
||||
__arch_compare_and_exchange_xxx_32_int(mem, new, old, mb1, mb2); \
|
||||
!__cmp; })
|
||||
|
||||
#define __arch_compare_and_exchange_bool_64_int(mem, new, old, mb1, mb2) \
|
||||
({ unsigned long __prev; int __cmp; \
|
||||
__arch_compare_and_exchange_xxx_64_int(mem, new, old, mb1, mb2); \
|
||||
!__cmp; })
|
||||
|
||||
/* For all "val" routines, return the old value whether exchange
|
||||
successful or not. */
|
||||
|
||||
#define __arch_compare_and_exchange_val_8_int(mem, new, old, mb1, mb2) \
|
||||
({ unsigned long __prev; int __cmp; \
|
||||
__arch_compare_and_exchange_xxx_8_int(mem, new, old, mb1, mb2); \
|
||||
(typeof (*mem))__prev; })
|
||||
|
||||
#define __arch_compare_and_exchange_val_16_int(mem, new, old, mb1, mb2) \
|
||||
({ unsigned long __prev; int __cmp; \
|
||||
__arch_compare_and_exchange_xxx_16_int(mem, new, old, mb1, mb2); \
|
||||
(typeof (*mem))__prev; })
|
||||
|
||||
#define __arch_compare_and_exchange_val_32_int(mem, new, old, mb1, mb2) \
|
||||
({ unsigned long __prev; int __cmp; \
|
||||
__arch_compare_and_exchange_xxx_32_int(mem, new, old, mb1, mb2); \
|
||||
(typeof (*mem))__prev; })
|
||||
|
||||
#define __arch_compare_and_exchange_val_64_int(mem, new, old, mb1, mb2) \
|
||||
({ unsigned long __prev; int __cmp; \
|
||||
__arch_compare_and_exchange_xxx_64_int(mem, new, old, mb1, mb2); \
|
||||
(typeof (*mem))__prev; })
|
||||
|
||||
/* Compare and exchange with "acquire" semantics, ie barrier after. */
|
||||
|
||||
#define atomic_compare_and_exchange_bool_acq(mem, new, old) \
|
||||
__atomic_bool_bysize (__arch_compare_and_exchange_bool, int, \
|
||||
mem, new, old, "", __MB)
|
||||
|
||||
#define atomic_compare_and_exchange_val_acq(mem, new, old) \
|
||||
__atomic_val_bysize (__arch_compare_and_exchange_val, int, \
|
||||
mem, new, old, "", __MB)
|
||||
|
||||
/* Compare and exchange with "release" semantics, ie barrier before. */
|
||||
|
||||
#define atomic_compare_and_exchange_bool_rel(mem, new, old) \
|
||||
__atomic_bool_bysize (__arch_compare_and_exchange_bool, int, \
|
||||
mem, new, old, __MB, "")
|
||||
|
||||
#define atomic_compare_and_exchange_val_rel(mem, new, old) \
|
||||
__atomic_val_bysize (__arch_compare_and_exchange_val, int, \
|
||||
mem, new, old, __MB, "")
|
||||
|
||||
|
||||
/* Atomically store value and return the previous value. */
|
||||
|
||||
#define __arch_exchange_8_int(mem, value, mb1, mb2) \
|
||||
({ \
|
||||
unsigned long __ret, __tmp, __addr64, __sval; \
|
||||
__asm__ __volatile__ ( \
|
||||
mb1 \
|
||||
" andnot %[__addr8],7,%[__addr64]\n" \
|
||||
" insbl %[__value],%[__addr8],%[__sval]\n" \
|
||||
"1: ldq_l %[__tmp],0(%[__addr64])\n" \
|
||||
" extbl %[__tmp],%[__addr8],%[__ret]\n" \
|
||||
" mskbl %[__tmp],%[__addr8],%[__tmp]\n" \
|
||||
" or %[__sval],%[__tmp],%[__tmp]\n" \
|
||||
" stq_c %[__tmp],0(%[__addr64])\n" \
|
||||
" beq %[__tmp],1b\n" \
|
||||
mb2 \
|
||||
: [__ret] "=&r" (__ret), \
|
||||
[__sval] "=&r" (__sval), \
|
||||
[__tmp] "=&r" (__tmp), \
|
||||
[__addr64] "=&r" (__addr64) \
|
||||
: [__addr8] "r" (mem), \
|
||||
[__value] "r" (value) \
|
||||
: "memory"); \
|
||||
__ret; })
|
||||
|
||||
#define __arch_exchange_16_int(mem, value, mb1, mb2) \
|
||||
({ \
|
||||
unsigned long __ret, __tmp, __addr64, __sval; \
|
||||
__asm__ __volatile__ ( \
|
||||
mb1 \
|
||||
" andnot %[__addr16],7,%[__addr64]\n" \
|
||||
" inswl %[__value],%[__addr16],%[__sval]\n" \
|
||||
"1: ldq_l %[__tmp],0(%[__addr64])\n" \
|
||||
" extwl %[__tmp],%[__addr16],%[__ret]\n" \
|
||||
" mskwl %[__tmp],%[__addr16],%[__tmp]\n" \
|
||||
" or %[__sval],%[__tmp],%[__tmp]\n" \
|
||||
" stq_c %[__tmp],0(%[__addr64])\n" \
|
||||
" beq %[__tmp],1b\n" \
|
||||
mb2 \
|
||||
: [__ret] "=&r" (__ret), \
|
||||
[__sval] "=&r" (__sval), \
|
||||
[__tmp] "=&r" (__tmp), \
|
||||
[__addr64] "=&r" (__addr64) \
|
||||
: [__addr16] "r" (mem), \
|
||||
[__value] "r" (value) \
|
||||
: "memory"); \
|
||||
__ret; })
|
||||
|
||||
#define __arch_exchange_32_int(mem, value, mb1, mb2) \
|
||||
({ \
|
||||
signed int __ret, __tmp; \
|
||||
__asm__ __volatile__ ( \
|
||||
mb1 \
|
||||
"1: ldl_l %[__ret],%[__mem]\n" \
|
||||
" mov %[__val],%[__tmp]\n" \
|
||||
" stl_c %[__tmp],%[__mem]\n" \
|
||||
" beq %[__tmp],1b\n" \
|
||||
mb2 \
|
||||
: [__ret] "=&r" (__ret), \
|
||||
[__tmp] "=&r" (__tmp) \
|
||||
: [__mem] "m" (*(mem)), \
|
||||
[__val] "Ir" (value) \
|
||||
: "memory"); \
|
||||
__ret; })
|
||||
|
||||
#define __arch_exchange_64_int(mem, value, mb1, mb2) \
|
||||
({ \
|
||||
unsigned long __ret, __tmp; \
|
||||
__asm__ __volatile__ ( \
|
||||
mb1 \
|
||||
"1: ldq_l %[__ret],%[__mem]\n" \
|
||||
" mov %[__val],%[__tmp]\n" \
|
||||
" stq_c %[__tmp],%[__mem]\n" \
|
||||
" beq %[__tmp],1b\n" \
|
||||
mb2 \
|
||||
: [__ret] "=&r" (__ret), \
|
||||
[__tmp] "=&r" (__tmp) \
|
||||
: [__mem] "m" (*(mem)), \
|
||||
[__val] "Ir" (value) \
|
||||
: "memory"); \
|
||||
__ret; })
|
||||
|
||||
#define atomic_exchange_acq(mem, value) \
|
||||
__atomic_val_bysize (__arch_exchange, int, mem, value, "", __MB)
|
||||
|
||||
#define atomic_exchange_rel(mem, value) \
|
||||
__atomic_val_bysize (__arch_exchange, int, mem, value, __MB, "")
|
||||
|
||||
|
||||
/* Atomically add value and return the previous (unincremented) value. */
|
||||
|
||||
#define __arch_exchange_and_add_8_int(mem, value, mb1, mb2) \
|
||||
({ __builtin_trap (); 0; })
|
||||
|
||||
#define __arch_exchange_and_add_16_int(mem, value, mb1, mb2) \
|
||||
({ __builtin_trap (); 0; })
|
||||
|
||||
#define __arch_exchange_and_add_32_int(mem, value, mb1, mb2) \
|
||||
({ \
|
||||
signed int __ret, __tmp; \
|
||||
__asm__ __volatile__ ( \
|
||||
mb1 \
|
||||
"1: ldl_l %[__ret],%[__mem]\n" \
|
||||
" addl %[__ret],%[__val],%[__tmp]\n" \
|
||||
" stl_c %[__tmp],%[__mem]\n" \
|
||||
" beq %[__tmp],1b\n" \
|
||||
mb2 \
|
||||
: [__ret] "=&r" (__ret), \
|
||||
[__tmp] "=&r" (__tmp) \
|
||||
: [__mem] "m" (*(mem)), \
|
||||
[__val] "Ir" ((signed int)(value)) \
|
||||
: "memory"); \
|
||||
__ret; })
|
||||
|
||||
#define __arch_exchange_and_add_64_int(mem, value, mb1, mb2) \
|
||||
({ \
|
||||
unsigned long __ret, __tmp; \
|
||||
__asm__ __volatile__ ( \
|
||||
mb1 \
|
||||
"1: ldq_l %[__ret],%[__mem]\n" \
|
||||
" addq %[__ret],%[__val],%[__tmp]\n" \
|
||||
" stq_c %[__tmp],%[__mem]\n" \
|
||||
" beq %[__tmp],1b\n" \
|
||||
mb2 \
|
||||
: [__ret] "=&r" (__ret), \
|
||||
[__tmp] "=&r" (__tmp) \
|
||||
: [__mem] "m" (*(mem)), \
|
||||
[__val] "Ir" ((unsigned long)(value)) \
|
||||
: "memory"); \
|
||||
__ret; })
|
||||
|
||||
/* ??? Barrier semantics for atomic_exchange_and_add appear to be
|
||||
undefined. Use full barrier for now, as that's safe. */
|
||||
#define atomic_exchange_and_add(mem, value) \
|
||||
__atomic_val_bysize (__arch_exchange_and_add, int, mem, value, __MB, __MB)
|
||||
|
||||
|
||||
/* ??? Blah, I'm lazy. Implement these later. Can do better than the
|
||||
compare-and-exchange loop provided by generic code.
|
||||
|
||||
#define atomic_decrement_if_positive(mem)
|
||||
#define atomic_bit_test_set(mem, bit)
|
||||
|
||||
*/
|
||||
|
||||
#ifndef UP
|
||||
# define atomic_full_barrier() __asm ("mb" : : : "memory");
|
||||
# define atomic_read_barrier() __asm ("mb" : : : "memory");
|
||||
# define atomic_write_barrier() __asm ("wmb" : : : "memory");
|
||||
#endif
|
@ -1,7 +0,0 @@
|
||||
/* Alpha is little-endian. */
|
||||
|
||||
#ifndef _ENDIAN_H
|
||||
# error "Never use <bits/endian.h> directly; include <endian.h> instead."
|
||||
#endif
|
||||
|
||||
#define __BYTE_ORDER __LITTLE_ENDIAN
|
@ -1,69 +0,0 @@
|
||||
/* Copyright (C) 2005 Free Software Foundation, Inc.
|
||||
This file is part of the GNU C Library.
|
||||
|
||||
The GNU C Library is free software; you can redistribute it and/or
|
||||
modify it under the terms of the GNU Lesser General Public
|
||||
License as published by the Free Software Foundation; either
|
||||
version 2.1 of the License, or (at your option) any later version.
|
||||
|
||||
The GNU C Library is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
||||
Lesser General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU Lesser General Public
|
||||
License along with the GNU C Library; if not, write to the Free
|
||||
Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
|
||||
02111-1307 USA. */
|
||||
|
||||
#ifndef _LINK_H
|
||||
# error "Never include <bits/link.h> directly; use <link.h> instead."
|
||||
#endif
|
||||
|
||||
|
||||
/* Registers for entry into PLT on Alpha. */
|
||||
typedef struct La_alpha_regs
|
||||
{
|
||||
uint64_t lr_r26;
|
||||
uint64_t lr_sp;
|
||||
uint64_t lr_r16;
|
||||
uint64_t lr_r17;
|
||||
uint64_t lr_r18;
|
||||
uint64_t lr_r19;
|
||||
uint64_t lr_r20;
|
||||
uint64_t lr_r21;
|
||||
double lr_f16;
|
||||
double lr_f17;
|
||||
double lr_f18;
|
||||
double lr_f19;
|
||||
double lr_f20;
|
||||
double lr_f21;
|
||||
} La_alpha_regs;
|
||||
|
||||
/* Return values for calls from PLT on Alpha. */
|
||||
typedef struct La_alpha_retval
|
||||
{
|
||||
uint64_t lrv_r0;
|
||||
uint64_t lrv_r1;
|
||||
double lrv_f0;
|
||||
double lrv_f1;
|
||||
} La_alpha_retval;
|
||||
|
||||
|
||||
__BEGIN_DECLS
|
||||
|
||||
extern Elf64_Addr la_alpha_gnu_pltenter (Elf64_Sym *__sym, unsigned int __ndx,
|
||||
uintptr_t *__refcook,
|
||||
uintptr_t *__defcook,
|
||||
La_alpha_regs *__regs,
|
||||
unsigned int *__flags,
|
||||
const char *__symname,
|
||||
long int *__framesizep);
|
||||
extern unsigned int la_alpha_gnu_pltexit (Elf64_Sym *__sym, unsigned int __ndx,
|
||||
uintptr_t *__refcook,
|
||||
uintptr_t *__defcook,
|
||||
const La_alpha_regs *__inregs,
|
||||
La_alpha_retval *__outregs,
|
||||
const char *symname);
|
||||
|
||||
__END_DECLS
|
@ -1,80 +0,0 @@
|
||||
/* Copyright (C) 1997,1998,1999,2000,2003,2004,2006
|
||||
Free Software Foundation, Inc.
|
||||
This file is part of the GNU C Library.
|
||||
|
||||
The GNU C Library is free software; you can redistribute it and/or
|
||||
modify it under the terms of the GNU Lesser General Public
|
||||
License as published by the Free Software Foundation; either
|
||||
version 2.1 of the License, or (at your option) any later version.
|
||||
|
||||
The GNU C Library is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
||||
Lesser General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU Lesser General Public
|
||||
License along with the GNU C Library; if not, write to the Free
|
||||
Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
|
||||
02111-1307 USA. */
|
||||
|
||||
#if !defined _MATH_H && !defined _COMPLEX_H
|
||||
# error "Never use <bits/mathdef.h> directly; include <math.h> instead"
|
||||
#endif
|
||||
|
||||
/* FIXME! This file describes properties of the compiler, not the machine;
|
||||
it should not be part of libc! */
|
||||
|
||||
#if defined __USE_ISOC99 && defined _MATH_H && !defined _MATH_H_MATHDEF
|
||||
# define _MATH_H_MATHDEF 1
|
||||
|
||||
# ifdef __GNUC__
|
||||
# if __STDC__ == 1
|
||||
|
||||
/* In GNU or ANSI mode, gcc leaves `float' expressions as-is. */
|
||||
typedef float float_t;
|
||||
typedef double double_t;
|
||||
|
||||
# else
|
||||
|
||||
/* For `gcc -traditional', `float' expressions are evaluated as `double'. */
|
||||
typedef double float_t;
|
||||
typedef double double_t;
|
||||
|
||||
# endif
|
||||
# else
|
||||
|
||||
/* Wild guess at types for float_t and double_t. */
|
||||
typedef double float_t;
|
||||
typedef double double_t;
|
||||
|
||||
# endif
|
||||
|
||||
/* The values returned by `ilogb' for 0 and NaN respectively. */
|
||||
# define FP_ILOGB0 (-2147483647)
|
||||
# define FP_ILOGBNAN (2147483647)
|
||||
|
||||
#endif /* ISO C99 && MATH_H */
|
||||
|
||||
#if defined _COMPLEX_H && !defined _COMPLEX_H_MATHDEF
|
||||
# define _COMPLEX_H_MATHDEF 1
|
||||
# if defined(__GNUC__) && !__GNUC_PREREQ(3,4)
|
||||
|
||||
/* Due to an ABI change, we need to remap the complex float symbols. */
|
||||
# define _Mdouble_ float
|
||||
# define __MATHCALL(function, args) \
|
||||
__MATHDECL (_Complex float, function, args)
|
||||
# define __MATHDECL(type, function, args) \
|
||||
__MATHDECL_1(type, function##f, args, __c1_##function##f); \
|
||||
__MATHDECL_1(type, __##function##f, args, __c1_##function##f)
|
||||
# define __MATHDECL_1(type, function, args, alias) \
|
||||
extern type function args __asm__(#alias) __THROW
|
||||
|
||||
# include <bits/cmathcalls.h>
|
||||
|
||||
# undef _Mdouble_
|
||||
# undef __MATHCALL
|
||||
# undef __MATHDECL
|
||||
# undef __MATHDECL_1
|
||||
|
||||
# endif /* GNUC before 3.4 */
|
||||
#endif /* COMPLEX_H */
|
@ -1,62 +0,0 @@
|
||||
/* Define the machine-dependent type `jmp_buf'. Alpha version.
|
||||
Copyright (C) 1992,1997,2003,2005,2006 Free Software Foundation, Inc.
|
||||
This file is part of the GNU C Library.
|
||||
|
||||
The GNU C Library is free software; you can redistribute it and/or
|
||||
modify it under the terms of the GNU Lesser General Public
|
||||
License as published by the Free Software Foundation; either
|
||||
version 2.1 of the License, or (at your option) any later version.
|
||||
|
||||
The GNU C Library is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
||||
Lesser General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU Lesser General Public
|
||||
License along with the GNU C Library; if not, write to the Free
|
||||
Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
|
||||
02111-1307 USA. */
|
||||
|
||||
#ifndef _BITS_SETJMP_H
|
||||
#define _BITS_SETJMP_H 1
|
||||
|
||||
#if !defined _SETJMP_H && !defined _PTHREAD_H
|
||||
# error "Never include <bits/setjmp.h> directly; use <setjmp.h> instead."
|
||||
#endif
|
||||
|
||||
/* The previous bits/setjmp.h had __jmp_buf defined as a structure.
|
||||
We use an array of 'long int' instead, to make writing the
|
||||
assembler easier. Naturally, user code should not depend on
|
||||
either representation. */
|
||||
|
||||
/*
|
||||
* Integer registers:
|
||||
* $0 is the return value (va);
|
||||
* $1-$8, $22-$25, $28 are call-used (t0-t7, t8-t11, at);
|
||||
* $9-$14 we save here (s0-s5);
|
||||
* $15 is the FP and we save it here (fp or s6);
|
||||
* $16-$21 are input arguments (call-used) (a0-a5);
|
||||
* $26 is the return PC and we save it here (ra);
|
||||
* $27 is the procedure value (i.e., the address of __setjmp) (pv or t12);
|
||||
* $29 is the global pointer, which the caller will reconstruct
|
||||
* from the return address restored in $26 (gp);
|
||||
* $30 is the stack pointer and we save it here (sp);
|
||||
* $31 is always zero (zero).
|
||||
*
|
||||
* Floating-point registers:
|
||||
* $f0 is the floating return value;
|
||||
* $f1, $f10-$f15, $f22-$f30 are call-used;
|
||||
* $f2-$f9 we save here;
|
||||
* $f16-$21 are input args (call-used);
|
||||
* $f31 is always zero.
|
||||
*
|
||||
* Note that even on Alpha hardware that does not have an FPU (there
|
||||
* isn't such a thing currently) it is required to implement the FP
|
||||
* registers.
|
||||
*/
|
||||
|
||||
#ifndef __ASSEMBLY__
|
||||
typedef long int __jmp_buf[17];
|
||||
#endif
|
||||
|
||||
#endif /* bits/setjmp.h */
|
@ -1 +0,0 @@
|
||||
/* _setjmp is in setjmp.S */
|
@ -1 +0,0 @@
|
||||
/* setjmp is in setjmp.S */
|
@ -1,120 +0,0 @@
|
||||
/* Copyright (C) 1996, 1997, 1998 Free Software Foundation, Inc.
|
||||
Contributed by Richard Henderson (rth@tamu.edu)
|
||||
This file is part of the GNU C Library.
|
||||
|
||||
The GNU C Library is free software; you can redistribute it and/or
|
||||
modify it under the terms of the GNU Lesser General Public
|
||||
License as published by the Free Software Foundation; either
|
||||
version 2.1 of the License, or (at your option) any later version.
|
||||
|
||||
The GNU C Library is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
||||
Lesser General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU Lesser General Public
|
||||
License along with the GNU C Library; if not, write to the Free
|
||||
Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
|
||||
02111-1307 USA. */
|
||||
|
||||
/* Fill a block of memory with zeros. Optimized for the Alpha architecture:
|
||||
|
||||
- memory accessed as aligned quadwords only
|
||||
- destination memory not read unless needed for good cache behaviour
|
||||
- basic blocks arranged to optimize branch prediction for full-quadword
|
||||
aligned memory blocks.
|
||||
- partial head and tail quadwords constructed with byte-mask instructions
|
||||
|
||||
This is generally scheduled for the EV5 (got to look out for my own
|
||||
interests :-), but with EV4 needs in mind. There *should* be no more
|
||||
stalls for the EV4 than there are for the EV5.
|
||||
*/
|
||||
|
||||
|
||||
#include <sysdep.h>
|
||||
|
||||
.set noat
|
||||
.set noreorder
|
||||
|
||||
.text
|
||||
|
||||
/* There is a problem with either gdb (as of 4.16) or gas (as of 2.7) that
|
||||
doesn't like putting the entry point for a procedure somewhere in the
|
||||
middle of the procedure descriptor. Work around this by putting the main
|
||||
loop in its own procedure descriptor. */
|
||||
|
||||
/* On entry to this basic block:
|
||||
t3 == loop counter
|
||||
t4 == bytes in partial final word
|
||||
a0 == possibly misaligned destination pointer */
|
||||
|
||||
.ent bzero_loop
|
||||
.align 3
|
||||
bzero_loop:
|
||||
.frame sp, 0, ra, 0
|
||||
.prologue 0
|
||||
|
||||
beq t3, $tail #
|
||||
blbc t3, 0f # skip single store if count even
|
||||
|
||||
stq_u zero, 0(a0) # e0 : store one word
|
||||
subq t3, 1, t3 # .. e1 :
|
||||
addq a0, 8, a0 # e0 :
|
||||
beq t3, $tail # .. e1 :
|
||||
|
||||
0: stq_u zero, 0(a0) # e0 : store two words
|
||||
subq t3, 2, t3 # .. e1 :
|
||||
stq_u zero, 8(a0) # e0 :
|
||||
addq a0, 16, a0 # .. e1 :
|
||||
bne t3, 0b # e1 :
|
||||
|
||||
$tail: bne t4, 1f # is there a tail to do?
|
||||
ret # no
|
||||
|
||||
1: ldq_u t0, 0(a0) # yes, load original data
|
||||
mskqh t0, t4, t0 #
|
||||
stq_u t0, 0(a0) #
|
||||
ret #
|
||||
|
||||
.end bzero_loop
|
||||
|
||||
ENTRY(__bzero)
|
||||
#ifdef PROF
|
||||
ldgp gp, 0(pv)
|
||||
lda AT, _mcount
|
||||
jsr AT, (AT), _mcount
|
||||
.prologue 1
|
||||
#else
|
||||
.prologue 0
|
||||
#endif
|
||||
|
||||
mov a0, v0 # e0 : move return value in place
|
||||
beq a1, $done # .. e1 : early exit for zero-length store
|
||||
and a0, 7, t1 # e0 :
|
||||
addq a1, t1, a1 # e1 : add dest misalignment to count
|
||||
srl a1, 3, t3 # e0 : loop = count >> 3
|
||||
and a1, 7, t4 # .. e1 : find number of bytes in tail
|
||||
unop # :
|
||||
beq t1, bzero_loop # e1 : aligned head, jump right in
|
||||
|
||||
ldq_u t0, 0(a0) # e0 : load original data to mask into
|
||||
cmpult a1, 8, t2 # .. e1 : is this a sub-word set?
|
||||
bne t2, $oneq # e1 :
|
||||
|
||||
mskql t0, a0, t0 # e0 : we span words. finish this partial
|
||||
subq t3, 1, t3 # e0 :
|
||||
addq a0, 8, a0 # .. e1 :
|
||||
stq_u t0, -8(a0) # e0 :
|
||||
br bzero_loop # .. e1 :
|
||||
|
||||
.align 3
|
||||
$oneq:
|
||||
mskql t0, a0, t2 # e0 :
|
||||
mskqh t0, a1, t3 # e0 :
|
||||
or t2, t3, t0 # e1 :
|
||||
stq_u t0, 0(a0) # e0 :
|
||||
|
||||
$done: ret
|
||||
|
||||
END(__bzero)
|
||||
weak_alias (__bzero, bzero)
|
@ -1,88 +0,0 @@
|
||||
/* Copyright (C) 1996, 1997, 2004 Free Software Foundation, Inc.
|
||||
This file is part of the GNU C Library.
|
||||
Contributed by Richard Henderson <rth@tamu.edu>.
|
||||
|
||||
The GNU C Library is free software; you can redistribute it and/or
|
||||
modify it under the terms of the GNU Lesser General Public
|
||||
License as published by the Free Software Foundation; either
|
||||
version 2.1 of the License, or (at your option) any later version.
|
||||
|
||||
The GNU C Library is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
||||
Lesser General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU Lesser General Public
|
||||
License along with the GNU C Library; if not, write to the Free
|
||||
Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
|
||||
02111-1307 USA. */
|
||||
|
||||
#include "div_libc.h"
|
||||
|
||||
#undef FRAME
|
||||
#ifdef __alpha_fix__
|
||||
#define FRAME 0
|
||||
#else
|
||||
#define FRAME 16
|
||||
#endif
|
||||
|
||||
.set noat
|
||||
|
||||
.align 4
|
||||
.globl div
|
||||
.ent div
|
||||
div:
|
||||
.frame sp, FRAME, ra
|
||||
#if FRAME > 0
|
||||
lda sp, -FRAME(sp)
|
||||
#endif
|
||||
#ifdef PROF
|
||||
.set macro
|
||||
ldgp gp, 0(pv)
|
||||
lda AT, _mcount
|
||||
jsr AT, (AT), _mcount
|
||||
.set nomacro
|
||||
.prologue 1
|
||||
#else
|
||||
.prologue 0
|
||||
#endif
|
||||
|
||||
beq $18, $divbyzero
|
||||
excb
|
||||
mf_fpcr $f10
|
||||
|
||||
_ITOFT2 $17, $f0, 0, $18, $f1, 8
|
||||
|
||||
cvtqt $f0, $f0
|
||||
cvtqt $f1, $f1
|
||||
divt/c $f0, $f1, $f0
|
||||
cvttq/c $f0, $f0
|
||||
excb
|
||||
mt_fpcr $f10
|
||||
_FTOIT $f0, $0, 0
|
||||
|
||||
mull $0, $18, $1
|
||||
subl $17, $1, $1
|
||||
|
||||
stl $0, 0(a0)
|
||||
stl $1, 4(a0)
|
||||
mov a0, v0
|
||||
|
||||
#if FRAME > 0
|
||||
lda sp, FRAME(sp)
|
||||
#endif
|
||||
ret
|
||||
|
||||
$divbyzero:
|
||||
mov a0, v0
|
||||
lda a0, GEN_INTDIV
|
||||
call_pal PAL_gentrap
|
||||
stl zero, 0(v0)
|
||||
stl zero, 4(v0)
|
||||
|
||||
#if FRAME > 0
|
||||
lda sp, FRAME(sp)
|
||||
#endif
|
||||
ret
|
||||
|
||||
.end div
|
@ -1,164 +0,0 @@
|
||||
/* Copyright (C) 2004 Free Software Foundation, Inc.
|
||||
This file is part of the GNU C Library.
|
||||
|
||||
The GNU C Library is free software; you can redistribute it and/or
|
||||
modify it under the terms of the GNU Lesser General Public
|
||||
License as published by the Free Software Foundation; either
|
||||
version 2.1 of the License, or (at your option) any later version.
|
||||
|
||||
The GNU C Library is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
||||
Lesser General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU Lesser General Public
|
||||
License along with the GNU C Library; if not, write to the Free
|
||||
Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
|
||||
02111-1307 USA. */
|
||||
|
||||
/* Common bits for implementing software divide. */
|
||||
|
||||
#include <sysdep.h>
|
||||
#ifdef __linux__
|
||||
# include <asm/gentrap.h>
|
||||
# include <asm/pal.h>
|
||||
#else
|
||||
# include <machine/pal.h>
|
||||
#endif
|
||||
|
||||
/* These are not normal C functions. Argument registers are t10 and t11;
|
||||
the result goes in t12; the return address is in t9. Only t12 and AT
|
||||
may be clobbered. */
|
||||
#define X t10
|
||||
#define Y t11
|
||||
#define RV t12
|
||||
#define RA t9
|
||||
|
||||
/* The secureplt format does not allow the division routines to be called
|
||||
via plt; there aren't enough registers free to be clobbered. Avoid
|
||||
setting the symbol type to STT_FUNC, so that the linker won't be tempted
|
||||
to create a plt entry. */
|
||||
#define funcnoplt notype
|
||||
|
||||
/* None of these functions should use implicit anything. */
|
||||
.set nomacro
|
||||
.set noat
|
||||
|
||||
/* Code fragment to invoke _mcount for profiling. This should be invoked
|
||||
directly after allocation of the stack frame. */
|
||||
.macro CALL_MCOUNT
|
||||
#ifdef PROF
|
||||
stq ra, 0(sp)
|
||||
stq pv, 8(sp)
|
||||
stq gp, 16(sp)
|
||||
cfi_rel_offset (ra, 0)
|
||||
cfi_rel_offset (pv, 8)
|
||||
cfi_rel_offset (gp, 16)
|
||||
br AT, 1f
|
||||
.set macro
|
||||
1: ldgp gp, 0(AT)
|
||||
mov RA, ra
|
||||
lda AT, _mcount
|
||||
jsr AT, (AT), _mcount
|
||||
.set nomacro
|
||||
ldq ra, 0(sp)
|
||||
ldq pv, 8(sp)
|
||||
ldq gp, 16(sp)
|
||||
cfi_restore (ra)
|
||||
cfi_restore (pv)
|
||||
cfi_restore (gp)
|
||||
/* Realign subsequent code with what we'd have without this
|
||||
macro at all. This means aligned with one arithmetic insn
|
||||
used within the bundle. */
|
||||
.align 4
|
||||
nop
|
||||
#endif
|
||||
.endm
|
||||
|
||||
/* In order to make the below work, all top-level divide routines must
|
||||
use the same frame size. */
|
||||
#define FRAME 64
|
||||
|
||||
/* Code fragment to generate an integer divide-by-zero fault. When
|
||||
building libc.so, we arrange for there to be one copy of this code
|
||||
placed late in the dso, such that all branches are forward. When
|
||||
building libc.a, we use multiple copies to avoid having an out of
|
||||
range branch. Users should jump to DIVBYZERO. */
|
||||
|
||||
.macro DO_DIVBYZERO
|
||||
#ifdef PIC
|
||||
#define DIVBYZERO __divbyzero
|
||||
.section .gnu.linkonce.t.divbyzero, "ax", @progbits
|
||||
.globl __divbyzero
|
||||
.type __divbyzero, @function
|
||||
.usepv __divbyzero, no
|
||||
.hidden __divbyzero
|
||||
#else
|
||||
#define DIVBYZERO $divbyzero
|
||||
#endif
|
||||
|
||||
.align 4
|
||||
DIVBYZERO:
|
||||
cfi_startproc
|
||||
cfi_return_column (RA)
|
||||
cfi_def_cfa_offset (FRAME)
|
||||
|
||||
mov a0, RV
|
||||
unop
|
||||
lda a0, GEN_INTDIV
|
||||
call_pal PAL_gentrap
|
||||
|
||||
mov RV, a0
|
||||
clr RV
|
||||
lda sp, FRAME(sp)
|
||||
cfi_def_cfa_offset (0)
|
||||
ret $31, (RA), 1
|
||||
|
||||
cfi_endproc
|
||||
.size DIVBYZERO, .-DIVBYZERO
|
||||
.endm
|
||||
|
||||
/* Like the ev6 instructions, but fall back to stack use on prior machines. */
|
||||
|
||||
.arch ev6
|
||||
|
||||
.macro _ITOFS gr, fr, slot
|
||||
#ifdef __alpha_fix__
|
||||
itofs \gr, \fr
|
||||
#else
|
||||
stl \gr, \slot(sp)
|
||||
lds \fr, \slot(sp)
|
||||
#endif
|
||||
.endm
|
||||
|
||||
.macro _ITOFT gr, fr, slot
|
||||
#ifdef __alpha_fix__
|
||||
itoft \gr, \fr
|
||||
#else
|
||||
stq \gr, \slot(sp)
|
||||
ldt \fr, \slot(sp)
|
||||
#endif
|
||||
.endm
|
||||
|
||||
.macro _FTOIT fr, gr, slot
|
||||
#ifdef __alpha_fix__
|
||||
ftoit \fr, \gr
|
||||
#else
|
||||
stt \fr, \slot(sp)
|
||||
ldq \gr, \slot(sp)
|
||||
#endif
|
||||
.endm
|
||||
|
||||
/* Similarly, but move two registers. Schedules better for pre-ev6. */
|
||||
|
||||
.macro _ITOFT2 gr1, fr1, slot1, gr2, fr2, slot2
|
||||
#ifdef __alpha_fix__
|
||||
itoft \gr1, \fr1
|
||||
itoft \gr2, \fr2
|
||||
#else
|
||||
stq \gr1, \slot1(sp)
|
||||
stq \gr2, \slot2(sp)
|
||||
ldt \fr1, \slot1(sp)
|
||||
ldt \fr2, \slot2(sp)
|
||||
#endif
|
||||
.endm
|
@ -1,84 +0,0 @@
|
||||
/* Copyright (C) 2004 Free Software Foundation, Inc.
|
||||
This file is part of the GNU C Library.
|
||||
|
||||
The GNU C Library is free software; you can redistribute it and/or
|
||||
modify it under the terms of the GNU Lesser General Public
|
||||
License as published by the Free Software Foundation; either
|
||||
version 2.1 of the License, or (at your option) any later version.
|
||||
|
||||
The GNU C Library is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
||||
Lesser General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU Lesser General Public
|
||||
License along with the GNU C Library; if not, write to the Free
|
||||
Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
|
||||
02111-1307 USA. */
|
||||
|
||||
#include "div_libc.h"
|
||||
|
||||
/* 32-bit signed int divide. This is not a normal C function. Argument
|
||||
registers are t10 and t11, the result goes in t12. Only t12 and AT may
|
||||
be clobbered.
|
||||
|
||||
The FPU can handle all input values except zero. Whee!
|
||||
|
||||
The FPCR save/restore is due to the fact that the EV6 _will_ set FPCR_INE
|
||||
for cvttq/c even without /sui being set. It will not, however, properly
|
||||
raise the exception, so we don't have to worry about FPCR_INED being clear
|
||||
and so dying by SIGFPE. */
|
||||
|
||||
#ifndef EXTEND
|
||||
#define EXTEND(S,D) sextl S, D
|
||||
#endif
|
||||
|
||||
.text
|
||||
.align 4
|
||||
.globl __divl
|
||||
.type __divl, @funcnoplt
|
||||
.usepv __divl, no
|
||||
|
||||
cfi_startproc
|
||||
cfi_return_column (RA)
|
||||
__divl:
|
||||
lda sp, -FRAME(sp)
|
||||
cfi_def_cfa_offset (FRAME)
|
||||
CALL_MCOUNT
|
||||
stt $f0, 0(sp)
|
||||
excb
|
||||
beq Y, DIVBYZERO
|
||||
|
||||
stt $f1, 8(sp)
|
||||
stt $f2, 16(sp)
|
||||
cfi_rel_offset ($f0, 0)
|
||||
cfi_rel_offset ($f1, 8)
|
||||
cfi_rel_offset ($f2, 16)
|
||||
mf_fpcr $f2
|
||||
|
||||
EXTEND (X, RV)
|
||||
EXTEND (Y, AT)
|
||||
_ITOFT2 RV, $f0, 24, AT, $f1, 32
|
||||
cvtqt $f0, $f0
|
||||
cvtqt $f1, $f1
|
||||
divt/c $f0, $f1, $f0
|
||||
cvttq/c $f0, $f0
|
||||
excb
|
||||
mt_fpcr $f2
|
||||
_FTOIT $f0, RV, 24
|
||||
|
||||
ldt $f0, 0(sp)
|
||||
ldt $f1, 8(sp)
|
||||
ldt $f2, 16(sp)
|
||||
lda sp, FRAME(sp)
|
||||
cfi_restore ($f0)
|
||||
cfi_restore ($f1)
|
||||
cfi_restore ($f2)
|
||||
cfi_def_cfa_offset (0)
|
||||
sextl RV, RV
|
||||
ret $31, (RA), 1
|
||||
|
||||
cfi_endproc
|
||||
.size __divl, .-__divl
|
||||
|
||||
DO_DIVBYZERO
|
@ -1,4 +0,0 @@
|
||||
#define UNSIGNED
|
||||
#define EXTEND(S,D) zapnot S, 15, D
|
||||
#define __divl __divlu
|
||||
#include <divl.S>
|
@ -1,274 +0,0 @@
|
||||
/* Copyright (C) 2004 Free Software Foundation, Inc.
|
||||
This file is part of the GNU C Library.
|
||||
|
||||
The GNU C Library is free software; you can redistribute it and/or
|
||||
modify it under the terms of the GNU Lesser General Public
|
||||
License as published by the Free Software Foundation; either
|
||||
version 2.1 of the License, or (at your option) any later version.
|
||||
|
||||
The GNU C Library is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
||||
Lesser General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU Lesser General Public
|
||||
License along with the GNU C Library; if not, write to the Free
|
||||
Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
|
||||
02111-1307 USA. */
|
||||
|
||||
#include "div_libc.h"
|
||||
|
||||
|
||||
/* 64-bit signed long divide. These are not normal C functions. Argument
|
||||
registers are t10 and t11, the result goes in t12. Only t12 and AT may
|
||||
be clobbered.
|
||||
|
||||
Theory of operation here is that we can use the FPU divider for virtually
|
||||
all operands that we see: all dividend values between -2**53 and 2**53-1
|
||||
can be computed directly. Note that divisor values need not be checked
|
||||
against that range because the rounded fp value will be close enough such
|
||||
that the quotient is < 1, which will properly be truncated to zero when we
|
||||
convert back to integer.
|
||||
|
||||
When the dividend is outside the range for which we can compute exact
|
||||
results, we use the fp quotent as an estimate from which we begin refining
|
||||
an exact integral value. This reduces the number of iterations in the
|
||||
shift-and-subtract loop significantly.
|
||||
|
||||
The FPCR save/restore is due to the fact that the EV6 _will_ set FPCR_INE
|
||||
for cvttq/c even without /sui being set. It will not, however, properly
|
||||
raise the exception, so we don't have to worry about FPCR_INED being clear
|
||||
and so dying by SIGFPE. */
|
||||
|
||||
.text
|
||||
.align 4
|
||||
.globl __divq
|
||||
.type __divq, @funcnoplt
|
||||
.usepv __divq, no
|
||||
|
||||
cfi_startproc
|
||||
cfi_return_column (RA)
|
||||
__divq:
|
||||
lda sp, -FRAME(sp)
|
||||
cfi_def_cfa_offset (FRAME)
|
||||
CALL_MCOUNT
|
||||
|
||||
/* Get the fp divide insn issued as quickly as possible. After
|
||||
that's done, we have at least 22 cycles until its results are
|
||||
ready -- all the time in the world to figure out how we're
|
||||
going to use the results. */
|
||||
stt $f0, 0(sp)
|
||||
excb
|
||||
beq Y, DIVBYZERO
|
||||
|
||||
stt $f1, 8(sp)
|
||||
stt $f3, 48(sp)
|
||||
cfi_rel_offset ($f0, 0)
|
||||
cfi_rel_offset ($f1, 8)
|
||||
cfi_rel_offset ($f3, 48)
|
||||
mf_fpcr $f3
|
||||
|
||||
_ITOFT2 X, $f0, 16, Y, $f1, 24
|
||||
cvtqt $f0, $f0
|
||||
cvtqt $f1, $f1
|
||||
divt/c $f0, $f1, $f0
|
||||
|
||||
/* Check to see if X fit in the double as an exact value. */
|
||||
sll X, (64-53), AT
|
||||
ldt $f1, 8(sp)
|
||||
sra AT, (64-53), AT
|
||||
cmpeq X, AT, AT
|
||||
beq AT, $x_big
|
||||
|
||||
/* If we get here, we're expecting exact results from the division.
|
||||
Do nothing else besides convert and clean up. */
|
||||
cvttq/c $f0, $f0
|
||||
excb
|
||||
mt_fpcr $f3
|
||||
_FTOIT $f0, RV, 16
|
||||
|
||||
ldt $f0, 0(sp)
|
||||
ldt $f3, 48(sp)
|
||||
cfi_restore ($f1)
|
||||
cfi_remember_state
|
||||
cfi_restore ($f0)
|
||||
cfi_restore ($f3)
|
||||
cfi_def_cfa_offset (0)
|
||||
lda sp, FRAME(sp)
|
||||
ret $31, (RA), 1
|
||||
|
||||
.align 4
|
||||
cfi_restore_state
|
||||
$x_big:
|
||||
/* If we get here, X is large enough that we don't expect exact
|
||||
results, and neither X nor Y got mis-translated for the fp
|
||||
division. Our task is to take the fp result, figure out how
|
||||
far it's off from the correct result and compute a fixup. */
|
||||
stq t0, 16(sp)
|
||||
stq t1, 24(sp)
|
||||
stq t2, 32(sp)
|
||||
stq t5, 40(sp)
|
||||
cfi_rel_offset (t0, 16)
|
||||
cfi_rel_offset (t1, 24)
|
||||
cfi_rel_offset (t2, 32)
|
||||
cfi_rel_offset (t5, 40)
|
||||
|
||||
#define Q RV /* quotient */
|
||||
#define R t0 /* remainder */
|
||||
#define SY t1 /* scaled Y */
|
||||
#define S t2 /* scalar */
|
||||
#define QY t3 /* Q*Y */
|
||||
|
||||
/* The fixup code below can only handle unsigned values. */
|
||||
or X, Y, AT
|
||||
mov $31, t5
|
||||
blt AT, $fix_sign_in
|
||||
$fix_sign_in_ret1:
|
||||
cvttq/c $f0, $f0
|
||||
|
||||
_FTOIT $f0, Q, 8
|
||||
.align 3
|
||||
$fix_sign_in_ret2:
|
||||
ldt $f0, 0(sp)
|
||||
stq t3, 0(sp)
|
||||
cfi_restore ($f0)
|
||||
cfi_rel_offset (t3, 0)
|
||||
|
||||
mulq Q, Y, QY
|
||||
excb
|
||||
stq t4, 8(sp)
|
||||
mt_fpcr $f3
|
||||
cfi_rel_offset (t4, 8)
|
||||
|
||||
subq QY, X, R
|
||||
mov Y, SY
|
||||
mov 1, S
|
||||
bgt R, $q_high
|
||||
|
||||
$q_high_ret:
|
||||
subq X, QY, R
|
||||
mov Y, SY
|
||||
mov 1, S
|
||||
bgt R, $q_low
|
||||
|
||||
$q_low_ret:
|
||||
ldq t0, 16(sp)
|
||||
ldq t1, 24(sp)
|
||||
ldq t2, 32(sp)
|
||||
bne t5, $fix_sign_out
|
||||
|
||||
$fix_sign_out_ret:
|
||||
ldq t3, 0(sp)
|
||||
ldq t4, 8(sp)
|
||||
ldq t5, 40(sp)
|
||||
ldt $f3, 48(sp)
|
||||
lda sp, FRAME(sp)
|
||||
cfi_remember_state
|
||||
cfi_restore (t0)
|
||||
cfi_restore (t1)
|
||||
cfi_restore (t2)
|
||||
cfi_restore (t3)
|
||||
cfi_restore (t4)
|
||||
cfi_restore (t5)
|
||||
cfi_restore ($f3)
|
||||
cfi_def_cfa_offset (0)
|
||||
ret $31, (RA), 1
|
||||
|
||||
.align 4
|
||||
cfi_restore_state
|
||||
/* The quotient that we computed was too large. We need to reduce
|
||||
it by S such that Y*S >= R. Obviously the closer we get to the
|
||||
correct value the better, but overshooting high is ok, as we'll
|
||||
fix that up later. */
|
||||
0:
|
||||
addq SY, SY, SY
|
||||
addq S, S, S
|
||||
$q_high:
|
||||
cmpult SY, R, AT
|
||||
bne AT, 0b
|
||||
|
||||
subq Q, S, Q
|
||||
unop
|
||||
subq QY, SY, QY
|
||||
br $q_high_ret
|
||||
|
||||
.align 4
|
||||
/* The quotient that we computed was too small. Divide Y by the
|
||||
current remainder (R) and add that to the existing quotient (Q).
|
||||
The expectation, of course, is that R is much smaller than X. */
|
||||
/* Begin with a shift-up loop. Compute S such that Y*S >= R. We
|
||||
already have a copy of Y in SY and the value 1 in S. */
|
||||
0:
|
||||
addq SY, SY, SY
|
||||
addq S, S, S
|
||||
$q_low:
|
||||
cmpult SY, R, AT
|
||||
bne AT, 0b
|
||||
|
||||
/* Shift-down and subtract loop. Each iteration compares our scaled
|
||||
Y (SY) with the remainder (R); if SY <= R then X is divisible by
|
||||
Y's scalar (S) so add it to the quotient (Q). */
|
||||
2: addq Q, S, t3
|
||||
srl S, 1, S
|
||||
cmpule SY, R, AT
|
||||
subq R, SY, t4
|
||||
|
||||
cmovne AT, t3, Q
|
||||
cmovne AT, t4, R
|
||||
srl SY, 1, SY
|
||||
bne S, 2b
|
||||
|
||||
br $q_low_ret
|
||||
|
||||
.align 4
|
||||
$fix_sign_in:
|
||||
/* If we got here, then X|Y is negative. Need to adjust everything
|
||||
such that we're doing unsigned division in the fixup loop. */
|
||||
/* T5 records the changes we had to make:
|
||||
bit 0: set if result should be negative.
|
||||
bit 2: set if X was negated.
|
||||
bit 3: set if Y was negated.
|
||||
*/
|
||||
xor X, Y, AT
|
||||
cmplt AT, 0, t5
|
||||
cmplt X, 0, AT
|
||||
negq X, t0
|
||||
|
||||
s4addq AT, t5, t5
|
||||
cmovne AT, t0, X
|
||||
cmplt Y, 0, AT
|
||||
negq Y, t0
|
||||
|
||||
s8addq AT, t5, t5
|
||||
cmovne AT, t0, Y
|
||||
unop
|
||||
blbc t5, $fix_sign_in_ret1
|
||||
|
||||
cvttq/c $f0, $f0
|
||||
_FTOIT $f0, Q, 8
|
||||
.align 3
|
||||
negq Q, Q
|
||||
br $fix_sign_in_ret2
|
||||
|
||||
.align 4
|
||||
$fix_sign_out:
|
||||
/* Now we get to undo what we did above. */
|
||||
/* ??? Is this really faster than just increasing the size of
|
||||
the stack frame and storing X and Y in memory? */
|
||||
and t5, 8, AT
|
||||
negq Y, t4
|
||||
cmovne AT, t4, Y
|
||||
|
||||
and t5, 4, AT
|
||||
negq X, t4
|
||||
cmovne AT, t4, X
|
||||
|
||||
negq RV, t4
|
||||
cmovlbs t5, t4, RV
|
||||
|
||||
br $fix_sign_out_ret
|
||||
|
||||
cfi_endproc
|
||||
.size __divq, .-__divq
|
||||
|
||||
DO_DIVBYZERO
|
@ -1,257 +0,0 @@
|
||||
/* Copyright (C) 2004 Free Software Foundation, Inc.
|
||||
This file is part of the GNU C Library.
|
||||
|
||||
The GNU C Library is free software; you can redistribute it and/or
|
||||
modify it under the terms of the GNU Lesser General Public
|
||||
License as published by the Free Software Foundation; either
|
||||
version 2.1 of the License, or (at your option) any later version.
|
||||
|
||||
The GNU C Library is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
||||
Lesser General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU Lesser General Public
|
||||
License along with the GNU C Library; if not, write to the Free
|
||||
Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
|
||||
02111-1307 USA. */
|
||||
|
||||
#include "div_libc.h"
|
||||
|
||||
|
||||
/* 64-bit unsigned long divide. These are not normal C functions. Argument
|
||||
registers are t10 and t11, the result goes in t12. Only t12 and AT may be
|
||||
clobbered.
|
||||
|
||||
Theory of operation here is that we can use the FPU divider for virtually
|
||||
all operands that we see: all dividend values between -2**53 and 2**53-1
|
||||
can be computed directly. Note that divisor values need not be checked
|
||||
against that range because the rounded fp value will be close enough such
|
||||
that the quotient is < 1, which will properly be truncated to zero when we
|
||||
convert back to integer.
|
||||
|
||||
When the dividend is outside the range for which we can compute exact
|
||||
results, we use the fp quotent as an estimate from which we begin refining
|
||||
an exact integral value. This reduces the number of iterations in the
|
||||
shift-and-subtract loop significantly.
|
||||
|
||||
The FPCR save/restore is due to the fact that the EV6 _will_ set FPCR_INE
|
||||
for cvttq/c even without /sui being set. It will not, however, properly
|
||||
raise the exception, so we don't have to worry about FPCR_INED being clear
|
||||
and so dying by SIGFPE. */
|
||||
|
||||
.text
|
||||
.align 4
|
||||
.globl __divqu
|
||||
.type __divqu, @funcnoplt
|
||||
.usepv __divqu, no
|
||||
|
||||
cfi_startproc
|
||||
cfi_return_column (RA)
|
||||
__divqu:
|
||||
lda sp, -FRAME(sp)
|
||||
cfi_def_cfa_offset (FRAME)
|
||||
CALL_MCOUNT
|
||||
|
||||
/* Get the fp divide insn issued as quickly as possible. After
|
||||
that's done, we have at least 22 cycles until its results are
|
||||
ready -- all the time in the world to figure out how we're
|
||||
going to use the results. */
|
||||
stt $f0, 0(sp)
|
||||
excb
|
||||
beq Y, DIVBYZERO
|
||||
|
||||
stt $f1, 8(sp)
|
||||
stt $f3, 48(sp)
|
||||
cfi_rel_offset ($f0, 0)
|
||||
cfi_rel_offset ($f1, 8)
|
||||
cfi_rel_offset ($f3, 48)
|
||||
mf_fpcr $f3
|
||||
|
||||
_ITOFT2 X, $f0, 16, Y, $f1, 24
|
||||
cvtqt $f0, $f0
|
||||
cvtqt $f1, $f1
|
||||
blt X, $x_is_neg
|
||||
divt/c $f0, $f1, $f0
|
||||
|
||||
/* Check to see if Y was mis-converted as signed value. */
|
||||
ldt $f1, 8(sp)
|
||||
blt Y, $y_is_neg
|
||||
|
||||
/* Check to see if X fit in the double as an exact value. */
|
||||
srl X, 53, AT
|
||||
bne AT, $x_big
|
||||
|
||||
/* If we get here, we're expecting exact results from the division.
|
||||
Do nothing else besides convert and clean up. */
|
||||
cvttq/c $f0, $f0
|
||||
excb
|
||||
mt_fpcr $f3
|
||||
_FTOIT $f0, RV, 16
|
||||
|
||||
ldt $f0, 0(sp)
|
||||
ldt $f3, 48(sp)
|
||||
cfi_remember_state
|
||||
cfi_restore ($f0)
|
||||
cfi_restore ($f1)
|
||||
cfi_restore ($f3)
|
||||
cfi_def_cfa_offset (0)
|
||||
lda sp, FRAME(sp)
|
||||
ret $31, (RA), 1
|
||||
|
||||
.align 4
|
||||
cfi_restore_state
|
||||
$x_is_neg:
|
||||
/* If we get here, X is so big that bit 63 is set, which made the
|
||||
conversion come out negative. Fix it up lest we not even get
|
||||
a good estimate. */
|
||||
ldah AT, 0x5f80 /* 2**64 as float. */
|
||||
stt $f2, 24(sp)
|
||||
cfi_rel_offset ($f2, 24)
|
||||
_ITOFS AT, $f2, 16
|
||||
|
||||
.align 4
|
||||
addt $f0, $f2, $f0
|
||||
unop
|
||||
divt/c $f0, $f1, $f0
|
||||
unop
|
||||
|
||||
/* Ok, we've now the divide issued. Continue with other checks. */
|
||||
ldt $f1, 8(sp)
|
||||
unop
|
||||
ldt $f2, 24(sp)
|
||||
blt Y, $y_is_neg
|
||||
cfi_restore ($f1)
|
||||
cfi_restore ($f2)
|
||||
cfi_remember_state /* for y_is_neg */
|
||||
|
||||
.align 4
|
||||
$x_big:
|
||||
/* If we get here, X is large enough that we don't expect exact
|
||||
results, and neither X nor Y got mis-translated for the fp
|
||||
division. Our task is to take the fp result, figure out how
|
||||
far it's off from the correct result and compute a fixup. */
|
||||
stq t0, 16(sp)
|
||||
stq t1, 24(sp)
|
||||
stq t2, 32(sp)
|
||||
stq t3, 40(sp)
|
||||
cfi_rel_offset (t0, 16)
|
||||
cfi_rel_offset (t1, 24)
|
||||
cfi_rel_offset (t2, 32)
|
||||
cfi_rel_offset (t3, 40)
|
||||
|
||||
#define Q RV /* quotient */
|
||||
#define R t0 /* remainder */
|
||||
#define SY t1 /* scaled Y */
|
||||
#define S t2 /* scalar */
|
||||
#define QY t3 /* Q*Y */
|
||||
|
||||
cvttq/c $f0, $f0
|
||||
_FTOIT $f0, Q, 8
|
||||
mulq Q, Y, QY
|
||||
|
||||
.align 4
|
||||
stq t4, 8(sp)
|
||||
excb
|
||||
ldt $f0, 0(sp)
|
||||
mt_fpcr $f3
|
||||
cfi_rel_offset (t4, 8)
|
||||
cfi_restore ($f0)
|
||||
|
||||
subq QY, X, R
|
||||
mov Y, SY
|
||||
mov 1, S
|
||||
bgt R, $q_high
|
||||
|
||||
$q_high_ret:
|
||||
subq X, QY, R
|
||||
mov Y, SY
|
||||
mov 1, S
|
||||
bgt R, $q_low
|
||||
|
||||
$q_low_ret:
|
||||
ldq t4, 8(sp)
|
||||
ldq t0, 16(sp)
|
||||
ldq t1, 24(sp)
|
||||
ldq t2, 32(sp)
|
||||
|
||||
ldq t3, 40(sp)
|
||||
ldt $f3, 48(sp)
|
||||
lda sp, FRAME(sp)
|
||||
cfi_remember_state
|
||||
cfi_restore (t0)
|
||||
cfi_restore (t1)
|
||||
cfi_restore (t2)
|
||||
cfi_restore (t3)
|
||||
cfi_restore (t4)
|
||||
cfi_restore ($f3)
|
||||
cfi_def_cfa_offset (0)
|
||||
ret $31, (RA), 1
|
||||
|
||||
.align 4
|
||||
cfi_restore_state
|
||||
/* The quotient that we computed was too large. We need to reduce
|
||||
it by S such that Y*S >= R. Obviously the closer we get to the
|
||||
correct value the better, but overshooting high is ok, as we'll
|
||||
fix that up later. */
|
||||
0:
|
||||
addq SY, SY, SY
|
||||
addq S, S, S
|
||||
$q_high:
|
||||
cmpult SY, R, AT
|
||||
bne AT, 0b
|
||||
|
||||
subq Q, S, Q
|
||||
unop
|
||||
subq QY, SY, QY
|
||||
br $q_high_ret
|
||||
|
||||
.align 4
|
||||
/* The quotient that we computed was too small. Divide Y by the
|
||||
current remainder (R) and add that to the existing quotient (Q).
|
||||
The expectation, of course, is that R is much smaller than X. */
|
||||
/* Begin with a shift-up loop. Compute S such that Y*S >= R. We
|
||||
already have a copy of Y in SY and the value 1 in S. */
|
||||
0:
|
||||
addq SY, SY, SY
|
||||
addq S, S, S
|
||||
$q_low:
|
||||
cmpult SY, R, AT
|
||||
bne AT, 0b
|
||||
|
||||
/* Shift-down and subtract loop. Each iteration compares our scaled
|
||||
Y (SY) with the remainder (R); if SY <= R then X is divisible by
|
||||
Y's scalar (S) so add it to the quotient (Q). */
|
||||
2: addq Q, S, t3
|
||||
srl S, 1, S
|
||||
cmpule SY, R, AT
|
||||
subq R, SY, t4
|
||||
|
||||
cmovne AT, t3, Q
|
||||
cmovne AT, t4, R
|
||||
srl SY, 1, SY
|
||||
bne S, 2b
|
||||
|
||||
br $q_low_ret
|
||||
|
||||
.align 4
|
||||
cfi_restore_state
|
||||
$y_is_neg:
|
||||
/* If we get here, Y is so big that bit 63 is set. The results
|
||||
from the divide will be completely wrong. Fortunately, the
|
||||
quotient must be either 0 or 1, so just compute it directly. */
|
||||
cmpule Y, X, RV
|
||||
excb
|
||||
mt_fpcr $f3
|
||||
ldt $f0, 0(sp)
|
||||
ldt $f3, 48(sp)
|
||||
lda sp, FRAME(sp)
|
||||
cfi_restore ($f0)
|
||||
cfi_restore ($f3)
|
||||
cfi_def_cfa_offset (0)
|
||||
ret $31, (RA), 1
|
||||
|
||||
cfi_endproc
|
||||
.size __divqu, .-__divqu
|
||||
|
||||
DO_DIVBYZERO
|
@ -1,3 +0,0 @@
|
||||
/* Number of extra dynamic section entries for this architecture. By
|
||||
default there are none. */
|
||||
#define DT_THISPROCNUM DT_ALPHA_NUM
|
@ -1,522 +0,0 @@
|
||||
/* Machine-dependent ELF dynamic relocation inline functions. Alpha version.
|
||||
Copyright (C) 1996-2005, 2006 Free Software Foundation, Inc.
|
||||
This file is part of the GNU C Library.
|
||||
Contributed by Richard Henderson <rth@tamu.edu>.
|
||||
|
||||
The GNU C Library is free software; you can redistribute it and/or
|
||||
modify it under the terms of the GNU Lesser General Public
|
||||
License as published by the Free Software Foundation; either
|
||||
version 2.1 of the License, or (at your option) any later version.
|
||||
|
||||
The GNU C Library is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
||||
Lesser General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU Lesser General Public
|
||||
License along with the GNU C Library; if not, write to the Free
|
||||
Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
|
||||
02111-1307 USA. */
|
||||
|
||||
/* This was written in the absence of an ABI -- don't expect
|
||||
it to remain unchanged. */
|
||||
|
||||
#ifndef dl_machine_h
|
||||
#define dl_machine_h 1
|
||||
|
||||
#define ELF_MACHINE_NAME "alpha"
|
||||
|
||||
#include <string.h>
|
||||
|
||||
|
||||
/* Mask identifying addresses reserved for the user program,
|
||||
where the dynamic linker should not map anything. */
|
||||
#define ELF_MACHINE_USER_ADDRESS_MASK 0x120000000UL
|
||||
|
||||
/* Translate a processor specific dynamic tag to the index in l_info array. */
|
||||
#define DT_ALPHA(x) (DT_ALPHA_##x - DT_LOPROC + DT_NUM)
|
||||
|
||||
/* Return nonzero iff ELF header is compatible with the running host. */
|
||||
static inline int
|
||||
elf_machine_matches_host (const Elf64_Ehdr *ehdr)
|
||||
{
|
||||
return ehdr->e_machine == EM_ALPHA;
|
||||
}
|
||||
|
||||
/* Return the link-time address of _DYNAMIC. The multiple-got-capable
|
||||
linker no longer allocates the first .got entry for this. But not to
|
||||
worry, no special tricks are needed. */
|
||||
static inline Elf64_Addr
|
||||
elf_machine_dynamic (void)
|
||||
{
|
||||
#ifndef NO_AXP_MULTI_GOT_LD
|
||||
return (Elf64_Addr) &_DYNAMIC;
|
||||
#else
|
||||
register Elf64_Addr *gp __asm__ ("$29");
|
||||
return gp[-4096];
|
||||
#endif
|
||||
}
|
||||
|
||||
/* Return the run-time load address of the shared object. */
|
||||
|
||||
static inline Elf64_Addr
|
||||
elf_machine_load_address (void)
|
||||
{
|
||||
/* This relies on the compiler using gp-relative addresses for static symbols. */
|
||||
static void *dot = ˙
|
||||
return (void *)&dot - dot;
|
||||
}
|
||||
|
||||
/* Set up the loaded object described by L so its unrelocated PLT
|
||||
entries will jump to the on-demand fixup code in dl-runtime.c. */
|
||||
|
||||
static inline int
|
||||
elf_machine_runtime_setup (struct link_map *map, int lazy, int profile)
|
||||
{
|
||||
extern char _dl_runtime_resolve_new[] attribute_hidden;
|
||||
extern char _dl_runtime_profile_new[] attribute_hidden;
|
||||
extern char _dl_runtime_resolve_old[] attribute_hidden;
|
||||
extern char _dl_runtime_profile_old[] attribute_hidden;
|
||||
|
||||
struct pltgot {
|
||||
char *resolve;
|
||||
struct link_map *link;
|
||||
};
|
||||
|
||||
struct pltgot *pg;
|
||||
long secureplt;
|
||||
char *resolve;
|
||||
|
||||
if (map->l_info[DT_JMPREL] == 0 || !lazy)
|
||||
return lazy;
|
||||
|
||||
/* Check to see if we're using the read-only plt form. */
|
||||
secureplt = map->l_info[DT_ALPHA(PLTRO)] != 0;
|
||||
|
||||
/* If the binary uses the read-only secure plt format, PG points to
|
||||
the .got.plt section, which is the right place for ld.so to place
|
||||
its hooks. Otherwise, PG is currently pointing at the start of
|
||||
the plt; the hooks go at offset 16. */
|
||||
pg = (struct pltgot *) D_PTR (map, l_info[DT_PLTGOT]);
|
||||
pg += !secureplt;
|
||||
|
||||
/* This function will be called to perform the relocation. They're
|
||||
not declared as functions to convince the compiler to use gp
|
||||
relative relocations for them. */
|
||||
if (secureplt)
|
||||
resolve = _dl_runtime_resolve_new;
|
||||
else
|
||||
resolve = _dl_runtime_resolve_old;
|
||||
|
||||
if (__builtin_expect (profile, 0))
|
||||
{
|
||||
if (secureplt)
|
||||
resolve = _dl_runtime_profile_new;
|
||||
else
|
||||
resolve = _dl_runtime_profile_old;
|
||||
|
||||
if (GLRO(dl_profile) && _dl_name_match_p (GLRO(dl_profile), map))
|
||||
{
|
||||
/* This is the object we are looking for. Say that we really
|
||||
want profiling and the timers are started. */
|
||||
GL(dl_profile_map) = map;
|
||||
}
|
||||
}
|
||||
|
||||
pg->resolve = resolve;
|
||||
pg->link = map;
|
||||
|
||||
return lazy;
|
||||
}
|
||||
|
||||
/* Initial entry point code for the dynamic linker.
|
||||
The C function `_dl_start' is the real entry point;
|
||||
its return value is the user program's entry point. */
|
||||
|
||||
#define RTLD_START asm ("\
|
||||
.section .text \n\
|
||||
.set at \n\
|
||||
.globl _start \n\
|
||||
.ent _start \n\
|
||||
_start: \n\
|
||||
.frame $31,0,$31,0 \n\
|
||||
br $gp, 0f \n\
|
||||
0: ldgp $gp, 0($gp) \n\
|
||||
.prologue 0 \n\
|
||||
/* Pass pointer to argument block to _dl_start. */ \n\
|
||||
mov $sp, $16 \n\
|
||||
bsr $26, _dl_start !samegp \n\
|
||||
.end _start \n\
|
||||
/* FALLTHRU */ \n\
|
||||
.globl _dl_start_user \n\
|
||||
.ent _dl_start_user \n\
|
||||
_dl_start_user: \n\
|
||||
.frame $31,0,$31,0 \n\
|
||||
.prologue 0 \n\
|
||||
/* Save the user entry point address in s0. */ \n\
|
||||
mov $0, $9 \n\
|
||||
/* See if we were run as a command with the executable \n\
|
||||
file name as an extra leading argument. */ \n\
|
||||
ldah $1, _dl_skip_args($gp) !gprelhigh \n\
|
||||
ldl $1, _dl_skip_args($1) !gprellow \n\
|
||||
bne $1, $fixup_stack \n\
|
||||
$fixup_stack_ret: \n\
|
||||
/* The special initializer gets called with the stack \n\
|
||||
just as the application's entry point will see it; \n\
|
||||
it can switch stacks if it moves these contents \n\
|
||||
over. */ \n\
|
||||
" RTLD_START_SPECIAL_INIT " \n\
|
||||
/* Call _dl_init(_dl_loaded, argc, argv, envp) to run \n\
|
||||
initializers. */ \n\
|
||||
ldah $16, _rtld_local($gp) !gprelhigh \n\
|
||||
ldq $16, _rtld_local($16) !gprellow \n\
|
||||
ldq $17, 0($sp) \n\
|
||||
lda $18, 8($sp) \n\
|
||||
s8addq $17, 8, $19 \n\
|
||||
addq $19, $18, $19 \n\
|
||||
bsr $26, _dl_init_internal !samegp \n\
|
||||
/* Pass our finalizer function to the user in $0. */ \n\
|
||||
ldah $0, _dl_fini($gp) !gprelhigh \n\
|
||||
lda $0, _dl_fini($0) !gprellow \n\
|
||||
/* Jump to the user's entry point. */ \n\
|
||||
mov $9, $27 \n\
|
||||
jmp ($9) \n\
|
||||
$fixup_stack: \n\
|
||||
/* Adjust the stack pointer to skip _dl_skip_args words.\n\
|
||||
This involves copying everything down, since the \n\
|
||||
stack pointer must always be 16-byte aligned. */ \n\
|
||||
ldah $7, _dl_argv_internal($gp) !gprelhigh \n\
|
||||
ldq $2, 0($sp) \n\
|
||||
ldq $5, _dl_argv_internal($7) !gprellow \n\
|
||||
subq $31, $1, $6 \n\
|
||||
subq $2, $1, $2 \n\
|
||||
s8addq $6, $5, $5 \n\
|
||||
mov $sp, $4 \n\
|
||||
s8addq $1, $sp, $3 \n\
|
||||
stq $2, 0($sp) \n\
|
||||
stq $5, _dl_argv_internal($7) !gprellow \n\
|
||||
/* Copy down argv. */ \n\
|
||||
0: ldq $5, 8($3) \n\
|
||||
addq $4, 8, $4 \n\
|
||||
addq $3, 8, $3 \n\
|
||||
stq $5, 0($4) \n\
|
||||
bne $5, 0b \n\
|
||||
/* Copy down envp. */ \n\
|
||||
1: ldq $5, 8($3) \n\
|
||||
addq $4, 8, $4 \n\
|
||||
addq $3, 8, $3 \n\
|
||||
stq $5, 0($4) \n\
|
||||
bne $5, 1b \n\
|
||||
/* Copy down auxiliary table. */ \n\
|
||||
2: ldq $5, 8($3) \n\
|
||||
ldq $6, 16($3) \n\
|
||||
addq $4, 16, $4 \n\
|
||||
addq $3, 16, $3 \n\
|
||||
stq $5, -8($4) \n\
|
||||
stq $6, 0($4) \n\
|
||||
bne $5, 2b \n\
|
||||
br $fixup_stack_ret \n\
|
||||
.end _dl_start_user \n\
|
||||
.set noat \n\
|
||||
.previous");
|
||||
|
||||
#ifndef RTLD_START_SPECIAL_INIT
|
||||
#define RTLD_START_SPECIAL_INIT /* nothing */
|
||||
#endif
|
||||
|
||||
/* ELF_RTYPE_CLASS_PLT iff TYPE describes relocation of a PLT entry
|
||||
or TLS variables, so undefined references should not be allowed
|
||||
to define the value.
|
||||
|
||||
ELF_RTYPE_CLASS_NOCOPY iff TYPE should not be allowed to resolve
|
||||
to one of the main executable's symbols, as for a COPY reloc.
|
||||
This is unused on Alpha. */
|
||||
|
||||
#if !defined RTLD_BOOTSTRAP || USE___THREAD
|
||||
# define elf_machine_type_class(type) \
|
||||
(((type) == R_ALPHA_JMP_SLOT \
|
||||
|| (type) == R_ALPHA_DTPMOD64 \
|
||||
|| (type) == R_ALPHA_DTPREL64 \
|
||||
|| (type) == R_ALPHA_TPREL64) * ELF_RTYPE_CLASS_PLT)
|
||||
#else
|
||||
# define elf_machine_type_class(type) \
|
||||
(((type) == R_ALPHA_JMP_SLOT) * ELF_RTYPE_CLASS_PLT)
|
||||
#endif
|
||||
|
||||
/* A reloc type used for ld.so cmdline arg lookups to reject PLT entries. */
|
||||
#define ELF_MACHINE_JMP_SLOT R_ALPHA_JMP_SLOT
|
||||
|
||||
/* The alpha never uses Elf64_Rel relocations. */
|
||||
#define ELF_MACHINE_NO_REL 1
|
||||
|
||||
/* Fix up the instructions of a PLT entry to invoke the function
|
||||
rather than the dynamic linker. */
|
||||
static inline Elf64_Addr
|
||||
elf_machine_fixup_plt (struct link_map *map, lookup_t t,
|
||||
const Elf64_Rela *reloc,
|
||||
Elf64_Addr *got_addr, Elf64_Addr value)
|
||||
{
|
||||
const Elf64_Rela *rela_plt;
|
||||
Elf64_Word *plte;
|
||||
long int edisp;
|
||||
|
||||
/* Store the value we are going to load. */
|
||||
*got_addr = value;
|
||||
|
||||
/* If this binary uses the read-only secure plt format, we're done. */
|
||||
if (map->l_info[DT_ALPHA(PLTRO)])
|
||||
return value;
|
||||
|
||||
/* Otherwise we have to modify the plt entry in place to do the branch. */
|
||||
|
||||
/* Recover the PLT entry address by calculating reloc's index into the
|
||||
.rela.plt, and finding that entry in the .plt. */
|
||||
rela_plt = (const Elf64_Rela *) D_PTR (map, l_info[DT_JMPREL]);
|
||||
plte = (Elf64_Word *) (D_PTR (map, l_info[DT_PLTGOT]) + 32);
|
||||
plte += 3 * (reloc - rela_plt);
|
||||
|
||||
/* Find the displacement from the plt entry to the function. */
|
||||
edisp = (long int) (value - (Elf64_Addr)&plte[3]) / 4;
|
||||
|
||||
if (edisp >= -0x100000 && edisp < 0x100000)
|
||||
{
|
||||
/* If we are in range, use br to perfect branch prediction and
|
||||
elide the dependency on the address load. This case happens,
|
||||
e.g., when a shared library call is resolved to the same library. */
|
||||
|
||||
int hi, lo;
|
||||
hi = value - (Elf64_Addr)&plte[0];
|
||||
lo = (short int) hi;
|
||||
hi = (hi - lo) >> 16;
|
||||
|
||||
/* Emit "lda $27,lo($27)" */
|
||||
plte[1] = 0x237b0000 | (lo & 0xffff);
|
||||
|
||||
/* Emit "br $31,function" */
|
||||
plte[2] = 0xc3e00000 | (edisp & 0x1fffff);
|
||||
|
||||
/* Think about thread-safety -- the previous instructions must be
|
||||
committed to memory before the first is overwritten. */
|
||||
__asm__ __volatile__("wmb" : : : "memory");
|
||||
|
||||
/* Emit "ldah $27,hi($27)" */
|
||||
plte[0] = 0x277b0000 | (hi & 0xffff);
|
||||
}
|
||||
else
|
||||
{
|
||||
/* Don't bother with the hint since we already know the hint is
|
||||
wrong. Eliding it prevents the wrong page from getting pulled
|
||||
into the cache. */
|
||||
|
||||
int hi, lo;
|
||||
hi = (Elf64_Addr)got_addr - (Elf64_Addr)&plte[0];
|
||||
lo = (short)hi;
|
||||
hi = (hi - lo) >> 16;
|
||||
|
||||
/* Emit "ldq $27,lo($27)" */
|
||||
plte[1] = 0xa77b0000 | (lo & 0xffff);
|
||||
|
||||
/* Emit "jmp $31,($27)" */
|
||||
plte[2] = 0x6bfb0000;
|
||||
|
||||
/* Think about thread-safety -- the previous instructions must be
|
||||
committed to memory before the first is overwritten. */
|
||||
__asm__ __volatile__("wmb" : : : "memory");
|
||||
|
||||
/* Emit "ldah $27,hi($27)" */
|
||||
plte[0] = 0x277b0000 | (hi & 0xffff);
|
||||
}
|
||||
|
||||
/* At this point, if we've been doing runtime resolution, Icache is dirty.
|
||||
This will be taken care of in _dl_runtime_resolve. If instead we are
|
||||
doing this as part of non-lazy startup relocation, that bit of code
|
||||
hasn't made it into Icache yet, so there's nothing to clean up. */
|
||||
|
||||
return value;
|
||||
}
|
||||
|
||||
/* Return the final value of a plt relocation. */
|
||||
static inline Elf64_Addr
|
||||
elf_machine_plt_value (struct link_map *map, const Elf64_Rela *reloc,
|
||||
Elf64_Addr value)
|
||||
{
|
||||
return value + reloc->r_addend;
|
||||
}
|
||||
|
||||
/* Names of the architecture-specific auditing callback functions. */
|
||||
#define ARCH_LA_PLTENTER alpha_gnu_pltenter
|
||||
#define ARCH_LA_PLTEXIT alpha_gnu_pltexit
|
||||
|
||||
#endif /* !dl_machine_h */
|
||||
|
||||
#ifdef RESOLVE_MAP
|
||||
|
||||
/* Perform the relocation specified by RELOC and SYM (which is fully resolved).
|
||||
MAP is the object containing the reloc. */
|
||||
auto inline void
|
||||
__attribute__ ((always_inline))
|
||||
elf_machine_rela (struct link_map *map,
|
||||
const Elf64_Rela *reloc,
|
||||
const Elf64_Sym *sym,
|
||||
const struct r_found_version *version,
|
||||
void *const reloc_addr_arg)
|
||||
{
|
||||
Elf64_Addr *const reloc_addr = reloc_addr_arg;
|
||||
unsigned long int const r_type = ELF64_R_TYPE (reloc->r_info);
|
||||
|
||||
#if !defined RTLD_BOOTSTRAP && !defined HAVE_Z_COMBRELOC && !defined SHARED
|
||||
/* This is defined in rtld.c, but nowhere in the static libc.a; make the
|
||||
reference weak so static programs can still link. This declaration
|
||||
cannot be done when compiling rtld.c (i.e. #ifdef RTLD_BOOTSTRAP)
|
||||
because rtld.c contains the common defn for _dl_rtld_map, which is
|
||||
incompatible with a weak decl in the same file. */
|
||||
weak_extern (_dl_rtld_map);
|
||||
#endif
|
||||
|
||||
/* We cannot use a switch here because we cannot locate the switch
|
||||
jump table until we've self-relocated. */
|
||||
|
||||
#if !defined RTLD_BOOTSTRAP || !defined HAVE_Z_COMBRELOC
|
||||
if (__builtin_expect (r_type == R_ALPHA_RELATIVE, 0))
|
||||
{
|
||||
# if !defined RTLD_BOOTSTRAP && !defined HAVE_Z_COMBRELOC
|
||||
/* Already done in dynamic linker. */
|
||||
if (map != &GL(dl_rtld_map))
|
||||
# endif
|
||||
{
|
||||
/* XXX Make some timings. Maybe it's preferable to test for
|
||||
unaligned access and only do it the complex way if necessary. */
|
||||
Elf64_Addr reloc_addr_val;
|
||||
|
||||
/* Load value without causing unaligned trap. */
|
||||
memcpy (&reloc_addr_val, reloc_addr_arg, 8);
|
||||
reloc_addr_val += map->l_addr;
|
||||
|
||||
/* Store value without causing unaligned trap. */
|
||||
memcpy (reloc_addr_arg, &reloc_addr_val, 8);
|
||||
}
|
||||
}
|
||||
else
|
||||
#endif
|
||||
if (__builtin_expect (r_type == R_ALPHA_NONE, 0))
|
||||
return;
|
||||
else
|
||||
{
|
||||
struct link_map *sym_map = RESOLVE_MAP (&sym, version, r_type);
|
||||
Elf64_Addr sym_value;
|
||||
Elf64_Addr sym_raw_value;
|
||||
|
||||
sym_raw_value = sym_value = reloc->r_addend;
|
||||
if (sym_map)
|
||||
{
|
||||
sym_raw_value += sym->st_value;
|
||||
sym_value = sym_raw_value + sym_map->l_addr;
|
||||
}
|
||||
|
||||
if (r_type == R_ALPHA_GLOB_DAT)
|
||||
*reloc_addr = sym_value;
|
||||
#ifdef RESOLVE_CONFLICT_FIND_MAP
|
||||
/* In .gnu.conflict section, R_ALPHA_JMP_SLOT relocations have
|
||||
R_ALPHA_JMP_SLOT in lower 8 bits and the remaining 24 bits
|
||||
are .rela.plt index. */
|
||||
else if ((r_type & 0xff) == R_ALPHA_JMP_SLOT)
|
||||
{
|
||||
/* elf_machine_fixup_plt needs the map reloc_addr points into,
|
||||
while in _dl_resolve_conflicts map is _dl_loaded. */
|
||||
RESOLVE_CONFLICT_FIND_MAP (map, reloc_addr);
|
||||
reloc = ((const Elf64_Rela *) D_PTR (map, l_info[DT_JMPREL]))
|
||||
+ (r_type >> 8);
|
||||
elf_machine_fixup_plt (map, 0, reloc, reloc_addr, sym_value);
|
||||
}
|
||||
#else
|
||||
else if (r_type == R_ALPHA_JMP_SLOT)
|
||||
elf_machine_fixup_plt (map, 0, reloc, reloc_addr, sym_value);
|
||||
#endif
|
||||
#ifndef RTLD_BOOTSTRAP
|
||||
else if (r_type == R_ALPHA_REFQUAD)
|
||||
{
|
||||
/* Store value without causing unaligned trap. */
|
||||
memcpy (reloc_addr_arg, &sym_value, 8);
|
||||
}
|
||||
#endif
|
||||
#if !defined RTLD_BOOTSTRAP || USE___THREAD
|
||||
else if (r_type == R_ALPHA_DTPMOD64)
|
||||
{
|
||||
# ifdef RTLD_BOOTSTRAP
|
||||
/* During startup the dynamic linker is always index 1. */
|
||||
*reloc_addr = 1;
|
||||
# else
|
||||
/* Get the information from the link map returned by the
|
||||
resolv function. */
|
||||
if (sym_map != NULL)
|
||||
*reloc_addr = sym_map->l_tls_modid;
|
||||
# endif
|
||||
}
|
||||
else if (r_type == R_ALPHA_DTPREL64)
|
||||
{
|
||||
# ifndef RTLD_BOOTSTRAP
|
||||
/* During relocation all TLS symbols are defined and used.
|
||||
Therefore the offset is already correct. */
|
||||
*reloc_addr = sym_raw_value;
|
||||
# endif
|
||||
}
|
||||
else if (r_type == R_ALPHA_TPREL64)
|
||||
{
|
||||
# ifdef RTLD_BOOTSTRAP
|
||||
*reloc_addr = sym_raw_value + map->l_tls_offset;
|
||||
# else
|
||||
if (sym_map)
|
||||
{
|
||||
CHECK_STATIC_TLS (map, sym_map);
|
||||
*reloc_addr = sym_raw_value + sym_map->l_tls_offset;
|
||||
}
|
||||
# endif
|
||||
}
|
||||
#endif
|
||||
else
|
||||
_dl_reloc_bad_type (map, r_type, 0);
|
||||
}
|
||||
}
|
||||
|
||||
/* Let do-rel.h know that on Alpha if l_addr is 0, all RELATIVE relocs
|
||||
can be skipped. */
|
||||
#define ELF_MACHINE_REL_RELATIVE 1
|
||||
|
||||
auto inline void
|
||||
__attribute__ ((always_inline))
|
||||
elf_machine_rela_relative (Elf64_Addr l_addr, const Elf64_Rela *reloc,
|
||||
void *const reloc_addr_arg)
|
||||
{
|
||||
/* XXX Make some timings. Maybe it's preferable to test for
|
||||
unaligned access and only do it the complex way if necessary. */
|
||||
Elf64_Addr reloc_addr_val;
|
||||
|
||||
/* Load value without causing unaligned trap. */
|
||||
memcpy (&reloc_addr_val, reloc_addr_arg, 8);
|
||||
reloc_addr_val += l_addr;
|
||||
|
||||
/* Store value without causing unaligned trap. */
|
||||
memcpy (reloc_addr_arg, &reloc_addr_val, 8);
|
||||
}
|
||||
|
||||
auto inline void
|
||||
__attribute__ ((always_inline))
|
||||
elf_machine_lazy_rel (struct link_map *map,
|
||||
Elf64_Addr l_addr, const Elf64_Rela *reloc)
|
||||
{
|
||||
Elf64_Addr * const reloc_addr = (void *)(l_addr + reloc->r_offset);
|
||||
unsigned long int const r_type = ELF64_R_TYPE (reloc->r_info);
|
||||
|
||||
if (r_type == R_ALPHA_JMP_SLOT)
|
||||
{
|
||||
/* Perform a RELATIVE reloc on the .got entry that transfers
|
||||
to the .plt. */
|
||||
*reloc_addr += l_addr;
|
||||
}
|
||||
else if (r_type == R_ALPHA_NONE)
|
||||
return;
|
||||
else
|
||||
_dl_reloc_bad_type (map, r_type, 1);
|
||||
}
|
||||
|
||||
#endif /* RESOLVE_MAP */
|
@ -1,24 +0,0 @@
|
||||
/* System-specific settings for dynamic linker code. Alpha version.
|
||||
Copyright (C) 2002, 2003, 2004, 2005 Free Software Foundation, Inc.
|
||||
This file is part of the GNU C Library.
|
||||
|
||||
The GNU C Library is free software; you can redistribute it and/or
|
||||
modify it under the terms of the GNU Lesser General Public
|
||||
License as published by the Free Software Foundation; either
|
||||
version 2.1 of the License, or (at your option) any later version.
|
||||
|
||||
The GNU C Library is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
||||
Lesser General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU Lesser General Public
|
||||
License along with the GNU C Library; if not, write to the Free
|
||||
Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
|
||||
02111-1307 USA. */
|
||||
|
||||
#include_next <dl-sysdep.h>
|
||||
|
||||
/* _dl_argv cannot be attribute_relro, because _dl_start_user
|
||||
might write into it after _dl_start returns. */
|
||||
#define DL_ARGV_NOT_RELRO 1
|
@ -1,29 +0,0 @@
|
||||
/* Thread-local storage handling in the ELF dynamic linker. Alpha version.
|
||||
Copyright (C) 2002 Free Software Foundation, Inc.
|
||||
This file is part of the GNU C Library.
|
||||
|
||||
The GNU C Library is free software; you can redistribute it and/or
|
||||
modify it under the terms of the GNU Lesser General Public
|
||||
License as published by the Free Software Foundation; either
|
||||
version 2.1 of the License, or (at your option) any later version.
|
||||
|
||||
The GNU C Library is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
||||
Lesser General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU Lesser General Public
|
||||
License along with the GNU C Library; if not, write to the Free
|
||||
Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
|
||||
02111-1307 USA. */
|
||||
|
||||
|
||||
/* Type used for the representation of TLS information in the GOT. */
|
||||
typedef struct
|
||||
{
|
||||
unsigned long int ti_module;
|
||||
unsigned long int ti_offset;
|
||||
} tls_index;
|
||||
|
||||
|
||||
extern void *__tls_get_addr (tls_index *ti);
|
@ -1,541 +0,0 @@
|
||||
/* PLT trampolines. Alpha version.
|
||||
Copyright (C) 2005 Free Software Foundation, Inc.
|
||||
This file is part of the GNU C Library.
|
||||
|
||||
The GNU C Library is free software; you can redistribute it and/or
|
||||
modify it under the terms of the GNU Lesser General Public
|
||||
License as published by the Free Software Foundation; either
|
||||
version 2.1 of the License, or (at your option) any later version.
|
||||
|
||||
The GNU C Library is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
||||
Lesser General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU Lesser General Public
|
||||
License along with the GNU C Library; if not, write to the Free
|
||||
Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
|
||||
02111-1307 USA. */
|
||||
|
||||
#include <sysdep.h>
|
||||
|
||||
.set noat
|
||||
|
||||
.macro savei regno, offset
|
||||
stq $\regno, \offset($30)
|
||||
cfi_rel_offset(\regno, \offset)
|
||||
.endm
|
||||
|
||||
.macro savef regno, offset
|
||||
stt $f\regno, \offset($30)
|
||||
cfi_rel_offset(\regno+32, \offset)
|
||||
.endm
|
||||
|
||||
.align 4
|
||||
.globl _dl_runtime_resolve_new
|
||||
.ent _dl_runtime_resolve_new
|
||||
|
||||
#undef FRAMESIZE
|
||||
#define FRAMESIZE 14*8
|
||||
|
||||
_dl_runtime_resolve_new:
|
||||
.frame $30, FRAMESIZE, $26, 0
|
||||
.mask 0x4000000, 0
|
||||
|
||||
ldah $29, 0($27) !gpdisp!1
|
||||
lda $30, -FRAMESIZE($30)
|
||||
stq $26, 0*8($30)
|
||||
stq $16, 2*8($30)
|
||||
|
||||
stq $17, 3*8($30)
|
||||
lda $29, 0($29) !gpdisp!1
|
||||
stq $18, 4*8($30)
|
||||
mov $28, $16 /* link_map from .got.plt */
|
||||
|
||||
stq $19, 5*8($30)
|
||||
mov $25, $17 /* offset of reloc entry */
|
||||
stq $20, 6*8($30)
|
||||
mov $26, $18 /* return address */
|
||||
|
||||
stq $21, 7*8($30)
|
||||
stt $f16, 8*8($30)
|
||||
stt $f17, 9*8($30)
|
||||
stt $f18, 10*8($30)
|
||||
|
||||
stt $f19, 11*8($30)
|
||||
stt $f20, 12*8($30)
|
||||
stt $f21, 13*8($30)
|
||||
.prologue 2
|
||||
|
||||
bsr $26, _dl_fixup !samegp
|
||||
mov $0, $27
|
||||
|
||||
ldq $26, 0*8($30)
|
||||
ldq $16, 2*8($30)
|
||||
ldq $17, 3*8($30)
|
||||
ldq $18, 4*8($30)
|
||||
ldq $19, 5*8($30)
|
||||
ldq $20, 6*8($30)
|
||||
ldq $21, 7*8($30)
|
||||
ldt $f16, 8*8($30)
|
||||
ldt $f17, 9*8($30)
|
||||
ldt $f18, 10*8($30)
|
||||
ldt $f19, 11*8($30)
|
||||
ldt $f20, 12*8($30)
|
||||
ldt $f21, 13*8($30)
|
||||
lda $30, FRAMESIZE($30)
|
||||
jmp $31, ($27), 0
|
||||
.end _dl_runtime_resolve_new
|
||||
|
||||
.globl _dl_runtime_profile_new
|
||||
.type _dl_runtime_profile_new, @function
|
||||
|
||||
#undef FRAMESIZE
|
||||
#define FRAMESIZE 20*8
|
||||
|
||||
/* We save the registers in a different order than desired by
|
||||
.mask/.fmask, so we have to use explicit cfi directives. */
|
||||
cfi_startproc
|
||||
|
||||
_dl_runtime_profile_new:
|
||||
ldah $29, 0($27) !gpdisp!2
|
||||
lda $30, -FRAMESIZE($30)
|
||||
savei 26, 0*8
|
||||
stq $16, 2*8($30)
|
||||
|
||||
stq $17, 3*8($30)
|
||||
lda $29, 0($29) !gpdisp!2
|
||||
stq $18, 4*8($30)
|
||||
lda $1, FRAMESIZE($30) /* incoming sp value */
|
||||
|
||||
stq $1, 1*8($30)
|
||||
stq $19, 5*8($30)
|
||||
stq $20, 6*8($30)
|
||||
mov $28, $16 /* link_map from .got.plt */
|
||||
|
||||
stq $21, 7*8($30)
|
||||
mov $25, $17 /* offset of reloc entry */
|
||||
stt $f16, 8*8($30)
|
||||
mov $26, $18 /* return address */
|
||||
|
||||
stt $f17, 9*8($30)
|
||||
mov $30, $19 /* La_alpha_regs address */
|
||||
stt $f18, 10*8($30)
|
||||
lda $20, 14*8($30) /* framesize address */
|
||||
|
||||
stt $f19, 11*8($30)
|
||||
stt $f20, 12*8($30)
|
||||
stt $f21, 13*8($30)
|
||||
stq $28, 16*8($30)
|
||||
stq $25, 17*8($30)
|
||||
|
||||
bsr $26, _dl_profile_fixup !samegp
|
||||
mov $0, $27
|
||||
|
||||
/* Discover if we're wrapping this call. */
|
||||
ldq $18, 14*8($30)
|
||||
bge $18, 1f
|
||||
|
||||
ldq $26, 0*8($30)
|
||||
ldq $16, 2*8($30)
|
||||
ldq $17, 3*8($30)
|
||||
ldq $18, 4*8($30)
|
||||
ldq $19, 5*8($30)
|
||||
ldq $20, 6*8($30)
|
||||
ldq $21, 7*8($30)
|
||||
ldt $f16, 8*8($30)
|
||||
ldt $f17, 9*8($30)
|
||||
ldt $f18, 10*8($30)
|
||||
ldt $f19, 11*8($30)
|
||||
ldt $f20, 12*8($30)
|
||||
ldt $f21, 13*8($30)
|
||||
lda $30, FRAMESIZE($30)
|
||||
jmp $31, ($27), 0
|
||||
|
||||
1:
|
||||
/* Create a frame pointer and allocate a new argument frame. */
|
||||
savei 15, 15*8
|
||||
mov $30, $15
|
||||
cfi_def_cfa_register (15)
|
||||
addq $18, 15, $18
|
||||
bic $18, 15, $18
|
||||
subq $30, $18, $30
|
||||
|
||||
/* Save the call destination around memcpy. */
|
||||
stq $0, 14*8($30)
|
||||
|
||||
/* Copy the stack arguments into place. */
|
||||
lda $16, 0($30)
|
||||
lda $17, FRAMESIZE($15)
|
||||
jsr $26, memcpy
|
||||
ldgp $29, 0($26)
|
||||
|
||||
/* Reload the argument registers. */
|
||||
ldq $27, 14*8($30)
|
||||
ldq $16, 2*8($15)
|
||||
ldq $17, 3*8($15)
|
||||
ldq $18, 4*8($15)
|
||||
ldq $19, 5*8($15)
|
||||
ldq $20, 6*8($15)
|
||||
ldq $21, 7*8($15)
|
||||
ldt $f16, 8*8($15)
|
||||
ldt $f17, 9*8($15)
|
||||
ldt $f18, 10*8($15)
|
||||
ldt $f19, 11*8($15)
|
||||
ldt $f20, 12*8($15)
|
||||
ldt $f21, 13*8($15)
|
||||
|
||||
jsr $26, ($27), 0
|
||||
ldgp $29, 0($26)
|
||||
|
||||
/* Set up for call to _dl_call_pltexit. */
|
||||
ldq $16, 16*8($15)
|
||||
ldq $17, 17*8($15)
|
||||
stq $0, 16*8($15)
|
||||
lda $18, 0($15)
|
||||
stq $1, 17*8($15)
|
||||
lda $19, 16*8($15)
|
||||
stt $f0, 18*8($15)
|
||||
stt $f1, 19*8($15)
|
||||
bsr $26, _dl_call_pltexit !samegp
|
||||
|
||||
mov $15, $30
|
||||
cfi_def_cfa_register (30)
|
||||
ldq $26, 0($30)
|
||||
ldq $15, 15*8($30)
|
||||
lda $30, FRAMESIZE($30)
|
||||
ret
|
||||
|
||||
cfi_endproc
|
||||
.size _dl_runtime_profile_new, .-_dl_runtime_profile_new
|
||||
|
||||
.align 4
|
||||
.globl _dl_runtime_resolve_old
|
||||
.ent _dl_runtime_resolve_old
|
||||
|
||||
#undef FRAMESIZE
|
||||
#define FRAMESIZE 44*8
|
||||
|
||||
_dl_runtime_resolve_old:
|
||||
lda $30, -FRAMESIZE($30)
|
||||
.frame $30, FRAMESIZE, $26
|
||||
/* Preserve all registers that C normally doesn't. */
|
||||
stq $26, 0*8($30)
|
||||
stq $0, 1*8($30)
|
||||
stq $1, 2*8($30)
|
||||
stq $2, 3*8($30)
|
||||
stq $3, 4*8($30)
|
||||
stq $4, 5*8($30)
|
||||
stq $5, 6*8($30)
|
||||
stq $6, 7*8($30)
|
||||
stq $7, 8*8($30)
|
||||
stq $8, 9*8($30)
|
||||
stq $16, 10*8($30)
|
||||
stq $17, 11*8($30)
|
||||
stq $18, 12*8($30)
|
||||
stq $19, 13*8($30)
|
||||
stq $20, 14*8($30)
|
||||
stq $21, 15*8($30)
|
||||
stq $22, 16*8($30)
|
||||
stq $23, 17*8($30)
|
||||
stq $24, 18*8($30)
|
||||
stq $25, 19*8($30)
|
||||
stq $29, 20*8($30)
|
||||
stt $f0, 21*8($30)
|
||||
stt $f1, 22*8($30)
|
||||
stt $f10, 23*8($30)
|
||||
stt $f11, 24*8($30)
|
||||
stt $f12, 25*8($30)
|
||||
stt $f13, 26*8($30)
|
||||
stt $f14, 27*8($30)
|
||||
stt $f15, 28*8($30)
|
||||
stt $f16, 29*8($30)
|
||||
stt $f17, 30*8($30)
|
||||
stt $f18, 31*8($30)
|
||||
stt $f19, 32*8($30)
|
||||
stt $f20, 33*8($30)
|
||||
stt $f21, 34*8($30)
|
||||
stt $f22, 35*8($30)
|
||||
stt $f23, 36*8($30)
|
||||
stt $f24, 37*8($30)
|
||||
stt $f25, 38*8($30)
|
||||
stt $f26, 39*8($30)
|
||||
stt $f27, 40*8($30)
|
||||
stt $f28, 41*8($30)
|
||||
stt $f29, 42*8($30)
|
||||
stt $f30, 43*8($30)
|
||||
.mask 0x27ff01ff, -FRAMESIZE
|
||||
.fmask 0xfffffc03, -FRAMESIZE+21*8
|
||||
/* Set up our GP. */
|
||||
br $29, .+4
|
||||
ldgp $29, 0($29)
|
||||
.prologue 0
|
||||
/* Set up the arguments for _dl_fixup:
|
||||
$16 = link_map out of plt0
|
||||
$17 = offset of reloc entry = ($28 - $27 - 20) /12 * 24
|
||||
$18 = return address
|
||||
*/
|
||||
subq $28, $27, $17
|
||||
ldq $16, 8($27)
|
||||
subq $17, 20, $17
|
||||
mov $26, $18
|
||||
addq $17, $17, $17
|
||||
bsr $26, _dl_fixup !samegp
|
||||
|
||||
/* Move the destination address into position. */
|
||||
mov $0, $27
|
||||
/* Restore program registers. */
|
||||
ldq $26, 0*8($30)
|
||||
ldq $0, 1*8($30)
|
||||
ldq $1, 2*8($30)
|
||||
ldq $2, 3*8($30)
|
||||
ldq $3, 4*8($30)
|
||||
ldq $4, 5*8($30)
|
||||
ldq $5, 6*8($30)
|
||||
ldq $6, 7*8($30)
|
||||
ldq $7, 8*8($30)
|
||||
ldq $8, 9*8($30)
|
||||
ldq $16, 10*8($30)
|
||||
ldq $17, 11*8($30)
|
||||
ldq $18, 12*8($30)
|
||||
ldq $19, 13*8($30)
|
||||
ldq $20, 14*8($30)
|
||||
ldq $21, 15*8($30)
|
||||
ldq $22, 16*8($30)
|
||||
ldq $23, 17*8($30)
|
||||
ldq $24, 18*8($30)
|
||||
ldq $25, 19*8($30)
|
||||
ldq $29, 20*8($30)
|
||||
ldt $f0, 21*8($30)
|
||||
ldt $f1, 22*8($30)
|
||||
ldt $f10, 23*8($30)
|
||||
ldt $f11, 24*8($30)
|
||||
ldt $f12, 25*8($30)
|
||||
ldt $f13, 26*8($30)
|
||||
ldt $f14, 27*8($30)
|
||||
ldt $f15, 28*8($30)
|
||||
ldt $f16, 29*8($30)
|
||||
ldt $f17, 30*8($30)
|
||||
ldt $f18, 31*8($30)
|
||||
ldt $f19, 32*8($30)
|
||||
ldt $f20, 33*8($30)
|
||||
ldt $f21, 34*8($30)
|
||||
ldt $f22, 35*8($30)
|
||||
ldt $f23, 36*8($30)
|
||||
ldt $f24, 37*8($30)
|
||||
ldt $f25, 38*8($30)
|
||||
ldt $f26, 39*8($30)
|
||||
ldt $f27, 40*8($30)
|
||||
ldt $f28, 41*8($30)
|
||||
ldt $f29, 42*8($30)
|
||||
ldt $f30, 43*8($30)
|
||||
/* Flush the Icache after having modified the .plt code. */
|
||||
imb
|
||||
/* Clean up and turn control to the destination */
|
||||
lda $30, FRAMESIZE($30)
|
||||
jmp $31, ($27)
|
||||
|
||||
.end _dl_runtime_resolve_old
|
||||
|
||||
.globl _dl_runtime_profile_old
|
||||
.usepv _dl_runtime_profile_old, no
|
||||
.type _dl_runtime_profile_old, @function
|
||||
|
||||
/* We save the registers in a different order than desired by
|
||||
.mask/.fmask, so we have to use explicit cfi directives. */
|
||||
cfi_startproc
|
||||
|
||||
#undef FRAMESIZE
|
||||
#define FRAMESIZE 50*8
|
||||
|
||||
.align 4
|
||||
_dl_runtime_profile_old:
|
||||
lda $30, -FRAMESIZE($30)
|
||||
cfi_adjust_cfa_offset (FRAMESIZE)
|
||||
|
||||
/* Preserve all argument registers. This also constructs the
|
||||
La_alpha_regs structure. */
|
||||
savei 26, 0*8
|
||||
savei 16, 2*8
|
||||
savei 17, 3*8
|
||||
savei 18, 4*8
|
||||
savei 19, 5*8
|
||||
savei 20, 6*8
|
||||
savei 21, 7*8
|
||||
lda $16, FRAMESIZE($30)
|
||||
savef 16, 8*8
|
||||
savef 17, 9*8
|
||||
savef 18, 10*8
|
||||
savef 19, 11*8
|
||||
savef 20, 12*8
|
||||
savef 21, 13*8
|
||||
stq $16, 1*8($30)
|
||||
|
||||
/* Preserve all registers that C normally doesn't. */
|
||||
savei 0, 14*8
|
||||
savei 1, 15*8
|
||||
savei 2, 16*8
|
||||
savei 3, 17*8
|
||||
savei 4, 18*8
|
||||
savei 5, 19*8
|
||||
savei 6, 20*8
|
||||
savei 7, 21*8
|
||||
savei 8, 22*8
|
||||
savei 22, 23*8
|
||||
savei 23, 24*8
|
||||
savei 24, 25*8
|
||||
savei 25, 26*8
|
||||
savei 29, 27*8
|
||||
savef 0, 28*8
|
||||
savef 1, 29*8
|
||||
savef 10, 30*8
|
||||
savef 11, 31*8
|
||||
savef 12, 32*8
|
||||
savef 13, 33*8
|
||||
savef 14, 34*8
|
||||
savef 15, 35*8
|
||||
savef 22, 36*8
|
||||
savef 23, 37*8
|
||||
savef 24, 38*8
|
||||
savef 25, 39*8
|
||||
savef 26, 40*8
|
||||
savef 27, 41*8
|
||||
savef 28, 42*8
|
||||
savef 29, 43*8
|
||||
savef 30, 44*8
|
||||
|
||||
/* Set up our GP. */
|
||||
br $29, .+4
|
||||
ldgp $29, 0($29)
|
||||
|
||||
/* Set up the arguments for _dl_profile_fixup:
|
||||
$16 = link_map out of plt0
|
||||
$17 = offset of reloc entry = ($28 - $27 - 20) /12 * 24
|
||||
$18 = return address
|
||||
$19 = La_alpha_regs address
|
||||
$20 = framesize address
|
||||
*/
|
||||
subq $28, $27, $17
|
||||
ldq $16, 8($27)
|
||||
subq $17, 20, $17
|
||||
mov $26, $18
|
||||
addq $17, $17, $17
|
||||
lda $19, 0($30)
|
||||
lda $20, 45*8($30)
|
||||
stq $16, 48*8($30)
|
||||
stq $17, 49*8($30)
|
||||
|
||||
bsr $26, _dl_profile_fixup !samegp
|
||||
|
||||
/* Discover if we're wrapping this call. */
|
||||
ldq $18, 45*8($30)
|
||||
bge $18, 1f
|
||||
|
||||
/* Move the destination address into position. */
|
||||
mov $0, $27
|
||||
/* Restore program registers. */
|
||||
ldq $26, 0*8($30)
|
||||
ldq $16, 2*8($30)
|
||||
ldq $17, 3*8($30)
|
||||
ldq $18, 4*8($30)
|
||||
ldq $19, 5*8($30)
|
||||
ldq $20, 6*8($30)
|
||||
ldq $21, 7*8($30)
|
||||
ldt $f16, 8*8($30)
|
||||
ldt $f17, 9*8($30)
|
||||
ldt $f18, 10*8($30)
|
||||
ldt $f19, 11*8($30)
|
||||
ldt $f20, 12*8($30)
|
||||
ldt $f21, 13*8($30)
|
||||
ldq $0, 14*8($30)
|
||||
ldq $1, 15*8($30)
|
||||
ldq $2, 16*8($30)
|
||||
ldq $3, 17*8($30)
|
||||
ldq $4, 18*8($30)
|
||||
ldq $5, 19*8($30)
|
||||
ldq $6, 20*8($30)
|
||||
ldq $7, 21*8($30)
|
||||
ldq $8, 22*8($30)
|
||||
ldq $22, 23*8($30)
|
||||
ldq $23, 24*8($30)
|
||||
ldq $24, 25*8($30)
|
||||
ldq $25, 26*8($30)
|
||||
ldq $29, 27*8($30)
|
||||
ldt $f0, 28*8($30)
|
||||
ldt $f1, 29*8($30)
|
||||
ldt $f10, 30*8($30)
|
||||
ldt $f11, 31*8($30)
|
||||
ldt $f12, 32*8($30)
|
||||
ldt $f13, 33*8($30)
|
||||
ldt $f14, 34*8($30)
|
||||
ldt $f15, 35*8($30)
|
||||
ldt $f22, 36*8($30)
|
||||
ldt $f23, 37*8($30)
|
||||
ldt $f24, 38*8($30)
|
||||
ldt $f25, 39*8($30)
|
||||
ldt $f26, 40*8($30)
|
||||
ldt $f27, 41*8($30)
|
||||
ldt $f28, 42*8($30)
|
||||
ldt $f29, 43*8($30)
|
||||
ldt $f30, 44*8($30)
|
||||
|
||||
/* Clean up and turn control to the destination. */
|
||||
lda $30, FRAMESIZE($30)
|
||||
jmp $31, ($27)
|
||||
|
||||
1:
|
||||
/* Create a frame pointer and allocate a new argument frame. */
|
||||
savei 15, 45*8
|
||||
mov $30, $15
|
||||
cfi_def_cfa_register (15)
|
||||
addq $18, 15, $18
|
||||
bic $18, 15, $18
|
||||
subq $30, $18, $30
|
||||
|
||||
/* Save the call destination around memcpy. */
|
||||
stq $0, 46*8($30)
|
||||
|
||||
/* Copy the stack arguments into place. */
|
||||
lda $16, 0($30)
|
||||
lda $17, FRAMESIZE($15)
|
||||
jsr $26, memcpy
|
||||
ldgp $29, 0($26)
|
||||
|
||||
/* Reload the argument registers. */
|
||||
ldq $27, 46*8($30)
|
||||
ldq $16, 2*8($15)
|
||||
ldq $17, 3*8($15)
|
||||
ldq $18, 4*8($15)
|
||||
ldq $19, 5*8($15)
|
||||
ldq $20, 6*8($15)
|
||||
ldq $21, 7*8($15)
|
||||
ldt $f16, 8*8($15)
|
||||
ldt $f17, 9*8($15)
|
||||
ldt $f18, 10*8($15)
|
||||
ldt $f19, 11*8($15)
|
||||
ldt $f20, 12*8($15)
|
||||
ldt $f21, 13*8($15)
|
||||
|
||||
jsr $26, ($27), 0
|
||||
ldgp $29, 0($26)
|
||||
|
||||
/* Set up for call to _dl_call_pltexit. */
|
||||
ldq $16, 48*8($15)
|
||||
ldq $17, 49*8($15)
|
||||
stq $0, 46*8($15)
|
||||
lda $18, 0($15)
|
||||
stq $1, 47*8($15)
|
||||
lda $19, 46*8($15)
|
||||
stt $f0, 48*8($15)
|
||||
stt $f1, 49*8($15)
|
||||
bsr $26, _dl_call_pltexit !samegp
|
||||
|
||||
mov $15, $30
|
||||
cfi_def_cfa_register (30)
|
||||
ldq $26, 0($30)
|
||||
ldq $15, 45*8($30)
|
||||
lda $30, FRAMESIZE($30)
|
||||
ret
|
||||
|
||||
cfi_endproc
|
||||
.size _dl_runtime_profile_old, .-_dl_runtime_profile_old
|
106
sysdeps/alpha/elf/configure
vendored
106
sysdeps/alpha/elf/configure
vendored
@ -1,106 +0,0 @@
|
||||
# This file is generated from configure.in by Autoconf. DO NOT EDIT!
|
||||
# Local configure fragment for sysdeps/alpha/elf.
|
||||
|
||||
if test "$usetls" != no; then
|
||||
# Check for support of thread-local storage handling in assembler and
|
||||
# linker.
|
||||
echo "$as_me:$LINENO: checking for Alpha TLS support" >&5
|
||||
echo $ECHO_N "checking for Alpha TLS support... $ECHO_C" >&6
|
||||
if test "${libc_cv_alpha_tls+set}" = set; then
|
||||
echo $ECHO_N "(cached) $ECHO_C" >&6
|
||||
else
|
||||
cat > conftest.s <<\EOF
|
||||
.section ".tdata", "awT", @progbits
|
||||
.globl foo
|
||||
foo: .quad 1
|
||||
.section ".tbss", "awT", @nobits
|
||||
.globl bar
|
||||
bar: .skip 8
|
||||
.text
|
||||
baz:
|
||||
.set nomacro
|
||||
ldq $27, __tls_get_addr($29) !literal!1
|
||||
ldq $16, a($29) !tlsgd!1
|
||||
jsr $26, ($27), __tls_get_addr !lituse_tlsgd!1
|
||||
|
||||
jsr $26, ($27), __tls_get_addr !lituse_tlsldm!2
|
||||
ldq $27, __tls_get_addr($29) !literal!2
|
||||
ldq $16, b($29) !tlsldm!2
|
||||
|
||||
ldq $16, c($29) !tlsgd
|
||||
ldq $16, d($29) !tlsldm
|
||||
|
||||
ldq $16, e($29) !tlsgd!3
|
||||
ldq $16, f($29) !tlsldm!4
|
||||
|
||||
ldq $16, g($29) !gotdtprel
|
||||
ldah $16, h($31) !dtprelhi
|
||||
lda $16, i($16) !dtprello
|
||||
lda $16, j($31) !dtprel
|
||||
|
||||
ldq $16, k($29) !gottprel
|
||||
ldah $16, l($31) !tprelhi
|
||||
lda $16, m($16) !tprello
|
||||
lda $16, n($31) !tprel
|
||||
EOF
|
||||
if { ac_try='${CC-cc} -c $CFLAGS conftest.s 1>&5'
|
||||
{ (eval echo "$as_me:$LINENO: \"$ac_try\"") >&5
|
||||
(eval $ac_try) 2>&5
|
||||
ac_status=$?
|
||||
echo "$as_me:$LINENO: \$? = $ac_status" >&5
|
||||
(exit $ac_status); }; }; then
|
||||
libc_cv_alpha_tls=yes
|
||||
else
|
||||
libc_cv_alpha_tls=no
|
||||
fi
|
||||
rm -f conftest*
|
||||
fi
|
||||
echo "$as_me:$LINENO: result: $libc_cv_alpha_tls" >&5
|
||||
echo "${ECHO_T}$libc_cv_alpha_tls" >&6
|
||||
if test $libc_cv_alpha_tls = yes; then
|
||||
cat >>confdefs.h <<\_ACEOF
|
||||
#define HAVE_TLS_SUPPORT 1
|
||||
_ACEOF
|
||||
|
||||
fi
|
||||
fi
|
||||
|
||||
echo "$as_me:$LINENO: checking for GP relative module local relocs" >&5
|
||||
echo $ECHO_N "checking for GP relative module local relocs... $ECHO_C" >&6
|
||||
if test "${libc_cv_alpha_hidden_gprel+set}" = set; then
|
||||
echo $ECHO_N "(cached) $ECHO_C" >&6
|
||||
else
|
||||
cat > conftest.c <<\EOF
|
||||
static int bar;
|
||||
int baz __attribute__((visibility("hidden")));
|
||||
|
||||
int foo (void)
|
||||
{
|
||||
return bar + baz;
|
||||
}
|
||||
EOF
|
||||
|
||||
libc_cv_alpha_hidden_gprel=no
|
||||
if { ac_try='${CC-cc} -S $CFLAGS -O2 -fpic conftest.c 1>&5'
|
||||
{ (eval echo "$as_me:$LINENO: \"$ac_try\"") >&5
|
||||
(eval $ac_try) 2>&5
|
||||
ac_status=$?
|
||||
echo "$as_me:$LINENO: \$? = $ac_status" >&5
|
||||
(exit $ac_status); }; }; then
|
||||
if grep -q 'bar.*!gprel' conftest.s \
|
||||
&& grep -q 'baz.*!gprel' conftest.s \
|
||||
&& ! grep -q 'bar.*!literal' conftest.s \
|
||||
&& ! grep -q 'baz.*!literal' conftest.s; then
|
||||
libc_cv_alpha_hidden_gprel=yes
|
||||
fi
|
||||
fi
|
||||
rm -f conftest*
|
||||
fi
|
||||
echo "$as_me:$LINENO: result: $libc_cv_alpha_hidden_gprel" >&5
|
||||
echo "${ECHO_T}$libc_cv_alpha_hidden_gprel" >&6
|
||||
if test $libc_cv_alpha_hidden_gprel = yes; then
|
||||
cat >>confdefs.h <<\_ACEOF
|
||||
#define PI_STATIC_AND_HIDDEN 1
|
||||
_ACEOF
|
||||
|
||||
fi
|
@ -1,78 +0,0 @@
|
||||
GLIBC_PROVIDES dnl See aclocal.m4 in the top level source directory.
|
||||
# Local configure fragment for sysdeps/alpha/elf.
|
||||
|
||||
if test "$usetls" != no; then
|
||||
# Check for support of thread-local storage handling in assembler and
|
||||
# linker.
|
||||
AC_CACHE_CHECK(for Alpha TLS support, libc_cv_alpha_tls, [dnl
|
||||
cat > conftest.s <<\EOF
|
||||
.section ".tdata", "awT", @progbits
|
||||
.globl foo
|
||||
foo: .quad 1
|
||||
.section ".tbss", "awT", @nobits
|
||||
.globl bar
|
||||
bar: .skip 8
|
||||
.text
|
||||
baz:
|
||||
.set nomacro
|
||||
ldq $27, __tls_get_addr($29) !literal!1
|
||||
ldq $16, a($29) !tlsgd!1
|
||||
jsr $26, ($27), __tls_get_addr !lituse_tlsgd!1
|
||||
|
||||
jsr $26, ($27), __tls_get_addr !lituse_tlsldm!2
|
||||
ldq $27, __tls_get_addr($29) !literal!2
|
||||
ldq $16, b($29) !tlsldm!2
|
||||
|
||||
ldq $16, c($29) !tlsgd
|
||||
ldq $16, d($29) !tlsldm
|
||||
|
||||
ldq $16, e($29) !tlsgd!3
|
||||
ldq $16, f($29) !tlsldm!4
|
||||
|
||||
ldq $16, g($29) !gotdtprel
|
||||
ldah $16, h($31) !dtprelhi
|
||||
lda $16, i($16) !dtprello
|
||||
lda $16, j($31) !dtprel
|
||||
|
||||
ldq $16, k($29) !gottprel
|
||||
ldah $16, l($31) !tprelhi
|
||||
lda $16, m($16) !tprello
|
||||
lda $16, n($31) !tprel
|
||||
EOF
|
||||
dnl
|
||||
if AC_TRY_COMMAND(${CC-cc} -c $CFLAGS conftest.s 1>&AS_MESSAGE_LOG_FD); then
|
||||
libc_cv_alpha_tls=yes
|
||||
else
|
||||
libc_cv_alpha_tls=no
|
||||
fi
|
||||
rm -f conftest*])
|
||||
if test $libc_cv_alpha_tls = yes; then
|
||||
AC_DEFINE(HAVE_TLS_SUPPORT)
|
||||
fi
|
||||
fi
|
||||
|
||||
AC_CACHE_CHECK(for GP relative module local relocs, libc_cv_alpha_hidden_gprel, [dnl
|
||||
cat > conftest.c <<\EOF
|
||||
static int bar;
|
||||
int baz __attribute__((visibility("hidden")));
|
||||
|
||||
int foo (void)
|
||||
{
|
||||
return bar + baz;
|
||||
}
|
||||
EOF
|
||||
dnl
|
||||
|
||||
libc_cv_alpha_hidden_gprel=no
|
||||
if AC_TRY_COMMAND(${CC-cc} -S $CFLAGS -O2 -fpic conftest.c 1>&AS_MESSAGE_LOG_FD); then
|
||||
if grep -q 'bar.*!gprel' conftest.s \
|
||||
&& grep -q 'baz.*!gprel' conftest.s \
|
||||
&& ! grep -q 'bar.*!literal' conftest.s \
|
||||
&& ! grep -q 'baz.*!literal' conftest.s; then
|
||||
libc_cv_alpha_hidden_gprel=yes
|
||||
fi
|
||||
fi
|
||||
rm -f conftest*])
|
||||
if test $libc_cv_alpha_hidden_gprel = yes; then
|
||||
AC_DEFINE(PI_STATIC_AND_HIDDEN)
|
||||
fi
|
@ -1,110 +0,0 @@
|
||||
/* Special .init and .fini section support for Alpha.
|
||||
Copyright (C) 2001, 2002, 2003 Free Software Foundation, Inc.
|
||||
This file is part of the GNU C Library.
|
||||
|
||||
The GNU C Library is free software; you can redistribute it and/or
|
||||
modify it under the terms of the GNU Lesser General Public
|
||||
License as published by the Free Software Foundation; either
|
||||
version 2.1 of the License, or (at your option) any later version.
|
||||
|
||||
In addition to the permissions in the GNU Lesser General Public
|
||||
License, the Free Software Foundation gives you unlimited
|
||||
permission to link the compiled version of this file with other
|
||||
programs, and to distribute those programs without any restriction
|
||||
coming from the use of this file. (The GNU Lesser General Public
|
||||
License restrictions do apply in other respects; for example, they
|
||||
cover modification of the file, and distribution when not linked
|
||||
into another program.)
|
||||
|
||||
Note that people who make modified versions of this file are not
|
||||
obligated to grant this special exception for their modified
|
||||
versions; it is their choice whether to do so. The GNU Lesser
|
||||
General Public License gives permission to release a modified
|
||||
version without this exception; this exception also makes it
|
||||
possible to release a modified version which carries forward this
|
||||
exception.
|
||||
|
||||
The GNU C Library is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
||||
Lesser General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU Lesser General Public
|
||||
License along with the GNU C Library; if not, write to the Free
|
||||
Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
|
||||
02111-1307 USA. */
|
||||
|
||||
/* This file is compiled into assembly code which is then munged by a sed
|
||||
script into two files: crti.s and crtn.s.
|
||||
|
||||
* crti.s puts a function prologue at the beginning of the .init and .fini
|
||||
sections and defines global symbols for those addresses, so they can be
|
||||
called as functions.
|
||||
|
||||
* crtn.s puts the corresponding function epilogues in the .init and .fini
|
||||
sections.
|
||||
|
||||
This differs from what would be generated by the generic code in that
|
||||
we save and restore the GP within the function. In order for linker
|
||||
relaxation to work, the value in the GP register on exit from a function
|
||||
must be valid for the function entry point. Normally, a function is
|
||||
contained within one object file and this is not an issue, provided
|
||||
that the function reloads the gp after making any function calls.
|
||||
However, _init and _fini are constructed from pieces of many object
|
||||
files, all of which may have different GP values. So we must reload
|
||||
the GP value from crti.o in crtn.o. */
|
||||
|
||||
__asm__ (" \n\
|
||||
#include \"defs.h\" \n\
|
||||
\n\
|
||||
/*@HEADER_ENDS*/ \n\
|
||||
\n\
|
||||
/*@_init_PROLOG_BEGINS*/ \n\
|
||||
.section .init, \"ax\", @progbits \n\
|
||||
.globl _init \n\
|
||||
.type _init, @function \n\
|
||||
.usepv _init, std \n\
|
||||
_init: \n\
|
||||
ldgp $29, 0($27) \n\
|
||||
subq $30, 16, $30 \n\
|
||||
lda $27, __gmon_start__ \n\
|
||||
stq $26, 0($30) \n\
|
||||
stq $29, 8($30) \n\
|
||||
beq $27, 1f \n\
|
||||
jsr $26, ($27), __gmon_start__ \n\
|
||||
ldq $29, 8($30) \n\
|
||||
.align 3 \n\
|
||||
1: \n\
|
||||
/*@_init_PROLOG_ENDS*/ \n\
|
||||
\n\
|
||||
/*@_init_EPILOG_BEGINS*/ \n\
|
||||
.section .init, \"ax\", @progbits \n\
|
||||
ldq $26, 0($30) \n\
|
||||
ldq $29, 8($30) \n\
|
||||
addq $30, 16, $30 \n\
|
||||
ret \n\
|
||||
/*@_init_EPILOG_ENDS*/ \n\
|
||||
\n\
|
||||
/*@_fini_PROLOG_BEGINS*/ \n\
|
||||
.section .fini, \"ax\", @progbits \n\
|
||||
.globl _fini \n\
|
||||
.type _fini,@function \n\
|
||||
.usepv _fini,std \n\
|
||||
_fini: \n\
|
||||
ldgp $29, 0($27) \n\
|
||||
subq $30, 16, $30 \n\
|
||||
stq $26, 0($30) \n\
|
||||
stq $29, 8($30) \n\
|
||||
.align 3 \n\
|
||||
/*@_fini_PROLOG_ENDS*/ \n\
|
||||
\n\
|
||||
/*@_fini_EPILOG_BEGINS*/ \n\
|
||||
.section .fini, \"ax\", @progbits \n\
|
||||
ldq $26, 0($30) \n\
|
||||
ldq $29, 8($30) \n\
|
||||
addq $30, 16, $30 \n\
|
||||
ret \n\
|
||||
/*@_fini_EPILOG_ENDS*/ \n\
|
||||
\n\
|
||||
/*@TRAILER_BEGINS*/ \n\
|
||||
");
|
@ -1,87 +0,0 @@
|
||||
/* Startup code for Alpha/ELF.
|
||||
Copyright (C) 1993, 1995, 1996, 1997, 1998, 2000, 2001, 2002, 2003
|
||||
Free Software Foundation, Inc.
|
||||
This file is part of the GNU C Library.
|
||||
Contributed by Richard Henderson <rth@tamu.edu>
|
||||
|
||||
The GNU C Library is free software; you can redistribute it and/or
|
||||
modify it under the terms of the GNU Lesser General Public
|
||||
License as published by the Free Software Foundation; either
|
||||
version 2.1 of the License, or (at your option) any later version.
|
||||
|
||||
In addition to the permissions in the GNU Lesser General Public
|
||||
License, the Free Software Foundation gives you unlimited
|
||||
permission to link the compiled version of this file with other
|
||||
programs, and to distribute those programs without any restriction
|
||||
coming from the use of this file. (The GNU Lesser General Public
|
||||
License restrictions do apply in other respects; for example, they
|
||||
cover modification of the file, and distribution when not linked
|
||||
into another program.)
|
||||
|
||||
Note that people who make modified versions of this file are not
|
||||
obligated to grant this special exception for their modified
|
||||
versions; it is their choice whether to do so. The GNU Lesser
|
||||
General Public License gives permission to release a modified
|
||||
version without this exception; this exception also makes it
|
||||
possible to release a modified version which carries forward this
|
||||
exception.
|
||||
|
||||
The GNU C Library is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
||||
Lesser General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU Lesser General Public
|
||||
License along with the GNU C Library; if not, write to the Free
|
||||
Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
|
||||
02111-1307 USA. */
|
||||
|
||||
#include <sysdep.h>
|
||||
|
||||
.text
|
||||
.align 3
|
||||
.globl _start
|
||||
.ent _start, 0
|
||||
.type _start,@function
|
||||
_start:
|
||||
.frame $15, 0, $15
|
||||
br gp, 1f
|
||||
1: ldgp gp, 0(gp)
|
||||
subq sp, 16, sp
|
||||
mov 0, $15
|
||||
.prologue 0
|
||||
|
||||
/* Load address of the user's main function. */
|
||||
lda a0, main
|
||||
|
||||
ldl a1, 16(sp) /* get argc */
|
||||
lda a2, 24(sp) /* get argv */
|
||||
|
||||
/* Load address of our own entry points to .fini and .init. */
|
||||
lda a3, __libc_csu_init
|
||||
lda a4, __libc_csu_fini
|
||||
|
||||
/* Store address of the shared library termination function. */
|
||||
mov v0, a5
|
||||
|
||||
/* Provide the highest stack address to the user code. */
|
||||
stq sp, 0(sp)
|
||||
|
||||
/* Call the user's main function, and exit with its value.
|
||||
But let the libc call main. */
|
||||
jsr ra, __libc_start_main
|
||||
|
||||
/* Die very horribly if exit returns. Call_pal hlt is callable from
|
||||
kernel mode only; this will result in an illegal instruction trap. */
|
||||
call_pal 0
|
||||
.end _start
|
||||
|
||||
/* For ECOFF backwards compatibility. */
|
||||
weak_alias (_start, __start)
|
||||
|
||||
/* Define a symbol for the first piece of initialized data. */
|
||||
.data
|
||||
.globl __data_start
|
||||
__data_start:
|
||||
.weak data_start
|
||||
data_start = __data_start
|
@ -1,91 +0,0 @@
|
||||
/* Copyright (C) 1996, 1997, 1998, 2004 Free Software Foundation, Inc.
|
||||
Contributed by David Mosberger (davidm@cs.arizona.edu).
|
||||
This file is part of the GNU C Library.
|
||||
|
||||
The GNU C Library is free software; you can redistribute it and/or
|
||||
modify it under the terms of the GNU Lesser General Public
|
||||
License as published by the Free Software Foundation; either
|
||||
version 2.1 of the License, or (at your option) any later version.
|
||||
|
||||
The GNU C Library is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
||||
Lesser General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU Lesser General Public
|
||||
License along with the GNU C Library; if not, write to the Free
|
||||
Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
|
||||
02111-1307 USA. */
|
||||
|
||||
/* Finds the first bit set in an integer. Optimized for the Alpha
|
||||
architecture. */
|
||||
|
||||
#include <sysdep.h>
|
||||
|
||||
.set noreorder
|
||||
.set noat
|
||||
|
||||
|
||||
ENTRY(__ffs)
|
||||
#ifdef PROF
|
||||
ldgp gp, 0(pv)
|
||||
lda AT, _mcount
|
||||
jsr AT, (AT), _mcount
|
||||
.prologue 1
|
||||
zap $16, 0xF0, $16
|
||||
br $ffsl..ng
|
||||
#else
|
||||
.prologue 0
|
||||
zap $16, 0xF0, $16
|
||||
# FALLTHRU
|
||||
#endif
|
||||
END(__ffs)
|
||||
|
||||
.align 4
|
||||
ENTRY(ffsl)
|
||||
#ifdef PROF
|
||||
ldgp gp, 0(pv)
|
||||
lda AT, _mcount
|
||||
jsr AT, (AT), _mcount
|
||||
.prologue 1
|
||||
$ffsl..ng:
|
||||
#else
|
||||
.prologue 0
|
||||
#endif
|
||||
not $16, $1 # e0 :
|
||||
ldi $2, -1 # .. e1 :
|
||||
cmpbge $1, $2, $3 # e0 : bit N == 1 for byte N == 0
|
||||
clr $0 # .. e1 :
|
||||
addq $3, 1, $4 # e0 :
|
||||
bic $4, $3, $3 # e1 : bit N == 1 for first byte N != 0
|
||||
and $3, 0xF0, $4 # e0 :
|
||||
and $3, 0xCC, $5 # .. e1 :
|
||||
and $3, 0xAA, $6 # e0 :
|
||||
cmovne $4, 4, $0 # .. e1 :
|
||||
cmovne $5, 2, $5 # e0 :
|
||||
cmovne $6, 1, $6 # .. e1 :
|
||||
addl $0, $5, $0 # e0 :
|
||||
addl $0, $6, $0 # e1 : $0 == N
|
||||
extbl $16, $0, $1 # e0 : $1 == byte N
|
||||
ldi $2, 1 # .. e1 :
|
||||
negq $1, $3 # e0 :
|
||||
and $3, $1, $3 # e1 : bit N == least bit set of byte N
|
||||
and $3, 0xF0, $4 # e0 :
|
||||
and $3, 0xCC, $5 # .. e1 :
|
||||
and $3, 0xAA, $6 # e0 :
|
||||
cmovne $4, 5, $2 # .. e1 :
|
||||
cmovne $5, 2, $5 # e0 :
|
||||
cmovne $6, 1, $6 # .. e1 :
|
||||
s8addl $0, $2, $0 # e0 : mult byte ofs by 8 and sum
|
||||
addl $5, $6, $5 # .. e1 :
|
||||
addl $0, $5, $0 # e0 :
|
||||
nop # .. e1 :
|
||||
cmoveq $16, 0, $0 # e0 : trap input == 0 case.
|
||||
ret # .. e1 : 18
|
||||
|
||||
END(ffsl)
|
||||
|
||||
weak_alias (__ffs, ffs)
|
||||
libc_hidden_builtin_def (ffs)
|
||||
weak_extern (ffsl)
|
||||
weak_alias (ffsl, ffsll)
|
@ -1 +0,0 @@
|
||||
/* This function is defined in ffs.S. */
|
@ -1,23 +0,0 @@
|
||||
libc {
|
||||
GLIBC_2.0 {
|
||||
# functions used in other libraries
|
||||
__ieee_get_fp_control; __ieee_set_fp_control;
|
||||
}
|
||||
}
|
||||
libm {
|
||||
GLIBC_2.3.4 {
|
||||
# functions implementing old complex float abi
|
||||
__c1_cabsf; __c1_cacosf; __c1_cacoshf; __c1_cargf; __c1_casinf;
|
||||
__c1_casinhf; __c1_catanf; __c1_catanhf; __c1_ccosf; __c1_ccoshf;
|
||||
__c1_cexpf; __c1_cimagf; __c1_clog10f; __c1_clogf; __c1_conjf;
|
||||
__c1_cpowf; __c1_cprojf; __c1_crealf; __c1_csinf; __c1_csinhf;
|
||||
__c1_csqrtf; __c1_ctanf; __c1_ctanhf;
|
||||
|
||||
# functions implementing new complex float abi
|
||||
cabsf; cacosf; cacoshf; cargf; casinf;
|
||||
casinhf; catanf; catanhf; ccosf; ccoshf;
|
||||
cexpf; cimagf; clog10f; clogf; conjf;
|
||||
cpowf; cprojf; crealf; csinf; csinhf;
|
||||
csqrtf; ctanf; ctanhf;
|
||||
}
|
||||
}
|
@ -1,123 +0,0 @@
|
||||
/* Copyright (C) 1997, 1998, 1999, 2000 Free Software Foundation, Inc.
|
||||
This file is part of the GNU C Library.
|
||||
|
||||
The GNU C Library is free software; you can redistribute it and/or
|
||||
modify it under the terms of the GNU Lesser General Public
|
||||
License as published by the Free Software Foundation; either
|
||||
version 2.1 of the License, or (at your option) any later version.
|
||||
|
||||
The GNU C Library is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
||||
Lesser General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU Lesser General Public
|
||||
License along with the GNU C Library; if not, write to the Free
|
||||
Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
|
||||
02111-1307 USA. */
|
||||
|
||||
#ifndef _FENV_H
|
||||
# error "Never use <bits/fenv.h> directly; include <fenv.h> instead."
|
||||
#endif
|
||||
|
||||
|
||||
/* Define the bits representing the exception.
|
||||
|
||||
Note that these are the bit positions as defined by the OSF/1
|
||||
ieee_{get,set}_control_word interface and not by the hardware fpcr.
|
||||
|
||||
See the Alpha Architecture Handbook section 4.7.7.3 for details,
|
||||
but in summary, trap shadows mean the hardware register can acquire
|
||||
extra exception bits so for proper IEEE support the tracking has to
|
||||
be done in software -- in this case with kernel support.
|
||||
|
||||
As to why the system call interface isn't in the same format as
|
||||
the hardware register, only those crazy folks at DEC can tell you. */
|
||||
|
||||
enum
|
||||
{
|
||||
#ifdef __USE_GNU
|
||||
FE_DENORMAL = 1UL << 22,
|
||||
#define FE_DENORMAL FE_DENORMAL
|
||||
#endif
|
||||
|
||||
FE_INEXACT = 1UL << 21,
|
||||
#define FE_INEXACT FE_INEXACT
|
||||
|
||||
FE_UNDERFLOW = 1UL << 20,
|
||||
#define FE_UNDERFLOW FE_UNDERFLOW
|
||||
|
||||
FE_OVERFLOW = 1UL << 19,
|
||||
#define FE_OVERFLOW FE_OVERFLOW
|
||||
|
||||
FE_DIVBYZERO = 1UL << 18,
|
||||
#define FE_DIVBYZERO FE_DIVBYZERO
|
||||
|
||||
FE_INVALID = 1UL << 17,
|
||||
#define FE_INVALID FE_INVALID
|
||||
|
||||
FE_ALL_EXCEPT = 0x3f << 17
|
||||
#define FE_ALL_EXCEPT FE_ALL_EXCEPT
|
||||
};
|
||||
|
||||
/* Alpha chips support all four defined rouding modes.
|
||||
|
||||
Note that code must be compiled to use dynamic rounding (/d) instructions
|
||||
to see these changes. For gcc this is -mfp-rounding-mode=d; for DEC cc
|
||||
this is -fprm d. The default for both is static rounding to nearest.
|
||||
|
||||
These are shifted down 58 bits from the hardware fpcr because the
|
||||
functions are declared to take integers. */
|
||||
|
||||
enum
|
||||
{
|
||||
FE_TOWARDZERO = 0,
|
||||
#define FE_TOWARDZERO FE_TOWARDZERO
|
||||
|
||||
FE_DOWNWARD = 1,
|
||||
#define FE_DOWNWARD FE_DOWNWARD
|
||||
|
||||
FE_TONEAREST = 2,
|
||||
#define FE_TONEAREST FE_TONEAREST
|
||||
|
||||
FE_UPWARD = 3,
|
||||
#define FE_UPWARD FE_UPWARD
|
||||
};
|
||||
|
||||
#ifdef __USE_GNU
|
||||
/* On later hardware, and later kernels for earlier hardware, we can forcibly
|
||||
underflow denormal inputs and outputs. This can speed up certain programs
|
||||
significantly, usually without affecting accuracy. */
|
||||
enum
|
||||
{
|
||||
FE_MAP_DMZ = 1UL << 12, /* Map denorm inputs to zero */
|
||||
#define FE_MAP_DMZ FE_MAP_DMZ
|
||||
|
||||
FE_MAP_UMZ = 1UL << 13, /* Map underflowed outputs to zero */
|
||||
#define FE_MAP_UMZ FE_MAP_UMZ
|
||||
};
|
||||
#endif
|
||||
|
||||
/* Type representing exception flags. */
|
||||
typedef unsigned long int fexcept_t;
|
||||
|
||||
/* Type representing floating-point environment. */
|
||||
typedef unsigned long int fenv_t;
|
||||
|
||||
/* If the default argument is used we use this value. Note that due to
|
||||
architecture-specified page mappings, no user-space pointer will ever
|
||||
have its two high bits set. Co-opt one. */
|
||||
#define FE_DFL_ENV ((__const fenv_t *) 0x8800000000000000UL)
|
||||
|
||||
#ifdef __USE_GNU
|
||||
/* Floating-point environment where none of the exceptions are masked. */
|
||||
# define FE_NOMASK_ENV ((__const fenv_t *) 0x880000000000003eUL)
|
||||
|
||||
/* Floating-point environment with (processor-dependent) non-IEEE floating
|
||||
point. In this case, mapping denormals to zero. */
|
||||
# define FE_NONIEEE_ENV ((__const fenv_t *) 0x8800000000003000UL)
|
||||
#endif
|
||||
|
||||
/* The system calls to talk to the kernel's FP code. */
|
||||
extern unsigned long int __ieee_get_fp_control (void) __THROW;
|
||||
extern void __ieee_set_fp_control (unsigned long int __value) __THROW;
|
@ -1,150 +0,0 @@
|
||||
/* Inline math functions for Alpha.
|
||||
Copyright (C) 1996, 1997, 1999-2001, 2004, 2007
|
||||
Free Software Foundation, Inc.
|
||||
This file is part of the GNU C Library.
|
||||
Contributed by David Mosberger-Tang.
|
||||
|
||||
The GNU C Library is free software; you can redistribute it and/or
|
||||
modify it under the terms of the GNU Lesser General Public
|
||||
License as published by the Free Software Foundation; either
|
||||
version 2.1 of the License, or (at your option) any later version.
|
||||
|
||||
The GNU C Library is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
||||
Lesser General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU Lesser General Public
|
||||
License along with the GNU C Library; if not, write to the Free
|
||||
Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
|
||||
02111-1307 USA. */
|
||||
|
||||
#ifndef _MATH_H
|
||||
# error "Never use <bits/mathinline.h> directly; include <math.h> instead."
|
||||
#endif
|
||||
|
||||
#ifndef __extern_inline
|
||||
# define __MATH_INLINE __inline
|
||||
#else
|
||||
# define __MATH_INLINE __extern_inline
|
||||
#endif
|
||||
|
||||
#if defined __USE_ISOC99 && defined __GNUC__ && !__GNUC_PREREQ(3,0)
|
||||
# undef isgreater
|
||||
# undef isgreaterequal
|
||||
# undef isless
|
||||
# undef islessequal
|
||||
# undef islessgreater
|
||||
# undef isunordered
|
||||
# define isunordered(u, v) \
|
||||
(__extension__ \
|
||||
({ double __r, __u = (u), __v = (v); \
|
||||
__asm ("cmptun/su %1,%2,%0\n\ttrapb" \
|
||||
: "=&f" (__r) : "f" (__u), "f"(__v)); \
|
||||
__r != 0; }))
|
||||
#endif /* ISO C99 */
|
||||
|
||||
#if (!defined __NO_MATH_INLINES || defined __LIBC_INTERNAL_MATH_INLINES) \
|
||||
&& defined __OPTIMIZE__
|
||||
|
||||
#if !__GNUC_PREREQ (4, 0)
|
||||
# define __inline_copysign(NAME, TYPE) \
|
||||
__MATH_INLINE TYPE \
|
||||
__NTH (NAME (TYPE __x, TYPE __y)) \
|
||||
{ \
|
||||
TYPE __z; \
|
||||
__asm ("cpys %1, %2, %0" : "=f" (__z) : "f" (__y), "f" (__x)); \
|
||||
return __z; \
|
||||
}
|
||||
|
||||
__inline_copysign (__copysignf, float)
|
||||
__inline_copysign (copysignf, float)
|
||||
__inline_copysign (__copysign, double)
|
||||
__inline_copysign (copysign, double)
|
||||
|
||||
# undef __inline_copysign
|
||||
#endif
|
||||
|
||||
|
||||
#if !__GNUC_PREREQ (2, 8)
|
||||
# define __inline_fabs(NAME, TYPE) \
|
||||
__MATH_INLINE TYPE \
|
||||
__NTH (NAME (TYPE __x)) \
|
||||
{ \
|
||||
TYPE __z; \
|
||||
__asm ("cpys $f31, %1, %0" : "=f" (__z) : "f" (__x)); \
|
||||
return __z; \
|
||||
}
|
||||
|
||||
__inline_fabs (__fabsf, float)
|
||||
__inline_fabs (fabsf, float)
|
||||
__inline_fabs (__fabs, double)
|
||||
__inline_fabs (fabs, double)
|
||||
|
||||
# undef __inline_fabs
|
||||
#endif
|
||||
|
||||
#ifdef __USE_ISOC99
|
||||
|
||||
/* Test for negative number. Used in the signbit() macro. */
|
||||
__MATH_INLINE int
|
||||
__NTH (__signbitf (float __x))
|
||||
{
|
||||
#if !__GNUC_PREREQ (4, 0)
|
||||
__extension__ union { float __f; int __i; } __u = { __f: __x };
|
||||
return __u.__i < 0;
|
||||
#else
|
||||
return __builtin_signbitf (__x);
|
||||
#endif
|
||||
}
|
||||
|
||||
__MATH_INLINE int
|
||||
__NTH (__signbit (double __x))
|
||||
{
|
||||
#if !__GNUC_PREREQ (4, 0)
|
||||
__extension__ union { double __d; long __i; } __u = { __d: __x };
|
||||
return __u.__i < 0;
|
||||
#else
|
||||
return __builtin_signbit (__x);
|
||||
#endif
|
||||
}
|
||||
|
||||
__MATH_INLINE int
|
||||
__NTH (__signbitl (long double __x))
|
||||
{
|
||||
#if !__GNUC_PREREQ (4, 0)
|
||||
__extension__ union {
|
||||
long double __d;
|
||||
long __i[sizeof(long double)/sizeof(long)];
|
||||
} __u = { __d: __x };
|
||||
return __u.__i[sizeof(long double)/sizeof(long) - 1] < 0;
|
||||
#else
|
||||
return __builtin_signbitl (__x);
|
||||
#endif
|
||||
}
|
||||
|
||||
/* Test for NaN. Used in the isnan() macro. */
|
||||
|
||||
__MATH_INLINE int
|
||||
__NTH (__isnanf (float __x))
|
||||
{
|
||||
return isunordered (__x, __x);
|
||||
}
|
||||
|
||||
__MATH_INLINE int
|
||||
__NTH (__isnan (double __x))
|
||||
{
|
||||
return isunordered (__x, __x);
|
||||
}
|
||||
|
||||
#ifndef __NO_LONG_DOUBLE_MATH
|
||||
__MATH_INLINE int
|
||||
__NTH (__isnanl (long double __x))
|
||||
{
|
||||
return isunordered (__x, __x);
|
||||
}
|
||||
#endif
|
||||
|
||||
#endif /* C99 */
|
||||
|
||||
#endif /* __NO_MATH_INLINES */
|
@ -1,42 +0,0 @@
|
||||
/* Return the complex absolute value of float complex value.
|
||||
Copyright (C) 2004 Free Software Foundation, Inc.
|
||||
This file is part of the GNU C Library.
|
||||
|
||||
The GNU C Library is free software; you can redistribute it and/or
|
||||
modify it under the terms of the GNU Lesser General Public
|
||||
License as published by the Free Software Foundation; either
|
||||
version 2.1 of the License, or (at your option) any later version.
|
||||
|
||||
The GNU C Library is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
||||
Lesser General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU Lesser General Public
|
||||
License along with the GNU C Library; if not, write to the Free
|
||||
Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
|
||||
02111-1307 USA. */
|
||||
|
||||
#define __cabsf __cabsf_not_defined
|
||||
#define cabsf cabsf_not_defined
|
||||
|
||||
#include <complex.h>
|
||||
#include <math.h>
|
||||
#include "cfloat-compat.h"
|
||||
|
||||
#undef __cabsf
|
||||
#undef cabsf
|
||||
|
||||
float
|
||||
__c1_cabsf (c1_cfloat_decl (z))
|
||||
{
|
||||
return __hypotf (c1_cfloat_real (z), c1_cfloat_imag (z));
|
||||
}
|
||||
|
||||
float
|
||||
__c2_cabsf (c2_cfloat_decl (z))
|
||||
{
|
||||
return __hypotf (c2_cfloat_real (z), c2_cfloat_imag (z));
|
||||
}
|
||||
|
||||
cfloat_versions (cabsf);
|
@ -1,42 +0,0 @@
|
||||
/* Compute argument of complex float value.
|
||||
Copyright (C) 2004 Free Software Foundation, Inc.
|
||||
This file is part of the GNU C Library.
|
||||
|
||||
The GNU C Library is free software; you can redistribute it and/or
|
||||
modify it under the terms of the GNU Lesser General Public
|
||||
License as published by the Free Software Foundation; either
|
||||
version 2.1 of the License, or (at your option) any later version.
|
||||
|
||||
The GNU C Library is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
||||
Lesser General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU Lesser General Public
|
||||
License along with the GNU C Library; if not, write to the Free
|
||||
Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
|
||||
02111-1307 USA. */
|
||||
|
||||
#define __cargf __cargf_not_defined
|
||||
#define cargf cargf_not_defined
|
||||
|
||||
#include <complex.h>
|
||||
#include <math.h>
|
||||
#include "cfloat-compat.h"
|
||||
|
||||
#undef __cargf
|
||||
#undef cargf
|
||||
|
||||
float
|
||||
__c1_cargf (c1_cfloat_decl (x))
|
||||
{
|
||||
return __atan2f (c1_cfloat_imag (x), c1_cfloat_real (x));
|
||||
}
|
||||
|
||||
float
|
||||
__c2_cargf (c2_cfloat_decl (x))
|
||||
{
|
||||
return __atan2f (c2_cfloat_imag (x), c2_cfloat_real (x));
|
||||
}
|
||||
|
||||
cfloat_versions (cargf);
|
@ -1,74 +0,0 @@
|
||||
/* Compatibility macros for old and new Alpha complex float ABI.
|
||||
Copyright (C) 2004 Free Software Foundation, Inc.
|
||||
This file is part of the GNU C Library.
|
||||
|
||||
The GNU C Library is free software; you can redistribute it and/or
|
||||
modify it under the terms of the GNU Lesser General Public
|
||||
License as published by the Free Software Foundation; either
|
||||
version 2.1 of the License, or (at your option) any later version.
|
||||
|
||||
The GNU C Library is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
||||
Lesser General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU Lesser General Public
|
||||
License along with the GNU C Library; if not, write to the Free
|
||||
Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
|
||||
02111-1307 USA. */
|
||||
|
||||
/* The behaviour of complex float changed between GCC 3.3 and 3.4.
|
||||
|
||||
In 3.3 and before (below, complex version 1, or "c1"), complex float
|
||||
values were packed into one floating point register.
|
||||
|
||||
In 3.4 and later (below, complex version 2, or "c2"), GCC changed to
|
||||
follow the official Tru64 ABI, which passes the components of a complex
|
||||
as separate parameters. */
|
||||
|
||||
#if __GNUC_PREREQ(3,4)
|
||||
typedef union { double d; _Complex float cf; } c1_compat;
|
||||
# define c1_cfloat_decl(x) double x
|
||||
# define c1_cfloat_real(x) __real__ c1_cfloat_value (x)
|
||||
# define c1_cfloat_imag(x) __imag__ c1_cfloat_value (x)
|
||||
# define c1_cfloat_value(x) (((c1_compat *)(void *)&x)->cf)
|
||||
# define c1_cfloat_rettype double
|
||||
# define c1_cfloat_return(x) ({ c1_compat _; _.cf = (x); _.d; })
|
||||
# define c2_cfloat_decl(x) _Complex float x
|
||||
# define c2_cfloat_real(x) __real__ x
|
||||
# define c2_cfloat_imag(x) __imag__ x
|
||||
# define c2_cfloat_value(x) x
|
||||
# define c2_cfloat_rettype _Complex float
|
||||
# define c2_cfloat_return(x) x
|
||||
#else
|
||||
# define c1_cfloat_decl(x) _Complex float x
|
||||
# define c1_cfloat_real(x) __real__ x
|
||||
# define c1_cfloat_imag(x) __imag__ x
|
||||
# define c1_cfloat_value(x) x
|
||||
# define c1_cfloat_rettype _Complex float
|
||||
# define c1_cfloat_return(x) x
|
||||
# define c2_cfloat_decl(x) float x ## r, float x ## i
|
||||
# define c2_cfloat_real(x) x ## r
|
||||
# define c2_cfloat_imag(x) x ## i
|
||||
# define c2_cfloat_value(x) \
|
||||
({ _Complex float _; __real__ _ = x##r; __imag__ _ = x##i; _; })
|
||||
# define c2_cfloat_rettype double _Complex
|
||||
# define c2_cfloat_return(x) x
|
||||
#endif
|
||||
|
||||
/* Get the proper symbol versions defined for each function. */
|
||||
|
||||
#include <shlib-compat.h>
|
||||
|
||||
#if SHLIB_COMPAT (libm, GLIBC_2_1, GLIBC_2_3_4)
|
||||
#define cfloat_versions_compat(func) \
|
||||
compat_symbol (libm, __c1_##func, func, GLIBC_2_1)
|
||||
#else
|
||||
#define cfloat_versions_compat(func)
|
||||
#endif
|
||||
|
||||
#define cfloat_versions(func) \
|
||||
cfloat_versions_compat(func); \
|
||||
versioned_symbol (libm, __c2_##func, func, GLIBC_2_3_4); \
|
||||
extern typeof(__c2_##func) __##func attribute_hidden; \
|
||||
strong_alias (__c2_##func, __##func)
|
@ -1,41 +0,0 @@
|
||||
/* Return imaginary part of complex float value.
|
||||
Copyright (C) 2004 Free Software Foundation, Inc.
|
||||
This file is part of the GNU C Library.
|
||||
|
||||
The GNU C Library is free software; you can redistribute it and/or
|
||||
modify it under the terms of the GNU Lesser General Public
|
||||
License as published by the Free Software Foundation; either
|
||||
version 2.1 of the License, or (at your option) any later version.
|
||||
|
||||
The GNU C Library is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
||||
Lesser General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU Lesser General Public
|
||||
License along with the GNU C Library; if not, write to the Free
|
||||
Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
|
||||
02111-1307 USA. */
|
||||
|
||||
#define __cimagf __cimagf_not_defined
|
||||
#define cimagf cimagf_not_defined
|
||||
|
||||
#include <complex.h>
|
||||
#include "cfloat-compat.h"
|
||||
|
||||
#undef __cimagf
|
||||
#undef cimagf
|
||||
|
||||
float
|
||||
__c1_cimagf (c1_cfloat_decl (z))
|
||||
{
|
||||
return c1_cfloat_imag (z);
|
||||
}
|
||||
|
||||
float
|
||||
__c2_cimagf (c2_cfloat_decl (z))
|
||||
{
|
||||
return c2_cfloat_imag (z);
|
||||
}
|
||||
|
||||
cfloat_versions (cimagf);
|
@ -1,43 +0,0 @@
|
||||
/* Return complex conjugate of complex float value.
|
||||
Copyright (C) 2004 Free Software Foundation, Inc.
|
||||
This file is part of the GNU C Library.
|
||||
|
||||
The GNU C Library is free software; you can redistribute it and/or
|
||||
modify it under the terms of the GNU Lesser General Public
|
||||
License as published by the Free Software Foundation; either
|
||||
version 2.1 of the License, or (at your option) any later version.
|
||||
|
||||
The GNU C Library is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
||||
Lesser General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU Lesser General Public
|
||||
License along with the GNU C Library; if not, write to the Free
|
||||
Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
|
||||
02111-1307 USA. */
|
||||
|
||||
#define __conjf __conjf_not_defined
|
||||
#define conjf conjf_not_defined
|
||||
|
||||
#include <complex.h>
|
||||
#include "cfloat-compat.h"
|
||||
|
||||
#undef __conjf
|
||||
#undef conjf
|
||||
|
||||
c1_cfloat_rettype
|
||||
__c1_conjf (c1_cfloat_decl (z))
|
||||
{
|
||||
_Complex float r = ~ c1_cfloat_value (z);
|
||||
return c1_cfloat_return (r);
|
||||
}
|
||||
|
||||
c2_cfloat_rettype
|
||||
__c2_conjf (c2_cfloat_decl (z))
|
||||
{
|
||||
_Complex float r = ~ c2_cfloat_value (z);
|
||||
return c2_cfloat_return (r);
|
||||
}
|
||||
|
||||
cfloat_versions (conjf);
|
@ -1,41 +0,0 @@
|
||||
/* Return real part of complex float value.
|
||||
Copyright (C) 2004 Free Software Foundation, Inc.
|
||||
This file is part of the GNU C Library.
|
||||
|
||||
The GNU C Library is free software; you can redistribute it and/or
|
||||
modify it under the terms of the GNU Lesser General Public
|
||||
License as published by the Free Software Foundation; either
|
||||
version 2.1 of the License, or (at your option) any later version.
|
||||
|
||||
The GNU C Library is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
||||
Lesser General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU Lesser General Public
|
||||
License along with the GNU C Library; if not, write to the Free
|
||||
Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
|
||||
02111-1307 USA. */
|
||||
|
||||
#define __crealf __crealf_not_defined
|
||||
#define crealf crealf_not_defined
|
||||
|
||||
#include <complex.h>
|
||||
#include "cfloat-compat.h"
|
||||
|
||||
#undef __crealf
|
||||
#undef crealf
|
||||
|
||||
float
|
||||
__c1_crealf (c1_cfloat_decl (z))
|
||||
{
|
||||
return c1_cfloat_real (z);
|
||||
}
|
||||
|
||||
float
|
||||
__c2_crealf (c2_cfloat_decl (z))
|
||||
{
|
||||
return c2_cfloat_real (z);
|
||||
}
|
||||
|
||||
cfloat_versions (crealf);
|
@ -1,165 +0,0 @@
|
||||
/* Copyright (C) 1996,1997,1998,2002,2003 Free Software Foundation, Inc.
|
||||
Contributed by David Mosberger (davidm@cs.arizona.edu).
|
||||
This file is part of the GNU C Library.
|
||||
|
||||
The GNU C Library is free software; you can redistribute it and/or
|
||||
modify it under the terms of the GNU Lesser General Public
|
||||
License as published by the Free Software Foundation; either
|
||||
version 2.1 of the License, or (at your option) any later version.
|
||||
|
||||
The GNU C Library is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
||||
Lesser General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU Lesser General Public
|
||||
License along with the GNU C Library; if not, write to the Free
|
||||
Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
|
||||
02111-1307 USA. */
|
||||
|
||||
#include <features.h>
|
||||
|
||||
#if !defined(_IEEE_FP_INEXACT)
|
||||
|
||||
/*
|
||||
* This version is much faster than generic sqrt implementation, but
|
||||
* it doesn't handle the inexact flag. It doesn't handle exceptional
|
||||
* values either, but will defer to the full ieee754_sqrt routine which
|
||||
* can.
|
||||
*/
|
||||
|
||||
/* Careful with rearranging this without consulting the assembly below. */
|
||||
const static struct sqrt_data_struct {
|
||||
unsigned long dn, up, half, almost_three_half;
|
||||
unsigned long one_and_a_half, two_to_minus_30, one, nan;
|
||||
const int T2[64];
|
||||
} sqrt_data __attribute__((used)) = {
|
||||
0x3fefffffffffffff, /* __dn = nextafter(1,-Inf) */
|
||||
0x3ff0000000000001, /* __up = nextafter(1,+Inf) */
|
||||
0x3fe0000000000000, /* half */
|
||||
0x3ff7ffffffc00000, /* almost_three_half = 1.5-2^-30 */
|
||||
0x3ff8000000000000, /* one_and_a_half */
|
||||
0x3e10000000000000, /* two_to_minus_30 */
|
||||
0x3ff0000000000000, /* one */
|
||||
0xffffffffffffffff, /* nan */
|
||||
|
||||
{ 0x1500, 0x2ef8, 0x4d67, 0x6b02, 0x87be, 0xa395, 0xbe7a, 0xd866,
|
||||
0xf14a, 0x1091b,0x11fcd,0x13552,0x14999,0x15c98,0x16e34,0x17e5f,
|
||||
0x18d03,0x19a01,0x1a545,0x1ae8a,0x1b5c4,0x1bb01,0x1bfde,0x1c28d,
|
||||
0x1c2de,0x1c0db,0x1ba73,0x1b11c,0x1a4b5,0x1953d,0x18266,0x16be0,
|
||||
0x1683e,0x179d8,0x18a4d,0x19992,0x1a789,0x1b445,0x1bf61,0x1c989,
|
||||
0x1d16d,0x1d77b,0x1dddf,0x1e2ad,0x1e5bf,0x1e6e8,0x1e654,0x1e3cd,
|
||||
0x1df2a,0x1d635,0x1cb16,0x1be2c,0x1ae4e,0x19bde,0x1868e,0x16e2e,
|
||||
0x1527f,0x1334a,0x11051,0xe951, 0xbe01, 0x8e0d, 0x5924, 0x1edd }
|
||||
};
|
||||
|
||||
asm ("\
|
||||
/* Define offsets into the structure defined in C above. */ \n\
|
||||
$DN = 0*8 \n\
|
||||
$UP = 1*8 \n\
|
||||
$HALF = 2*8 \n\
|
||||
$ALMOST_THREE_HALF = 3*8 \n\
|
||||
$NAN = 7*8 \n\
|
||||
$T2 = 8*8 \n\
|
||||
\n\
|
||||
/* Stack variables. */ \n\
|
||||
$K = 0 \n\
|
||||
$Y = 8 \n\
|
||||
\n\
|
||||
.text \n\
|
||||
.align 5 \n\
|
||||
.globl __ieee754_sqrt \n\
|
||||
.ent __ieee754_sqrt \n\
|
||||
__ieee754_sqrt: \n\
|
||||
ldgp $29, 0($27) \n\
|
||||
subq $sp, 16, $sp \n\
|
||||
.frame $sp, 16, $26, 0\n"
|
||||
#ifdef PROF
|
||||
" lda $28, _mcount \n\
|
||||
jsr $28, ($28), _mcount\n"
|
||||
#endif
|
||||
" .prologue 1 \n\
|
||||
\n\
|
||||
.align 4 \n\
|
||||
stt $f16, $K($sp) # e0 : \n\
|
||||
mult $f31, $f31, $f31 # .. fm : \n\
|
||||
lda $4, sqrt_data # e0 : \n\
|
||||
fblt $f16, $fixup # .. fa : \n\
|
||||
\n\
|
||||
ldah $2, 0x5fe8 # e0 : \n\
|
||||
ldq $3, $K($sp) # .. e1 : \n\
|
||||
ldt $f12, $HALF($4) # e0 : \n\
|
||||
ldt $f18, $ALMOST_THREE_HALF($4) # .. e1 : \n\
|
||||
\n\
|
||||
sll $3, 52, $5 # e0 : \n\
|
||||
lda $6, 0x7fd # .. e1 : \n\
|
||||
fnop # .. fa : \n\
|
||||
fnop # .. fm : \n\
|
||||
\n\
|
||||
subq $5, 1, $5 # e1 : \n\
|
||||
srl $3, 33, $1 # .. e0 : \n\
|
||||
cmpule $5, $6, $5 # e0 : \n\
|
||||
beq $5, $fixup # .. e1 : \n\
|
||||
\n\
|
||||
mult $f16, $f12, $f11 # fm : $f11 = x * 0.5 \n\
|
||||
subl $2, $1, $2 # .. e0 : \n\
|
||||
addt $f12, $f12, $f17 # .. fa : $f17 = 1.0 \n\
|
||||
srl $2, 12, $1 # e0 : \n\
|
||||
\n\
|
||||
and $1, 0xfc, $1 # e0 : \n\
|
||||
addq $1, $4, $1 # e1 : \n\
|
||||
ldl $1, $T2($1) # e0 : \n\
|
||||
addt $f12, $f17, $f15 # .. fa : $f15 = 1.5 \n\
|
||||
\n\
|
||||
subl $2, $1, $2 # e0 : \n\
|
||||
ldt $f14, $DN($4) # .. e1 : \n\
|
||||
sll $2, 32, $2 # e0 : \n\
|
||||
stq $2, $Y($sp) # e0 : \n\
|
||||
\n\
|
||||
ldt $f13, $Y($sp) # e0 : \n\
|
||||
mult/su $f11, $f13, $f10 # fm 2: $f10 = (x * 0.5) * y \n\
|
||||
mult $f10, $f13, $f10 # fm 4: $f10 = ((x*0.5)*y)*y \n\
|
||||
subt $f15, $f10, $f1 # fa 4: $f1 = (1.5-0.5*x*y*y) \n\
|
||||
\n\
|
||||
mult $f13, $f1, $f13 # fm 4: yp = y*(1.5-0.5*x*y^2)\n\
|
||||
mult/su $f11, $f13, $f1 # fm 4: $f11 = x * 0.5 * yp \n\
|
||||
mult $f1, $f13, $f11 # fm 4: $f11 = (x*0.5*yp)*yp \n\
|
||||
subt $f18, $f11, $f1 # fa 4: $f1=(1.5-2^-30)-x/2*yp^2\n\
|
||||
\n\
|
||||
mult $f13, $f1, $f13 # fm 4: ypp = $f13 = yp*$f1 \n\
|
||||
subt $f15, $f12, $f1 # .. fa : $f1 = (1.5 - 0.5) \n\
|
||||
ldt $f15, $UP($4) # .. e0 : \n\
|
||||
mult/su $f16, $f13, $f10 # fm 4: z = $f10 = x * ypp \n\
|
||||
\n\
|
||||
mult $f10, $f13, $f11 # fm 4: $f11 = z*ypp \n\
|
||||
mult $f10, $f12, $f12 # fm : $f12 = z*0.5 \n\
|
||||
subt $f1, $f11, $f1 # fa 4: $f1 = 1 - z*ypp \n\
|
||||
mult $f12, $f1, $f12 # fm 4: $f12 = z/2*(1 - z*ypp)\n\
|
||||
\n\
|
||||
addt $f10, $f12, $f0 # fa 4: zp=res= z+z/2*(1-z*ypp)\n\
|
||||
mult/c $f0, $f14, $f12 # fm 4: zmi = zp * DN \n\
|
||||
mult/c $f0, $f15, $f11 # fm : zpl = zp * UP \n\
|
||||
mult/c $f0, $f12, $f1 # fm : $f1 = zp * zmi \n\
|
||||
\n\
|
||||
mult/c $f0, $f11, $f15 # fm : $f15 = zp * zpl \n\
|
||||
subt/su $f1, $f16, $f13 # .. fa : y1 = zp*zmi - x \n\
|
||||
subt/su $f15, $f16, $f14 # fa 4: y2 = zp*zpl - x \n\
|
||||
fcmovge $f13, $f12, $f0 # fa 3: res = (y1>=0)?zmi:res \n\
|
||||
\n\
|
||||
fcmovlt $f14, $f11, $f0 # fa 4: res = (y2<0)?zpl:res \n\
|
||||
addq $sp, 16, $sp # .. e0 : \n\
|
||||
ret # .. e1 : \n\
|
||||
\n\
|
||||
.align 4 \n\
|
||||
$fixup: \n\
|
||||
addq $sp, 16, $sp \n\
|
||||
br __full_ieee754_sqrt !samegp \n\
|
||||
\n\
|
||||
.end __ieee754_sqrt");
|
||||
|
||||
static double __full_ieee754_sqrt(double) __attribute_used__;
|
||||
#define __ieee754_sqrt __full_ieee754_sqrt
|
||||
|
||||
#endif /* _IEEE_FP_INEXACT */
|
||||
|
||||
#include <sysdeps/ieee754/dbl-64/e_sqrt.c>
|
Some files were not shown because too many files have changed in this diff Show More
Loading…
Reference in New Issue
Block a user