mirror of
https://sourceware.org/git/glibc.git
synced 2024-11-27 11:43:34 +08:00
Fix x86 SSE cosf, sinf issues
* sysdeps/i386/i686/fpu/multiarch/s_sinf-sse2.S: Fix unwind info if defined PIC. Fix special cases description. * sysdeps/i386/i686/fpu/multiarch/s_cosf-sse2.S: Likewise. * sysdeps/x86_64/fpu/s_sinf.S: Fix special cases description, fix DP_HI_MASK entry. * sysdeps/x86_64/fpu/s_cosf.S: Likewise.
This commit is contained in:
parent
3d9b46b350
commit
80ccd52c95
10
ChangeLog
10
ChangeLog
@ -1,3 +1,13 @@
|
||||
2012-09-10 Liubov Dmitrieva <liubov.dmitrieva@gmail.com>
|
||||
|
||||
* sysdeps/i386/i686/fpu/multiarch/s_sinf-sse2.S: Fix
|
||||
unwind info if defined PIC. Fix special cases description.
|
||||
* sysdeps/i386/i686/fpu/multiarch/s_cosf-sse2.S: Likewise.
|
||||
|
||||
* sysdeps/x86_64/fpu/s_sinf.S: Fix special cases description, fix
|
||||
DP_HI_MASK entry.
|
||||
* sysdeps/x86_64/fpu/s_cosf.S: Likewise.
|
||||
|
||||
2012-09-07 H.J. Lu <hongjiu.lu@intel.com>
|
||||
|
||||
* scripts/check-local-headers.sh: Add "shopt -s nullglob".
|
||||
|
@ -50,25 +50,29 @@
|
||||
* 9) if x is NaN, return x-x.
|
||||
*
|
||||
* Special cases:
|
||||
* cos(+-0)==+-0 not raising inexact/underflow,
|
||||
* cos(subnormal) raises inexact/underflow
|
||||
* cos(min_normalized) raises inexact/underflow
|
||||
* cos(normalized) raises inexact
|
||||
* cos(Inf) = NaN, raises invalid, sets errno to EDOM
|
||||
* cos(NaN) = NaN
|
||||
* cos(+-0) = 1 not raising inexact,
|
||||
* cos(subnormal) raises inexact,
|
||||
* cos(min_normalized) raises inexact,
|
||||
* cos(normalized) raises inexact,
|
||||
* cos(Inf) = NaN, raises invalid, sets errno to EDOM,
|
||||
* cos(NaN) = NaN.
|
||||
*/
|
||||
|
||||
#ifdef PIC
|
||||
# define MO1(symbol) L(symbol)##@GOTOFF(%ebx)
|
||||
# define MO2(symbol,reg2,_scale) L(symbol)##@GOTOFF(%ebx,reg2,_scale)
|
||||
# define SAVE_BX pushl %ebx
|
||||
# define RESTORE_BX popl %ebx
|
||||
# define CFI_PUSH(REG) cfi_adjust_cfa_offset(4); cfi_rel_offset(REG,0)
|
||||
# define CFI_POP(REG) cfi_adjust_cfa_offset(-4); cfi_restore(REG)
|
||||
# define PUSH(REG) pushl REG; CFI_PUSH(REG)
|
||||
# define POP(REG) popl REG; CFI_POP(REG)
|
||||
# define ENTRANCE PUSH(%ebx); LOAD_PIC_REG(bx)
|
||||
# define RETURN POP(%ebx); ret; CFI_PUSH(%ebx)
|
||||
# define ARG_X 8(%esp)
|
||||
#else
|
||||
# define MO1(symbol) L(symbol)
|
||||
# define MO2(symbol,reg2,_scale) L(symbol)(,reg2,_scale)
|
||||
# define SAVE_BX
|
||||
# define RESTORE_BX
|
||||
# define ENTRANCE
|
||||
# define RETURN ret
|
||||
# define ARG_X 4(%esp)
|
||||
#endif
|
||||
|
||||
@ -76,11 +80,7 @@
|
||||
ENTRY(__cosf_sse2)
|
||||
/* Input: single precision x on stack at address ARG_X */
|
||||
|
||||
#ifdef PIC
|
||||
SAVE_BX
|
||||
LOAD_PIC_REG(bx)
|
||||
#endif
|
||||
|
||||
ENTRANCE
|
||||
movl ARG_X, %eax /* Bits of x */
|
||||
cvtss2sd ARG_X, %xmm0 /* DP x */
|
||||
andl $0x7fffffff, %eax /* |x| */
|
||||
@ -143,8 +143,7 @@ L(reconstruction):
|
||||
fldl 0(%esp) /* ...to FPU. */
|
||||
/* Return back 4 bytes of stack frame */
|
||||
lea 8(%esp), %esp
|
||||
RESTORE_BX
|
||||
ret
|
||||
RETURN
|
||||
|
||||
.p2align 4
|
||||
L(sin_poly):
|
||||
@ -183,9 +182,7 @@ L(sin_poly):
|
||||
fldl 0(%esp) /* ...to FPU. */
|
||||
/* Return back 4 bytes of stack frame */
|
||||
lea 8(%esp), %esp
|
||||
RESTORE_BX
|
||||
ret
|
||||
|
||||
RETURN
|
||||
|
||||
.p2align 4
|
||||
L(large_args):
|
||||
@ -275,7 +272,6 @@ L(very_large_skip2):
|
||||
|
||||
jmp L(reconstruction) /* end of very_large_args peth */
|
||||
|
||||
|
||||
.p2align 4
|
||||
L(arg_less_pio4):
|
||||
/* Here if |x|<Pi/4 */
|
||||
@ -307,8 +303,7 @@ L(epilogue):
|
||||
flds 0(%esp) /* ...to FPU. */
|
||||
/* Return back 4 bytes of stack frame */
|
||||
lea 4(%esp), %esp
|
||||
RESTORE_BX
|
||||
ret
|
||||
RETURN
|
||||
|
||||
.p2align 4
|
||||
L(arg_less_2pn5):
|
||||
@ -353,7 +348,6 @@ L(skip_errno_setting):
|
||||
jmp L(epilogue)
|
||||
END(__cosf_sse2)
|
||||
|
||||
|
||||
.section .rodata, "a"
|
||||
.p2align 3
|
||||
L(PIO4J): /* Table of j*Pi/4, for j=0,1,..,10 */
|
||||
@ -540,8 +534,8 @@ L(DP_ABS_MASK): /* Mask for getting DP absolute value */
|
||||
.p2align 3
|
||||
L(DP_HI_MASK): /* Mask for getting high 21 bits of DP value */
|
||||
.long 0x00000000,0xffffffff
|
||||
.type L(DP_ABS_MASK), @object
|
||||
ASM_SIZE_DIRECTIVE(L(DP_ABS_MASK))
|
||||
.type L(DP_HI_MASK), @object
|
||||
ASM_SIZE_DIRECTIVE(L(DP_HI_MASK))
|
||||
|
||||
.p2align 4
|
||||
L(SP_ABS_MASK): /* Mask for getting SP absolute value */
|
||||
|
@ -50,25 +50,29 @@
|
||||
* 9) if x is NaN, return x-x.
|
||||
*
|
||||
* Special cases:
|
||||
* sin(+-0)==+-0 not raising inexact/underflow,
|
||||
* sin(subnormal) raises inexact/underflow
|
||||
* sin(min_normalized) raises inexact/underflow
|
||||
* sin(normalized) raises inexact
|
||||
* sin(Inf) = NaN, raises invalid, sets errno to EDOM
|
||||
* sin(NaN) = NaN
|
||||
* sin(+-0) = +-0 not raising inexact/underflow,
|
||||
* sin(subnormal) raises inexact/underflow,
|
||||
* sin(min_normalized) raises inexact/underflow,
|
||||
* sin(normalized) raises inexact,
|
||||
* sin(Inf) = NaN, raises invalid, sets errno to EDOM,
|
||||
* sin(NaN) = NaN.
|
||||
*/
|
||||
|
||||
#ifdef PIC
|
||||
# define MO1(symbol) L(symbol)##@GOTOFF(%ebx)
|
||||
# define MO2(symbol,reg2,_scale) L(symbol)##@GOTOFF(%ebx,reg2,_scale)
|
||||
# define SAVE_BX pushl %ebx
|
||||
# define RESTORE_BX popl %ebx
|
||||
# define CFI_PUSH(REG) cfi_adjust_cfa_offset(4); cfi_rel_offset(REG,0)
|
||||
# define CFI_POP(REG) cfi_adjust_cfa_offset(-4); cfi_restore(REG)
|
||||
# define PUSH(REG) pushl REG; CFI_PUSH(REG)
|
||||
# define POP(REG) popl REG; CFI_POP(REG)
|
||||
# define ENTRANCE PUSH(%ebx); LOAD_PIC_REG(bx)
|
||||
# define RETURN POP(%ebx); ret; CFI_PUSH(%ebx)
|
||||
# define ARG_X 8(%esp)
|
||||
#else
|
||||
# define MO1(symbol) L(symbol)
|
||||
# define MO2(symbol,reg2,_scale) L(symbol)(,reg2,_scale)
|
||||
# define SAVE_BX
|
||||
# define RESTORE_BX
|
||||
# define ENTRANCE
|
||||
# define RETURN ret
|
||||
# define ARG_X 4(%esp)
|
||||
#endif
|
||||
|
||||
@ -76,11 +80,7 @@
|
||||
ENTRY(__sinf_sse2)
|
||||
/* Input: single precision x on stack at address ARG_X */
|
||||
|
||||
#ifdef PIC
|
||||
SAVE_BX
|
||||
LOAD_PIC_REG(bx)
|
||||
#endif
|
||||
|
||||
ENTRANCE
|
||||
movl ARG_X, %eax /* Bits of x */
|
||||
cvtss2sd ARG_X, %xmm0 /* DP x */
|
||||
andl $0x7fffffff, %eax /* |x| */
|
||||
@ -145,8 +145,7 @@ L(reconstruction):
|
||||
fldl 0(%esp) /* ...to FPU. */
|
||||
/* Return back 4 bytes of stack frame */
|
||||
lea 8(%esp), %esp
|
||||
RESTORE_BX
|
||||
ret
|
||||
RETURN
|
||||
|
||||
.p2align 4
|
||||
L(sin_poly):
|
||||
@ -186,9 +185,7 @@ L(sin_poly):
|
||||
fldl 0(%esp) /* ...to FPU. */
|
||||
/* Return back 4 bytes of stack frame */
|
||||
lea 8(%esp), %esp
|
||||
RESTORE_BX
|
||||
ret
|
||||
|
||||
RETURN
|
||||
|
||||
.p2align 4
|
||||
L(large_args):
|
||||
@ -281,10 +278,6 @@ L(very_large_skip2):
|
||||
|
||||
jmp L(reconstruction) /* end of very_large_args peth */
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
.p2align 4
|
||||
L(arg_less_pio4):
|
||||
/* Here if |x|<Pi/4 */
|
||||
@ -320,8 +313,7 @@ L(epilogue):
|
||||
flds 0(%esp) /* ...to FPU. */
|
||||
/* Return back 4 bytes of stack frame */
|
||||
lea 4(%esp), %esp
|
||||
RESTORE_BX
|
||||
ret
|
||||
RETURN
|
||||
|
||||
.p2align 4
|
||||
L(arg_less_2pn5):
|
||||
@ -376,7 +368,6 @@ L(skip_errno_setting):
|
||||
jmp L(epilogue)
|
||||
END(__sinf_sse2)
|
||||
|
||||
|
||||
.section .rodata, "a"
|
||||
.p2align 3
|
||||
L(PIO4J): /* Table of j*Pi/4, for j=0,1,..,10 */
|
||||
@ -569,7 +560,7 @@ L(DP_ABS_MASK): /* Mask for getting DP absolute value */
|
||||
.p2align 3
|
||||
L(DP_HI_MASK): /* Mask for getting high 21 bits of DP value */
|
||||
.long 0x00000000,0xffffffff
|
||||
.type L(DP_ABS_MASK), @object
|
||||
ASM_SIZE_DIRECTIVE(L(DP_ABS_MASK))
|
||||
.type L(DP_HI_MASK), @object
|
||||
ASM_SIZE_DIRECTIVE(L(DP_HI_MASK))
|
||||
|
||||
weak_alias (__sinf, sinf)
|
||||
|
@ -50,12 +50,12 @@
|
||||
* 9) if x is NaN, return x-x.
|
||||
*
|
||||
* Special cases:
|
||||
* cos(+-0)==+-0 not raising inexact/underflow,
|
||||
* cos(subnormal) raises inexact/underflow
|
||||
* cos(min_normalized) raises inexact/underflow
|
||||
* cos(normalized) raises inexact
|
||||
* cos(Inf) = NaN, raises invalid, sets errno to EDOM
|
||||
* cos(NaN) = NaN
|
||||
* cos(+-0) = 1 not raising inexact,
|
||||
* cos(subnormal) raises inexact,
|
||||
* cos(min_normalized) raises inexact,
|
||||
* cos(normalized) raises inexact,
|
||||
* cos(Inf) = NaN, raises invalid, sets errno to EDOM,
|
||||
* cos(NaN) = NaN.
|
||||
*/
|
||||
|
||||
.text
|
||||
@ -163,10 +163,6 @@ L(sin_poly):
|
||||
cvtsd2ss %xmm3, %xmm0 /* SP result */
|
||||
ret
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
.p2align 4
|
||||
L(large_args):
|
||||
/* Here if |x|>=9*Pi/4 */
|
||||
@ -257,7 +253,6 @@ L(very_large_skip2):
|
||||
|
||||
jmp L(reconstruction) /* end of very_large_args peth */
|
||||
|
||||
|
||||
.p2align 4
|
||||
L(arg_less_pio4):
|
||||
/* Here if |x|<Pi/4 */
|
||||
@ -317,7 +312,6 @@ L(arg_inf_or_nan):
|
||||
|
||||
/* Here if x is Inf. Set errno to EDOM. */
|
||||
call JUMPTARGET(__errno_location)
|
||||
lea (%rax), %rax
|
||||
movl $EDOM, (%rax)
|
||||
|
||||
.p2align 4
|
||||
@ -328,8 +322,6 @@ L(skip_errno_setting):
|
||||
ret
|
||||
END(__cosf)
|
||||
|
||||
|
||||
|
||||
.section .rodata, "a"
|
||||
.p2align 3
|
||||
L(PIO4J): /* Table of j*Pi/4, for j=0,1,..,10 */
|
||||
@ -516,8 +508,8 @@ L(DP_ABS_MASK): /* Mask for getting DP absolute value */
|
||||
.p2align 3
|
||||
L(DP_HI_MASK): /* Mask for getting high 21 bits of DP value */
|
||||
.long 0x00000000,0xffffffff
|
||||
.type L(DP_ABS_MASK), @object
|
||||
ASM_SIZE_DIRECTIVE(L(DP_ABS_MASK))
|
||||
.type L(DP_HI_MASK), @object
|
||||
ASM_SIZE_DIRECTIVE(L(DP_HI_MASK))
|
||||
|
||||
.p2align 4
|
||||
L(SP_ABS_MASK): /* Mask for getting SP absolute value */
|
||||
|
@ -50,12 +50,12 @@
|
||||
* 9) if x is NaN, return x-x.
|
||||
*
|
||||
* Special cases:
|
||||
* sin(+-0)==+-0 not raising inexact/underflow,
|
||||
* sin(subnormal) raises inexact/underflow
|
||||
* sin(min_normalized) raises inexact/underflow
|
||||
* sin(normalized) raises inexact
|
||||
* sin(Inf) = NaN, raises invalid, sets errno to EDOM
|
||||
* sin(NaN) = NaN
|
||||
* sin(+-0) = +-0 not raising inexact/underflow,
|
||||
* sin(subnormal) raises inexact/underflow,
|
||||
* sin(min_normalized) raises inexact/underflow,
|
||||
* sin(normalized) raises inexact,
|
||||
* sin(Inf) = NaN, raises invalid, sets errno to EDOM,
|
||||
* sin(NaN) = NaN.
|
||||
*/
|
||||
|
||||
.text
|
||||
@ -168,7 +168,6 @@ L(sin_poly):
|
||||
cvtsd2ss %xmm3, %xmm0 /* SP result */
|
||||
ret
|
||||
|
||||
|
||||
.p2align 4
|
||||
L(large_args):
|
||||
/* Here if |x|>=9*Pi/4 */
|
||||
@ -262,7 +261,6 @@ L(very_large_skip2):
|
||||
|
||||
jmp L(reconstruction) /* end of very_large_args peth */
|
||||
|
||||
|
||||
.p2align 4
|
||||
L(arg_less_pio4):
|
||||
/* Here if |x|<Pi/4 */
|
||||
@ -340,7 +338,6 @@ L(arg_inf_or_nan):
|
||||
|
||||
/* Here if x is Inf. Set errno to EDOM. */
|
||||
call JUMPTARGET(__errno_location)
|
||||
lea (%rax), %rax
|
||||
movl $EDOM, (%rax)
|
||||
|
||||
.p2align 4
|
||||
@ -351,8 +348,6 @@ L(skip_errno_setting):
|
||||
ret
|
||||
END(__sinf)
|
||||
|
||||
|
||||
|
||||
.section .rodata, "a"
|
||||
.p2align 3
|
||||
L(PIO4J): /* Table of j*Pi/4, for j=0,1,..,10 */
|
||||
@ -545,8 +540,8 @@ L(DP_ABS_MASK): /* Mask for getting DP absolute value */
|
||||
.p2align 3
|
||||
L(DP_HI_MASK): /* Mask for getting high 21 bits of DP value */
|
||||
.long 0x00000000,0xffffffff
|
||||
.type L(DP_ABS_MASK),@object
|
||||
ASM_SIZE_DIRECTIVE(L(DP_ABS_MASK))
|
||||
.type L(DP_HI_MASK),@object
|
||||
ASM_SIZE_DIRECTIVE(L(DP_HI_MASK))
|
||||
|
||||
.p2align 4
|
||||
L(SP_ABS_MASK): /* Mask for getting SP absolute value */
|
||||
|
Loading…
Reference in New Issue
Block a user