Fix x86 SSE cosf, sinf issues

* sysdeps/i386/i686/fpu/multiarch/s_sinf-sse2.S: Fix
	unwind info if defined PIC. Fix special cases description.
	* sysdeps/i386/i686/fpu/multiarch/s_cosf-sse2.S: Likewise.

	* sysdeps/x86_64/fpu/s_sinf.S: Fix special cases description, fix
	DP_HI_MASK entry.
	* sysdeps/x86_64/fpu/s_cosf.S: Likewise.
This commit is contained in:
Liubov Dmitrieva 2012-09-10 11:44:49 +02:00 committed by Andreas Jaeger
parent 3d9b46b350
commit 80ccd52c95
5 changed files with 66 additions and 84 deletions

View File

@ -1,3 +1,13 @@
2012-09-10 Liubov Dmitrieva <liubov.dmitrieva@gmail.com>
* sysdeps/i386/i686/fpu/multiarch/s_sinf-sse2.S: Fix
unwind info if defined PIC. Fix special cases description.
* sysdeps/i386/i686/fpu/multiarch/s_cosf-sse2.S: Likewise.
* sysdeps/x86_64/fpu/s_sinf.S: Fix special cases description, fix
DP_HI_MASK entry.
* sysdeps/x86_64/fpu/s_cosf.S: Likewise.
2012-09-07 H.J. Lu <hongjiu.lu@intel.com>
* scripts/check-local-headers.sh: Add "shopt -s nullglob".

View File

@ -50,25 +50,29 @@
* 9) if x is NaN, return x-x.
*
* Special cases:
* cos(+-0)==+-0 not raising inexact/underflow,
* cos(subnormal) raises inexact/underflow
* cos(min_normalized) raises inexact/underflow
* cos(normalized) raises inexact
* cos(Inf) = NaN, raises invalid, sets errno to EDOM
* cos(NaN) = NaN
* cos(+-0) = 1 not raising inexact,
* cos(subnormal) raises inexact,
* cos(min_normalized) raises inexact,
* cos(normalized) raises inexact,
* cos(Inf) = NaN, raises invalid, sets errno to EDOM,
* cos(NaN) = NaN.
*/
#ifdef PIC
# define MO1(symbol) L(symbol)##@GOTOFF(%ebx)
# define MO2(symbol,reg2,_scale) L(symbol)##@GOTOFF(%ebx,reg2,_scale)
# define SAVE_BX pushl %ebx
# define RESTORE_BX popl %ebx
# define CFI_PUSH(REG) cfi_adjust_cfa_offset(4); cfi_rel_offset(REG,0)
# define CFI_POP(REG) cfi_adjust_cfa_offset(-4); cfi_restore(REG)
# define PUSH(REG) pushl REG; CFI_PUSH(REG)
# define POP(REG) popl REG; CFI_POP(REG)
# define ENTRANCE PUSH(%ebx); LOAD_PIC_REG(bx)
# define RETURN POP(%ebx); ret; CFI_PUSH(%ebx)
# define ARG_X 8(%esp)
#else
# define MO1(symbol) L(symbol)
# define MO2(symbol,reg2,_scale) L(symbol)(,reg2,_scale)
# define SAVE_BX
# define RESTORE_BX
# define ENTRANCE
# define RETURN ret
# define ARG_X 4(%esp)
#endif
@ -76,11 +80,7 @@
ENTRY(__cosf_sse2)
/* Input: single precision x on stack at address ARG_X */
#ifdef PIC
SAVE_BX
LOAD_PIC_REG(bx)
#endif
ENTRANCE
movl ARG_X, %eax /* Bits of x */
cvtss2sd ARG_X, %xmm0 /* DP x */
andl $0x7fffffff, %eax /* |x| */
@ -143,8 +143,7 @@ L(reconstruction):
fldl 0(%esp) /* ...to FPU. */
/* Return back 4 bytes of stack frame */
lea 8(%esp), %esp
RESTORE_BX
ret
RETURN
.p2align 4
L(sin_poly):
@ -183,9 +182,7 @@ L(sin_poly):
fldl 0(%esp) /* ...to FPU. */
/* Return back 4 bytes of stack frame */
lea 8(%esp), %esp
RESTORE_BX
ret
RETURN
.p2align 4
L(large_args):
@ -275,7 +272,6 @@ L(very_large_skip2):
jmp L(reconstruction) /* end of very_large_args peth */
.p2align 4
L(arg_less_pio4):
/* Here if |x|<Pi/4 */
@ -307,8 +303,7 @@ L(epilogue):
flds 0(%esp) /* ...to FPU. */
/* Return back 4 bytes of stack frame */
lea 4(%esp), %esp
RESTORE_BX
ret
RETURN
.p2align 4
L(arg_less_2pn5):
@ -353,7 +348,6 @@ L(skip_errno_setting):
jmp L(epilogue)
END(__cosf_sse2)
.section .rodata, "a"
.p2align 3
L(PIO4J): /* Table of j*Pi/4, for j=0,1,..,10 */
@ -540,8 +534,8 @@ L(DP_ABS_MASK): /* Mask for getting DP absolute value */
.p2align 3
L(DP_HI_MASK): /* Mask for getting high 21 bits of DP value */
.long 0x00000000,0xffffffff
.type L(DP_ABS_MASK), @object
ASM_SIZE_DIRECTIVE(L(DP_ABS_MASK))
.type L(DP_HI_MASK), @object
ASM_SIZE_DIRECTIVE(L(DP_HI_MASK))
.p2align 4
L(SP_ABS_MASK): /* Mask for getting SP absolute value */

View File

@ -50,25 +50,29 @@
* 9) if x is NaN, return x-x.
*
* Special cases:
* sin(+-0)==+-0 not raising inexact/underflow,
* sin(subnormal) raises inexact/underflow
* sin(min_normalized) raises inexact/underflow
* sin(normalized) raises inexact
* sin(Inf) = NaN, raises invalid, sets errno to EDOM
* sin(NaN) = NaN
* sin(+-0) = +-0 not raising inexact/underflow,
* sin(subnormal) raises inexact/underflow,
* sin(min_normalized) raises inexact/underflow,
* sin(normalized) raises inexact,
* sin(Inf) = NaN, raises invalid, sets errno to EDOM,
* sin(NaN) = NaN.
*/
#ifdef PIC
# define MO1(symbol) L(symbol)##@GOTOFF(%ebx)
# define MO2(symbol,reg2,_scale) L(symbol)##@GOTOFF(%ebx,reg2,_scale)
# define SAVE_BX pushl %ebx
# define RESTORE_BX popl %ebx
# define CFI_PUSH(REG) cfi_adjust_cfa_offset(4); cfi_rel_offset(REG,0)
# define CFI_POP(REG) cfi_adjust_cfa_offset(-4); cfi_restore(REG)
# define PUSH(REG) pushl REG; CFI_PUSH(REG)
# define POP(REG) popl REG; CFI_POP(REG)
# define ENTRANCE PUSH(%ebx); LOAD_PIC_REG(bx)
# define RETURN POP(%ebx); ret; CFI_PUSH(%ebx)
# define ARG_X 8(%esp)
#else
# define MO1(symbol) L(symbol)
# define MO2(symbol,reg2,_scale) L(symbol)(,reg2,_scale)
# define SAVE_BX
# define RESTORE_BX
# define ENTRANCE
# define RETURN ret
# define ARG_X 4(%esp)
#endif
@ -76,11 +80,7 @@
ENTRY(__sinf_sse2)
/* Input: single precision x on stack at address ARG_X */
#ifdef PIC
SAVE_BX
LOAD_PIC_REG(bx)
#endif
ENTRANCE
movl ARG_X, %eax /* Bits of x */
cvtss2sd ARG_X, %xmm0 /* DP x */
andl $0x7fffffff, %eax /* |x| */
@ -145,8 +145,7 @@ L(reconstruction):
fldl 0(%esp) /* ...to FPU. */
/* Return back 4 bytes of stack frame */
lea 8(%esp), %esp
RESTORE_BX
ret
RETURN
.p2align 4
L(sin_poly):
@ -186,9 +185,7 @@ L(sin_poly):
fldl 0(%esp) /* ...to FPU. */
/* Return back 4 bytes of stack frame */
lea 8(%esp), %esp
RESTORE_BX
ret
RETURN
.p2align 4
L(large_args):
@ -281,10 +278,6 @@ L(very_large_skip2):
jmp L(reconstruction) /* end of very_large_args peth */
.p2align 4
L(arg_less_pio4):
/* Here if |x|<Pi/4 */
@ -320,8 +313,7 @@ L(epilogue):
flds 0(%esp) /* ...to FPU. */
/* Return back 4 bytes of stack frame */
lea 4(%esp), %esp
RESTORE_BX
ret
RETURN
.p2align 4
L(arg_less_2pn5):
@ -376,7 +368,6 @@ L(skip_errno_setting):
jmp L(epilogue)
END(__sinf_sse2)
.section .rodata, "a"
.p2align 3
L(PIO4J): /* Table of j*Pi/4, for j=0,1,..,10 */
@ -569,7 +560,7 @@ L(DP_ABS_MASK): /* Mask for getting DP absolute value */
.p2align 3
L(DP_HI_MASK): /* Mask for getting high 21 bits of DP value */
.long 0x00000000,0xffffffff
.type L(DP_ABS_MASK), @object
ASM_SIZE_DIRECTIVE(L(DP_ABS_MASK))
.type L(DP_HI_MASK), @object
ASM_SIZE_DIRECTIVE(L(DP_HI_MASK))
weak_alias (__sinf, sinf)

View File

@ -50,12 +50,12 @@
* 9) if x is NaN, return x-x.
*
* Special cases:
* cos(+-0)==+-0 not raising inexact/underflow,
* cos(subnormal) raises inexact/underflow
* cos(min_normalized) raises inexact/underflow
* cos(normalized) raises inexact
* cos(Inf) = NaN, raises invalid, sets errno to EDOM
* cos(NaN) = NaN
* cos(+-0) = 1 not raising inexact,
* cos(subnormal) raises inexact,
* cos(min_normalized) raises inexact,
* cos(normalized) raises inexact,
* cos(Inf) = NaN, raises invalid, sets errno to EDOM,
* cos(NaN) = NaN.
*/
.text
@ -163,10 +163,6 @@ L(sin_poly):
cvtsd2ss %xmm3, %xmm0 /* SP result */
ret
.p2align 4
L(large_args):
/* Here if |x|>=9*Pi/4 */
@ -257,7 +253,6 @@ L(very_large_skip2):
jmp L(reconstruction) /* end of very_large_args peth */
.p2align 4
L(arg_less_pio4):
/* Here if |x|<Pi/4 */
@ -317,7 +312,6 @@ L(arg_inf_or_nan):
/* Here if x is Inf. Set errno to EDOM. */
call JUMPTARGET(__errno_location)
lea (%rax), %rax
movl $EDOM, (%rax)
.p2align 4
@ -328,8 +322,6 @@ L(skip_errno_setting):
ret
END(__cosf)
.section .rodata, "a"
.p2align 3
L(PIO4J): /* Table of j*Pi/4, for j=0,1,..,10 */
@ -516,8 +508,8 @@ L(DP_ABS_MASK): /* Mask for getting DP absolute value */
.p2align 3
L(DP_HI_MASK): /* Mask for getting high 21 bits of DP value */
.long 0x00000000,0xffffffff
.type L(DP_ABS_MASK), @object
ASM_SIZE_DIRECTIVE(L(DP_ABS_MASK))
.type L(DP_HI_MASK), @object
ASM_SIZE_DIRECTIVE(L(DP_HI_MASK))
.p2align 4
L(SP_ABS_MASK): /* Mask for getting SP absolute value */

View File

@ -50,12 +50,12 @@
* 9) if x is NaN, return x-x.
*
* Special cases:
* sin(+-0)==+-0 not raising inexact/underflow,
* sin(subnormal) raises inexact/underflow
* sin(min_normalized) raises inexact/underflow
* sin(normalized) raises inexact
* sin(Inf) = NaN, raises invalid, sets errno to EDOM
* sin(NaN) = NaN
* sin(+-0) = +-0 not raising inexact/underflow,
* sin(subnormal) raises inexact/underflow,
* sin(min_normalized) raises inexact/underflow,
* sin(normalized) raises inexact,
* sin(Inf) = NaN, raises invalid, sets errno to EDOM,
* sin(NaN) = NaN.
*/
.text
@ -168,7 +168,6 @@ L(sin_poly):
cvtsd2ss %xmm3, %xmm0 /* SP result */
ret
.p2align 4
L(large_args):
/* Here if |x|>=9*Pi/4 */
@ -262,7 +261,6 @@ L(very_large_skip2):
jmp L(reconstruction) /* end of very_large_args peth */
.p2align 4
L(arg_less_pio4):
/* Here if |x|<Pi/4 */
@ -340,7 +338,6 @@ L(arg_inf_or_nan):
/* Here if x is Inf. Set errno to EDOM. */
call JUMPTARGET(__errno_location)
lea (%rax), %rax
movl $EDOM, (%rax)
.p2align 4
@ -351,8 +348,6 @@ L(skip_errno_setting):
ret
END(__sinf)
.section .rodata, "a"
.p2align 3
L(PIO4J): /* Table of j*Pi/4, for j=0,1,..,10 */
@ -545,8 +540,8 @@ L(DP_ABS_MASK): /* Mask for getting DP absolute value */
.p2align 3
L(DP_HI_MASK): /* Mask for getting high 21 bits of DP value */
.long 0x00000000,0xffffffff
.type L(DP_ABS_MASK),@object
ASM_SIZE_DIRECTIVE(L(DP_ABS_MASK))
.type L(DP_HI_MASK),@object
ASM_SIZE_DIRECTIVE(L(DP_HI_MASK))
.p2align 4
L(SP_ABS_MASK): /* Mask for getting SP absolute value */