Fix x86 SSE cosf, sinf issues

* sysdeps/i386/i686/fpu/multiarch/s_sinf-sse2.S: Fix unwind info if defined PIC. Fix special cases description. * sysdeps/i386/i686/fpu/multiarch/s_cosf-sse2.S: Likewise. * sysdeps/x86_64/fpu/s_sinf.S: Fix special cases description, fix DP_HI_MASK entry. * sysdeps/x86_64/fpu/s_cosf.S: Likewise.
2024-11-23 17:53:37 +08:00 · 2012-09-10 11:44:49 +02:00 · 2012-09-10 11:44:49 +02:00 · 80ccd52c95
commit 80ccd52c95
parent 3d9b46b350
5 changed files with 66 additions and 84 deletions
--- a/10
+++ b/10
@ -1,3 +1,13 @@
+2012-09-10  Liubov Dmitrieva  <liubov.dmitrieva@gmail.com>
+
+	* sysdeps/i386/i686/fpu/multiarch/s_sinf-sse2.S: Fix
+	unwind info if defined PIC. Fix special cases description.
+	* sysdeps/i386/i686/fpu/multiarch/s_cosf-sse2.S: Likewise.
+
+	* sysdeps/x86_64/fpu/s_sinf.S: Fix special cases description, fix
+	DP_HI_MASK entry.
+	* sysdeps/x86_64/fpu/s_cosf.S: Likewise.
+
 2012-09-07  H.J. Lu  <hongjiu.lu@intel.com>

 	* scripts/check-local-headers.sh: Add "shopt -s nullglob".
--- a/sysdeps/i386/i686/fpu/multiarch/s_cosf-sse2.S
+++ b/sysdeps/i386/i686/fpu/multiarch/s_cosf-sse2.S
@ -50,25 +50,29 @@
 *  9) if x is NaN, return x-x.
 *
 * Special cases:
- *  cos(+-0)==+-0 not raising inexact/underflow,
- *  cos(subnormal) raises inexact/underflow
- *  cos(min_normalized) raises inexact/underflow
- *  cos(normalized) raises inexact
- *  cos(Inf) = NaN, raises invalid, sets errno to EDOM
- *  cos(NaN) = NaN
+ *  cos(+-0) = 1 not raising inexact,
+ *  cos(subnormal) raises inexact,
+ *  cos(min_normalized) raises inexact,
+ *  cos(normalized) raises inexact,
+ *  cos(Inf) = NaN, raises invalid, sets errno to EDOM,
+ *  cos(NaN) = NaN.
 */

 #ifdef	PIC
 # define MO1(symbol)			L(symbol)##@GOTOFF(%ebx)
 # define MO2(symbol,reg2,_scale)	L(symbol)##@GOTOFF(%ebx,reg2,_scale)
-# define SAVE_BX			pushl	%ebx
-# define RESTORE_BX			popl	%ebx
+# define CFI_PUSH(REG)	cfi_adjust_cfa_offset(4); cfi_rel_offset(REG,0)
+# define CFI_POP(REG)	cfi_adjust_cfa_offset(-4); cfi_restore(REG)
+# define PUSH(REG)			pushl REG; CFI_PUSH(REG)
+# define POP(REG)			popl REG; CFI_POP(REG)
+# define ENTRANCE			PUSH(%ebx); LOAD_PIC_REG(bx)
+# define RETURN				POP(%ebx); ret; CFI_PUSH(%ebx)
 # define ARG_X				8(%esp)
 #else
 # define MO1(symbol)			L(symbol)
 # define MO2(symbol,reg2,_scale)	L(symbol)(,reg2,_scale)
-# define SAVE_BX
-# define RESTORE_BX
+# define ENTRANCE
+# define RETURN				ret
 # define ARG_X				4(%esp)
 #endif

@ -76,11 +80,7 @@
 ENTRY(__cosf_sse2)
 	/* Input: single precision x on stack at address ARG_X */

-#ifdef	PIC
-	SAVE_BX
-	LOAD_PIC_REG(bx)
-#endif
-
+	ENTRANCE
 	movl	ARG_X, %eax		/* Bits of x */
 	cvtss2sd ARG_X, %xmm0		/* DP x */
 	andl	$0x7fffffff, %eax	/* |x| */
@ -143,8 +143,7 @@ L(reconstruction):
 	fldl	0(%esp)			/* ...to FPU.  */
 	/* Return back 4 bytes of stack frame */
 	lea	8(%esp), %esp
-	RESTORE_BX
-	ret
+	RETURN

 	.p2align	4
 L(sin_poly):
@ -183,9 +182,7 @@ L(sin_poly):
 	fldl	0(%esp)			/* ...to FPU.  */
 	/* Return back 4 bytes of stack frame */
 	lea	8(%esp), %esp
-	RESTORE_BX
-	ret
-
+	RETURN

 	.p2align	4
 L(large_args):
@ -275,7 +272,6 @@ L(very_large_skip2):

 	jmp	L(reconstruction)	/* end of very_large_args peth */

-
 	.p2align	4
 L(arg_less_pio4):
 	/* Here if |x|<Pi/4 */
@ -307,8 +303,7 @@ L(epilogue):
 	flds	0(%esp)			/* ...to FPU.  */
 	/* Return back 4 bytes of stack frame */
 	lea	4(%esp), %esp
-	RESTORE_BX
-	ret
+	RETURN

 	.p2align	4
 L(arg_less_2pn5):
@ -353,7 +348,6 @@ L(skip_errno_setting):
 	jmp	L(epilogue)
 END(__cosf_sse2)

-
 	.section .rodata, "a"
 	.p2align 3
 L(PIO4J): /* Table of j*Pi/4, for j=0,1,..,10 */
@ -540,8 +534,8 @@ L(DP_ABS_MASK): /* Mask for getting DP absolute value */
 	.p2align 3
 L(DP_HI_MASK): /* Mask for getting high 21 bits of DP value */
 	.long	0x00000000,0xffffffff
-	.type L(DP_ABS_MASK), @object
-	ASM_SIZE_DIRECTIVE(L(DP_ABS_MASK))
+	.type L(DP_HI_MASK), @object
+	ASM_SIZE_DIRECTIVE(L(DP_HI_MASK))

 	.p2align 4
 L(SP_ABS_MASK): /* Mask for getting SP absolute value */
--- a/sysdeps/i386/i686/fpu/multiarch/s_sinf-sse2.S
+++ b/sysdeps/i386/i686/fpu/multiarch/s_sinf-sse2.S
@ -50,25 +50,29 @@
 *  9) if x is NaN, return x-x.
 *
 * Special cases:
- *  sin(+-0)==+-0 not raising inexact/underflow,
- *  sin(subnormal) raises inexact/underflow
- *  sin(min_normalized) raises inexact/underflow
- *  sin(normalized) raises inexact
- *  sin(Inf) = NaN, raises invalid, sets errno to EDOM
- *  sin(NaN) = NaN
+ *  sin(+-0) = +-0 not raising inexact/underflow,
+ *  sin(subnormal) raises inexact/underflow,
+ *  sin(min_normalized) raises inexact/underflow,
+ *  sin(normalized) raises inexact,
+ *  sin(Inf) = NaN, raises invalid, sets errno to EDOM,
+ *  sin(NaN) = NaN.
 */

 #ifdef	PIC
 # define MO1(symbol)			L(symbol)##@GOTOFF(%ebx)
 # define MO2(symbol,reg2,_scale)	L(symbol)##@GOTOFF(%ebx,reg2,_scale)
-# define SAVE_BX			pushl	%ebx
-# define RESTORE_BX			popl	%ebx
+# define CFI_PUSH(REG)	cfi_adjust_cfa_offset(4); cfi_rel_offset(REG,0)
+# define CFI_POP(REG)	cfi_adjust_cfa_offset(-4); cfi_restore(REG)
+# define PUSH(REG)			pushl REG; CFI_PUSH(REG)
+# define POP(REG)			popl REG; CFI_POP(REG)
+# define ENTRANCE			PUSH(%ebx); LOAD_PIC_REG(bx)
+# define RETURN				POP(%ebx); ret; CFI_PUSH(%ebx)
 # define ARG_X				8(%esp)
 #else
 # define MO1(symbol)			L(symbol)
 # define MO2(symbol,reg2,_scale)	L(symbol)(,reg2,_scale)
-# define SAVE_BX
-# define RESTORE_BX
+# define ENTRANCE
+# define RETURN				ret
 # define ARG_X				4(%esp)
 #endif

@ -76,11 +80,7 @@
 ENTRY(__sinf_sse2)
 	/* Input: single precision x on stack at address ARG_X */

-#ifdef	PIC
-	SAVE_BX
-	LOAD_PIC_REG(bx)
-#endif
-
+	ENTRANCE
 	movl	ARG_X, %eax		/* Bits of x */
 	cvtss2sd ARG_X, %xmm0		/* DP x */
 	andl	$0x7fffffff, %eax	/* |x| */
@ -145,8 +145,7 @@ L(reconstruction):
 	fldl	0(%esp)			/* ...to FPU.  */
 	/* Return back 4 bytes of stack frame */
 	lea	8(%esp), %esp
-	RESTORE_BX
-	ret
+	RETURN

 	.p2align	4
 L(sin_poly):
@ -186,9 +185,7 @@ L(sin_poly):
 	fldl	0(%esp)			/* ...to FPU.  */
 	/* Return back 4 bytes of stack frame */
 	lea	8(%esp), %esp
-	RESTORE_BX
-	ret
-
+	RETURN

 	.p2align	4
 L(large_args):
@ -281,10 +278,6 @@ L(very_large_skip2):

 	jmp	L(reconstruction)	/* end of very_large_args peth */

-
-
-
-
 	.p2align	4
 L(arg_less_pio4):
 	/* Here if |x|<Pi/4 */
@ -320,8 +313,7 @@ L(epilogue):
 	flds	0(%esp)			/* ...to FPU.  */
 	/* Return back 4 bytes of stack frame */
 	lea	4(%esp), %esp
-	RESTORE_BX
-	ret
+	RETURN

 	.p2align	4
 L(arg_less_2pn5):
@ -376,7 +368,6 @@ L(skip_errno_setting):
 	jmp	L(epilogue)
 END(__sinf_sse2)

-
 	.section .rodata, "a"
 	.p2align 3
 L(PIO4J): /* Table of j*Pi/4, for j=0,1,..,10 */
@ -569,7 +560,7 @@ L(DP_ABS_MASK): /* Mask for getting DP absolute value */
 	.p2align 3
 L(DP_HI_MASK): /* Mask for getting high 21 bits of DP value */
 	.long	0x00000000,0xffffffff
-	.type L(DP_ABS_MASK), @object
-	ASM_SIZE_DIRECTIVE(L(DP_ABS_MASK))
+	.type L(DP_HI_MASK), @object
+	ASM_SIZE_DIRECTIVE(L(DP_HI_MASK))

 weak_alias (__sinf, sinf)
--- a/sysdeps/x86_64/fpu/s_cosf.S
+++ b/sysdeps/x86_64/fpu/s_cosf.S
@ -50,12 +50,12 @@
 *  9) if x is NaN, return x-x.
 *
 * Special cases:
- *  cos(+-0)==+-0 not raising inexact/underflow,
- *  cos(subnormal) raises inexact/underflow
- *  cos(min_normalized) raises inexact/underflow
- *  cos(normalized) raises inexact
- *  cos(Inf) = NaN, raises invalid, sets errno to EDOM
- *  cos(NaN) = NaN
+ *  cos(+-0) = 1 not raising inexact,
+ *  cos(subnormal) raises inexact,
+ *  cos(min_normalized) raises inexact,
+ *  cos(normalized) raises inexact,
+ *  cos(Inf) = NaN, raises invalid, sets errno to EDOM,
+ *  cos(NaN) = NaN.
 */

 	.text
@ -163,10 +163,6 @@ L(sin_poly):
 	cvtsd2ss %xmm3, %xmm0		/* SP result */
 	ret

-
-
-
-
 	.p2align	4
 L(large_args):
 	/* Here if |x|>=9*Pi/4 */
@ -257,7 +253,6 @@ L(very_large_skip2):

 	jmp	L(reconstruction)	/* end of very_large_args peth */

-
 	.p2align	4
 L(arg_less_pio4):
 	/* Here if |x|<Pi/4 */
@ -317,7 +312,6 @@ L(arg_inf_or_nan):

 	/* Here if x is Inf. Set errno to EDOM.  */
 	call	JUMPTARGET(__errno_location)
-	lea	(%rax), %rax
 	movl	$EDOM, (%rax)

 	.p2align	4
@ -328,8 +322,6 @@ L(skip_errno_setting):
 	ret
 END(__cosf)

-
-
 	.section .rodata, "a"
 	.p2align 3
 L(PIO4J): /* Table of j*Pi/4, for j=0,1,..,10 */
@ -516,8 +508,8 @@ L(DP_ABS_MASK): /* Mask for getting DP absolute value */
 	.p2align 3
 L(DP_HI_MASK): /* Mask for getting high 21 bits of DP value */
 	.long	0x00000000,0xffffffff
-	.type L(DP_ABS_MASK), @object
-	ASM_SIZE_DIRECTIVE(L(DP_ABS_MASK))
+	.type L(DP_HI_MASK), @object
+	ASM_SIZE_DIRECTIVE(L(DP_HI_MASK))

 	.p2align 4
 L(SP_ABS_MASK): /* Mask for getting SP absolute value */
--- a/sysdeps/x86_64/fpu/s_sinf.S
+++ b/sysdeps/x86_64/fpu/s_sinf.S
@ -50,12 +50,12 @@
 *  9) if x is NaN, return x-x.
 *
 * Special cases:
- *  sin(+-0)==+-0 not raising inexact/underflow,
- *  sin(subnormal) raises inexact/underflow
- *  sin(min_normalized) raises inexact/underflow
- *  sin(normalized) raises inexact
- *  sin(Inf) = NaN, raises invalid, sets errno to EDOM
- *  sin(NaN) = NaN
+ *  sin(+-0) = +-0 not raising inexact/underflow,
+ *  sin(subnormal) raises inexact/underflow,
+ *  sin(min_normalized) raises inexact/underflow,
+ *  sin(normalized) raises inexact,
+ *  sin(Inf) = NaN, raises invalid, sets errno to EDOM,
+ *  sin(NaN) = NaN.
 */

 	.text
@ -168,7 +168,6 @@ L(sin_poly):
 	cvtsd2ss %xmm3, %xmm0 		/* SP result */
 	ret

-
 	.p2align	4
 L(large_args):
 	/* Here if |x|>=9*Pi/4 */
@ -262,7 +261,6 @@ L(very_large_skip2):

 	jmp	L(reconstruction)	/* end of very_large_args peth */

-
 	.p2align	4
 L(arg_less_pio4):
 	/* Here if |x|<Pi/4 */
@ -340,7 +338,6 @@ L(arg_inf_or_nan):

 	/* Here if x is Inf. Set errno to EDOM.  */
 	call	JUMPTARGET(__errno_location)
-	lea	(%rax), %rax
 	movl	$EDOM, (%rax)

 	.p2align	4
@ -351,8 +348,6 @@ L(skip_errno_setting):
 	ret
 END(__sinf)

-
-
 	.section .rodata, "a"
 	.p2align 3
 L(PIO4J): /* Table of j*Pi/4, for j=0,1,..,10 */
@ -545,8 +540,8 @@ L(DP_ABS_MASK): /* Mask for getting DP absolute value */
 	.p2align 3
 L(DP_HI_MASK): /* Mask for getting high 21 bits of DP value */
 	.long	0x00000000,0xffffffff
-	.type L(DP_ABS_MASK),@object
-	ASM_SIZE_DIRECTIVE(L(DP_ABS_MASK))
+	.type L(DP_HI_MASK),@object
+	ASM_SIZE_DIRECTIVE(L(DP_HI_MASK))

 	.p2align 4
 L(SP_ABS_MASK): /* Mask for getting SP absolute value */