mirror of
https://sourceware.org/git/glibc.git
synced 2024-11-23 01:33:36 +08:00
math: Use tanf from CORE-MATH
The CORE-MATH implementation is correctly rounded (for any rounding mode) and shows better performance to the generic tanf. The code was adapted to glibc style, to use the definition of math_config.h, to remove errno handling, and to use a generic 128 bit routine for ABIs that do not support it natively. Benchtest on x64_64 (Ryzen 9 5900X, gcc 14.2.1), aarch64 (neoverse1, gcc 13.2.1), and powerpc (POWER10, gcc 13.2.1): latency master patched improvement x86_64 82.3961 54.8052 33.49% x86_64v2 82.3415 54.8052 33.44% x86_64v3 69.3661 50.4864 27.22% i686 219.271 45.5396 79.23% aarch64 29.2127 19.1951 34.29% power10 19.5060 16.2760 16.56% reciprocal-throughput master patched improvement x86_64 28.3976 19.7334 30.51% x86_64v2 28.4568 19.7334 30.65% x86_64v3 21.1815 16.1811 23.61% i686 105.016 15.1426 85.58% aarch64 18.1573 10.7681 40.70% power10 8.7207 8.7097 0.13% Signed-off-by: Alexei Sibidanov <sibid@uvic.ca> Signed-off-by: Paul Zimmermann <Paul.Zimmermann@inria.fr> Signed-off-by: Adhemerval Zanella <adhemerval.zanella@linaro.org> Reviewed-by: DJ Delorie <dj@redhat.com>
This commit is contained in:
parent
d846f4c12d
commit
bccb0648ea
@ -288,3 +288,9 @@ sysdeps/ieee754/flt-32/e_lgammaf_r.c:
|
||||
- remove the errno stuff (this is done by the wrapper)
|
||||
- replace 0x1p127f * 0x1p127f by math_narrow_eval (x * 0x1p127f)
|
||||
- add libm_alias_finite (__ieee754_lgammaf_r, __lgammaf_r) at the end
|
||||
sysdeps/ieee754/flt-32/s_tanf.c:
|
||||
(src/binary32/tan/tanf.c in CORE-MATH)
|
||||
- The code was adapted to use glibc code style and internal
|
||||
functions to handle errno, overflow, and underflow. It was changed
|
||||
to use an internal wrapper for 128 bit unsigned integer operations
|
||||
for ABIs that do not support the type natively.
|
||||
|
@ -1561,7 +1561,6 @@ float: 3
|
||||
ldouble: 4
|
||||
|
||||
Function: "tan":
|
||||
float: 1
|
||||
ldouble: 1
|
||||
|
||||
Function: "tan_advsimd":
|
||||
@ -1570,7 +1569,6 @@ float: 2
|
||||
|
||||
Function: "tan_downward":
|
||||
double: 1
|
||||
float: 2
|
||||
ldouble: 1
|
||||
|
||||
Function: "tan_sve":
|
||||
@ -1579,12 +1577,10 @@ float: 2
|
||||
|
||||
Function: "tan_towardzero":
|
||||
double: 1
|
||||
float: 1
|
||||
ldouble: 1
|
||||
|
||||
Function: "tan_upward":
|
||||
double: 1
|
||||
float: 1
|
||||
ldouble: 1
|
||||
|
||||
Function: "tanh":
|
||||
|
@ -1342,22 +1342,18 @@ float: 3
|
||||
ldouble: 4
|
||||
|
||||
Function: "tan":
|
||||
float: 1
|
||||
ldouble: 1
|
||||
|
||||
Function: "tan_downward":
|
||||
double: 1
|
||||
float: 2
|
||||
ldouble: 1
|
||||
|
||||
Function: "tan_towardzero":
|
||||
double: 1
|
||||
float: 1
|
||||
ldouble: 1
|
||||
|
||||
Function: "tan_upward":
|
||||
double: 1
|
||||
float: 1
|
||||
ldouble: 1
|
||||
|
||||
Function: "tanh":
|
||||
|
@ -1081,19 +1081,15 @@ float: 3
|
||||
|
||||
Function: "tan":
|
||||
double: 1
|
||||
float: 1
|
||||
|
||||
Function: "tan_downward":
|
||||
double: 1
|
||||
float: 2
|
||||
|
||||
Function: "tan_towardzero":
|
||||
double: 1
|
||||
float: 2
|
||||
|
||||
Function: "tan_upward":
|
||||
double: 1
|
||||
float: 2
|
||||
|
||||
Function: "tanh":
|
||||
double: 3
|
||||
|
@ -259,9 +259,6 @@ Function: "sinh":
|
||||
double: 2
|
||||
float: 2
|
||||
|
||||
Function: "tan":
|
||||
float: 1
|
||||
|
||||
Function: "tanh":
|
||||
double: 2
|
||||
float: 2
|
||||
|
@ -1078,20 +1078,14 @@ Function: "sinh_upward":
|
||||
double: 3
|
||||
float: 3
|
||||
|
||||
Function: "tan":
|
||||
float: 1
|
||||
|
||||
Function: "tan_downward":
|
||||
double: 1
|
||||
float: 2
|
||||
|
||||
Function: "tan_towardzero":
|
||||
double: 1
|
||||
float: 1
|
||||
|
||||
Function: "tan_upward":
|
||||
double: 1
|
||||
float: 1
|
||||
|
||||
Function: "tanh":
|
||||
double: 2
|
||||
|
@ -1000,20 +1000,14 @@ Function: "sinh_upward":
|
||||
double: 3
|
||||
float: 3
|
||||
|
||||
Function: "tan":
|
||||
float: 1
|
||||
|
||||
Function: "tan_downward":
|
||||
double: 1
|
||||
float: 2
|
||||
|
||||
Function: "tan_towardzero":
|
||||
double: 1
|
||||
float: 1
|
||||
|
||||
Function: "tan_upward":
|
||||
double: 1
|
||||
float: 1
|
||||
|
||||
Function: "tanh":
|
||||
double: 2
|
||||
|
@ -1031,20 +1031,14 @@ Function: "sinh_upward":
|
||||
double: 3
|
||||
float: 3
|
||||
|
||||
Function: "tan":
|
||||
float: 1
|
||||
|
||||
Function: "tan_downward":
|
||||
double: 1
|
||||
float: 2
|
||||
|
||||
Function: "tan_towardzero":
|
||||
double: 1
|
||||
float: 1
|
||||
|
||||
Function: "tan_upward":
|
||||
double: 1
|
||||
float: 1
|
||||
|
||||
Function: "tanh":
|
||||
double: 2
|
||||
|
150
sysdeps/generic/math_uint128.h
Normal file
150
sysdeps/generic/math_uint128.h
Normal file
@ -0,0 +1,150 @@
|
||||
/* Internal 128 bit int support.
|
||||
Copyright (C) 2024 Free Software Foundation, Inc.
|
||||
This file is part of the GNU C Library.
|
||||
|
||||
The GNU C Library is free software; you can redistribute it and/or
|
||||
modify it under the terms of the GNU Lesser General Public
|
||||
License as published by the Free Software Foundation; either
|
||||
version 2.1 of the License, or (at your option) any later version.
|
||||
|
||||
The GNU C Library is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
||||
Lesser General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU Lesser General Public
|
||||
License along with the GNU C Library; if not, see
|
||||
<https://www.gnu.org/licenses/>. */
|
||||
|
||||
#ifndef _MATH_INT128_H
|
||||
#define _MATH_INT128_H
|
||||
|
||||
/* Limited support for internal 128 bit integer, used on some math
|
||||
implementations. It uses compiler builtin type if supported, otherwise
|
||||
it is emulated. Only unsigned and some operations are currently supported:
|
||||
|
||||
- u128_t: the 128 bit unsigned type.
|
||||
- u128_high: return the high part of the number.
|
||||
- u128_low: return the low part of the number.
|
||||
- u128_from_u64: create a 128 bit number from a 64 bit one.
|
||||
- u128_mul: multiply two 128 bit numbers.
|
||||
- u128_add: add two 128 bit numbers.
|
||||
- u128_lshift: left shift a number.
|
||||
- u128_rshift: right shift a number.
|
||||
*/
|
||||
|
||||
#if defined __BITINT_MAXWIDTH__ && __BITINT_MAXWIDTH__ >= 128
|
||||
typedef unsigned _BitInt(128) u128;
|
||||
# define __MATH_INT128_BUILTIN_TYPE 1
|
||||
#elif defined __SIZEOF_INT128__
|
||||
typedef unsigned __int128 u128;
|
||||
# define __MATH_INT128_BUILTIN_TYPE 1
|
||||
#else
|
||||
# define __MATH_INT128_BUILTIN_TYPE 0
|
||||
#endif
|
||||
|
||||
#if __MATH_INT128_BUILTIN_TYPE
|
||||
# define u128_high(__x) (uint64_t)((__x) >> 64)
|
||||
# define u128_low(__x) (uint64_t)(__x)
|
||||
# define u128_from_u64(__x) (u128)(__x)
|
||||
# define u128_mul(__x, __y) (__x) * (__y)
|
||||
# define u128_add(__x, __y) (__x) + (__y)
|
||||
# define u128_lshift(__x, __y) (__x) << (__y)
|
||||
# define u128_rshift(__x, __y) (__x) >> (__y)
|
||||
#else
|
||||
typedef struct
|
||||
{
|
||||
uint64_t low;
|
||||
uint64_t high;
|
||||
} u128;
|
||||
|
||||
# define u128_high(__x) (__x).high
|
||||
# define u128_low(__x) (__x).low
|
||||
# define u128_from_u64(__x) (u128){.low = (__x), .high = 0}
|
||||
|
||||
# define MASK32 (UINT64_C(0xffffffff))
|
||||
|
||||
static u128 u128_add (u128 x, u128 y)
|
||||
{
|
||||
bool carry = x.low + y.low < x.low;
|
||||
return (u128) { .high = x.high + y.high + carry, .low = x.low + y.low };
|
||||
}
|
||||
|
||||
static u128 u128_lshift (u128 x, unsigned int n)
|
||||
{
|
||||
switch (n)
|
||||
{
|
||||
case 0: return x;
|
||||
case 1 ... 63: return (u128) { .high = (x.high << n) | (x.low >> (64 - n)),
|
||||
.low = x.low << n };
|
||||
case 64 ...127: return (u128) { .high = x.low << (n - 64), .low = 0};
|
||||
default: return (u128) { .high = 0, .low = 0 };
|
||||
}
|
||||
}
|
||||
|
||||
static u128 u128_rshift (u128 x, unsigned int n)
|
||||
{
|
||||
switch (n)
|
||||
{
|
||||
case 0: return x;
|
||||
case 1 ... 63: return (u128) { .high = x.high >> n,
|
||||
.low = (x.high << (64 - n)) | (x.low >> n) };
|
||||
case 64 ...127: return (u128) { .high = 0, .low = x.high >> (n - 64) };
|
||||
default: return (u128) { .high = 0, .low = 0 };
|
||||
}
|
||||
}
|
||||
|
||||
static u128 u128_mul (u128 x, u128 y)
|
||||
{
|
||||
if (x.high == 0 && y.high == 0)
|
||||
{
|
||||
uint64_t x0 = x.low & MASK32;
|
||||
uint64_t x1 = x.low >> 32;
|
||||
uint64_t y0 = y.low & MASK32;
|
||||
uint64_t y1 = y.low >> 32;
|
||||
u128 x0y0 = { .high = 0, .low = x0 * y0 };
|
||||
u128 x0y1 = { .high = 0, .low = x0 * y1 };
|
||||
u128 x1y0 = { .high = 0, .low = x1 * y0 };
|
||||
u128 x1y1 = { .high = x1 * y1, .low = 0 };
|
||||
/* x0y0 + ((x0y1 + x1y0) << 32) + x1y1 */
|
||||
return u128_add (u128_add (x0y0,
|
||||
u128_lshift (u128_add (x0y1, x1y0),
|
||||
32)),
|
||||
x1y1);
|
||||
}
|
||||
else
|
||||
{
|
||||
uint64_t x0 = x.low & MASK32;
|
||||
uint64_t x1 = x.low >> 32;
|
||||
uint64_t x2 = x.high & MASK32;
|
||||
uint64_t x3 = x.high >> 32;
|
||||
uint64_t y0 = y.low & MASK32;
|
||||
uint64_t y1 = y.low >> 32;
|
||||
uint64_t y2 = y.high & MASK32;
|
||||
uint64_t y3 = y.high >> 32;
|
||||
u128 x0y0 = { .high = 0, .low = x0 * y0 };
|
||||
u128 x0y1 = { .high = 0, .low = x0 * y1 };
|
||||
u128 x0y2 = { .high = 0, .low = x0 * y2 };
|
||||
u128 x0y3 = { .high = 0, .low = x0 * y3 };
|
||||
u128 x1y0 = { .high = 0, .low = x1 * y0 };
|
||||
u128 x1y1 = { .high = 0, .low = x1 * y1 };
|
||||
u128 x1y2 = { .high = 0, .low = x1 * y2 };
|
||||
u128 x2y0 = { .high = 0, .low = x2 * y0 };
|
||||
u128 x2y1 = { .high = 0, .low = x2 * y1 };
|
||||
u128 x3y0 = { .high = 0, .low = x3 * y0 };
|
||||
/* x0y0 + ((x0y1 + x1y0) << 32) + ((x0y2 + x1y1 + x2y0) << 64) +
|
||||
((x0y3 + x1y2 + x2y1 + x3y0) << 96) */
|
||||
u128 r0 = u128_add (x0y0,
|
||||
u128_lshift (u128_add (x0y1, x1y0),
|
||||
32));
|
||||
u128 r1 = u128_add (u128_lshift (u128_add (u128_add (x0y2, x1y1), x2y0),
|
||||
64),
|
||||
u128_lshift (u128_add (u128_add (x0y3, x1y2),
|
||||
u128_add (x2y1, x3y0)),
|
||||
96));
|
||||
return u128_add (r0, r1);
|
||||
}
|
||||
}
|
||||
#endif /* __SIZEOF_INT128__ */
|
||||
|
||||
#endif
|
@ -1107,20 +1107,16 @@ float: 3
|
||||
|
||||
Function: "tan":
|
||||
double: 1
|
||||
float: 1
|
||||
ldouble: 1
|
||||
|
||||
Function: "tan_downward":
|
||||
double: 1
|
||||
float: 2
|
||||
|
||||
Function: "tan_towardzero":
|
||||
double: 1
|
||||
float: 1
|
||||
|
||||
Function: "tan_upward":
|
||||
double: 1
|
||||
float: 1
|
||||
|
||||
Function: "tanh":
|
||||
double: 2
|
||||
|
@ -1614,25 +1614,21 @@ float128: 4
|
||||
ldouble: 5
|
||||
|
||||
Function: "tan":
|
||||
float: 1
|
||||
float128: 1
|
||||
ldouble: 2
|
||||
|
||||
Function: "tan_downward":
|
||||
double: 1
|
||||
float: 2
|
||||
float128: 1
|
||||
ldouble: 3
|
||||
|
||||
Function: "tan_towardzero":
|
||||
double: 1
|
||||
float: 2
|
||||
float128: 1
|
||||
ldouble: 3
|
||||
|
||||
Function: "tan_upward":
|
||||
double: 1
|
||||
float: 2
|
||||
float128: 1
|
||||
ldouble: 2
|
||||
|
||||
|
@ -1619,25 +1619,21 @@ float128: 4
|
||||
ldouble: 5
|
||||
|
||||
Function: "tan":
|
||||
float: 1
|
||||
float128: 1
|
||||
ldouble: 2
|
||||
|
||||
Function: "tan_downward":
|
||||
double: 1
|
||||
float: 2
|
||||
float128: 1
|
||||
ldouble: 3
|
||||
|
||||
Function: "tan_towardzero":
|
||||
double: 1
|
||||
float: 2
|
||||
float128: 1
|
||||
ldouble: 3
|
||||
|
||||
Function: "tan_upward":
|
||||
double: 1
|
||||
float: 2
|
||||
float128: 1
|
||||
ldouble: 2
|
||||
|
||||
|
@ -1,101 +1 @@
|
||||
/* k_tanf.c -- float version of k_tan.c
|
||||
*/
|
||||
|
||||
/*
|
||||
* ====================================================
|
||||
* Copyright (C) 1993 by Sun Microsystems, Inc. All rights reserved.
|
||||
*
|
||||
* Developed at SunPro, a Sun Microsystems, Inc. business.
|
||||
* Permission to use, copy, modify, and distribute this
|
||||
* software is freely granted, provided that this notice
|
||||
* is preserved.
|
||||
* ====================================================
|
||||
*/
|
||||
|
||||
#if defined(LIBM_SCCS) && !defined(lint)
|
||||
static char rcsid[] = "$NetBSD: k_tanf.c,v 1.4 1995/05/10 20:46:39 jtc Exp $";
|
||||
#endif
|
||||
|
||||
#include <float.h>
|
||||
#include <math.h>
|
||||
#include <math_private.h>
|
||||
#include <math-underflow.h>
|
||||
static const float
|
||||
one = 1.0000000000e+00, /* 0x3f800000 */
|
||||
pio4 = 7.8539812565e-01, /* 0x3f490fda */
|
||||
pio4lo= 3.7748947079e-08, /* 0x33222168 */
|
||||
T[] = {
|
||||
3.3333334327e-01, /* 0x3eaaaaab */
|
||||
1.3333334029e-01, /* 0x3e088889 */
|
||||
5.3968254477e-02, /* 0x3d5d0dd1 */
|
||||
2.1869488060e-02, /* 0x3cb327a4 */
|
||||
8.8632395491e-03, /* 0x3c11371f */
|
||||
3.5920790397e-03, /* 0x3b6b6916 */
|
||||
1.4562094584e-03, /* 0x3abede48 */
|
||||
5.8804126456e-04, /* 0x3a1a26c8 */
|
||||
2.4646313977e-04, /* 0x398137b9 */
|
||||
7.8179444245e-05, /* 0x38a3f445 */
|
||||
7.1407252108e-05, /* 0x3895c07a */
|
||||
-1.8558637748e-05, /* 0xb79bae5f */
|
||||
2.5907305826e-05, /* 0x37d95384 */
|
||||
};
|
||||
|
||||
float __kernel_tanf(float x, float y, int iy)
|
||||
{
|
||||
float z,r,v,w,s;
|
||||
int32_t ix,hx;
|
||||
GET_FLOAT_WORD(hx,x);
|
||||
ix = hx&0x7fffffff; /* high word of |x| */
|
||||
if(ix<0x39000000) /* x < 2**-13 */
|
||||
{if((int)x==0) { /* generate inexact */
|
||||
if((ix|(iy+1))==0) return one/fabsf(x);
|
||||
else if (iy == 1)
|
||||
{
|
||||
math_check_force_underflow (x);
|
||||
return x;
|
||||
}
|
||||
else
|
||||
return -one / x;
|
||||
}
|
||||
}
|
||||
if(ix>=0x3f2ca140) { /* |x|>=0.6744 */
|
||||
if(hx<0) {x = -x; y = -y;}
|
||||
z = pio4-x;
|
||||
w = pio4lo-y;
|
||||
x = z+w; y = 0.0;
|
||||
if (fabsf (x) < 0x1p-13f)
|
||||
return (1 - ((hx >> 30) & 2)) * iy * (1.0f - 2 * iy * x);
|
||||
}
|
||||
z = x*x;
|
||||
w = z*z;
|
||||
/* Break x^5*(T[1]+x^2*T[2]+...) into
|
||||
* x^5(T[1]+x^4*T[3]+...+x^20*T[11]) +
|
||||
* x^5(x^2*(T[2]+x^4*T[4]+...+x^22*[T12]))
|
||||
*/
|
||||
r = T[1]+w*(T[3]+w*(T[5]+w*(T[7]+w*(T[9]+w*T[11]))));
|
||||
v = z*(T[2]+w*(T[4]+w*(T[6]+w*(T[8]+w*(T[10]+w*T[12])))));
|
||||
s = z*x;
|
||||
r = y + z*(s*(r+v)+y);
|
||||
r += T[0]*s;
|
||||
w = x+r;
|
||||
if(ix>=0x3f2ca140) {
|
||||
v = (float)iy;
|
||||
return (float)(1-((hx>>30)&2))*(v-(float)2.0*(x-(w*w/(w+v)-r)));
|
||||
}
|
||||
if(iy==1) return w;
|
||||
else { /* if allow error up to 2 ulp,
|
||||
simply return -1.0/(x+r) here */
|
||||
/* compute -1.0/(x+r) accurately */
|
||||
float a,t;
|
||||
int32_t i;
|
||||
z = w;
|
||||
GET_FLOAT_WORD(i,z);
|
||||
SET_FLOAT_WORD(z,i&0xfffff000);
|
||||
v = r-(z - x); /* z+v = r+x */
|
||||
t = a = -(float)1.0/w; /* a = -1.0/w */
|
||||
GET_FLOAT_WORD(i,t);
|
||||
SET_FLOAT_WORD(t,i&0xfffff000);
|
||||
s = (float)1.0+t*z;
|
||||
return t+a*(s+t*v);
|
||||
}
|
||||
}
|
||||
/* Not needed. */
|
||||
|
@ -1,76 +1,180 @@
|
||||
/* s_tanf.c -- float version of s_tan.c.
|
||||
*/
|
||||
/* Correctly-rounded tangent of binary32 value.
|
||||
|
||||
/*
|
||||
* ====================================================
|
||||
* Copyright (C) 1993 by Sun Microsystems, Inc. All rights reserved.
|
||||
*
|
||||
* Developed at SunPro, a Sun Microsystems, Inc. business.
|
||||
* Permission to use, copy, modify, and distribute this
|
||||
* software is freely granted, provided that this notice
|
||||
* is preserved.
|
||||
* ====================================================
|
||||
*/
|
||||
Copyright (c) 2022-2024 Alexei Sibidanov.
|
||||
|
||||
#if defined(LIBM_SCCS) && !defined(lint)
|
||||
static char rcsid[] = "$NetBSD: s_tanf.c,v 1.4 1995/05/10 20:48:20 jtc Exp $";
|
||||
#endif
|
||||
The original version of this file was copied from the CORE-MATH
|
||||
project (file src/binary32/tan/tanf.c, revision 59d21d7).
|
||||
|
||||
#include <errno.h>
|
||||
#include <math.h>
|
||||
#include <math_private.h>
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to deal
|
||||
in the Software without restriction, including without limitation the rights
|
||||
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
copies of the Software, and to permit persons to whom the Software is
|
||||
furnished to do so, subject to the following conditions:
|
||||
|
||||
The above copyright notice and this permission notice shall be included in all
|
||||
copies or substantial portions of the Software.
|
||||
|
||||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
||||
SOFTWARE.
|
||||
*/
|
||||
|
||||
#include <array_length.h>
|
||||
#include <stdint.h>
|
||||
#include <libm-alias-float.h>
|
||||
#include "s_sincosf.h"
|
||||
#include "math_config.h"
|
||||
#include <math_uint128.h>
|
||||
|
||||
/* Reduce range of X to a multiple of PI/2. The modulo result is between
|
||||
-PI/4 and PI/4 and returned as a high part y[0] and a low part y[1].
|
||||
The low bit in the return value indicates the first or 2nd half of tanf. */
|
||||
static inline int32_t
|
||||
rem_pio2f (float x, float *y)
|
||||
/* argument reduction
|
||||
for |z| < 2^28, return r such that 2/pi*x = q + r */
|
||||
static inline double
|
||||
rltl (float z, int *q)
|
||||
{
|
||||
double dx = x;
|
||||
int n;
|
||||
const sincos_t *p = &__sincosf_table[0];
|
||||
double x = z;
|
||||
double idl = -0x1.b1bbead603d8bp-32 * x;
|
||||
double idh = 0x1.45f306ep-1 * x;
|
||||
double id = roundeven (idh);
|
||||
*q = (int64_t) id;
|
||||
return (idh - id) + idl;
|
||||
}
|
||||
|
||||
if (__glibc_likely (abstop12 (x) < abstop12 (120.0f)))
|
||||
dx = reduce_fast (dx, p, &n);
|
||||
/* argument reduction
|
||||
same as rltl, but for |x| >= 2^28 */
|
||||
static double __attribute__ ((noinline))
|
||||
rbig (uint32_t u, int *q)
|
||||
{
|
||||
static const uint64_t ipi[] =
|
||||
{
|
||||
0xfe5163abdebbc562, 0xdb6295993c439041,
|
||||
0xfc2757d1f534ddc0, 0xa2f9836e4e441529
|
||||
};
|
||||
int e = (u >> 23) & 0xff, i;
|
||||
uint64_t m = (u & (~0u >> 9)) | 1 << 23;
|
||||
u128 p0 = u128_mul (u128_from_u64 (m), u128_from_u64 (ipi[0]));
|
||||
u128 p1 = u128_mul (u128_from_u64 (m), u128_from_u64 (ipi[1]));
|
||||
p1 = u128_add (p1, u128_rshift (p0, 64));
|
||||
u128 p2 = u128_mul (u128_from_u64 (m), u128_from_u64 (ipi[2]));
|
||||
p2 = u128_add (p2, u128_rshift (p1, 64));
|
||||
u128 p3 = u128_mul (u128_from_u64 (m), u128_from_u64 (ipi[3]));
|
||||
p3 = u128_add (p3, u128_rshift (p2, 64));
|
||||
uint64_t p3h = u128_high (p3);
|
||||
uint64_t p3l = u128_low (p3);
|
||||
uint64_t p2l = u128_low (p2);
|
||||
uint64_t p1l = u128_low (p1);
|
||||
int64_t a;
|
||||
int k = e - 127, s = k - 23;
|
||||
/* in ctanf(), rbig() is called in the case 127+28 <= e < 0xff
|
||||
thus 155 <= e <= 254, which yields 28 <= k <= 127 and 5 <= s <= 104 */
|
||||
if (s < 64)
|
||||
{
|
||||
i = p3h << s | p3l >> (64 - s);
|
||||
a = p3l << s | p2l >> (64 - s);
|
||||
}
|
||||
else if (s == 64)
|
||||
{
|
||||
i = p3l;
|
||||
a = p2l;
|
||||
}
|
||||
else
|
||||
{ /* s > 64 */
|
||||
i = p3l << (s - 64) | p2l >> (128 - s);
|
||||
a = p2l << (s - 64) | p1l >> (128 - s);
|
||||
}
|
||||
int sgn = u;
|
||||
sgn >>= 31;
|
||||
int64_t sm = a >> 63;
|
||||
i -= sm;
|
||||
double z = (a ^ sgn) * 0x1p-64;
|
||||
i = (i ^ sgn) - sgn;
|
||||
*q = i;
|
||||
return z;
|
||||
}
|
||||
|
||||
float
|
||||
__tanf (float x)
|
||||
{
|
||||
uint32_t t = asuint (x);
|
||||
int e = (t >> 23) & 0xff;
|
||||
int i;
|
||||
double z;
|
||||
if (__glibc_likely (e < 127 + 28)) /* |x| < 2^28 */
|
||||
{
|
||||
if (__glibc_unlikely (e < 115))
|
||||
{
|
||||
if (__glibc_unlikely (e < 102))
|
||||
return fmaf (x, fabsf (x), x);
|
||||
float x2 = x * x;
|
||||
return fmaf (x, 0x1.555556p-2f * x2, x);
|
||||
}
|
||||
z = rltl (x, &i);
|
||||
}
|
||||
else if (e < 0xff)
|
||||
z = rbig (t, &i);
|
||||
else
|
||||
{
|
||||
uint32_t xi = asuint (x);
|
||||
int sign = xi >> 31;
|
||||
|
||||
dx = reduce_large (xi, &n);
|
||||
dx = sign ? -dx : dx;
|
||||
if (t << 9)
|
||||
return x + x; /* nan */
|
||||
return __math_invalidf (x);
|
||||
}
|
||||
|
||||
y[0] = dx;
|
||||
y[1] = dx - y[0];
|
||||
return n;
|
||||
}
|
||||
|
||||
float __tanf(float x)
|
||||
{
|
||||
float y[2],z=0.0;
|
||||
int32_t n, ix;
|
||||
|
||||
GET_FLOAT_WORD(ix,x);
|
||||
|
||||
/* |x| ~< pi/4 */
|
||||
ix &= 0x7fffffff;
|
||||
if(ix <= 0x3f490fda) return __kernel_tanf(x,z,1);
|
||||
|
||||
/* tan(Inf or NaN) is NaN */
|
||||
else if (ix>=0x7f800000) {
|
||||
if (ix==0x7f800000)
|
||||
__set_errno (EDOM);
|
||||
return x-x; /* NaN */
|
||||
}
|
||||
|
||||
/* argument reduction needed */
|
||||
else {
|
||||
n = rem_pio2f(x,y);
|
||||
return __kernel_tanf(y[0],y[1],1-((n&1)<<1)); /* 1 -- n even
|
||||
-1 -- n odd */
|
||||
double z2 = z * z;
|
||||
double z4 = z2 * z2;
|
||||
static const double cn[] =
|
||||
{
|
||||
0x1.921fb54442d18p+0, -0x1.fd226e573289fp-2,
|
||||
0x1.b7a60c8dac9f6p-6, -0x1.725beb40f33e5p-13
|
||||
};
|
||||
static const double cd[] =
|
||||
{
|
||||
0x1p+0, -0x1.2395347fb829dp+0,
|
||||
0x1.2313660f29c36p-3, -0x1.9a707ab98d1c1p-9
|
||||
};
|
||||
static const double s[] = { 0, 1 };
|
||||
double n = cn[0] + z2 * cn[1];
|
||||
double n2 = cn[2] + z2 * cn[3];
|
||||
n += z4 * n2;
|
||||
double d = cd[0] + z2 * cd[1];
|
||||
double d2 = cd[2] + z2 * cd[3];
|
||||
d += z4 * d2;
|
||||
n *= z;
|
||||
double s0 = s[i & 1];
|
||||
double s1 = s[1 - (i & 1)];
|
||||
double r1 = (n * s1 - d * s0) / (n * s0 + d * s1);
|
||||
uint64_t tail = (asuint64 (r1) + 7) & (~UINT64_C(0) >> 35);
|
||||
if (__glibc_unlikely (tail <= 14))
|
||||
{
|
||||
static const struct
|
||||
{
|
||||
float arg;
|
||||
float rh;
|
||||
float rl;
|
||||
} st[] = {
|
||||
{ 0x1.143ec4p+0f, 0x1.ddf9f6p+0f, -0x1.891d24p-52f },
|
||||
{ 0x1.ada6aap+27f, 0x1.e80304p-3f, 0x1.419f46p-58f },
|
||||
{ 0x1.af61dap+48f, 0x1.60d1c8p-2f, -0x1.2d6c3ap-55f },
|
||||
{ 0x1.0088bcp+52f, 0x1.ca1edp+0f, 0x1.f6053p-53f },
|
||||
{ 0x1.f90dfcp+72f, 0x1.597f9cp-1f, 0x1.925978p-53f },
|
||||
{ 0x1.cc4e22p+85f, -0x1.f33584p+1f, 0x1.d7254ap-51f },
|
||||
{ 0x1.a6ce12p+86f, -0x1.c5612ep-1f, -0x1.26c33ep-53f },
|
||||
{ 0x1.6a0b76p+102f, -0x1.e42a1ep+0f, -0x1.1dc906p-52f },
|
||||
};
|
||||
uint32_t ax = t & (~0u >> 1);
|
||||
uint32_t sgn = t >> 31;
|
||||
for (int j = 0; j < array_length (st); j++)
|
||||
{
|
||||
if (__glibc_unlikely (asfloat (st[j].arg) == ax))
|
||||
{
|
||||
if (sgn)
|
||||
return -st[j].rh - st[j].rl;
|
||||
else
|
||||
return st[j].rh + st[j].rl;
|
||||
}
|
||||
}
|
||||
}
|
||||
return r1;
|
||||
}
|
||||
libm_alias_float (__tan, tan)
|
||||
|
@ -1349,22 +1349,18 @@ ldouble: 4
|
||||
|
||||
Function: "tan":
|
||||
double: 1
|
||||
float: 1
|
||||
ldouble: 1
|
||||
|
||||
Function: "tan_downward":
|
||||
double: 1
|
||||
float: 2
|
||||
ldouble: 1
|
||||
|
||||
Function: "tan_towardzero":
|
||||
double: 1
|
||||
float: 1
|
||||
ldouble: 1
|
||||
|
||||
Function: "tan_upward":
|
||||
double: 1
|
||||
float: 1
|
||||
ldouble: 1
|
||||
|
||||
Function: "tanh":
|
||||
|
@ -241,9 +241,6 @@ Function: "sinh":
|
||||
double: 2
|
||||
float: 2
|
||||
|
||||
Function: "tan":
|
||||
float: 1
|
||||
|
||||
Function: "tanh":
|
||||
double: 2
|
||||
float: 2
|
||||
|
@ -1079,20 +1079,14 @@ Function: "sinh_upward":
|
||||
double: 3
|
||||
float: 3
|
||||
|
||||
Function: "tan":
|
||||
float: 1
|
||||
|
||||
Function: "tan_downward":
|
||||
double: 1
|
||||
float: 2
|
||||
|
||||
Function: "tan_towardzero":
|
||||
double: 1
|
||||
float: 1
|
||||
|
||||
Function: "tan_upward":
|
||||
double: 1
|
||||
float: 1
|
||||
|
||||
Function: "tanh":
|
||||
double: 2
|
||||
|
@ -1360,22 +1360,18 @@ float: 3
|
||||
ldouble: 4
|
||||
|
||||
Function: "tan":
|
||||
float: 1
|
||||
ldouble: 1
|
||||
|
||||
Function: "tan_downward":
|
||||
double: 1
|
||||
float: 2
|
||||
ldouble: 1
|
||||
|
||||
Function: "tan_towardzero":
|
||||
double: 1
|
||||
float: 1
|
||||
ldouble: 1
|
||||
|
||||
Function: "tan_upward":
|
||||
double: 1
|
||||
float: 1
|
||||
ldouble: 1
|
||||
|
||||
Function: "tanh":
|
||||
|
@ -250,9 +250,6 @@ Function: "sinh":
|
||||
double: 2
|
||||
float: 2
|
||||
|
||||
Function: "tan":
|
||||
float: 1
|
||||
|
||||
Function: "tanh":
|
||||
double: 2
|
||||
float: 2
|
||||
|
@ -1013,20 +1013,14 @@ Function: "sinh_upward":
|
||||
double: 3
|
||||
float: 3
|
||||
|
||||
Function: "tan":
|
||||
float: 1
|
||||
|
||||
Function: "tan_downward":
|
||||
double: 1
|
||||
float: 2
|
||||
|
||||
Function: "tan_towardzero":
|
||||
double: 1
|
||||
float: 1
|
||||
|
||||
Function: "tan_upward":
|
||||
double: 1
|
||||
float: 1
|
||||
|
||||
Function: "tanh":
|
||||
double: 2
|
||||
|
@ -1003,20 +1003,14 @@ Function: "sinh_upward":
|
||||
double: 3
|
||||
float: 3
|
||||
|
||||
Function: "tan":
|
||||
float: 1
|
||||
|
||||
Function: "tan_downward":
|
||||
double: 1
|
||||
float: 2
|
||||
|
||||
Function: "tan_towardzero":
|
||||
double: 1
|
||||
float: 1
|
||||
|
||||
Function: "tan_upward":
|
||||
double: 1
|
||||
float: 1
|
||||
|
||||
Function: "tanh":
|
||||
double: 2
|
||||
|
@ -1737,25 +1737,21 @@ double: 1
|
||||
float: 1
|
||||
|
||||
Function: "tan":
|
||||
float: 3
|
||||
float128: 1
|
||||
ldouble: 2
|
||||
|
||||
Function: "tan_downward":
|
||||
double: 1
|
||||
float: 3
|
||||
float128: 1
|
||||
ldouble: 3
|
||||
|
||||
Function: "tan_towardzero":
|
||||
double: 1
|
||||
float: 3
|
||||
float128: 1
|
||||
ldouble: 2
|
||||
|
||||
Function: "tan_upward":
|
||||
double: 1
|
||||
float: 3
|
||||
float128: 1
|
||||
ldouble: 3
|
||||
|
||||
|
@ -1476,22 +1476,18 @@ double: 1
|
||||
float: 1
|
||||
|
||||
Function: "tan":
|
||||
float: 1
|
||||
ldouble: 2
|
||||
|
||||
Function: "tan_downward":
|
||||
double: 1
|
||||
float: 2
|
||||
ldouble: 3
|
||||
|
||||
Function: "tan_towardzero":
|
||||
double: 1
|
||||
float: 1
|
||||
ldouble: 2
|
||||
|
||||
Function: "tan_upward":
|
||||
double: 1
|
||||
float: 1
|
||||
ldouble: 3
|
||||
|
||||
Function: "tanh":
|
||||
|
@ -1289,22 +1289,18 @@ float: 3
|
||||
ldouble: 4
|
||||
|
||||
Function: "tan":
|
||||
float: 1
|
||||
ldouble: 1
|
||||
|
||||
Function: "tan_downward":
|
||||
double: 1
|
||||
float: 2
|
||||
ldouble: 1
|
||||
|
||||
Function: "tan_towardzero":
|
||||
double: 1
|
||||
float: 1
|
||||
ldouble: 1
|
||||
|
||||
Function: "tan_upward":
|
||||
double: 1
|
||||
float: 1
|
||||
ldouble: 1
|
||||
|
||||
Function: "tanh":
|
||||
|
@ -1347,22 +1347,18 @@ float: 3
|
||||
ldouble: 4
|
||||
|
||||
Function: "tan":
|
||||
float: 1
|
||||
ldouble: 1
|
||||
|
||||
Function: "tan_downward":
|
||||
double: 1
|
||||
float: 2
|
||||
ldouble: 1
|
||||
|
||||
Function: "tan_towardzero":
|
||||
double: 1
|
||||
float: 1
|
||||
ldouble: 1
|
||||
|
||||
Function: "tan_upward":
|
||||
double: 1
|
||||
float: 1
|
||||
ldouble: 1
|
||||
|
||||
Function: "tanh":
|
||||
|
@ -1346,22 +1346,18 @@ float: 3
|
||||
ldouble: 4
|
||||
|
||||
Function: "tan":
|
||||
float: 1
|
||||
ldouble: 1
|
||||
|
||||
Function: "tan_downward":
|
||||
double: 1
|
||||
float: 2
|
||||
ldouble: 1
|
||||
|
||||
Function: "tan_towardzero":
|
||||
double: 1
|
||||
float: 1
|
||||
ldouble: 1
|
||||
|
||||
Function: "tan_upward":
|
||||
double: 1
|
||||
float: 1
|
||||
ldouble: 1
|
||||
|
||||
Function: "tanh":
|
||||
|
@ -501,12 +501,8 @@ Function: "sinh_towardzero":
|
||||
double: 3
|
||||
float: 2
|
||||
|
||||
Function: "tan":
|
||||
float: 1
|
||||
|
||||
Function: "tan_towardzero":
|
||||
double: 1
|
||||
float: 1
|
||||
|
||||
Function: "tanh":
|
||||
double: 2
|
||||
|
@ -1360,22 +1360,18 @@ float: 3
|
||||
ldouble: 4
|
||||
|
||||
Function: "tan":
|
||||
float: 1
|
||||
ldouble: 1
|
||||
|
||||
Function: "tan_downward":
|
||||
double: 1
|
||||
float: 2
|
||||
ldouble: 1
|
||||
|
||||
Function: "tan_towardzero":
|
||||
double: 1
|
||||
float: 1
|
||||
ldouble: 1
|
||||
|
||||
Function: "tan_upward":
|
||||
double: 1
|
||||
float: 1
|
||||
ldouble: 1
|
||||
|
||||
Function: "tanh":
|
||||
|
@ -2136,25 +2136,21 @@ Function: "sinh_vlen8_avx2":
|
||||
float: 1
|
||||
|
||||
Function: "tan":
|
||||
float: 1
|
||||
float128: 1
|
||||
ldouble: 2
|
||||
|
||||
Function: "tan_downward":
|
||||
double: 1
|
||||
float: 2
|
||||
float128: 1
|
||||
ldouble: 3
|
||||
|
||||
Function: "tan_towardzero":
|
||||
double: 1
|
||||
float: 1
|
||||
float128: 1
|
||||
ldouble: 3
|
||||
|
||||
Function: "tan_upward":
|
||||
double: 1
|
||||
float: 1
|
||||
float128: 1
|
||||
ldouble: 2
|
||||
|
||||
|
Loading…
Reference in New Issue
Block a user