mirror of
https://git.code.sf.net/p/mingw-w64/mingw-w64
synced 2024-11-28 04:13:26 +08:00
fd78dd54e3
FMA of `double` shall not be implemented using `long double`, as the result has 106 bits and cannot be stored accurately in a `long double`, whose mantissa only has 64 bits. Let's calculate `FMA(0x1.0000000000001p0, 1, 0x0.00000000000007FFp0)`: First, we multiply `0x1.0000000000001p0` and `1`, which yields `0x1.0000000000001p0`. Then we add `0x0.00000000000007FFp0` to it: 0x1.0000 0000 0000 1 p0 +) 0x0.0000 0000 0000 07FF p0 --------------------------------- 0x1.0000 0000 0000 17FF p0 (long double) This result has 65 significant bits. When it is stored into a `long double`, the last bit is rounded to even, as follows: 0x1.0000 0000 0000 17FF p0 1 <= rounded UP as this bit is set --------------------------------- 0x1.0000 0000 0000 1800 p0 (long double) If we attempt to round this value into `double` again, we get: 0x1.0000 0000 0000 1800 p0 8 <= rounded UP as this bit is set --------------------------------- 0x1.0000 0000 0000 2000 p0 (double) This is wrong. Because FMA shall round the result exactly once. If we round the first result to double we get a different result: 0x0.0000 0000 0000 17FF p0 8 <= rounded DOWN as this bit is clear --------------------------------- 0x1.0000 0000 0000 1000 p0 (double) `float` suffers from a similar problem. This patch fixes such issues. Testcase for `double`: #include <assert.h> #include <math.h> int main(void) { volatile double a = 0x1.0000000000001p0; volatile double b = 1; volatile double c = 0x0.00000000000007FFp0; assert(a * b + c == 0x1.0000000000001p0); assert(fma(a, b, c) == 0x1.0000000000001p0); assert((double)((long double)a * b + c) == 0x1.0000000000002p0); } Testcase for `float`: #include <assert.h> #include <math.h> int main(void) { volatile float a = 0x0.800001p0f; volatile float b = 0x0.5FFFFFp0f;; volatile float c = 0x0.000000000000FFFFFFp0f; assert(a * b + c == 0x0.2FFFFFCp0); assert(fmaf(a, b, c) == 0x0.2FFFFFCp0); assert((float)((double)a * b + c) == 0x0.3000000p0); } Reference: https://www.exploringbinary.com/double-rounding-errors-in-floating Signed-off-by: Liu Hao <lh_mouse@126.com>
98 lines
3.1 KiB
C
98 lines
3.1 KiB
C
/**
|
|
* This file has no copyright assigned and is placed in the Public Domain.
|
|
* This file is part of the mingw-w64 runtime package.
|
|
* No warranty is given; refer to the file DISCLAIMER.PD within this package.
|
|
*/
|
|
double fma(double x, double y, double z);
|
|
|
|
#if defined(_ARM_) || defined(__arm__)
|
|
|
|
/* Use hardware FMA on ARM. */
|
|
double fma(double x, double y, double z){
|
|
__asm__ (
|
|
"fmacd %0, %1, %2 \n"
|
|
: "+w"(z)
|
|
: "w"(x), "w"(y)
|
|
);
|
|
return z;
|
|
}
|
|
|
|
#elif defined(_ARM64_) || defined(__aarch64__)
|
|
|
|
/* Use hardware FMA on ARM64. */
|
|
double fma(double x, double y, double z){
|
|
__asm__ (
|
|
"fmadd %d0, %d1, %d2, %d0 \n"
|
|
: "+w"(z)
|
|
: "w"(x), "w"(y)
|
|
);
|
|
return z;
|
|
}
|
|
|
|
#elif defined(_AMD64_) || defined(__x86_64__) || defined(_X86_) || defined(__i386__)
|
|
|
|
#include <math.h>
|
|
#include <stdint.h>
|
|
|
|
/* This is in accordance with the IEC 559 double-precision format.
|
|
* Be advised that due to the hidden bit, the higher half actually has 26 bits.
|
|
* Multiplying two 27-bit numbers will cause a 1-ULP error, which we cannot
|
|
* avoid. It is kept in the very last position.
|
|
*/
|
|
typedef union iec559_double_ {
|
|
struct __attribute__((__packed__)) {
|
|
uint64_t mlo : 27;
|
|
uint64_t mhi : 25;
|
|
uint64_t exp : 11;
|
|
uint64_t sgn : 1;
|
|
};
|
|
double f;
|
|
} iec559_double;
|
|
|
|
static inline void break_down(iec559_double *restrict lo, iec559_double *restrict hi, double x) {
|
|
hi->f = x;
|
|
/* Erase low-order significant bits. `hi->f` now has only 26 significant bits. */
|
|
hi->mlo = 0;
|
|
/* Store the low-order half. It will be normalized by the hardware. */
|
|
lo->f = x - hi->f;
|
|
/* Preserve signness in case of zero. */
|
|
lo->sgn = hi->sgn;
|
|
}
|
|
|
|
double fma(double x, double y, double z) {
|
|
/*
|
|
POSIX-2013:
|
|
1. If x or y are NaN, a NaN shall be returned.
|
|
2. If x multiplied by y is an exact infinity and z is also an infinity
|
|
but with the opposite sign, a domain error shall occur, and either a NaN
|
|
(if supported), or an implementation-defined value shall be returned.
|
|
3. If one of x and y is infinite, the other is zero, and z is not a NaN,
|
|
a domain error shall occur, and either a NaN (if supported), or an
|
|
implementation-defined value shall be returned.
|
|
4. If one of x and y is infinite, the other is zero, and z is a NaN, a NaN
|
|
shall be returned and a domain error may occur.
|
|
5. If x* y is not 0*Inf nor Inf*0 and z is a NaN, a NaN shall be returned.
|
|
*/
|
|
/* Check whether the result is finite. */
|
|
double ret = x * y + z;
|
|
if(!isfinite(ret)) {
|
|
return ret; /* If this naive check doesn't yield a finite value, the FMA isn't
|
|
likely to return one either. Forward the value as is. */
|
|
}
|
|
iec559_double xlo, xhi, ylo, yhi;
|
|
break_down(&xlo, &xhi, x);
|
|
break_down(&ylo, &yhi, y);
|
|
/* The order of these four statements is essential. Don't move them around. */
|
|
ret = z;
|
|
ret += xhi.f * yhi.f; /* The most significant item comes first. */
|
|
ret += xhi.f * ylo.f + xlo.f * yhi.f; /* They are equally significant. */
|
|
ret += xlo.f * ylo.f; /* The least significant item comes last. */
|
|
return ret;
|
|
}
|
|
|
|
#else
|
|
|
|
#error Please add FMA implementation for this platform.
|
|
|
|
#endif
|