Fix powf inaccuracy (bug 21112).

Bug 21112 reports a case where powf is substantially inaccurate. This results from a multiplication where cp_h*p_h is required to be exact, and p_h is masked to have only 12 leading nonzero bits in its mantissa, but the value of cp_h has the 13th bit nonzero, leading to inexact multiplication results in some cases that can result in large errors in the final result of powf. This patch fixes this by using a value of cp_h correctly rounded to nearest to 12 bits, with a corresponding updated value of cp_l. Tested for x86_64 and x86. [BZ #21112] * sysdeps/ieee754/flt-32/e_powf.c (cp_h): Use value with trailing 12 bits zero. (cp_l): Update for new value of cp_h. * math/auto-libm-test-in: Add another test of pow. * math/auto-libm-test-out-pow: Regenerated.
2024-11-23 09:43:32 +08:00 · 2017-02-07 17:15:47 +00:00 · 2017-02-07 17:15:47 +00:00 · edbbdb1855
commit edbbdb1855
parent 43ce02c6ec
4 changed files with 37 additions and 2 deletions
--- a/9
+++ b/9
@ -1,3 +1,12 @@
+2017-02-07  Joseph Myers  <joseph@codesourcery.com>
+
+	[BZ #21112]
+	* sysdeps/ieee754/flt-32/e_powf.c (cp_h): Use value with trailing
+	12 bits zero.
+	(cp_l): Update for new value of cp_h.
+	* math/auto-libm-test-in: Add another test of pow.
+	* math/auto-libm-test-out-pow: Regenerated.
+
 2017-02-07  Siddhesh Poyarekar  <siddhesh@sourceware.org>

 	* manual/contrib.texi: Fix typo.
--- a/math/auto-libm-test-in
+++ b/math/auto-libm-test-in
@ -3749,6 +3749,7 @@ pow 0xf.fffffp+124 -0x5.b5b648p+0
 pow 0x1.430d4cp+0 0x5.0e462p+4
 pow 0x9.8b82ap-4 -0x1.99907ap+12
 pow 0xd.73035p-4 -0x1.47bb8p+8
+pow 0x1.059c76p+0 0x1.ff80bep+11

 sin 0
 sin -0
--- a/math/auto-libm-test-out-pow
+++ b/math/auto-libm-test-out-pow
@ -44146,3 +44146,28 @@ pow 0xd.73035p-4 -0x1.47bb8p+8
 = pow tonearest ibm128 0xd.73035p-4 -0x1.47bb8p+8 : 0x4.523987c590d3192757b32fb92cp+80 : inexact-ok
 = pow towardzero ibm128 0xd.73035p-4 -0x1.47bb8p+8 : 0x4.523987c590d3192757b32fb92cp+80 : inexact-ok
 = pow upward ibm128 0xd.73035p-4 -0x1.47bb8p+8 : 0x4.523987c590d3192757b32fb92ep+80 : inexact-ok
+pow 0x1.059c76p+0 0x1.ff80bep+11
+= pow downward binary32 0x1.059c76p+0 0xf.fc05fp+8 : 0xf.ffe55p+124 : inexact-ok
+= pow tonearest binary32 0x1.059c76p+0 0xf.fc05fp+8 : 0xf.ffe55p+124 : inexact-ok
+= pow towardzero binary32 0x1.059c76p+0 0xf.fc05fp+8 : 0xf.ffe55p+124 : inexact-ok
+= pow upward binary32 0x1.059c76p+0 0xf.fc05fp+8 : 0xf.ffe56p+124 : inexact-ok
+= pow downward binary64 0x1.059c76p+0 0xf.fc05fp+8 : 0xf.ffe5535a38f98p+124 : inexact-ok
+= pow tonearest binary64 0x1.059c76p+0 0xf.fc05fp+8 : 0xf.ffe5535a38f98p+124 : inexact-ok
+= pow towardzero binary64 0x1.059c76p+0 0xf.fc05fp+8 : 0xf.ffe5535a38f98p+124 : inexact-ok
+= pow upward binary64 0x1.059c76p+0 0xf.fc05fp+8 : 0xf.ffe5535a38fap+124 : inexact-ok
+= pow downward intel96 0x1.059c76p+0 0xf.fc05fp+8 : 0xf.ffe5535a38f9be6p+124 : inexact-ok
+= pow tonearest intel96 0x1.059c76p+0 0xf.fc05fp+8 : 0xf.ffe5535a38f9be6p+124 : inexact-ok
+= pow towardzero intel96 0x1.059c76p+0 0xf.fc05fp+8 : 0xf.ffe5535a38f9be6p+124 : inexact-ok
+= pow upward intel96 0x1.059c76p+0 0xf.fc05fp+8 : 0xf.ffe5535a38f9be7p+124 : inexact-ok
+= pow downward m68k96 0x1.059c76p+0 0xf.fc05fp+8 : 0xf.ffe5535a38f9be6p+124 : inexact-ok
+= pow tonearest m68k96 0x1.059c76p+0 0xf.fc05fp+8 : 0xf.ffe5535a38f9be6p+124 : inexact-ok
+= pow towardzero m68k96 0x1.059c76p+0 0xf.fc05fp+8 : 0xf.ffe5535a38f9be6p+124 : inexact-ok
+= pow upward m68k96 0x1.059c76p+0 0xf.fc05fp+8 : 0xf.ffe5535a38f9be7p+124 : inexact-ok
+= pow downward binary128 0x1.059c76p+0 0xf.fc05fp+8 : 0xf.ffe5535a38f9be648255c105d56p+124 : inexact-ok
+= pow tonearest binary128 0x1.059c76p+0 0xf.fc05fp+8 : 0xf.ffe5535a38f9be648255c105d568p+124 : inexact-ok
+= pow towardzero binary128 0x1.059c76p+0 0xf.fc05fp+8 : 0xf.ffe5535a38f9be648255c105d56p+124 : inexact-ok
+= pow upward binary128 0x1.059c76p+0 0xf.fc05fp+8 : 0xf.ffe5535a38f9be648255c105d568p+124 : inexact-ok
+= pow downward ibm128 0x1.059c76p+0 0xf.fc05fp+8 : 0xf.ffe5535a38f9be648255c105d4p+124 : inexact-ok
+= pow tonearest ibm128 0x1.059c76p+0 0xf.fc05fp+8 : 0xf.ffe5535a38f9be648255c105d4p+124 : inexact-ok
+= pow towardzero ibm128 0x1.059c76p+0 0xf.fc05fp+8 : 0xf.ffe5535a38f9be648255c105d4p+124 : inexact-ok
+= pow upward ibm128 0x1.059c76p+0 0xf.fc05fp+8 : 0xf.ffe5535a38f9be648255c105d8p+124 : inexact-ok
--- a/sysdeps/ieee754/flt-32/e_powf.c
+++ b/sysdeps/ieee754/flt-32/e_powf.c
@ -43,8 +43,8 @@ lg2_h  =  6.93145752e-01, /* 0x3f317200 */
 lg2_l  =  1.42860654e-06, /* 0x35bfbe8c */
 ovt =  4.2995665694e-08, /* -(128-log2(ovfl+.5ulp)) */
 cp    =  9.6179670095e-01, /* 0x3f76384f =2/(3ln2) */
-cp_h  =  9.6179199219e-01, /* 0x3f763800 =head of cp */
-cp_l  =  4.7017383622e-06, /* 0x369dc3a0 =tail of cp_h */
+cp_h  =  0xf.64p-4, /* cp high 12 bits.  */
+cp_l  =  -0x7.b11e3p-16, /* 2/(3ln2) - cp_h.  */
 ivln2    =  1.4426950216e+00, /* 0x3fb8aa3b =1/ln2 */
 ivln2_h  =  1.4426879883e+00, /* 0x3fb8aa00 =16b 1/ln2*/
 ivln2_l  =  7.0526075433e-06; /* 0x36eca570 =1/ln2 tail*/