Fix x86 pow inaccuracy for large integer exponents (bug 706).

2012-04-09 09:42:05 +00:00 · 2012-04-09 09:42:05 +00:00 · c483f6b4a4
parent d2de7579f2
commit c483f6b4a4
5 changed files with 112 additions and 16 deletions
--- a/10
+++ b/10
@ -1,3 +1,13 @@
+2012-04-09  Joseph Myers  <joseph@codesourcery.com>
+
+	[BZ #706]
+	* sysdeps/i386/fpu/e_pow.S (p10): New object.
+	(__ieee754_pow): Use iterative multiplication algorithm only for
+	integer exponents with absolute value below 1024.  Check for odd
+	integer exponents when using algorithm for real exponents.
+	* math/libm-test.inc (pow_test): Add more tests.
+	* sysdeps/x86_64/fpu/libm-test-ulps: Update.
+
 2012-04-08  Joseph Myers  <joseph@codesourcery.com>

 	[BZ #13705]
--- a/24
+++ b/24
@ -9,18 +9,18 @@ Version 2.16

 * The following bugs are resolved with this release:

-  174, 350, 369, 411, 2541, 2547, 2548, 2551, 2552, 2553, 2554, 2562, 2563,
-  2565, 2566, 2576, 2678, 3335, 3866, 3868, 3976, 3992, 4026, 4108, 4596,
-  4822, 5077, 5461, 5805, 5993, 6471, 6486, 6578, 6649, 6730, 6770, 6884,
-  6890, 6895, 6907, 6911, 9739, 9902, 10110, 10135, 10140, 10153, 10210,
-  10346, 10545, 10716, 11174, 11322, 11365, 11451, 11494, 12047, 12340,
-  13058, 13525, 13526, 13527, 13528, 13529, 13530, 13531, 13532, 13533,
-  13547, 13551, 13552, 13553, 13555, 13559, 13566, 13583, 13592, 13618,
-  13637, 13656, 13658, 13673, 13691, 13695, 13704, 13705, 13706, 13726,
-  13738, 13760, 13761, 13786, 13792, 13806, 13824, 13840, 13841, 13844,
-  13846, 13851, 13852, 13854, 13871, 13879, 13883, 13892, 13895, 13908,
-  13910, 13911, 13912, 13913, 13915, 13916, 13917, 13918, 13919, 13920,
-  13921, 13926, 13928, 13938
+  174, 350, 369, 411, 706, 2541, 2547, 2548, 2551, 2552, 2553, 2554, 2562,
+  2563, 2565, 2566, 2576, 2678, 3335, 3866, 3868, 3976, 3992, 4026, 4108,
+  4596, 4822, 5077, 5461, 5805, 5993, 6471, 6486, 6578, 6649, 6730, 6770,
+  6884, 6890, 6895, 6907, 6911, 9739, 9902, 10110, 10135, 10140, 10153,
+  10210, 10346, 10545, 10716, 11174, 11322, 11365, 11451, 11494, 12047,
+  12340, 13058, 13525, 13526, 13527, 13528, 13529, 13530, 13531, 13532,
+  13533, 13547, 13551, 13552, 13553, 13555, 13559, 13566, 13583, 13592,
+  13618, 13637, 13656, 13658, 13673, 13691, 13695, 13704, 13705, 13706,
+  13726, 13738, 13760, 13761, 13786, 13792, 13806, 13824, 13840, 13841,
+  13844, 13846, 13851, 13852, 13854, 13871, 13879, 13883, 13892, 13895,
+  13908, 13910, 13911, 13912, 13913, 13915, 13916, 13917, 13918, 13919,
+  13920, 13921, 13926, 13928, 13938

 * ISO C11 support:

--- a/math/libm-test.inc
+++ b/math/libm-test.inc
@ -6070,6 +6070,32 @@ pow_test (void)
  /* Bug 13872: spurious OVERFLOW exception may be present.  */
  TEST_ff_f (pow, -min_value, max_value, plus_zero, OVERFLOW_EXCEPTION_OK);

+#ifndef TEST_LDOUBLE /* Bug 13881.  */
+  TEST_ff_f (pow, 0x0.ffffffp0, 10, 0.999999403953712118183885036774764444747L);
+  TEST_ff_f (pow, 0x0.ffffffp0, 100, 0.999994039553108359406305079606228341585L);
+  TEST_ff_f (pow, 0x0.ffffffp0, 1000, 0.9999403971297699052276650144650733772182L);
+  TEST_ff_f (pow, 0x0.ffffffp0, 0x1p24, 0.3678794302077803437135155590023422899744L);
+  TEST_ff_f (pow, 0x0.ffffffp0, 0x1p30, 1.603807831524924233828134753069728224044e-28L);
+  TEST_ff_f (pow, 0x0.ffffffp0, 0x1.234566p30, 2.374884712135295099971443365381007297732e-32L);
+  TEST_ff_f (pow, 0x0.ffffffp0, -10, 1.000000596046643153205170848674671339688L);
+  TEST_ff_f (pow, 0x0.ffffffp0, -100, 1.000005960482418779499387594989252621451L);
+  TEST_ff_f (pow, 0x0.ffffffp0, -1000, 1.000059606422943986382898964231519867906L);
+  TEST_ff_f (pow, 0x0.ffffffp0, -0x1p24, 2.7182819094701610539628664526874952929416L);
+  TEST_ff_f (pow, 0x0.ffffffp0, -0x1p30, 6.2351609734265057988914412331288163636075e+27L);
+  TEST_ff_f (pow, 0x0.ffffffp0, -0x1.234566p30, 4.2107307141696353498921307077142537353515e+31L);
+  TEST_ff_f (pow, 0x1.000002p0, 0x1p24, 7.3890552180866447284268641248075832310141L);
+  TEST_ff_f (pow, 0x1.000002p0, 0x1.234566p29, 4.2107033006507495188536371520637025716256e+31L);
+  TEST_ff_f (pow, 0x1.000002p0, -0x1.234566p29, 2.3749001736727769098946062325205705312166e-32L);
+#endif
+
+  /* Bug 13881: powl inaccurate so these tests disabled for long double.  */
+#if !defined TEST_FLOAT && !defined TEST_LDOUBLE
+  TEST_ff_f (pow, 0x0.fffffffffffff8p0L, 0x1.23456789abcdfp62L, 1.0118762747827252817436395051178295138220e-253L);
+  TEST_ff_f (pow, 0x0.fffffffffffff8p0L, -0x1.23456789abcdfp62L, 9.8826311568054561811190162420900667121992e+252L);
+  TEST_ff_f (pow, 0x1.0000000000001p0L, 0x1.23456789abcdfp61L, 9.8826311568044974397135026217687399395481e+252L);
+  TEST_ff_f (pow, 0x1.0000000000001p0L, -0x1.23456789abcdfp61L, 1.0118762747828234466621210689458255908670e-253L);
+#endif
+
  END (pow);
 }

--- a/sysdeps/i386/fpu/e_pow.S
+++ b/sysdeps/i386/fpu/e_pow.S
@ -32,6 +32,9 @@ limit:	.double 0.29
 	ASM_TYPE_DIRECTIVE(p63,@object)
 p63:	.byte 0, 0, 0, 0, 0, 0, 0xe0, 0x43
 	ASM_SIZE_DIRECTIVE(p63)
+	ASM_TYPE_DIRECTIVE(p10,@object)
+p10:	.byte 0, 0, 0, 0, 0, 0, 0x90, 0x40
+	ASM_SIZE_DIRECTIVE(p10)

 	.section .rodata.cst16,"aM",@progbits,16

@ -116,7 +119,15 @@ ENTRY(__ieee754_pow)
 	sahf
 	jne	3f

-	/* OK, we have an integer value for y.  */
+	/* OK, we have an integer value for y.  If large enough that
+	   errors may propagate out of the 11 bits excess precision, use
+	   the algorithm for real exponent instead.  */
+	fld	%st		// y : y : x
+	fabs			// |y| : y : x
+	fcompl	MO(p10)		// y : x
+	fnstsw
+	sahf
+	jnc	2f
 	popl	%eax
 	cfi_adjust_cfa_offset (-4)
 	popl	%edx
@ -157,7 +168,9 @@ ENTRY(__ieee754_pow)

 	cfi_adjust_cfa_offset (8)
 	.align ALIGNARG(4)
-2:	/* y is a large integer (so even).  */
+2:	// y is a large integer (absolute value at least 1L<<10), but
+	// may be odd unless at least 1L<<64.  So it may be necessary
+	// to adjust the sign of a negative result afterwards.
 	fxch			// x : y
 	fabs			// |x| : y
 	fxch			// y : x
@ -187,9 +200,41 @@ ENTRY(__ieee754_pow)
 	f2xm1			// 2^fract(y*log2(x))-1 : int(y*log2(x))
 	faddl	MO(one)		// 2^fract(y*log2(x)) : int(y*log2(x))
 	fscale			// 2^fract(y*log2(x))*2^int(y*log2(x)) : int(y*log2(x))
-	addl	$8, %esp
-	cfi_adjust_cfa_offset (-8)
 	fstp	%st(1)		// 2^fract(y*log2(x))*2^int(y*log2(x))
+	testb	$2, %dh
+	jz	292f
+	// x is negative.  If y is an odd integer, negate the result.
+	fldl	20(%esp)	// y : abs(result)
+	fld	%st		// y : y : abs(result)
+	fabs			// |y| : y : abs(result)
+	fcompl	MO(p63)		// y : abs(result)
+	fnstsw
+	sahf
+	jnc	291f
+
+	// We must find out whether y is an odd integer.
+	fld	%st		// y : y : abs(result)
+	fistpll	(%esp)		// y : abs(result)
+	fildll	(%esp)		// int(y) : y : abs(result)
+	fucompp			// abs(result)
+	fnstsw
+	sahf
+	jne	292f
+
+	// OK, the value is an integer, but is it odd?
+	popl	%eax
+	cfi_adjust_cfa_offset (-4)
+	popl	%edx
+	cfi_adjust_cfa_offset (-4)
+	andb	$1, %al
+	jz	290f		// jump if not odd
+	// It's an odd integer.
+	fchs
+290:	ret
+	cfi_adjust_cfa_offset (8)
+291:	fstp	%st(0)		// abs(result)
+292:	addl	$8, %esp
+	cfi_adjust_cfa_offset (-8)
 	ret


--- a/sysdeps/x86_64/fpu/libm-test-ulps
+++ b/sysdeps/x86_64/fpu/libm-test-ulps
@ -1444,6 +1444,17 @@ Test "log1p (-0.25) == -0.287682072451780927439219005993827432":
 float: 1
 ifloat: 1

+# pow
+Test "pow (0x0.ffffffp0, -0x1p24) == 2.7182819094701610539628664526874952929416":
+float: 1
+ifloat: 1
+Test "pow (0x0.ffffffp0, 0x1p24) == 0.3678794302077803437135155590023422899744":
+float: 1
+ifloat: 1
+Test "pow (0x1.000002p0, 0x1p24) == 7.3890552180866447284268641248075832310141":
+float: 1
+ifloat: 1
+
 # pow_downward
 Test "pow_downward (1.0625, 1.125) == 1.070582293028761362162622578677070098674":
 ildouble: 1
@ -2413,6 +2424,10 @@ Function: "log1p":
 float: 1
 ifloat: 1

+Function: "pow":
+float: 1
+ifloat: 1
+
 Function: "pow_downward":
 float: 1
 ifloat: 1