alpha: fix corner cases in ceil, floor, rint.

Partial revert of bebc49030c. Even with the revert, ceil and floor are still faster than libcpml's equivalent. Fixes bug 5350. Signed-off-by: Matt Turner <mattst88@gmail.com>
2010-05-03 23:25:05 -04:00 · 2010-05-03 23:25:05 -04:00 · 116ff9ad18
parent 6a84c77c71
commit 116ff9ad18
7 changed files with 136 additions and 71 deletions
--- a/ChangeLog.alpha
+++ b/ChangeLog.alpha
@ -1,3 +1,12 @@
+2010-05-03  Aurelien Jarno  <aurelien@aurel32.net>
+
+	* sysdeps/alpha/fpu/s_ceil.c: Fix corner cases.
+	* sysdeps/alpha/fpu/s_ceilf.c: Likewise.
+	* sysdeps/alpha/fpu/s_floor.c: Likewise.
+	* sysdeps/alpha/fpu/s_floorf.c: Likewise.
+	* sysdeps/alpha/fpu/s_rint.c: Likewise.
+	* sysdeps/alpha/fpu/s_rintf.c: Likewise.
+
 2010-05-03  GOTO Masanori  <gotom@debian.or.jp>

 	* sysdeps/unix/sysv/linux/alpha/kernel-features.h: Define
--- a/sysdeps/alpha/fpu/s_ceil.c
+++ b/sysdeps/alpha/fpu/s_ceil.c
@ -27,20 +27,25 @@
 double
 __ceil (double x)
 {
-  double two52 = copysign (0x1.0p52, x);
-  double r, tmp;
+  if (isless (fabs (x), 9007199254740992.0))	/* 1 << DBL_MANT_DIG */
+    {
+      double tmp1, new_x;

+      new_x = -x;
      __asm (
 #ifdef _IEEE_FP_INEXACT
-	 "addt/suim %2, %3, %1\n\tsubt/suim %1, %3, %0"
+	     "cvttq/svim %2,%1\n\t"
 #else
-	 "addt/sum %2, %3, %1\n\tsubt/sum %1, %3, %0"
+	     "cvttq/svm %2,%1\n\t"
 #endif
-	 : "=&f"(r), "=&f"(tmp)
-	 : "f"(-x), "f"(-two52));
+	     "cvtqt/m %1,%0\n\t"
+	     : "=f"(new_x), "=&f"(tmp1)
+	     : "f"(new_x));

      /* Fix up the negation we did above, as well as handling -0 properly. */
-  return copysign (r, x);
+      x = copysign(new_x, x);
+    }
+  return x;
 }

 weak_alias (__ceil, ceil)
--- a/sysdeps/alpha/fpu/s_ceilf.c
+++ b/sysdeps/alpha/fpu/s_ceilf.c
@ -26,20 +26,30 @@
 float
 __ceilf (float x)
 {
-  float two23 = copysignf (0x1.0p23, x);
-  float r, tmp;
+  if (isless (fabsf (x), 16777216.0f))	/* 1 << FLT_MANT_DIG */
+    {
+      /* Note that Alpha S_Floating is stored in registers in a
+	 restricted T_Floating format, so we don't even need to
+	 convert back to S_Floating in the end.  The initial
+	 conversion to T_Floating is needed to handle denormals.  */

-  __asm (
+      float tmp1, tmp2, new_x;
+
+      new_x = -x;
+      __asm ("cvtst/s %3,%2\n\t"
 #ifdef _IEEE_FP_INEXACT
-	 "adds/suim %2, %3, %1\n\tsubs/suim %1, %3, %0"
+	     "cvttq/svim %2,%1\n\t"
 #else
-	 "adds/sum %2, %3, %1\n\tsubs/sum %1, %3, %0"
+	     "cvttq/svm %2,%1\n\t"
 #endif
-	 : "=&f"(r), "=&f"(tmp)
-	 : "f"(-x), "f"(-two23));
+	     "cvtqt/m %1,%0\n\t"
+	     : "=f"(new_x), "=&f"(tmp1), "=&f"(tmp2)
+	     : "f"(new_x));

      /* Fix up the negation we did above, as well as handling -0 properly. */
-  return copysignf (r, x);
+      x = copysignf(new_x, x);
+    }
+  return x;
 }

 weak_alias (__ceilf, ceilf)
--- a/sysdeps/alpha/fpu/s_floor.c
+++ b/sysdeps/alpha/fpu/s_floor.c
@ -21,26 +21,32 @@
 #include <math_ldbl_opt.h>


-/* Use the -inf rounding mode conversion instructions to implement floor.  */
+/* Use the -inf rounding mode conversion instructions to implement
+   floor.  We note when the exponent is large enough that the value
+   must be integral, as this avoids unpleasant integer overflows.  */

 double
 __floor (double x)
 {
-  double two52 = copysign (0x1.0p52, x);
-  double r, tmp;
+  if (isless (fabs (x), 9007199254740992.0))	/* 1 << DBL_MANT_DIG */
+    {
+      double tmp1, new_x;

      __asm (
 #ifdef _IEEE_FP_INEXACT
-	 "addt/suim %2, %3, %1\n\tsubt/suim %1, %3, %0"
+	     "cvttq/svim %2,%1\n\t"
 #else
-	 "addt/sum %2, %3, %1\n\tsubt/sum %1, %3, %0"
+	     "cvttq/svm %2,%1\n\t"
 #endif
-	 : "=&f"(r), "=&f"(tmp)
-	 : "f"(x), "f"(two52));
+	     "cvtqt/m %1,%0\n\t"
+	     : "=f"(new_x), "=&f"(tmp1)
+	     : "f"(x));

      /* floor(-0) == -0, and in general we'll always have the same
 	 sign as our input.  */
-  return copysign (r, x);
+      x = copysign(new_x, x);
+    }
+  return x;
 }

 weak_alias (__floor, floor)
--- a/sysdeps/alpha/fpu/s_floorf.c
+++ b/sysdeps/alpha/fpu/s_floorf.c
@ -20,26 +20,37 @@
 #include <math.h>


-/* Use the -inf rounding mode conversion instructions to implement floor.  */
+/* Use the -inf rounding mode conversion instructions to implement
+   floor.  We note when the exponent is large enough that the value
+   must be integral, as this avoids unpleasant integer overflows.  */

 float
 __floorf (float x)
 {
-  float two23 = copysignf (0x1.0p23, x);
-  float r, tmp;
+  if (isless (fabsf (x), 16777216.0f))	/* 1 << FLT_MANT_DIG */
+    {
+      /* Note that Alpha S_Floating is stored in registers in a
+	 restricted T_Floating format, so we don't even need to
+	 convert back to S_Floating in the end.  The initial
+	 conversion to T_Floating is needed to handle denormals.  */

-  __asm (
+      float tmp1, tmp2, new_x;
+
+      __asm ("cvtst/s %3,%2\n\t"
 #ifdef _IEEE_FP_INEXACT
-	 "adds/suim %2, %3, %1\n\tsubs/suim %1, %3, %0"
+	     "cvttq/svim %2,%1\n\t"
 #else
-	 "adds/sum %2, %3, %1\n\tsubs/sum %1, %3, %0"
+	     "cvttq/svm %2,%1\n\t"
 #endif
-	 : "=&f"(r), "=&f"(tmp)
-	 : "f"(x), "f"(two23));
+	     "cvtqt/m %1,%0\n\t"
+	     : "=f"(new_x), "=&f"(tmp1), "=&f"(tmp2)
+	     : "f"(x));

      /* floor(-0) == -0, and in general we'll always have the same
 	 sign as our input.  */
-  return copysignf (r, x);
+      x = copysignf(new_x, x);
+    }
+  return x;
 }

 weak_alias (__floorf, floorf)
--- a/sysdeps/alpha/fpu/s_rint.c
+++ b/sysdeps/alpha/fpu/s_rint.c
@ -24,15 +24,24 @@
 double
 __rint (double x)
 {
-  double two52 = copysign (0x1.0p52, x);
-  double r;
+  if (isless (fabs (x), 9007199254740992.0))	/* 1 << DBL_MANT_DIG */
+    {
+      double tmp1, new_x;
+      __asm (
+#ifdef _IEEE_FP_INEXACT
+	     "cvttq/svid %2,%1\n\t"
+#else
+	     "cvttq/svd %2,%1\n\t"
+#endif
+	     "cvtqt/d %1,%0\n\t"
+	     : "=f"(new_x), "=&f"(tmp1)
+	     : "f"(x));

-  r = x + two52;
-  r = r - two52;
-
-  /* rint(-0.1) == -0, and in general we'll always have the same sign
-     as our input.  */
-  return copysign (r, x);
+      /* rint(-0.1) == -0, and in general we'll always have the same
+	 sign as our input.  */
+      x = copysign(new_x, x);
+    }
+  return x;
 }

 weak_alias (__rint, rint)
--- a/sysdeps/alpha/fpu/s_rintf.c
+++ b/sysdeps/alpha/fpu/s_rintf.c
@ -23,15 +23,30 @@
 float
 __rintf (float x)
 {
-  float two23 = copysignf (0x1.0p23, x);
-  float r;
+  if (isless (fabsf (x), 16777216.0f))	/* 1 << FLT_MANT_DIG */
+    {
+      /* Note that Alpha S_Floating is stored in registers in a
+	 restricted T_Floating format, so we don't even need to
+	 convert back to S_Floating in the end.  The initial
+	 conversion to T_Floating is needed to handle denormals.  */

-  r = x + two23;
-  r = r - two23;
+      float tmp1, tmp2, new_x;

-  /* rint(-0.1) == -0, and in general we'll always have the same sign
-     as our input.  */
-  return copysign (r, x);
+      __asm ("cvtst/s %3,%2\n\t"
+#ifdef _IEEE_FP_INEXACT
+	     "cvttq/svid %2,%1\n\t"
+#else
+	     "cvttq/svd %2,%1\n\t"
+#endif
+	     "cvtqt/d %1,%0\n\t"
+	     : "=f"(new_x), "=&f"(tmp1), "=&f"(tmp2)
+	     : "f"(x));
+
+      /* rint(-0.1) == -0, and in general we'll always have the same
+	 sign as our input.  */
+      x = copysignf(new_x, x);
+    }
+  return x;
 }

 weak_alias (__rintf, rintf)