alpha: fix corner cases in ceil, floor, rint.

Partial revert of bebc49030c. Even with the revert, ceil and floor are still faster than libcpml's equivalent. Fixes bug 5350. Signed-off-by: Matt Turner <mattst88@gmail.com>
2010-05-03 23:25:05 -04:00 · 2010-05-03 23:25:05 -04:00 · 116ff9ad18
parent 6a84c77c71
commit 116ff9ad18
7 changed files with 136 additions and 71 deletions
--- a/ChangeLog.alpha
+++ b/ChangeLog.alpha
@ -1,3 +1,12 @@
 2010-05-03  Aurelien Jarno  <aurelien@aurel32.net>
 	* sysdeps/alpha/fpu/s_ceil.c: Fix corner cases.
 	* sysdeps/alpha/fpu/s_ceilf.c: Likewise.
 	* sysdeps/alpha/fpu/s_floor.c: Likewise.
 	* sysdeps/alpha/fpu/s_floorf.c: Likewise.
 	* sysdeps/alpha/fpu/s_rint.c: Likewise.
 	* sysdeps/alpha/fpu/s_rintf.c: Likewise.
 2010-05-03  GOTO Masanori  <gotom@debian.or.jp>
 	* sysdeps/unix/sysv/linux/alpha/kernel-features.h: Define
--- a/sysdeps/alpha/fpu/s_ceil.c
+++ b/sysdeps/alpha/fpu/s_ceil.c
@ -27,20 +27,25 @@
 double
 __ceil (double x)
 {
-  double two52 = copysign (0x1.0p52, x);
+  if (isless (fabs (x), 9007199254740992.0))	/* 1 << DBL_MANT_DIG */
-  double r, tmp;
+    {
-  
+      double tmp1, new_x;
  __asm (
 #ifdef _IEEE_FP_INEXACT
 	 "addt/suim %2, %3, %1\n\tsubt/suim %1, %3, %0"
 #else
 	 "addt/sum %2, %3, %1\n\tsubt/sum %1, %3, %0"
 #endif
 	 : "=&f"(r), "=&f"(tmp)
 	 : "f"(-x), "f"(-two52));
-  /* Fix up the negation we did above, as well as handling -0 properly. */
+      new_x = -x;
-  return copysign (r, x);
+      __asm (
 #ifdef _IEEE_FP_INEXACT
 	     "cvttq/svim %2,%1\n\t"
 #else
 	     "cvttq/svm %2,%1\n\t"
 #endif
 	     "cvtqt/m %1,%0\n\t"
 	     : "=f"(new_x), "=&f"(tmp1)
 	     : "f"(new_x));
      /* Fix up the negation we did above, as well as handling -0 properly. */
      x = copysign(new_x, x);
    }
  return x;
 }
 weak_alias (__ceil, ceil)
--- a/sysdeps/alpha/fpu/s_ceilf.c
+++ b/sysdeps/alpha/fpu/s_ceilf.c
@ -26,20 +26,30 @@
 float
 __ceilf (float x)
 {
-  float two23 = copysignf (0x1.0p23, x);
+  if (isless (fabsf (x), 16777216.0f))	/* 1 << FLT_MANT_DIG */
-  float r, tmp;
+    {
-  
+      /* Note that Alpha S_Floating is stored in registers in a
-  __asm (
+	 restricted T_Floating format, so we don't even need to
-#ifdef _IEEE_FP_INEXACT
+	 convert back to S_Floating in the end.  The initial
-	 "adds/suim %2, %3, %1\n\tsubs/suim %1, %3, %0"
+	 conversion to T_Floating is needed to handle denormals.  */
 #else
 	 "adds/sum %2, %3, %1\n\tsubs/sum %1, %3, %0"
 #endif
 	 : "=&f"(r), "=&f"(tmp)
 	 : "f"(-x), "f"(-two23));
-  /* Fix up the negation we did above, as well as handling -0 properly. */
+      float tmp1, tmp2, new_x;
-  return copysignf (r, x);
+
      new_x = -x;
      __asm ("cvtst/s %3,%2\n\t"
 #ifdef _IEEE_FP_INEXACT
 	     "cvttq/svim %2,%1\n\t"
 #else
 	     "cvttq/svm %2,%1\n\t"
 #endif
 	     "cvtqt/m %1,%0\n\t"
 	     : "=f"(new_x), "=&f"(tmp1), "=&f"(tmp2)
 	     : "f"(new_x));
      /* Fix up the negation we did above, as well as handling -0 properly. */
      x = copysignf(new_x, x);
    }
  return x;
 }
 weak_alias (__ceilf, ceilf)
--- a/sysdeps/alpha/fpu/s_floor.c
+++ b/sysdeps/alpha/fpu/s_floor.c
@ -21,26 +21,32 @@
 #include <math_ldbl_opt.h>
-/* Use the -inf rounding mode conversion instructions to implement floor.  */
+/* Use the -inf rounding mode conversion instructions to implement
   floor.  We note when the exponent is large enough that the value
   must be integral, as this avoids unpleasant integer overflows.  */
 double
 __floor (double x)
 {
-  double two52 = copysign (0x1.0p52, x);
+  if (isless (fabs (x), 9007199254740992.0))	/* 1 << DBL_MANT_DIG */
-  double r, tmp;
+    {
-  
+      double tmp1, new_x;
  __asm (
 #ifdef _IEEE_FP_INEXACT
 	 "addt/suim %2, %3, %1\n\tsubt/suim %1, %3, %0"
 #else
 	 "addt/sum %2, %3, %1\n\tsubt/sum %1, %3, %0"
 #endif
 	 : "=&f"(r), "=&f"(tmp)
 	 : "f"(x), "f"(two52));
-  /* floor(-0) == -0, and in general we'll always have the same
+      __asm (
-     sign as our input.  */
+#ifdef _IEEE_FP_INEXACT
-  return copysign (r, x);
+	     "cvttq/svim %2,%1\n\t"
 #else
 	     "cvttq/svm %2,%1\n\t"
 #endif
 	     "cvtqt/m %1,%0\n\t"
 	     : "=f"(new_x), "=&f"(tmp1)
 	     : "f"(x));
      /* floor(-0) == -0, and in general we'll always have the same
 	 sign as our input.  */
      x = copysign(new_x, x);
    }
  return x;
 }
 weak_alias (__floor, floor)
--- a/sysdeps/alpha/fpu/s_floorf.c
+++ b/sysdeps/alpha/fpu/s_floorf.c
@ -20,26 +20,37 @@
 #include <math.h>
-/* Use the -inf rounding mode conversion instructions to implement floor.  */
+/* Use the -inf rounding mode conversion instructions to implement
   floor.  We note when the exponent is large enough that the value
   must be integral, as this avoids unpleasant integer overflows.  */
 float
 __floorf (float x)
 {
-  float two23 = copysignf (0x1.0p23, x);
+  if (isless (fabsf (x), 16777216.0f))	/* 1 << FLT_MANT_DIG */
-  float r, tmp;
+    {
-  
+      /* Note that Alpha S_Floating is stored in registers in a
-  __asm (
+	 restricted T_Floating format, so we don't even need to
-#ifdef _IEEE_FP_INEXACT
+	 convert back to S_Floating in the end.  The initial
-	 "adds/suim %2, %3, %1\n\tsubs/suim %1, %3, %0"
+	 conversion to T_Floating is needed to handle denormals.  */
 #else
 	 "adds/sum %2, %3, %1\n\tsubs/sum %1, %3, %0"
 #endif
 	 : "=&f"(r), "=&f"(tmp)
 	 : "f"(x), "f"(two23));
-  /* floor(-0) == -0, and in general we'll always have the same
+      float tmp1, tmp2, new_x;
-     sign as our input.  */
+
-  return copysignf (r, x);
+      __asm ("cvtst/s %3,%2\n\t"
 #ifdef _IEEE_FP_INEXACT
 	     "cvttq/svim %2,%1\n\t"
 #else
 	     "cvttq/svm %2,%1\n\t"
 #endif
 	     "cvtqt/m %1,%0\n\t"
 	     : "=f"(new_x), "=&f"(tmp1), "=&f"(tmp2)
 	     : "f"(x));
      /* floor(-0) == -0, and in general we'll always have the same
 	 sign as our input.  */
      x = copysignf(new_x, x);
    }
  return x;
 }
 weak_alias (__floorf, floorf)
--- a/sysdeps/alpha/fpu/s_rint.c
+++ b/sysdeps/alpha/fpu/s_rint.c
@ -24,15 +24,24 @@
 double
 __rint (double x)
 {
-  double two52 = copysign (0x1.0p52, x);
+  if (isless (fabs (x), 9007199254740992.0))	/* 1 << DBL_MANT_DIG */
-  double r;
+    {
-  
+      double tmp1, new_x;
-  r = x + two52;
+      __asm (
-  r = r - two52;
+#ifdef _IEEE_FP_INEXACT
 	     "cvttq/svid %2,%1\n\t"
 #else
 	     "cvttq/svd %2,%1\n\t"
 #endif
 	     "cvtqt/d %1,%0\n\t"
 	     : "=f"(new_x), "=&f"(tmp1)
 	     : "f"(x));
-  /* rint(-0.1) == -0, and in general we'll always have the same sign
+      /* rint(-0.1) == -0, and in general we'll always have the same
-     as our input.  */
+	 sign as our input.  */
-  return copysign (r, x);
+      x = copysign(new_x, x);
    }
  return x;
 }
 weak_alias (__rint, rint)
--- a/sysdeps/alpha/fpu/s_rintf.c
+++ b/sysdeps/alpha/fpu/s_rintf.c
@ -23,15 +23,30 @@
 float
 __rintf (float x)
 {
-  float two23 = copysignf (0x1.0p23, x);
+  if (isless (fabsf (x), 16777216.0f))	/* 1 << FLT_MANT_DIG */
-  float r;
+    {
      /* Note that Alpha S_Floating is stored in registers in a
 	 restricted T_Floating format, so we don't even need to
 	 convert back to S_Floating in the end.  The initial
 	 conversion to T_Floating is needed to handle denormals.  */
-  r = x + two23;
+      float tmp1, tmp2, new_x;
  r = r - two23;
-  /* rint(-0.1) == -0, and in general we'll always have the same sign
+      __asm ("cvtst/s %3,%2\n\t"
-     as our input.  */
+#ifdef _IEEE_FP_INEXACT
-  return copysign (r, x);
+	     "cvttq/svid %2,%1\n\t"
 #else
 	     "cvttq/svd %2,%1\n\t"
 #endif
 	     "cvtqt/d %1,%0\n\t"
 	     : "=f"(new_x), "=&f"(tmp1), "=&f"(tmp2)
 	     : "f"(x));
      /* rint(-0.1) == -0, and in general we'll always have the same
 	 sign as our input.  */
      x = copysignf(new_x, x);
    }
  return x;
 }
 weak_alias (__rintf, rintf)