* sysdeps/alpha/divq.S: Save t3 before it gets clobbered. * sysdeps/alpha/remq.S: Likewise. * sysdeps/alpha/div.S, sysdeps/alpha/ldiv.S: Rewrite with the new division algorithms in divl.S and divq.S respectively.

2004-07-30 Richard Henderson <rth@redhat.com> * sysdeps/alpha/divq.S: Save t3 before it gets clobbered. * sysdeps/alpha/remq.S: Likewise. * sysdeps/alpha/div.S, sysdeps/alpha/ldiv.S: Rewrite with the new division algorithms in divl.S and divq.S respectively.
2004-07-30 18:15:57 +00:00 · 2004-07-30 18:15:57 +00:00 · 80574c92d7
parent d1091a21f5
commit 80574c92d7
5 changed files with 214 additions and 129 deletions
--- a/7
+++ b/7
@ -1,3 +1,10 @@
 2004-07-30  Richard Henderson  <rth@redhat.com>
 	* sysdeps/alpha/divq.S: Save t3 before it gets clobbered.
 	* sysdeps/alpha/remq.S: Likewise.
 	* sysdeps/alpha/div.S, sysdeps/alpha/ldiv.S: Rewrite with the
 	new division algorithms in divl.S and divq.S respectively.
 2004-07-28  GOTO Masanori  <gotom@debian.or.jp>
 	* timezone/asia: Update from tzdata2004b.
--- a/sysdeps/alpha/div.S
+++ b/sysdeps/alpha/div.S
@ -1,4 +1,4 @@
-/* Copyright (C) 1996, 1997 Free Software Foundation, Inc.
+/* Copyright (C) 1996, 1997, 2004 Free Software Foundation, Inc.
   This file is part of the GNU C Library.
   Contributed by Richard Henderson <rth@tamu.edu>.
@ -17,13 +17,13 @@
   Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
   02111-1307 USA.  */
-#include <sysdep.h>
+#include "div_libc.h"
-#ifdef __linux__
+#undef FRAME
-# include <asm/gentrap.h>
+#ifdef __alpha_fix__
-# include <asm/pal.h>
+#define FRAME 0
 #else
-# include <machine/pal.h>
+#define FRAME 16
 #endif
 	.set noat
@ -32,78 +32,54 @@
 	.globl div
 	.ent div
 div:
-	.frame sp, 0, ra
+	.frame sp, FRAME, ra
 #if FRAME > 0
 	lda	sp, -FRAME(sp)
 #endif
 #ifdef PROF
 	.set	macro
 	ldgp	gp, 0(pv)
 	lda	AT, _mcount
 	jsr	AT, (AT), _mcount
 	.set	nomacro
 	.prologue 1
 #else
 	.prologue 0
 #endif
-#define divisor   t1
+	beq	$18, $divbyzero
 #define mask      t2
 #define quotient  t3
 #define modulus   t4
 #define tmp1      t5
 #define tmp2      t6
 #define compare   t7
-	/* find correct sign for input to unsigned divide loop. */
+	_ITOFT2	$17, $f0, 0, $18, $f1, 8
 	negl	a1, modulus			# e0    :
 	negl	a2, divisor			# .. e1 :
 	sextl	a1, a1				# e0    :
 	sextl	a2, a2				# .. e1 :
 	mov	zero, quotient			# e0    :
 	mov	1, mask				# .. e1 :
 	cmovge	a1, a1, modulus			# e0    :
 	cmovge	a2, a2, divisor			# .. e1 :
 	beq	a2, $divbyzero			# e1    :
 	unop					#       :
-	/* shift divisor left, using 3-bit shifts for 32-bit divides as we
+	cvtqt	$f0, $f0
-	   can't overflow.  Three-bit shifts will result in looping three
+	cvtqt	$f1, $f1
-	   times less here, but can result in two loops more later.  Thus
+	divt/c	$f0, $f1, $f0
-	   using a large shift isn't worth it (and s8addq pairs better than
+	cvttq/c	$f0, $f0
 	   a shift).  */
-1:	cmpult	divisor, modulus, compare	# e0    :
+	_FTOIT	$f0, $0, 0
 	s8addq	divisor, zero, divisor		# .. e1 :
 	s8addq	mask, zero, mask		# e0    :
 	bne	compare, 1b			# .. e1 :
-	/* start to go right again. */
+	mull	$0, $18, $1
-2:	addq	quotient, mask, tmp2		# e1    :
+	subl	$17, $1, $1
 	srl	mask, 1, mask			# .. e0 :
 	cmpule	divisor, modulus, compare	# e0    :
 	subq	modulus, divisor, tmp1		# .. e1 :
 	cmovne	compare, tmp2, quotient		# e1    :
 	srl	divisor, 1, divisor		# .. e0 :
 	cmovne	compare, tmp1, modulus		# e0    :
 	bne	mask, 2b			# .. e1 :
-	/* find correct sign for result.  */
+	stl	$0, 0(a0)
-	xor	a1, a2, compare			# e0    :
+	stl	$1, 4(a0)
-	negl	quotient, tmp1			# .. e1 :
+	mov	a0, v0
 	negl	modulus, tmp2			# e0    :
 	cmovlt	compare, tmp1, quotient		# .. e1 :
 	cmovlt	a1, tmp2, modulus		# e1    :
-	/* and store it away in the structure.  */
+#if FRAME > 0
-	stl	quotient, 0(a0)			# .. e0 :
+	lda	sp, FRAME(sp)
-	mov	a0, v0				# e1    :
+#endif
-	stl	modulus, 4(a0)			# .. e0 :
+	ret
 	ret					# e1    :
 $divbyzero:
 	mov	a0, v0
-	ldiq	a0, GEN_INTDIV
+	lda	a0, GEN_INTDIV
 	call_pal PAL_gentrap
 	/* if trap returns, return zero.  */
 	stl	zero, 0(v0)
 	stl	zero, 4(v0)
 #if FRAME > 0
 	lda	sp, FRAME(sp)
 #endif
 	ret
 	.end div
--- a/sysdeps/alpha/divq.S
+++ b/sysdeps/alpha/divq.S
@ -115,16 +115,16 @@ $fix_sign_in_ret1:
 	_FTOIT	$f0, Q, 8
 	.align	3
 $fix_sign_in_ret2:
 	mulq	Q, Y, QY
 	stq	t4, 8(sp)
 	ldt	$f0, 0(sp)
 	stq	t3, 0(sp)
 	cfi_restore ($f0)
 	cfi_rel_offset (t3, 0)
 	mulq	Q, Y, QY
 	unop
 	stq	t4, 8(sp)
 	unop
 	cfi_rel_offset (t4, 8)
 	cfi_restore ($f0)
 	stq	t3, 0(sp)
 	unop
 	cfi_rel_offset (t3, 0)
 	subq	QY, X, R
 	mov	Y, SY
--- a/sysdeps/alpha/ldiv.S
+++ b/sysdeps/alpha/ldiv.S
@ -1,4 +1,4 @@
-/* Copyright (C) 1996, 1997, 2001 Free Software Foundation, Inc.
+/* Copyright (C) 1996, 1997, 2001, 2004 Free Software Foundation, Inc.
   This file is part of the GNU C Library.
   Contributed by Richard Henderson <rth@tamu.edu>.
@ -17,93 +17,195 @@
   Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
   02111-1307 USA.  */
-#include <sysdep.h>
+#include "div_libc.h"
-#ifdef __linux__
+#undef FRAME
-# include <asm/gentrap.h>
+#ifdef __alpha_fix__
-# include <asm/pal.h>
+#define FRAME 0
 #else
-# include <machine/pal.h>
+#define FRAME 16
 #endif
 #undef X
 #undef Y
 #define X $17
 #define Y $18
 	.set noat
 	.align 4
 	.globl ldiv
 	.ent ldiv
 ldiv:
-	.frame sp, 0, ra
+	.frame sp, FRAME, ra
 #if FRAME > 0
 	lda	sp, -FRAME(sp)
 #endif
 #ifdef PROF
 	.set	macro
 	ldgp	gp, 0(pv)
 	lda	AT, _mcount
 	jsr	AT, (AT), _mcount
 	.set	nomacro
 	.prologue 1
 #else
 	.prologue 0
 #endif
-#define divisor   t1
+	beq	Y, $divbyzero
 #define mask      t2
 #define quotient  t3
 #define modulus   t4
 #define tmp1      t5
 #define tmp2      t6
 #define compare   t7
-	/* find correct sign for input to unsigned divide loop. */
+	_ITOFT2	X, $f0, 0, Y, $f1, 8
 	mov	a1, modulus			# e0    :
 	mov	a2, divisor			# .. e1 :
 	negq	a1, tmp1			# e0    :
 	negq	a2, tmp2			# .. e1 :
 	mov	zero, quotient			# e0    :
 	mov	1, mask				# .. e1 :
 	cmovlt	a1, tmp1, modulus		# e0    :
 	cmovlt	a2, tmp2, divisor		# .. e1 :
 	beq	a2, $divbyzero			# e1    :
 	unop					#       :
-	/* shift divisor left.  */
+	.align	4
-1:	cmpult	divisor, modulus, compare	# e0    :
+	cvtqt	$f0, $f0
-	blt	divisor, 2f			# .. e1 :
+	cvtqt	$f1, $f1
-	addq	divisor, divisor, divisor	# e0    :
+	divt/c	$f0, $f1, $f0
-	addq	mask, mask, mask		# .. e1 :
+	unop
 	bne	compare, 1b			# e1    :
 	unop					#       :
-	/* start to go right again. */
+	/* Check to see if X fit in the double as an exact value.  */
-2:	addq	quotient, mask, tmp2		# e1    :
+	sll	X, (64-53), AT
-	srl	mask, 1, mask			# .. e0 :
+	sra	AT, (64-53), AT
-	cmpule	divisor, modulus, compare	# e0    :
+	cmpeq	X, AT, AT
-	subq	modulus, divisor, tmp1		# .. e1 :
+	beq	AT, $x_big
 	cmovne	compare, tmp2, quotient		# e1    :
 	srl	divisor, 1, divisor		# .. e0 :
 	cmovne	compare, tmp1, modulus		# e0    :
 	bne	mask, 2b			# .. e1 :
-	/* find correct sign for result.  */
+	/* If we get here, we're expecting exact results from the division.
-	xor	a1, a2, compare			# e0    :
+	   Do nothing else besides convert and clean up.  */
-	negq	quotient, tmp1			# .. e1 :
+	cvttq/c	$f0, $f0
-	negq	modulus, tmp2			# e0    :
+	_FTOIT	$f0, $0, 0
 	cmovlt	compare, tmp1, quotient		# .. e1 :
 	cmovlt	a1, tmp2, modulus		# e1    :
-	/* and store it away in the structure.  */
+$egress:
-9:	stq	quotient, 0(a0)			# .. e0 :
+	mulq	$0, Y, $1
-	mov	a0, v0				# e1    :
+	subq	X, $1, $1
-	stq	modulus, 8(a0)			# .. e0 :
+
-	ret					# e1    :
+	stq	$0, 0($16)
 	stq	$1, 8($16)
 	mov	$16, $0
 #if FRAME > 0
 	lda	sp, FRAME(sp)
 #endif
 	ret
 	.align	4
 $x_big:
 	/* If we get here, X is large enough that we don't expect exact
 	   results, and neither X nor Y got mis-translated for the fp
 	   division.  Our task is to take the fp result, figure out how
 	   far it's off from the correct result and compute a fixup.  */
 #define Q	v0		/* quotient */
 #define R	t0		/* remainder */
 #define SY	t1		/* scaled Y */
 #define S	t2		/* scalar */
 #define QY	t3		/* Q*Y */
 	/* The fixup code below can only handle unsigned values.  */
 	or	X, Y, AT
 	mov	$31, t5
 	blt	AT, $fix_sign_in
 $fix_sign_in_ret1:
 	cvttq/c	$f0, $f0
 	_FTOIT	$f0, Q, 8
 	.align	3
 $fix_sign_in_ret2:
 	mulq	Q, Y, QY
 	.align	4
 	subq	QY, X, R
 	mov	Y, SY
 	mov	1, S
 	bgt	R, $q_high
 $q_high_ret:
 	subq	X, QY, R
 	mov	Y, SY
 	mov	1, S
 	bgt	R, $q_low
 $q_low_ret:
 	negq	Q, t4
 	cmovlbs	t5, t4, Q
 	br	$egress
 	.align	4
 	/* The quotient that we computed was too large.  We need to reduce
 	   it by S such that Y*S >= R.  Obviously the closer we get to the
 	   correct value the better, but overshooting high is ok, as we'll
 	   fix that up later.  */
 0:
 	addq	SY, SY, SY
 	addq	S, S, S
 $q_high:
 	cmpult	SY, R, AT
 	bne	AT, 0b
 	subq	Q, S, Q
 	unop
 	subq	QY, SY, QY
 	br	$q_high_ret
 	.align	4
 	/* The quotient that we computed was too small.  Divide Y by the 
 	   current remainder (R) and add that to the existing quotient (Q).
 	   The expectation, of course, is that R is much smaller than X.  */
 	/* Begin with a shift-up loop.  Compute S such that Y*S >= R.  We
 	   already have a copy of Y in SY and the value 1 in S.  */
 0:
 	addq	SY, SY, SY
 	addq	S, S, S
 $q_low:
 	cmpult	SY, R, AT
 	bne	AT, 0b
 	/* Shift-down and subtract loop.  Each iteration compares our scaled
 	   Y (SY) with the remainder (R); if SY <= R then X is divisible by
 	   Y's scalar (S) so add it to the quotient (Q).  */
 2:	addq	Q, S, t3
 	srl	S, 1, S
 	cmpule	SY, R, AT
 	subq	R, SY, t4
 	cmovne	AT, t3, Q
 	cmovne	AT, t4, R
 	srl	SY, 1, SY
 	bne	S, 2b
 	br	$q_low_ret
 	.align	4
 $fix_sign_in:
 	/* If we got here, then X|Y is negative.  Need to adjust everything
 	   such that we're doing unsigned division in the fixup loop.  */
 	/* T5 is true if result should be negative.  */
 	xor	X, Y, AT
 	cmplt	AT, 0, t5
 	cmplt	X, 0, AT
 	negq	X, t0
 	cmovne	AT, t0, X
 	cmplt	Y, 0, AT
 	negq	Y, t0
 	cmovne	AT, t0, Y
 	blbc	t5, $fix_sign_in_ret1
 	cvttq/c	$f0, $f0
 	_FTOIT	$f0, Q, 8
 	.align	3
 	negq	Q, Q
 	br	$fix_sign_in_ret2
 $divbyzero:
 	mov	a0, v0
 	lda	a0, GEN_INTDIV
 	call_pal PAL_gentrap
 	/* if trap returns, return zero.  */
 	stq	zero, 0(v0)
 	stq	zero, 8(v0)
 #if FRAME > 0
 	lda	sp, FRAME(sp)
 #endif
 	ret
-	.end ldiv
+	.end	ldiv
 weak_alias(ldiv, lldiv)
 weak_alias(ldiv, imaxdiv)
--- a/sysdeps/alpha/remq.S
+++ b/sysdeps/alpha/remq.S
@ -116,16 +116,16 @@ $fix_sign_in_ret1:
 	_FTOIT	$f0, Q, 8
 	.align	3
 $fix_sign_in_ret2:
 	mulq	Q, Y, QY
 	stq	t4, 8(sp)
 	ldt	$f0, 0(sp)
 	stq	t3, 0(sp)
 	cfi_restore ($f0)
 	cfi_rel_offset (t3, 0)
 	mulq	Q, Y, QY
 	unop
 	stq	t4, 8(sp)
 	unop
 	cfi_rel_offset (t4, 8)
 	cfi_restore ($f0)
 	stq	t3, 0(sp)
 	unop
 	cfi_rel_offset (t3, 0)
 	subq	QY, X, R
 	mov	Y, SY