* sysdeps/alpha/divq.S: Save t3 before it gets clobbered. * sysdeps/alpha/remq.S: Likewise. * sysdeps/alpha/div.S, sysdeps/alpha/ldiv.S: Rewrite with the new division algorithms in divl.S and divq.S respectively.

2004-07-30  Richard Henderson  <rth@redhat.com>

	* sysdeps/alpha/divq.S: Save t3 before it gets clobbered.
	* sysdeps/alpha/remq.S: Likewise.
	* sysdeps/alpha/div.S, sysdeps/alpha/ldiv.S: Rewrite with the
	new division algorithms in divl.S and divq.S respectively.
This commit is contained in:
Richard Henderson 2004-07-30 18:15:57 +00:00
parent d1091a21f5
commit 80574c92d7
5 changed files with 214 additions and 129 deletions

View File

@ -1,3 +1,10 @@
2004-07-30 Richard Henderson <rth@redhat.com>
* sysdeps/alpha/divq.S: Save t3 before it gets clobbered.
* sysdeps/alpha/remq.S: Likewise.
* sysdeps/alpha/div.S, sysdeps/alpha/ldiv.S: Rewrite with the
new division algorithms in divl.S and divq.S respectively.
2004-07-28 GOTO Masanori <gotom@debian.or.jp> 2004-07-28 GOTO Masanori <gotom@debian.or.jp>
* timezone/asia: Update from tzdata2004b. * timezone/asia: Update from tzdata2004b.

View File

@ -1,4 +1,4 @@
/* Copyright (C) 1996, 1997 Free Software Foundation, Inc. /* Copyright (C) 1996, 1997, 2004 Free Software Foundation, Inc.
This file is part of the GNU C Library. This file is part of the GNU C Library.
Contributed by Richard Henderson <rth@tamu.edu>. Contributed by Richard Henderson <rth@tamu.edu>.
@ -17,13 +17,13 @@
Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
02111-1307 USA. */ 02111-1307 USA. */
#include <sysdep.h> #include "div_libc.h"
#ifdef __linux__ #undef FRAME
# include <asm/gentrap.h> #ifdef __alpha_fix__
# include <asm/pal.h> #define FRAME 0
#else #else
# include <machine/pal.h> #define FRAME 16
#endif #endif
.set noat .set noat
@ -32,78 +32,54 @@
.globl div .globl div
.ent div .ent div
div: div:
.frame sp, 0, ra .frame sp, FRAME, ra
#if FRAME > 0
lda sp, -FRAME(sp)
#endif
#ifdef PROF #ifdef PROF
.set macro
ldgp gp, 0(pv) ldgp gp, 0(pv)
lda AT, _mcount lda AT, _mcount
jsr AT, (AT), _mcount jsr AT, (AT), _mcount
.set nomacro
.prologue 1 .prologue 1
#else #else
.prologue 0 .prologue 0
#endif #endif
#define divisor t1 beq $18, $divbyzero
#define mask t2
#define quotient t3
#define modulus t4
#define tmp1 t5
#define tmp2 t6
#define compare t7
/* find correct sign for input to unsigned divide loop. */ _ITOFT2 $17, $f0, 0, $18, $f1, 8
negl a1, modulus # e0 :
negl a2, divisor # .. e1 :
sextl a1, a1 # e0 :
sextl a2, a2 # .. e1 :
mov zero, quotient # e0 :
mov 1, mask # .. e1 :
cmovge a1, a1, modulus # e0 :
cmovge a2, a2, divisor # .. e1 :
beq a2, $divbyzero # e1 :
unop # :
/* shift divisor left, using 3-bit shifts for 32-bit divides as we cvtqt $f0, $f0
can't overflow. Three-bit shifts will result in looping three cvtqt $f1, $f1
times less here, but can result in two loops more later. Thus divt/c $f0, $f1, $f0
using a large shift isn't worth it (and s8addq pairs better than cvttq/c $f0, $f0
a shift). */
1: cmpult divisor, modulus, compare # e0 : _FTOIT $f0, $0, 0
s8addq divisor, zero, divisor # .. e1 :
s8addq mask, zero, mask # e0 :
bne compare, 1b # .. e1 :
/* start to go right again. */ mull $0, $18, $1
2: addq quotient, mask, tmp2 # e1 : subl $17, $1, $1
srl mask, 1, mask # .. e0 :
cmpule divisor, modulus, compare # e0 :
subq modulus, divisor, tmp1 # .. e1 :
cmovne compare, tmp2, quotient # e1 :
srl divisor, 1, divisor # .. e0 :
cmovne compare, tmp1, modulus # e0 :
bne mask, 2b # .. e1 :
/* find correct sign for result. */ stl $0, 0(a0)
xor a1, a2, compare # e0 : stl $1, 4(a0)
negl quotient, tmp1 # .. e1 : mov a0, v0
negl modulus, tmp2 # e0 :
cmovlt compare, tmp1, quotient # .. e1 :
cmovlt a1, tmp2, modulus # e1 :
/* and store it away in the structure. */ #if FRAME > 0
stl quotient, 0(a0) # .. e0 : lda sp, FRAME(sp)
mov a0, v0 # e1 : #endif
stl modulus, 4(a0) # .. e0 : ret
ret # e1 :
$divbyzero: $divbyzero:
mov a0, v0 mov a0, v0
ldiq a0, GEN_INTDIV lda a0, GEN_INTDIV
call_pal PAL_gentrap call_pal PAL_gentrap
/* if trap returns, return zero. */
stl zero, 0(v0) stl zero, 0(v0)
stl zero, 4(v0) stl zero, 4(v0)
#if FRAME > 0
lda sp, FRAME(sp)
#endif
ret ret
.end div .end div

View File

@ -115,16 +115,16 @@ $fix_sign_in_ret1:
_FTOIT $f0, Q, 8 _FTOIT $f0, Q, 8
.align 3 .align 3
$fix_sign_in_ret2: $fix_sign_in_ret2:
mulq Q, Y, QY
stq t4, 8(sp)
ldt $f0, 0(sp) ldt $f0, 0(sp)
stq t3, 0(sp)
cfi_restore ($f0)
cfi_rel_offset (t3, 0)
mulq Q, Y, QY
unop
stq t4, 8(sp)
unop unop
cfi_rel_offset (t4, 8) cfi_rel_offset (t4, 8)
cfi_restore ($f0)
stq t3, 0(sp)
unop
cfi_rel_offset (t3, 0)
subq QY, X, R subq QY, X, R
mov Y, SY mov Y, SY

View File

@ -1,4 +1,4 @@
/* Copyright (C) 1996, 1997, 2001 Free Software Foundation, Inc. /* Copyright (C) 1996, 1997, 2001, 2004 Free Software Foundation, Inc.
This file is part of the GNU C Library. This file is part of the GNU C Library.
Contributed by Richard Henderson <rth@tamu.edu>. Contributed by Richard Henderson <rth@tamu.edu>.
@ -17,93 +17,195 @@
Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
02111-1307 USA. */ 02111-1307 USA. */
#include <sysdep.h> #include "div_libc.h"
#ifdef __linux__ #undef FRAME
# include <asm/gentrap.h> #ifdef __alpha_fix__
# include <asm/pal.h> #define FRAME 0
#else #else
# include <machine/pal.h> #define FRAME 16
#endif #endif
#undef X
#undef Y
#define X $17
#define Y $18
.set noat .set noat
.align 4 .align 4
.globl ldiv .globl ldiv
.ent ldiv .ent ldiv
ldiv: ldiv:
.frame sp, 0, ra .frame sp, FRAME, ra
#if FRAME > 0
lda sp, -FRAME(sp)
#endif
#ifdef PROF #ifdef PROF
.set macro
ldgp gp, 0(pv) ldgp gp, 0(pv)
lda AT, _mcount lda AT, _mcount
jsr AT, (AT), _mcount jsr AT, (AT), _mcount
.set nomacro
.prologue 1 .prologue 1
#else #else
.prologue 0 .prologue 0
#endif #endif
#define divisor t1 beq Y, $divbyzero
#define mask t2
#define quotient t3
#define modulus t4
#define tmp1 t5
#define tmp2 t6
#define compare t7
/* find correct sign for input to unsigned divide loop. */ _ITOFT2 X, $f0, 0, Y, $f1, 8
mov a1, modulus # e0 :
mov a2, divisor # .. e1 :
negq a1, tmp1 # e0 :
negq a2, tmp2 # .. e1 :
mov zero, quotient # e0 :
mov 1, mask # .. e1 :
cmovlt a1, tmp1, modulus # e0 :
cmovlt a2, tmp2, divisor # .. e1 :
beq a2, $divbyzero # e1 :
unop # :
/* shift divisor left. */ .align 4
1: cmpult divisor, modulus, compare # e0 : cvtqt $f0, $f0
blt divisor, 2f # .. e1 : cvtqt $f1, $f1
addq divisor, divisor, divisor # e0 : divt/c $f0, $f1, $f0
addq mask, mask, mask # .. e1 : unop
bne compare, 1b # e1 :
unop # :
/* start to go right again. */ /* Check to see if X fit in the double as an exact value. */
2: addq quotient, mask, tmp2 # e1 : sll X, (64-53), AT
srl mask, 1, mask # .. e0 : sra AT, (64-53), AT
cmpule divisor, modulus, compare # e0 : cmpeq X, AT, AT
subq modulus, divisor, tmp1 # .. e1 : beq AT, $x_big
cmovne compare, tmp2, quotient # e1 :
srl divisor, 1, divisor # .. e0 :
cmovne compare, tmp1, modulus # e0 :
bne mask, 2b # .. e1 :
/* find correct sign for result. */ /* If we get here, we're expecting exact results from the division.
xor a1, a2, compare # e0 : Do nothing else besides convert and clean up. */
negq quotient, tmp1 # .. e1 : cvttq/c $f0, $f0
negq modulus, tmp2 # e0 : _FTOIT $f0, $0, 0
cmovlt compare, tmp1, quotient # .. e1 :
cmovlt a1, tmp2, modulus # e1 :
/* and store it away in the structure. */ $egress:
9: stq quotient, 0(a0) # .. e0 : mulq $0, Y, $1
mov a0, v0 # e1 : subq X, $1, $1
stq modulus, 8(a0) # .. e0 :
ret # e1 : stq $0, 0($16)
stq $1, 8($16)
mov $16, $0
#if FRAME > 0
lda sp, FRAME(sp)
#endif
ret
.align 4
$x_big:
/* If we get here, X is large enough that we don't expect exact
results, and neither X nor Y got mis-translated for the fp
division. Our task is to take the fp result, figure out how
far it's off from the correct result and compute a fixup. */
#define Q v0 /* quotient */
#define R t0 /* remainder */
#define SY t1 /* scaled Y */
#define S t2 /* scalar */
#define QY t3 /* Q*Y */
/* The fixup code below can only handle unsigned values. */
or X, Y, AT
mov $31, t5
blt AT, $fix_sign_in
$fix_sign_in_ret1:
cvttq/c $f0, $f0
_FTOIT $f0, Q, 8
.align 3
$fix_sign_in_ret2:
mulq Q, Y, QY
.align 4
subq QY, X, R
mov Y, SY
mov 1, S
bgt R, $q_high
$q_high_ret:
subq X, QY, R
mov Y, SY
mov 1, S
bgt R, $q_low
$q_low_ret:
negq Q, t4
cmovlbs t5, t4, Q
br $egress
.align 4
/* The quotient that we computed was too large. We need to reduce
it by S such that Y*S >= R. Obviously the closer we get to the
correct value the better, but overshooting high is ok, as we'll
fix that up later. */
0:
addq SY, SY, SY
addq S, S, S
$q_high:
cmpult SY, R, AT
bne AT, 0b
subq Q, S, Q
unop
subq QY, SY, QY
br $q_high_ret
.align 4
/* The quotient that we computed was too small. Divide Y by the
current remainder (R) and add that to the existing quotient (Q).
The expectation, of course, is that R is much smaller than X. */
/* Begin with a shift-up loop. Compute S such that Y*S >= R. We
already have a copy of Y in SY and the value 1 in S. */
0:
addq SY, SY, SY
addq S, S, S
$q_low:
cmpult SY, R, AT
bne AT, 0b
/* Shift-down and subtract loop. Each iteration compares our scaled
Y (SY) with the remainder (R); if SY <= R then X is divisible by
Y's scalar (S) so add it to the quotient (Q). */
2: addq Q, S, t3
srl S, 1, S
cmpule SY, R, AT
subq R, SY, t4
cmovne AT, t3, Q
cmovne AT, t4, R
srl SY, 1, SY
bne S, 2b
br $q_low_ret
.align 4
$fix_sign_in:
/* If we got here, then X|Y is negative. Need to adjust everything
such that we're doing unsigned division in the fixup loop. */
/* T5 is true if result should be negative. */
xor X, Y, AT
cmplt AT, 0, t5
cmplt X, 0, AT
negq X, t0
cmovne AT, t0, X
cmplt Y, 0, AT
negq Y, t0
cmovne AT, t0, Y
blbc t5, $fix_sign_in_ret1
cvttq/c $f0, $f0
_FTOIT $f0, Q, 8
.align 3
negq Q, Q
br $fix_sign_in_ret2
$divbyzero: $divbyzero:
mov a0, v0 mov a0, v0
lda a0, GEN_INTDIV lda a0, GEN_INTDIV
call_pal PAL_gentrap call_pal PAL_gentrap
/* if trap returns, return zero. */
stq zero, 0(v0) stq zero, 0(v0)
stq zero, 8(v0) stq zero, 8(v0)
#if FRAME > 0
lda sp, FRAME(sp)
#endif
ret ret
.end ldiv .end ldiv
weak_alias(ldiv, lldiv)
weak_alias(ldiv, imaxdiv)

View File

@ -116,16 +116,16 @@ $fix_sign_in_ret1:
_FTOIT $f0, Q, 8 _FTOIT $f0, Q, 8
.align 3 .align 3
$fix_sign_in_ret2: $fix_sign_in_ret2:
mulq Q, Y, QY
stq t4, 8(sp)
ldt $f0, 0(sp) ldt $f0, 0(sp)
stq t3, 0(sp)
cfi_restore ($f0)
cfi_rel_offset (t3, 0)
mulq Q, Y, QY
unop
stq t4, 8(sp)
unop unop
cfi_rel_offset (t4, 8) cfi_rel_offset (t4, 8)
cfi_restore ($f0)
stq t3, 0(sp)
unop
cfi_rel_offset (t3, 0)
subq QY, X, R subq QY, X, R
mov Y, SY mov Y, SY