mirror of git://sourceware.org/git/glibc.git
* sysdeps/alpha/divq.S: Save t3 before it gets clobbered. * sysdeps/alpha/remq.S: Likewise. * sysdeps/alpha/div.S, sysdeps/alpha/ldiv.S: Rewrite with the new division algorithms in divl.S and divq.S respectively.
2004-07-30 Richard Henderson <rth@redhat.com> * sysdeps/alpha/divq.S: Save t3 before it gets clobbered. * sysdeps/alpha/remq.S: Likewise. * sysdeps/alpha/div.S, sysdeps/alpha/ldiv.S: Rewrite with the new division algorithms in divl.S and divq.S respectively.
This commit is contained in:
parent
d1091a21f5
commit
80574c92d7
|
@ -1,3 +1,10 @@
|
||||||
|
2004-07-30 Richard Henderson <rth@redhat.com>
|
||||||
|
|
||||||
|
* sysdeps/alpha/divq.S: Save t3 before it gets clobbered.
|
||||||
|
* sysdeps/alpha/remq.S: Likewise.
|
||||||
|
* sysdeps/alpha/div.S, sysdeps/alpha/ldiv.S: Rewrite with the
|
||||||
|
new division algorithms in divl.S and divq.S respectively.
|
||||||
|
|
||||||
2004-07-28 GOTO Masanori <gotom@debian.or.jp>
|
2004-07-28 GOTO Masanori <gotom@debian.or.jp>
|
||||||
|
|
||||||
* timezone/asia: Update from tzdata2004b.
|
* timezone/asia: Update from tzdata2004b.
|
||||||
|
|
|
@ -1,4 +1,4 @@
|
||||||
/* Copyright (C) 1996, 1997 Free Software Foundation, Inc.
|
/* Copyright (C) 1996, 1997, 2004 Free Software Foundation, Inc.
|
||||||
This file is part of the GNU C Library.
|
This file is part of the GNU C Library.
|
||||||
Contributed by Richard Henderson <rth@tamu.edu>.
|
Contributed by Richard Henderson <rth@tamu.edu>.
|
||||||
|
|
||||||
|
@ -17,13 +17,13 @@
|
||||||
Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
|
Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
|
||||||
02111-1307 USA. */
|
02111-1307 USA. */
|
||||||
|
|
||||||
#include <sysdep.h>
|
#include "div_libc.h"
|
||||||
|
|
||||||
#ifdef __linux__
|
#undef FRAME
|
||||||
# include <asm/gentrap.h>
|
#ifdef __alpha_fix__
|
||||||
# include <asm/pal.h>
|
#define FRAME 0
|
||||||
#else
|
#else
|
||||||
# include <machine/pal.h>
|
#define FRAME 16
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
.set noat
|
.set noat
|
||||||
|
@ -32,78 +32,54 @@
|
||||||
.globl div
|
.globl div
|
||||||
.ent div
|
.ent div
|
||||||
div:
|
div:
|
||||||
.frame sp, 0, ra
|
.frame sp, FRAME, ra
|
||||||
|
#if FRAME > 0
|
||||||
|
lda sp, -FRAME(sp)
|
||||||
|
#endif
|
||||||
#ifdef PROF
|
#ifdef PROF
|
||||||
|
.set macro
|
||||||
ldgp gp, 0(pv)
|
ldgp gp, 0(pv)
|
||||||
lda AT, _mcount
|
lda AT, _mcount
|
||||||
jsr AT, (AT), _mcount
|
jsr AT, (AT), _mcount
|
||||||
|
.set nomacro
|
||||||
.prologue 1
|
.prologue 1
|
||||||
#else
|
#else
|
||||||
.prologue 0
|
.prologue 0
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
#define divisor t1
|
beq $18, $divbyzero
|
||||||
#define mask t2
|
|
||||||
#define quotient t3
|
|
||||||
#define modulus t4
|
|
||||||
#define tmp1 t5
|
|
||||||
#define tmp2 t6
|
|
||||||
#define compare t7
|
|
||||||
|
|
||||||
/* find correct sign for input to unsigned divide loop. */
|
_ITOFT2 $17, $f0, 0, $18, $f1, 8
|
||||||
negl a1, modulus # e0 :
|
|
||||||
negl a2, divisor # .. e1 :
|
|
||||||
sextl a1, a1 # e0 :
|
|
||||||
sextl a2, a2 # .. e1 :
|
|
||||||
mov zero, quotient # e0 :
|
|
||||||
mov 1, mask # .. e1 :
|
|
||||||
cmovge a1, a1, modulus # e0 :
|
|
||||||
cmovge a2, a2, divisor # .. e1 :
|
|
||||||
beq a2, $divbyzero # e1 :
|
|
||||||
unop # :
|
|
||||||
|
|
||||||
/* shift divisor left, using 3-bit shifts for 32-bit divides as we
|
cvtqt $f0, $f0
|
||||||
can't overflow. Three-bit shifts will result in looping three
|
cvtqt $f1, $f1
|
||||||
times less here, but can result in two loops more later. Thus
|
divt/c $f0, $f1, $f0
|
||||||
using a large shift isn't worth it (and s8addq pairs better than
|
cvttq/c $f0, $f0
|
||||||
a shift). */
|
|
||||||
|
|
||||||
1: cmpult divisor, modulus, compare # e0 :
|
_FTOIT $f0, $0, 0
|
||||||
s8addq divisor, zero, divisor # .. e1 :
|
|
||||||
s8addq mask, zero, mask # e0 :
|
|
||||||
bne compare, 1b # .. e1 :
|
|
||||||
|
|
||||||
/* start to go right again. */
|
mull $0, $18, $1
|
||||||
2: addq quotient, mask, tmp2 # e1 :
|
subl $17, $1, $1
|
||||||
srl mask, 1, mask # .. e0 :
|
|
||||||
cmpule divisor, modulus, compare # e0 :
|
|
||||||
subq modulus, divisor, tmp1 # .. e1 :
|
|
||||||
cmovne compare, tmp2, quotient # e1 :
|
|
||||||
srl divisor, 1, divisor # .. e0 :
|
|
||||||
cmovne compare, tmp1, modulus # e0 :
|
|
||||||
bne mask, 2b # .. e1 :
|
|
||||||
|
|
||||||
/* find correct sign for result. */
|
stl $0, 0(a0)
|
||||||
xor a1, a2, compare # e0 :
|
stl $1, 4(a0)
|
||||||
negl quotient, tmp1 # .. e1 :
|
mov a0, v0
|
||||||
negl modulus, tmp2 # e0 :
|
|
||||||
cmovlt compare, tmp1, quotient # .. e1 :
|
|
||||||
cmovlt a1, tmp2, modulus # e1 :
|
|
||||||
|
|
||||||
/* and store it away in the structure. */
|
#if FRAME > 0
|
||||||
stl quotient, 0(a0) # .. e0 :
|
lda sp, FRAME(sp)
|
||||||
mov a0, v0 # e1 :
|
#endif
|
||||||
stl modulus, 4(a0) # .. e0 :
|
ret
|
||||||
ret # e1 :
|
|
||||||
|
|
||||||
$divbyzero:
|
$divbyzero:
|
||||||
mov a0, v0
|
mov a0, v0
|
||||||
ldiq a0, GEN_INTDIV
|
lda a0, GEN_INTDIV
|
||||||
call_pal PAL_gentrap
|
call_pal PAL_gentrap
|
||||||
|
|
||||||
/* if trap returns, return zero. */
|
|
||||||
stl zero, 0(v0)
|
stl zero, 0(v0)
|
||||||
stl zero, 4(v0)
|
stl zero, 4(v0)
|
||||||
|
|
||||||
|
#if FRAME > 0
|
||||||
|
lda sp, FRAME(sp)
|
||||||
|
#endif
|
||||||
ret
|
ret
|
||||||
|
|
||||||
.end div
|
.end div
|
||||||
|
|
|
@ -115,16 +115,16 @@ $fix_sign_in_ret1:
|
||||||
_FTOIT $f0, Q, 8
|
_FTOIT $f0, Q, 8
|
||||||
.align 3
|
.align 3
|
||||||
$fix_sign_in_ret2:
|
$fix_sign_in_ret2:
|
||||||
mulq Q, Y, QY
|
|
||||||
stq t4, 8(sp)
|
|
||||||
|
|
||||||
ldt $f0, 0(sp)
|
ldt $f0, 0(sp)
|
||||||
|
stq t3, 0(sp)
|
||||||
|
cfi_restore ($f0)
|
||||||
|
cfi_rel_offset (t3, 0)
|
||||||
|
|
||||||
|
mulq Q, Y, QY
|
||||||
|
unop
|
||||||
|
stq t4, 8(sp)
|
||||||
unop
|
unop
|
||||||
cfi_rel_offset (t4, 8)
|
cfi_rel_offset (t4, 8)
|
||||||
cfi_restore ($f0)
|
|
||||||
stq t3, 0(sp)
|
|
||||||
unop
|
|
||||||
cfi_rel_offset (t3, 0)
|
|
||||||
|
|
||||||
subq QY, X, R
|
subq QY, X, R
|
||||||
mov Y, SY
|
mov Y, SY
|
||||||
|
|
|
@ -1,4 +1,4 @@
|
||||||
/* Copyright (C) 1996, 1997, 2001 Free Software Foundation, Inc.
|
/* Copyright (C) 1996, 1997, 2001, 2004 Free Software Foundation, Inc.
|
||||||
This file is part of the GNU C Library.
|
This file is part of the GNU C Library.
|
||||||
Contributed by Richard Henderson <rth@tamu.edu>.
|
Contributed by Richard Henderson <rth@tamu.edu>.
|
||||||
|
|
||||||
|
@ -17,93 +17,195 @@
|
||||||
Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
|
Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
|
||||||
02111-1307 USA. */
|
02111-1307 USA. */
|
||||||
|
|
||||||
#include <sysdep.h>
|
#include "div_libc.h"
|
||||||
|
|
||||||
#ifdef __linux__
|
#undef FRAME
|
||||||
# include <asm/gentrap.h>
|
#ifdef __alpha_fix__
|
||||||
# include <asm/pal.h>
|
#define FRAME 0
|
||||||
#else
|
#else
|
||||||
# include <machine/pal.h>
|
#define FRAME 16
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
|
#undef X
|
||||||
|
#undef Y
|
||||||
|
#define X $17
|
||||||
|
#define Y $18
|
||||||
|
|
||||||
.set noat
|
.set noat
|
||||||
|
|
||||||
.align 4
|
.align 4
|
||||||
.globl ldiv
|
.globl ldiv
|
||||||
.ent ldiv
|
.ent ldiv
|
||||||
ldiv:
|
ldiv:
|
||||||
.frame sp, 0, ra
|
.frame sp, FRAME, ra
|
||||||
|
#if FRAME > 0
|
||||||
|
lda sp, -FRAME(sp)
|
||||||
|
#endif
|
||||||
#ifdef PROF
|
#ifdef PROF
|
||||||
|
.set macro
|
||||||
ldgp gp, 0(pv)
|
ldgp gp, 0(pv)
|
||||||
lda AT, _mcount
|
lda AT, _mcount
|
||||||
jsr AT, (AT), _mcount
|
jsr AT, (AT), _mcount
|
||||||
|
.set nomacro
|
||||||
.prologue 1
|
.prologue 1
|
||||||
#else
|
#else
|
||||||
.prologue 0
|
.prologue 0
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
#define divisor t1
|
beq Y, $divbyzero
|
||||||
#define mask t2
|
|
||||||
#define quotient t3
|
|
||||||
#define modulus t4
|
|
||||||
#define tmp1 t5
|
|
||||||
#define tmp2 t6
|
|
||||||
#define compare t7
|
|
||||||
|
|
||||||
/* find correct sign for input to unsigned divide loop. */
|
_ITOFT2 X, $f0, 0, Y, $f1, 8
|
||||||
mov a1, modulus # e0 :
|
|
||||||
mov a2, divisor # .. e1 :
|
|
||||||
negq a1, tmp1 # e0 :
|
|
||||||
negq a2, tmp2 # .. e1 :
|
|
||||||
mov zero, quotient # e0 :
|
|
||||||
mov 1, mask # .. e1 :
|
|
||||||
cmovlt a1, tmp1, modulus # e0 :
|
|
||||||
cmovlt a2, tmp2, divisor # .. e1 :
|
|
||||||
beq a2, $divbyzero # e1 :
|
|
||||||
unop # :
|
|
||||||
|
|
||||||
/* shift divisor left. */
|
.align 4
|
||||||
1: cmpult divisor, modulus, compare # e0 :
|
cvtqt $f0, $f0
|
||||||
blt divisor, 2f # .. e1 :
|
cvtqt $f1, $f1
|
||||||
addq divisor, divisor, divisor # e0 :
|
divt/c $f0, $f1, $f0
|
||||||
addq mask, mask, mask # .. e1 :
|
unop
|
||||||
bne compare, 1b # e1 :
|
|
||||||
unop # :
|
|
||||||
|
|
||||||
/* start to go right again. */
|
/* Check to see if X fit in the double as an exact value. */
|
||||||
2: addq quotient, mask, tmp2 # e1 :
|
sll X, (64-53), AT
|
||||||
srl mask, 1, mask # .. e0 :
|
sra AT, (64-53), AT
|
||||||
cmpule divisor, modulus, compare # e0 :
|
cmpeq X, AT, AT
|
||||||
subq modulus, divisor, tmp1 # .. e1 :
|
beq AT, $x_big
|
||||||
cmovne compare, tmp2, quotient # e1 :
|
|
||||||
srl divisor, 1, divisor # .. e0 :
|
|
||||||
cmovne compare, tmp1, modulus # e0 :
|
|
||||||
bne mask, 2b # .. e1 :
|
|
||||||
|
|
||||||
/* find correct sign for result. */
|
/* If we get here, we're expecting exact results from the division.
|
||||||
xor a1, a2, compare # e0 :
|
Do nothing else besides convert and clean up. */
|
||||||
negq quotient, tmp1 # .. e1 :
|
cvttq/c $f0, $f0
|
||||||
negq modulus, tmp2 # e0 :
|
_FTOIT $f0, $0, 0
|
||||||
cmovlt compare, tmp1, quotient # .. e1 :
|
|
||||||
cmovlt a1, tmp2, modulus # e1 :
|
|
||||||
|
|
||||||
/* and store it away in the structure. */
|
$egress:
|
||||||
9: stq quotient, 0(a0) # .. e0 :
|
mulq $0, Y, $1
|
||||||
mov a0, v0 # e1 :
|
subq X, $1, $1
|
||||||
stq modulus, 8(a0) # .. e0 :
|
|
||||||
ret # e1 :
|
stq $0, 0($16)
|
||||||
|
stq $1, 8($16)
|
||||||
|
mov $16, $0
|
||||||
|
|
||||||
|
#if FRAME > 0
|
||||||
|
lda sp, FRAME(sp)
|
||||||
|
#endif
|
||||||
|
ret
|
||||||
|
|
||||||
|
.align 4
|
||||||
|
$x_big:
|
||||||
|
/* If we get here, X is large enough that we don't expect exact
|
||||||
|
results, and neither X nor Y got mis-translated for the fp
|
||||||
|
division. Our task is to take the fp result, figure out how
|
||||||
|
far it's off from the correct result and compute a fixup. */
|
||||||
|
|
||||||
|
#define Q v0 /* quotient */
|
||||||
|
#define R t0 /* remainder */
|
||||||
|
#define SY t1 /* scaled Y */
|
||||||
|
#define S t2 /* scalar */
|
||||||
|
#define QY t3 /* Q*Y */
|
||||||
|
|
||||||
|
/* The fixup code below can only handle unsigned values. */
|
||||||
|
or X, Y, AT
|
||||||
|
mov $31, t5
|
||||||
|
blt AT, $fix_sign_in
|
||||||
|
$fix_sign_in_ret1:
|
||||||
|
cvttq/c $f0, $f0
|
||||||
|
|
||||||
|
_FTOIT $f0, Q, 8
|
||||||
|
.align 3
|
||||||
|
$fix_sign_in_ret2:
|
||||||
|
mulq Q, Y, QY
|
||||||
|
|
||||||
|
.align 4
|
||||||
|
subq QY, X, R
|
||||||
|
mov Y, SY
|
||||||
|
mov 1, S
|
||||||
|
bgt R, $q_high
|
||||||
|
|
||||||
|
$q_high_ret:
|
||||||
|
subq X, QY, R
|
||||||
|
mov Y, SY
|
||||||
|
mov 1, S
|
||||||
|
bgt R, $q_low
|
||||||
|
|
||||||
|
$q_low_ret:
|
||||||
|
negq Q, t4
|
||||||
|
cmovlbs t5, t4, Q
|
||||||
|
br $egress
|
||||||
|
|
||||||
|
.align 4
|
||||||
|
/* The quotient that we computed was too large. We need to reduce
|
||||||
|
it by S such that Y*S >= R. Obviously the closer we get to the
|
||||||
|
correct value the better, but overshooting high is ok, as we'll
|
||||||
|
fix that up later. */
|
||||||
|
0:
|
||||||
|
addq SY, SY, SY
|
||||||
|
addq S, S, S
|
||||||
|
$q_high:
|
||||||
|
cmpult SY, R, AT
|
||||||
|
bne AT, 0b
|
||||||
|
|
||||||
|
subq Q, S, Q
|
||||||
|
unop
|
||||||
|
subq QY, SY, QY
|
||||||
|
br $q_high_ret
|
||||||
|
|
||||||
|
.align 4
|
||||||
|
/* The quotient that we computed was too small. Divide Y by the
|
||||||
|
current remainder (R) and add that to the existing quotient (Q).
|
||||||
|
The expectation, of course, is that R is much smaller than X. */
|
||||||
|
/* Begin with a shift-up loop. Compute S such that Y*S >= R. We
|
||||||
|
already have a copy of Y in SY and the value 1 in S. */
|
||||||
|
0:
|
||||||
|
addq SY, SY, SY
|
||||||
|
addq S, S, S
|
||||||
|
$q_low:
|
||||||
|
cmpult SY, R, AT
|
||||||
|
bne AT, 0b
|
||||||
|
|
||||||
|
/* Shift-down and subtract loop. Each iteration compares our scaled
|
||||||
|
Y (SY) with the remainder (R); if SY <= R then X is divisible by
|
||||||
|
Y's scalar (S) so add it to the quotient (Q). */
|
||||||
|
2: addq Q, S, t3
|
||||||
|
srl S, 1, S
|
||||||
|
cmpule SY, R, AT
|
||||||
|
subq R, SY, t4
|
||||||
|
|
||||||
|
cmovne AT, t3, Q
|
||||||
|
cmovne AT, t4, R
|
||||||
|
srl SY, 1, SY
|
||||||
|
bne S, 2b
|
||||||
|
|
||||||
|
br $q_low_ret
|
||||||
|
|
||||||
|
.align 4
|
||||||
|
$fix_sign_in:
|
||||||
|
/* If we got here, then X|Y is negative. Need to adjust everything
|
||||||
|
such that we're doing unsigned division in the fixup loop. */
|
||||||
|
/* T5 is true if result should be negative. */
|
||||||
|
xor X, Y, AT
|
||||||
|
cmplt AT, 0, t5
|
||||||
|
cmplt X, 0, AT
|
||||||
|
negq X, t0
|
||||||
|
|
||||||
|
cmovne AT, t0, X
|
||||||
|
cmplt Y, 0, AT
|
||||||
|
negq Y, t0
|
||||||
|
|
||||||
|
cmovne AT, t0, Y
|
||||||
|
blbc t5, $fix_sign_in_ret1
|
||||||
|
|
||||||
|
cvttq/c $f0, $f0
|
||||||
|
_FTOIT $f0, Q, 8
|
||||||
|
.align 3
|
||||||
|
negq Q, Q
|
||||||
|
br $fix_sign_in_ret2
|
||||||
|
|
||||||
$divbyzero:
|
$divbyzero:
|
||||||
mov a0, v0
|
mov a0, v0
|
||||||
lda a0, GEN_INTDIV
|
lda a0, GEN_INTDIV
|
||||||
call_pal PAL_gentrap
|
call_pal PAL_gentrap
|
||||||
|
|
||||||
/* if trap returns, return zero. */
|
|
||||||
stq zero, 0(v0)
|
stq zero, 0(v0)
|
||||||
stq zero, 8(v0)
|
stq zero, 8(v0)
|
||||||
|
|
||||||
|
#if FRAME > 0
|
||||||
|
lda sp, FRAME(sp)
|
||||||
|
#endif
|
||||||
ret
|
ret
|
||||||
|
|
||||||
.end ldiv
|
.end ldiv
|
||||||
|
|
||||||
weak_alias(ldiv, lldiv)
|
|
||||||
weak_alias(ldiv, imaxdiv)
|
|
||||||
|
|
|
@ -116,16 +116,16 @@ $fix_sign_in_ret1:
|
||||||
_FTOIT $f0, Q, 8
|
_FTOIT $f0, Q, 8
|
||||||
.align 3
|
.align 3
|
||||||
$fix_sign_in_ret2:
|
$fix_sign_in_ret2:
|
||||||
mulq Q, Y, QY
|
|
||||||
stq t4, 8(sp)
|
|
||||||
|
|
||||||
ldt $f0, 0(sp)
|
ldt $f0, 0(sp)
|
||||||
|
stq t3, 0(sp)
|
||||||
|
cfi_restore ($f0)
|
||||||
|
cfi_rel_offset (t3, 0)
|
||||||
|
|
||||||
|
mulq Q, Y, QY
|
||||||
|
unop
|
||||||
|
stq t4, 8(sp)
|
||||||
unop
|
unop
|
||||||
cfi_rel_offset (t4, 8)
|
cfi_rel_offset (t4, 8)
|
||||||
cfi_restore ($f0)
|
|
||||||
stq t3, 0(sp)
|
|
||||||
unop
|
|
||||||
cfi_rel_offset (t3, 0)
|
|
||||||
|
|
||||||
subq QY, X, R
|
subq QY, X, R
|
||||||
mov Y, SY
|
mov Y, SY
|
||||||
|
|
Loading…
Reference in New Issue