mirror of git://sourceware.org/git/glibc.git
Updated from /src/gmp-1.937
This commit is contained in:
parent
f860256b2e
commit
3de9f02e92
|
@ -26,16 +26,7 @@
|
||||||
# size r18
|
# size r18
|
||||||
# s2_limb r19
|
# s2_limb r19
|
||||||
|
|
||||||
# This code runs at 42 cycles/limb on the 21064.
|
# This code runs at 42 cycles/limb on EV4 and 18 cycles/limb on EV5.
|
||||||
|
|
||||||
# To improve performance for long multiplications, we would use
|
|
||||||
# 'fetch' for S1 and 'fetch_m' for RES. It's not obvious how to use
|
|
||||||
# these instructions without slowing down the general code: 1. We can
|
|
||||||
# only have two prefetches in operation at any time in the Alpha
|
|
||||||
# architecture. 2. There will seldom be any special alignment
|
|
||||||
# between RES_PTR and S1_PTR. Maybe we can simply divide the current
|
|
||||||
# loop into an inner and outer loop, having the inner loop handle
|
|
||||||
# exactly one prefetch block?
|
|
||||||
|
|
||||||
.set noreorder
|
.set noreorder
|
||||||
.set noat
|
.set noat
|
||||||
|
@ -52,7 +43,7 @@ __mpn_addmul_1:
|
||||||
mulq $2,$19,$3 # $3 = prod_low
|
mulq $2,$19,$3 # $3 = prod_low
|
||||||
ldq $5,0($16) # $5 = *res_ptr
|
ldq $5,0($16) # $5 = *res_ptr
|
||||||
umulh $2,$19,$0 # $0 = prod_high
|
umulh $2,$19,$0 # $0 = prod_high
|
||||||
beq $18,Lend1 # jump if size was == 1
|
beq $18,.Lend1 # jump if size was == 1
|
||||||
ldq $2,0($17) # $2 = s1_limb
|
ldq $2,0($17) # $2 = s1_limb
|
||||||
addq $17,8,$17 # s1_ptr++
|
addq $17,8,$17 # s1_ptr++
|
||||||
subq $18,1,$18 # size--
|
subq $18,1,$18 # size--
|
||||||
|
@ -60,10 +51,10 @@ __mpn_addmul_1:
|
||||||
cmpult $3,$5,$4
|
cmpult $3,$5,$4
|
||||||
stq $3,0($16)
|
stq $3,0($16)
|
||||||
addq $16,8,$16 # res_ptr++
|
addq $16,8,$16 # res_ptr++
|
||||||
beq $18,Lend2 # jump if size was == 2
|
beq $18,.Lend2 # jump if size was == 2
|
||||||
|
|
||||||
.align 3
|
.align 3
|
||||||
Loop: mulq $2,$19,$3 # $3 = prod_low
|
.Loop: mulq $2,$19,$3 # $3 = prod_low
|
||||||
ldq $5,0($16) # $5 = *res_ptr
|
ldq $5,0($16) # $5 = *res_ptr
|
||||||
addq $4,$0,$0 # cy_limb = cy_limb + 'cy'
|
addq $4,$0,$0 # cy_limb = cy_limb + 'cy'
|
||||||
subq $18,1,$18 # size--
|
subq $18,1,$18 # size--
|
||||||
|
@ -77,9 +68,9 @@ Loop: mulq $2,$19,$3 # $3 = prod_low
|
||||||
stq $3,0($16)
|
stq $3,0($16)
|
||||||
addq $16,8,$16 # res_ptr++
|
addq $16,8,$16 # res_ptr++
|
||||||
addq $5,$0,$0 # combine carries
|
addq $5,$0,$0 # combine carries
|
||||||
bne $18,Loop
|
bne $18,.Loop
|
||||||
|
|
||||||
Lend2: mulq $2,$19,$3 # $3 = prod_low
|
.Lend2: mulq $2,$19,$3 # $3 = prod_low
|
||||||
ldq $5,0($16) # $5 = *res_ptr
|
ldq $5,0($16) # $5 = *res_ptr
|
||||||
addq $4,$0,$0 # cy_limb = cy_limb + 'cy'
|
addq $4,$0,$0 # cy_limb = cy_limb + 'cy'
|
||||||
umulh $2,$19,$4 # $4 = cy_limb
|
umulh $2,$19,$4 # $4 = cy_limb
|
||||||
|
@ -91,7 +82,7 @@ Lend2: mulq $2,$19,$3 # $3 = prod_low
|
||||||
addq $5,$0,$0 # combine carries
|
addq $5,$0,$0 # combine carries
|
||||||
addq $4,$0,$0 # cy_limb = prod_high + cy
|
addq $4,$0,$0 # cy_limb = prod_high + cy
|
||||||
ret $31,($26),1
|
ret $31,($26),1
|
||||||
Lend1: addq $5,$3,$3
|
.Lend1: addq $5,$3,$3
|
||||||
cmpult $3,$5,$5
|
cmpult $3,$5,$5
|
||||||
stq $3,0($16)
|
stq $3,0($16)
|
||||||
addq $0,$5,$0
|
addq $0,$5,$0
|
||||||
|
|
|
@ -35,84 +35,113 @@
|
||||||
__mpn_add_n:
|
__mpn_add_n:
|
||||||
.frame $30,0,$26,0
|
.frame $30,0,$26,0
|
||||||
|
|
||||||
ldq $3,0($17)
|
or $31,$31,$25 # clear cy
|
||||||
ldq $4,0($18)
|
subq $19,4,$19 # decr loop cnt
|
||||||
|
blt $19,.Lend2 # if less than 4 limbs, goto 2nd loop
|
||||||
subq $19,1,$19
|
# Start software pipeline for 1st loop
|
||||||
and $19,4-1,$2 # number of limbs in first loop
|
ldq $0,0($18)
|
||||||
bis $31,$31,$0
|
ldq $1,8($18)
|
||||||
beq $2,.L0 # if multiple of 4 limbs, skip first loop
|
ldq $4,0($17)
|
||||||
|
|
||||||
subq $19,$2,$19
|
|
||||||
|
|
||||||
.Loop0: subq $2,1,$2
|
|
||||||
ldq $5,8($17)
|
ldq $5,8($17)
|
||||||
addq $4,$0,$4
|
addq $17,32,$17 # update s1_ptr
|
||||||
ldq $6,8($18)
|
ldq $2,16($18)
|
||||||
cmpult $4,$0,$1
|
addq $0,$4,$20 # 1st main add
|
||||||
addq $3,$4,$4
|
ldq $3,24($18)
|
||||||
cmpult $4,$3,$0
|
subq $19,4,$19 # decr loop cnt
|
||||||
stq $4,0($16)
|
ldq $6,-16($17)
|
||||||
or $0,$1,$0
|
cmpult $20,$0,$25 # compute cy from last add
|
||||||
|
ldq $7,-8($17)
|
||||||
addq $17,8,$17
|
addq $1,$25,$28 # cy add
|
||||||
addq $18,8,$18
|
addq $18,32,$18 # update s2_ptr
|
||||||
bis $5,$5,$3
|
addq $5,$28,$21 # 2nd main add
|
||||||
bis $6,$6,$4
|
cmpult $28,$25,$8 # compute cy from last add
|
||||||
addq $16,8,$16
|
blt $19,.Lend1 # if less than 4 limbs remain, jump
|
||||||
bne $2,.Loop0
|
# 1st loop handles groups of 4 limbs in a software pipeline
|
||||||
|
|
||||||
.L0: beq $19,.Lend
|
|
||||||
|
|
||||||
.align 4
|
.align 4
|
||||||
.Loop: subq $19,4,$19
|
.Loop: cmpult $21,$28,$25 # compute cy from last add
|
||||||
unop
|
ldq $0,0($18)
|
||||||
|
or $8,$25,$25 # combine cy from the two adds
|
||||||
ldq $6,8($18)
|
ldq $1,8($18)
|
||||||
addq $4,$0,$0
|
addq $2,$25,$28 # cy add
|
||||||
|
ldq $4,0($17)
|
||||||
|
addq $28,$6,$22 # 3rd main add
|
||||||
ldq $5,8($17)
|
ldq $5,8($17)
|
||||||
cmpult $0,$4,$1
|
cmpult $28,$25,$8 # compute cy from last add
|
||||||
ldq $4,16($18)
|
cmpult $22,$28,$25 # compute cy from last add
|
||||||
addq $3,$0,$20
|
|
||||||
cmpult $20,$3,$0
|
|
||||||
ldq $3,16($17)
|
|
||||||
or $0,$1,$0
|
|
||||||
addq $6,$0,$0
|
|
||||||
cmpult $0,$6,$1
|
|
||||||
ldq $6,24($18)
|
|
||||||
addq $5,$0,$21
|
|
||||||
cmpult $21,$5,$0
|
|
||||||
ldq $5,24($17)
|
|
||||||
or $0,$1,$0
|
|
||||||
addq $4,$0,$0
|
|
||||||
cmpult $0,$4,$1
|
|
||||||
ldq $4,32($18)
|
|
||||||
addq $3,$0,$22
|
|
||||||
cmpult $22,$3,$0
|
|
||||||
ldq $3,32($17)
|
|
||||||
or $0,$1,$0
|
|
||||||
addq $6,$0,$0
|
|
||||||
cmpult $0,$6,$1
|
|
||||||
addq $5,$0,$23
|
|
||||||
cmpult $23,$5,$0
|
|
||||||
or $0,$1,$0
|
|
||||||
|
|
||||||
stq $20,0($16)
|
stq $20,0($16)
|
||||||
|
or $8,$25,$25 # combine cy from the two adds
|
||||||
stq $21,8($16)
|
stq $21,8($16)
|
||||||
stq $22,16($16)
|
addq $3,$25,$28 # cy add
|
||||||
stq $23,24($16)
|
addq $28,$7,$23 # 4th main add
|
||||||
|
cmpult $28,$25,$8 # compute cy from last add
|
||||||
|
cmpult $23,$28,$25 # compute cy from last add
|
||||||
|
addq $17,32,$17 # update s1_ptr
|
||||||
|
or $8,$25,$25 # combine cy from the two adds
|
||||||
|
addq $16,32,$16 # update res_ptr
|
||||||
|
addq $0,$25,$28 # cy add
|
||||||
|
ldq $2,16($18)
|
||||||
|
addq $4,$28,$20 # 1st main add
|
||||||
|
ldq $3,24($18)
|
||||||
|
cmpult $28,$25,$8 # compute cy from last add
|
||||||
|
ldq $6,-16($17)
|
||||||
|
cmpult $20,$28,$25 # compute cy from last add
|
||||||
|
ldq $7,-8($17)
|
||||||
|
or $8,$25,$25 # combine cy from the two adds
|
||||||
|
subq $19,4,$19 # decr loop cnt
|
||||||
|
stq $22,-16($16)
|
||||||
|
addq $1,$25,$28 # cy add
|
||||||
|
stq $23,-8($16)
|
||||||
|
addq $5,$28,$21 # 2nd main add
|
||||||
|
addq $18,32,$18 # update s2_ptr
|
||||||
|
cmpult $28,$25,$8 # compute cy from last add
|
||||||
|
bge $19,.Loop
|
||||||
|
# Finish software pipeline for 1st loop
|
||||||
|
.Lend1: cmpult $21,$28,$25 # compute cy from last add
|
||||||
|
or $8,$25,$25 # combine cy from the two adds
|
||||||
|
addq $2,$25,$28 # cy add
|
||||||
|
addq $28,$6,$22 # 3rd main add
|
||||||
|
cmpult $28,$25,$8 # compute cy from last add
|
||||||
|
cmpult $22,$28,$25 # compute cy from last add
|
||||||
|
stq $20,0($16)
|
||||||
|
or $8,$25,$25 # combine cy from the two adds
|
||||||
|
stq $21,8($16)
|
||||||
|
addq $3,$25,$28 # cy add
|
||||||
|
addq $28,$7,$23 # 4th main add
|
||||||
|
cmpult $28,$25,$8 # compute cy from last add
|
||||||
|
cmpult $23,$28,$25 # compute cy from last add
|
||||||
|
or $8,$25,$25 # combine cy from the two adds
|
||||||
|
addq $16,32,$16 # update res_ptr
|
||||||
|
stq $22,-16($16)
|
||||||
|
stq $23,-8($16)
|
||||||
|
.Lend2: addq $19,4,$19 # restore loop cnt
|
||||||
|
beq $19,.Lret
|
||||||
|
# Start software pipeline for 2nd loop
|
||||||
|
ldq $0,0($18)
|
||||||
|
ldq $4,0($17)
|
||||||
|
subq $19,1,$19
|
||||||
|
beq $19,.Lend0
|
||||||
|
# 2nd loop handles remaining 1-3 limbs
|
||||||
|
.align 4
|
||||||
|
.Loop0: addq $0,$25,$28 # cy add
|
||||||
|
ldq $0,8($18)
|
||||||
|
addq $4,$28,$20 # main add
|
||||||
|
ldq $4,8($17)
|
||||||
|
addq $18,8,$18
|
||||||
|
cmpult $28,$25,$8 # compute cy from last add
|
||||||
|
addq $17,8,$17
|
||||||
|
stq $20,0($16)
|
||||||
|
cmpult $20,$28,$25 # compute cy from last add
|
||||||
|
subq $19,1,$19 # decr loop cnt
|
||||||
|
or $8,$25,$25 # combine cy from the two adds
|
||||||
|
addq $16,8,$16
|
||||||
|
bne $19,.Loop0
|
||||||
|
.Lend0: addq $0,$25,$28 # cy add
|
||||||
|
addq $4,$28,$20 # main add
|
||||||
|
cmpult $28,$25,$8 # compute cy from last add
|
||||||
|
cmpult $20,$28,$25 # compute cy from last add
|
||||||
|
stq $20,0($16)
|
||||||
|
or $8,$25,$25 # combine cy from the two adds
|
||||||
|
|
||||||
addq $17,32,$17
|
.Lret: or $25,$31,$0 # return cy
|
||||||
addq $18,32,$18
|
|
||||||
addq $16,32,$16
|
|
||||||
bne $19,.Loop
|
|
||||||
|
|
||||||
.Lend: addq $4,$0,$4
|
|
||||||
cmpult $4,$0,$1
|
|
||||||
addq $3,$4,$4
|
|
||||||
cmpult $4,$3,$0
|
|
||||||
stq $4,0($16)
|
|
||||||
or $0,$1,$0
|
|
||||||
ret $31,($26),1
|
ret $31,($26),1
|
||||||
|
|
||||||
.end __mpn_add_n
|
.end __mpn_add_n
|
||||||
|
|
|
@ -25,7 +25,7 @@
|
||||||
# size r18
|
# size r18
|
||||||
# cnt r19
|
# cnt r19
|
||||||
|
|
||||||
# This code runs at 4.25 cycles/limb on the EV5.
|
# This code runs at 3.25 cycles/limb on the EV5.
|
||||||
|
|
||||||
.set noreorder
|
.set noreorder
|
||||||
.set noat
|
.set noat
|
||||||
|
@ -44,11 +44,11 @@ __mpn_lshift:
|
||||||
and $18,4-1,$28 # number of limbs in first loop
|
and $18,4-1,$28 # number of limbs in first loop
|
||||||
srl $4,$20,$0 # compute function result
|
srl $4,$20,$0 # compute function result
|
||||||
|
|
||||||
beq $28,L0
|
beq $28,.L0
|
||||||
subq $18,$28,$18
|
subq $18,$28,$18
|
||||||
|
|
||||||
.align 3
|
.align 3
|
||||||
Loop0: ldq $3,-16($17)
|
.Loop0: ldq $3,-16($17)
|
||||||
subq $16,8,$16
|
subq $16,8,$16
|
||||||
sll $4,$19,$5
|
sll $4,$19,$5
|
||||||
subq $17,8,$17
|
subq $17,8,$17
|
||||||
|
@ -57,17 +57,17 @@ Loop0: ldq $3,-16($17)
|
||||||
or $3,$3,$4
|
or $3,$3,$4
|
||||||
or $5,$6,$8
|
or $5,$6,$8
|
||||||
stq $8,0($16)
|
stq $8,0($16)
|
||||||
bne $28,Loop0
|
bne $28,.Loop0
|
||||||
|
|
||||||
L0: sll $4,$19,$24
|
.L0: sll $4,$19,$24
|
||||||
beq $18,Lend
|
beq $18,.Lend
|
||||||
# warm up phase 1
|
# warm up phase 1
|
||||||
ldq $1,-16($17)
|
ldq $1,-16($17)
|
||||||
subq $18,4,$18
|
subq $18,4,$18
|
||||||
ldq $2,-24($17)
|
ldq $2,-24($17)
|
||||||
ldq $3,-32($17)
|
ldq $3,-32($17)
|
||||||
ldq $4,-40($17)
|
ldq $4,-40($17)
|
||||||
beq $18,Lcool1
|
beq $18,.Lend1
|
||||||
# warm up phase 2
|
# warm up phase 2
|
||||||
srl $1,$20,$7
|
srl $1,$20,$7
|
||||||
sll $1,$19,$21
|
sll $1,$19,$21
|
||||||
|
@ -84,10 +84,10 @@ L0: sll $4,$19,$24
|
||||||
sll $4,$19,$24
|
sll $4,$19,$24
|
||||||
ldq $4,-72($17)
|
ldq $4,-72($17)
|
||||||
subq $18,4,$18
|
subq $18,4,$18
|
||||||
beq $18,Lcool1
|
beq $18,.Lend2
|
||||||
.align 4
|
.align 4
|
||||||
# main loop
|
# main loop
|
||||||
Loop: stq $7,-8($16)
|
.Loop: stq $7,-8($16)
|
||||||
or $5,$22,$5
|
or $5,$22,$5
|
||||||
stq $8,-16($16)
|
stq $8,-16($16)
|
||||||
or $6,$23,$6
|
or $6,$23,$6
|
||||||
|
@ -113,16 +113,14 @@ Loop: stq $7,-8($16)
|
||||||
subq $16,32,$16
|
subq $16,32,$16
|
||||||
|
|
||||||
srl $4,$20,$6
|
srl $4,$20,$6
|
||||||
ldq $3,-96($17
|
ldq $3,-96($17)
|
||||||
sll $4,$19,$24
|
sll $4,$19,$24
|
||||||
ldq $4,-104($17)
|
ldq $4,-104($17)
|
||||||
|
|
||||||
subq $17,32,$17
|
subq $17,32,$17
|
||||||
bne $18,Loop
|
bne $18,.Loop
|
||||||
unop
|
|
||||||
unop
|
|
||||||
# cool down phase 2/1
|
# cool down phase 2/1
|
||||||
Lcool1: stq $7,-8($16)
|
.Lend2: stq $7,-8($16)
|
||||||
or $5,$22,$5
|
or $5,$22,$5
|
||||||
stq $8,-16($16)
|
stq $8,-16($16)
|
||||||
or $6,$23,$6
|
or $6,$23,$6
|
||||||
|
@ -150,7 +148,7 @@ Lcool1: stq $7,-8($16)
|
||||||
ret $31,($26),1
|
ret $31,($26),1
|
||||||
|
|
||||||
# cool down phase 1/1
|
# cool down phase 1/1
|
||||||
Lcool1: srl $1,$20,$7
|
.Lend1: srl $1,$20,$7
|
||||||
sll $1,$19,$21
|
sll $1,$19,$21
|
||||||
srl $2,$20,$8
|
srl $2,$20,$8
|
||||||
sll $2,$19,$22
|
sll $2,$19,$22
|
||||||
|
@ -170,6 +168,6 @@ Lcool1: srl $1,$20,$7
|
||||||
stq $24,-40($16)
|
stq $24,-40($16)
|
||||||
ret $31,($26),1
|
ret $31,($26),1
|
||||||
|
|
||||||
Lend stq $24,-8($16)
|
.Lend: stq $24,-8($16)
|
||||||
ret $31,($26),1
|
ret $31,($26),1
|
||||||
.end __mpn_lshift
|
.end __mpn_lshift
|
||||||
|
|
|
@ -25,7 +25,7 @@
|
||||||
# size r18
|
# size r18
|
||||||
# cnt r19
|
# cnt r19
|
||||||
|
|
||||||
# This code runs at 4.25 cycles/limb on the EV5.
|
# This code runs at 3.25 cycles/limb on the EV5.
|
||||||
|
|
||||||
.set noreorder
|
.set noreorder
|
||||||
.set noat
|
.set noat
|
||||||
|
@ -42,11 +42,11 @@ __mpn_rshift:
|
||||||
and $18,4-1,$28 # number of limbs in first loop
|
and $18,4-1,$28 # number of limbs in first loop
|
||||||
sll $4,$20,$0 # compute function result
|
sll $4,$20,$0 # compute function result
|
||||||
|
|
||||||
beq $28,L0
|
beq $28,.L0
|
||||||
subq $18,$28,$18
|
subq $18,$28,$18
|
||||||
|
|
||||||
.align 3
|
.align 3
|
||||||
Loop0: ldq $3,8($17)
|
.Loop0: ldq $3,8($17)
|
||||||
addq $16,8,$16
|
addq $16,8,$16
|
||||||
srl $4,$19,$5
|
srl $4,$19,$5
|
||||||
addq $17,8,$17
|
addq $17,8,$17
|
||||||
|
@ -55,17 +55,17 @@ Loop0: ldq $3,8($17)
|
||||||
or $3,$3,$4
|
or $3,$3,$4
|
||||||
or $5,$6,$8
|
or $5,$6,$8
|
||||||
stq $8,-8($16)
|
stq $8,-8($16)
|
||||||
bne $28,Loop0
|
bne $28,.Loop0
|
||||||
|
|
||||||
L0: srl $4,$19,$24
|
.L0: srl $4,$19,$24
|
||||||
beq $18,Lend
|
beq $18,.Lend
|
||||||
# warm up phase 1
|
# warm up phase 1
|
||||||
ldq $1,8($17)
|
ldq $1,8($17)
|
||||||
subq $18,4,$18
|
subq $18,4,$18
|
||||||
ldq $2,16($17)
|
ldq $2,16($17)
|
||||||
ldq $3,24($17)
|
ldq $3,24($17)
|
||||||
ldq $4,32($17)
|
ldq $4,32($17)
|
||||||
beq $18,Lcool1
|
beq $18,.Lend1
|
||||||
# warm up phase 2
|
# warm up phase 2
|
||||||
sll $1,$20,$7
|
sll $1,$20,$7
|
||||||
srl $1,$19,$21
|
srl $1,$19,$21
|
||||||
|
@ -82,10 +82,10 @@ L0: srl $4,$19,$24
|
||||||
srl $4,$19,$24
|
srl $4,$19,$24
|
||||||
ldq $4,64($17)
|
ldq $4,64($17)
|
||||||
subq $18,4,$18
|
subq $18,4,$18
|
||||||
beq $18,Lcool2
|
beq $18,.Lend2
|
||||||
.align 4
|
.align 4
|
||||||
# main loop
|
# main loop
|
||||||
Loop: stq $7,0($16)
|
.Loop: stq $7,0($16)
|
||||||
or $5,$22,$5
|
or $5,$22,$5
|
||||||
stq $8,8($16)
|
stq $8,8($16)
|
||||||
or $6,$23,$6
|
or $6,$23,$6
|
||||||
|
@ -116,11 +116,9 @@ Loop: stq $7,0($16)
|
||||||
ldq $4,96($17)
|
ldq $4,96($17)
|
||||||
|
|
||||||
addq $17,32,$17
|
addq $17,32,$17
|
||||||
bne $18,Loop
|
bne $18,.Loop
|
||||||
unop
|
|
||||||
unop
|
|
||||||
# cool down phase 2/1
|
# cool down phase 2/1
|
||||||
Lcool2: stq $7,0($16)
|
.Lend2: stq $7,0($16)
|
||||||
or $5,$22,$5
|
or $5,$22,$5
|
||||||
stq $8,8($16)
|
stq $8,8($16)
|
||||||
or $6,$23,$6
|
or $6,$23,$6
|
||||||
|
@ -148,7 +146,7 @@ Lcool2: stq $7,0($16)
|
||||||
ret $31,($26),1
|
ret $31,($26),1
|
||||||
|
|
||||||
# cool down phase 1/1
|
# cool down phase 1/1
|
||||||
Lcool1: sll $1,$20,$7
|
.Lend1: sll $1,$20,$7
|
||||||
srl $1,$19,$21
|
srl $1,$19,$21
|
||||||
sll $2,$20,$8
|
sll $2,$20,$8
|
||||||
srl $2,$19,$22
|
srl $2,$19,$22
|
||||||
|
@ -168,6 +166,6 @@ Lcool1: sll $1,$20,$7
|
||||||
stq $24,32($16)
|
stq $24,32($16)
|
||||||
ret $31,($26),1
|
ret $31,($26),1
|
||||||
|
|
||||||
Lend: stq $24,0($16)
|
.Lend: stq $24,0($16)
|
||||||
ret $31,($26),1
|
ret $31,($26),1
|
||||||
.end __mpn_rshift
|
.end __mpn_rshift
|
||||||
|
|
|
@ -0,0 +1,148 @@
|
||||||
|
# Alpha __mpn_sub_n -- Subtract two limb vectors of the same length > 0 and
|
||||||
|
# store difference in a third limb vector.
|
||||||
|
|
||||||
|
# Copyright (C) 1995 Free Software Foundation, Inc.
|
||||||
|
|
||||||
|
# This file is part of the GNU MP Library.
|
||||||
|
|
||||||
|
# The GNU MP Library is free software; you can redistribute it and/or modify
|
||||||
|
# it under the terms of the GNU Library General Public License as published by
|
||||||
|
# the Free Software Foundation; either version 2 of the License, or (at your
|
||||||
|
# option) any later version.
|
||||||
|
|
||||||
|
# The GNU MP Library is distributed in the hope that it will be useful, but
|
||||||
|
# WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
|
||||||
|
# or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Library General Public
|
||||||
|
# License for more details.
|
||||||
|
|
||||||
|
# You should have received a copy of the GNU Library General Public License
|
||||||
|
# along with the GNU MP Library; see the file COPYING.LIB. If not, write to
|
||||||
|
# the Free Software Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
|
||||||
|
|
||||||
|
|
||||||
|
# INPUT PARAMETERS
|
||||||
|
# res_ptr $16
|
||||||
|
# s1_ptr $17
|
||||||
|
# s2_ptr $18
|
||||||
|
# size $19
|
||||||
|
|
||||||
|
.set noreorder
|
||||||
|
.set noat
|
||||||
|
.text
|
||||||
|
.align 3
|
||||||
|
.globl __mpn_sub_n
|
||||||
|
.ent __mpn_sub_n
|
||||||
|
__mpn_sub_n:
|
||||||
|
.frame $30,0,$26,0
|
||||||
|
|
||||||
|
or $31,$31,$25 # clear cy
|
||||||
|
subq $19,4,$19 # decr loop cnt
|
||||||
|
blt $19,.Lend2 # if less than 4 limbs, goto 2nd loop
|
||||||
|
# Start software pipeline for 1st loop
|
||||||
|
ldq $0,0($18)
|
||||||
|
ldq $1,8($18)
|
||||||
|
ldq $4,0($17)
|
||||||
|
ldq $5,8($17)
|
||||||
|
addq $17,32,$17 # update s1_ptr
|
||||||
|
ldq $2,16($18)
|
||||||
|
subq $4,$0,$20 # 1st main sub
|
||||||
|
ldq $3,24($18)
|
||||||
|
subq $19,4,$19 # decr loop cnt
|
||||||
|
ldq $6,-16($17)
|
||||||
|
cmpult $4,$20,$25 # compute cy from last sub
|
||||||
|
ldq $7,-8($17)
|
||||||
|
addq $1,$25,$28 # cy add
|
||||||
|
addq $18,32,$18 # update s2_ptr
|
||||||
|
subq $5,$28,$21 # 2nd main sub
|
||||||
|
cmpult $28,$25,$8 # compute cy from last add
|
||||||
|
blt $19,.Lend1 # if less than 4 limbs remain, jump
|
||||||
|
# 1st loop handles groups of 4 limbs in a software pipeline
|
||||||
|
.align 4
|
||||||
|
.Loop: cmpult $5,$21,$25 # compute cy from last add
|
||||||
|
ldq $0,0($18)
|
||||||
|
or $8,$25,$25 # combine cy from the two adds
|
||||||
|
ldq $1,8($18)
|
||||||
|
addq $2,$25,$28 # cy add
|
||||||
|
ldq $4,0($17)
|
||||||
|
subq $6,$28,$22 # 3rd main sub
|
||||||
|
ldq $5,8($17)
|
||||||
|
cmpult $28,$25,$8 # compute cy from last add
|
||||||
|
cmpult $6,$22,$25 # compute cy from last add
|
||||||
|
stq $20,0($16)
|
||||||
|
or $8,$25,$25 # combine cy from the two adds
|
||||||
|
stq $21,8($16)
|
||||||
|
addq $3,$25,$28 # cy add
|
||||||
|
subq $7,$28,$23 # 4th main sub
|
||||||
|
cmpult $28,$25,$8 # compute cy from last add
|
||||||
|
cmpult $7,$23,$25 # compute cy from last add
|
||||||
|
addq $17,32,$17 # update s1_ptr
|
||||||
|
or $8,$25,$25 # combine cy from the two adds
|
||||||
|
addq $16,32,$16 # update res_ptr
|
||||||
|
addq $0,$25,$28 # cy add
|
||||||
|
ldq $2,16($18)
|
||||||
|
subq $4,$28,$20 # 1st main sub
|
||||||
|
ldq $3,24($18)
|
||||||
|
cmpult $28,$25,$8 # compute cy from last add
|
||||||
|
ldq $6,-16($17)
|
||||||
|
cmpult $4,$20,$25 # compute cy from last add
|
||||||
|
ldq $7,-8($17)
|
||||||
|
or $8,$25,$25 # combine cy from the two adds
|
||||||
|
subq $19,4,$19 # decr loop cnt
|
||||||
|
stq $22,-16($16)
|
||||||
|
addq $1,$25,$28 # cy add
|
||||||
|
stq $23,-8($16)
|
||||||
|
subq $5,$28,$21 # 2nd main sub
|
||||||
|
addq $18,32,$18 # update s2_ptr
|
||||||
|
cmpult $28,$25,$8 # compute cy from last add
|
||||||
|
bge $19,.Loop
|
||||||
|
# Finish software pipeline for 1st loop
|
||||||
|
.Lend1: cmpult $5,$21,$25 # compute cy from last add
|
||||||
|
or $8,$25,$25 # combine cy from the two adds
|
||||||
|
addq $2,$25,$28 # cy add
|
||||||
|
subq $6,$28,$22 # 3rd main sub
|
||||||
|
cmpult $28,$25,$8 # compute cy from last add
|
||||||
|
cmpult $6,$22,$25 # compute cy from last add
|
||||||
|
stq $20,0($16)
|
||||||
|
or $8,$25,$25 # combine cy from the two adds
|
||||||
|
stq $21,8($16)
|
||||||
|
addq $3,$25,$28 # cy add
|
||||||
|
subq $7,$28,$23 # 4th main sub
|
||||||
|
cmpult $28,$25,$8 # compute cy from last add
|
||||||
|
cmpult $7,$23,$25 # compute cy from last add
|
||||||
|
or $8,$25,$25 # combine cy from the two adds
|
||||||
|
addq $16,32,$16 # update res_ptr
|
||||||
|
stq $22,-16($16)
|
||||||
|
stq $23,-8($16)
|
||||||
|
.Lend2: addq $19,4,$19 # restore loop cnt
|
||||||
|
beq $19,.Lret
|
||||||
|
# Start software pipeline for 2nd loop
|
||||||
|
ldq $0,0($18)
|
||||||
|
ldq $4,0($17)
|
||||||
|
subq $19,1,$19
|
||||||
|
beq $19,.Lend0
|
||||||
|
# 2nd loop handles remaining 1-3 limbs
|
||||||
|
.align 4
|
||||||
|
.Loop0: addq $0,$25,$28 # cy add
|
||||||
|
ldq $0,8($18)
|
||||||
|
subq $4,$28,$20 # main sub
|
||||||
|
ldq $1,8($17)
|
||||||
|
addq $18,8,$18
|
||||||
|
cmpult $28,$25,$8 # compute cy from last add
|
||||||
|
addq $17,8,$17
|
||||||
|
stq $20,0($16)
|
||||||
|
cmpult $4,$20,$25 # compute cy from last add
|
||||||
|
subq $19,1,$19 # decr loop cnt
|
||||||
|
or $8,$25,$25 # combine cy from the two adds
|
||||||
|
addq $16,8,$16
|
||||||
|
or $1,$31,$4
|
||||||
|
bne $19,.Loop0
|
||||||
|
.Lend0: addq $0,$25,$28 # cy add
|
||||||
|
subq $4,$28,$20 # main sub
|
||||||
|
cmpult $28,$25,$8 # compute cy from last add
|
||||||
|
cmpult $4,$20,$25 # compute cy from last add
|
||||||
|
stq $20,0($16)
|
||||||
|
or $8,$25,$25 # combine cy from the two adds
|
||||||
|
|
||||||
|
.Lret: or $25,$31,$0 # return cy
|
||||||
|
ret $31,($26),1
|
||||||
|
.end __mpn_sub_n
|
|
@ -53,11 +53,11 @@ __mpn_lshift:
|
||||||
and $18,4-1,$20 # number of limbs in first loop
|
and $18,4-1,$20 # number of limbs in first loop
|
||||||
srl $4,$7,$0 # compute function result
|
srl $4,$7,$0 # compute function result
|
||||||
|
|
||||||
beq $20,L0
|
beq $20,.L0
|
||||||
subq $18,$20,$18
|
subq $18,$20,$18
|
||||||
|
|
||||||
.align 3
|
.align 3
|
||||||
Loop0:
|
.Loop0:
|
||||||
ldq $3,-8($17)
|
ldq $3,-8($17)
|
||||||
subq $16,8,$16
|
subq $16,8,$16
|
||||||
subq $17,8,$17
|
subq $17,8,$17
|
||||||
|
@ -67,12 +67,12 @@ Loop0:
|
||||||
bis $3,$3,$4
|
bis $3,$3,$4
|
||||||
bis $5,$6,$8
|
bis $5,$6,$8
|
||||||
stq $8,0($16)
|
stq $8,0($16)
|
||||||
bne $20,Loop0
|
bne $20,.Loop0
|
||||||
|
|
||||||
L0: beq $18,Lend
|
.L0: beq $18,.Lend
|
||||||
|
|
||||||
.align 3
|
.align 3
|
||||||
Loop: ldq $3,-8($17)
|
.Loop: ldq $3,-8($17)
|
||||||
subq $16,32,$16
|
subq $16,32,$16
|
||||||
subq $18,4,$18
|
subq $18,4,$18
|
||||||
sll $4,$19,$5
|
sll $4,$19,$5
|
||||||
|
@ -100,9 +100,9 @@ Loop: ldq $3,-8($17)
|
||||||
bis $1,$2,$8
|
bis $1,$2,$8
|
||||||
stq $8,0($16)
|
stq $8,0($16)
|
||||||
|
|
||||||
bgt $18,Loop
|
bgt $18,.Loop
|
||||||
|
|
||||||
Lend: sll $4,$19,$8
|
.Lend: sll $4,$19,$8
|
||||||
stq $8,-8($16)
|
stq $8,-8($16)
|
||||||
ret $31,($26),1
|
ret $31,($26),1
|
||||||
.end __mpn_lshift
|
.end __mpn_lshift
|
||||||
|
|
|
@ -1,7 +1,7 @@
|
||||||
# Alpha 21064 __mpn_mul_1 -- Multiply a limb vector with a limb and store
|
# Alpha 21064 __mpn_mul_1 -- Multiply a limb vector with a limb and store
|
||||||
# the result in a second limb vector.
|
# the result in a second limb vector.
|
||||||
|
|
||||||
# Copyright (C) 1992, 1994 Free Software Foundation, Inc.
|
# Copyright (C) 1992, 1994, 1995 Free Software Foundation, Inc.
|
||||||
|
|
||||||
# This file is part of the GNU MP Library.
|
# This file is part of the GNU MP Library.
|
||||||
|
|
||||||
|
|
|
@ -51,11 +51,11 @@ __mpn_rshift:
|
||||||
and $18,4-1,$20 # number of limbs in first loop
|
and $18,4-1,$20 # number of limbs in first loop
|
||||||
sll $4,$7,$0 # compute function result
|
sll $4,$7,$0 # compute function result
|
||||||
|
|
||||||
beq $20,L0
|
beq $20,.L0
|
||||||
subq $18,$20,$18
|
subq $18,$20,$18
|
||||||
|
|
||||||
.align 3
|
.align 3
|
||||||
Loop0:
|
.Loop0:
|
||||||
ldq $3,0($17)
|
ldq $3,0($17)
|
||||||
addq $16,8,$16
|
addq $16,8,$16
|
||||||
addq $17,8,$17
|
addq $17,8,$17
|
||||||
|
@ -65,12 +65,12 @@ Loop0:
|
||||||
bis $3,$3,$4
|
bis $3,$3,$4
|
||||||
bis $5,$6,$8
|
bis $5,$6,$8
|
||||||
stq $8,-8($16)
|
stq $8,-8($16)
|
||||||
bne $20,Loop0
|
bne $20,.Loop0
|
||||||
|
|
||||||
L0: beq $18,Lend
|
.L0: beq $18,.Lend
|
||||||
|
|
||||||
.align 3
|
.align 3
|
||||||
Loop: ldq $3,0($17)
|
.Loop: ldq $3,0($17)
|
||||||
addq $16,32,$16
|
addq $16,32,$16
|
||||||
subq $18,4,$18
|
subq $18,4,$18
|
||||||
srl $4,$19,$5
|
srl $4,$19,$5
|
||||||
|
@ -98,9 +98,9 @@ Loop: ldq $3,0($17)
|
||||||
bis $1,$2,$8
|
bis $1,$2,$8
|
||||||
stq $8,-8($16)
|
stq $8,-8($16)
|
||||||
|
|
||||||
bgt $18,Loop
|
bgt $18,.Loop
|
||||||
|
|
||||||
Lend: srl $4,$19,$8
|
.Lend: srl $4,$19,$8
|
||||||
stq $8,0($16)
|
stq $8,0($16)
|
||||||
ret $31,($26),1
|
ret $31,($26),1
|
||||||
.end __mpn_rshift
|
.end __mpn_rshift
|
||||||
|
|
|
@ -26,16 +26,7 @@
|
||||||
# size r18
|
# size r18
|
||||||
# s2_limb r19
|
# s2_limb r19
|
||||||
|
|
||||||
# This code runs at 42 cycles/limb on the 21064.
|
# This code runs at 42 cycles/limb on EV4 and 18 cycles/limb on EV5.
|
||||||
|
|
||||||
# To improve performance for long multiplications, we would use
|
|
||||||
# 'fetch' for S1 and 'fetch_m' for RES. It's not obvious how to use
|
|
||||||
# these instructions without slowing down the general code: 1. We can
|
|
||||||
# only have two prefetches in operation at any time in the Alpha
|
|
||||||
# architecture. 2. There will seldom be any special alignment
|
|
||||||
# between RES_PTR and S1_PTR. Maybe we can simply divide the current
|
|
||||||
# loop into an inner and outer loop, having the inner loop handle
|
|
||||||
# exactly one prefetch block?
|
|
||||||
|
|
||||||
.set noreorder
|
.set noreorder
|
||||||
.set noat
|
.set noat
|
||||||
|
@ -52,7 +43,7 @@ __mpn_submul_1:
|
||||||
mulq $2,$19,$3 # $3 = prod_low
|
mulq $2,$19,$3 # $3 = prod_low
|
||||||
ldq $5,0($16) # $5 = *res_ptr
|
ldq $5,0($16) # $5 = *res_ptr
|
||||||
umulh $2,$19,$0 # $0 = prod_high
|
umulh $2,$19,$0 # $0 = prod_high
|
||||||
beq $18,Lend1 # jump if size was == 1
|
beq $18,.Lend1 # jump if size was == 1
|
||||||
ldq $2,0($17) # $2 = s1_limb
|
ldq $2,0($17) # $2 = s1_limb
|
||||||
addq $17,8,$17 # s1_ptr++
|
addq $17,8,$17 # s1_ptr++
|
||||||
subq $18,1,$18 # size--
|
subq $18,1,$18 # size--
|
||||||
|
@ -60,10 +51,10 @@ __mpn_submul_1:
|
||||||
cmpult $5,$3,$4
|
cmpult $5,$3,$4
|
||||||
stq $3,0($16)
|
stq $3,0($16)
|
||||||
addq $16,8,$16 # res_ptr++
|
addq $16,8,$16 # res_ptr++
|
||||||
beq $18,Lend2 # jump if size was == 2
|
beq $18,.Lend2 # jump if size was == 2
|
||||||
|
|
||||||
.align 3
|
.align 3
|
||||||
Loop: mulq $2,$19,$3 # $3 = prod_low
|
.Loop: mulq $2,$19,$3 # $3 = prod_low
|
||||||
ldq $5,0($16) # $5 = *res_ptr
|
ldq $5,0($16) # $5 = *res_ptr
|
||||||
addq $4,$0,$0 # cy_limb = cy_limb + 'cy'
|
addq $4,$0,$0 # cy_limb = cy_limb + 'cy'
|
||||||
subq $18,1,$18 # size--
|
subq $18,1,$18 # size--
|
||||||
|
@ -77,9 +68,9 @@ Loop: mulq $2,$19,$3 # $3 = prod_low
|
||||||
stq $3,0($16)
|
stq $3,0($16)
|
||||||
addq $16,8,$16 # res_ptr++
|
addq $16,8,$16 # res_ptr++
|
||||||
addq $5,$0,$0 # combine carries
|
addq $5,$0,$0 # combine carries
|
||||||
bne $18,Loop
|
bne $18,.Loop
|
||||||
|
|
||||||
Lend2: mulq $2,$19,$3 # $3 = prod_low
|
.Lend2: mulq $2,$19,$3 # $3 = prod_low
|
||||||
ldq $5,0($16) # $5 = *res_ptr
|
ldq $5,0($16) # $5 = *res_ptr
|
||||||
addq $4,$0,$0 # cy_limb = cy_limb + 'cy'
|
addq $4,$0,$0 # cy_limb = cy_limb + 'cy'
|
||||||
umulh $2,$19,$4 # $4 = cy_limb
|
umulh $2,$19,$4 # $4 = cy_limb
|
||||||
|
@ -91,7 +82,7 @@ Lend2: mulq $2,$19,$3 # $3 = prod_low
|
||||||
addq $5,$0,$0 # combine carries
|
addq $5,$0,$0 # combine carries
|
||||||
addq $4,$0,$0 # cy_limb = prod_high + cy
|
addq $4,$0,$0 # cy_limb = prod_high + cy
|
||||||
ret $31,($26),1
|
ret $31,($26),1
|
||||||
Lend1: subq $5,$3,$3
|
.Lend1: subq $5,$3,$3
|
||||||
cmpult $5,$3,$5
|
cmpult $5,$3,$5
|
||||||
stq $3,0($16)
|
stq $3,0($16)
|
||||||
addq $0,$5,$0
|
addq $0,$5,$0
|
||||||
|
|
|
@ -1,6 +1,6 @@
|
||||||
# Alpha 21064 __udiv_qrnnd
|
# Alpha 21064 __udiv_qrnnd
|
||||||
|
|
||||||
# Copyright (C) 1992, 1994 Free Software Foundation, Inc.
|
# Copyright (C) 1992, 1994, 1995 Free Software Foundation, Inc.
|
||||||
|
|
||||||
# This file is part of the GNU MP Library.
|
# This file is part of the GNU MP Library.
|
||||||
|
|
||||||
|
@ -21,13 +21,11 @@
|
||||||
|
|
||||||
.set noreorder
|
.set noreorder
|
||||||
.set noat
|
.set noat
|
||||||
|
|
||||||
.text
|
.text
|
||||||
.align 3
|
.align 3
|
||||||
.globl __udiv_qrnnd
|
.globl __udiv_qrnnd
|
||||||
.ent __udiv_qrnnd 0
|
.ent __udiv_qrnnd
|
||||||
__udiv_qrnnd:
|
__udiv_qrnnd:
|
||||||
__udiv_qrnnd..ng:
|
|
||||||
.frame $30,0,$26,0
|
.frame $30,0,$26,0
|
||||||
.prologue 0
|
.prologue 0
|
||||||
#define cnt $2
|
#define cnt $2
|
||||||
|
@ -39,9 +37,9 @@ __udiv_qrnnd..ng:
|
||||||
#define qb $20
|
#define qb $20
|
||||||
|
|
||||||
ldiq cnt,16
|
ldiq cnt,16
|
||||||
blt d,Largedivisor
|
blt d,.Largedivisor
|
||||||
|
|
||||||
Loop1: cmplt n0,0,tmp
|
.Loop1: cmplt n0,0,tmp
|
||||||
addq n1,n1,n1
|
addq n1,n1,n1
|
||||||
bis n1,tmp,n1
|
bis n1,tmp,n1
|
||||||
addq n0,n0,n0
|
addq n0,n0,n0
|
||||||
|
@ -74,12 +72,12 @@ Loop1: cmplt n0,0,tmp
|
||||||
cmovne qb,tmp,n1
|
cmovne qb,tmp,n1
|
||||||
bis n0,qb,n0
|
bis n0,qb,n0
|
||||||
subq cnt,1,cnt
|
subq cnt,1,cnt
|
||||||
bgt cnt,Loop1
|
bgt cnt,.Loop1
|
||||||
stq n1,0(rem_ptr)
|
stq n1,0(rem_ptr)
|
||||||
bis $31,n0,$0
|
bis $31,n0,$0
|
||||||
ret $31,($26),1
|
ret $31,($26),1
|
||||||
|
|
||||||
Largedivisor:
|
.Largedivisor:
|
||||||
and n0,1,$4
|
and n0,1,$4
|
||||||
|
|
||||||
srl n0,1,n0
|
srl n0,1,n0
|
||||||
|
@ -91,7 +89,7 @@ Largedivisor:
|
||||||
srl d,1,$5
|
srl d,1,$5
|
||||||
addq $5,$6,$5
|
addq $5,$6,$5
|
||||||
|
|
||||||
Loop2: cmplt n0,0,tmp
|
.Loop2: cmplt n0,0,tmp
|
||||||
addq n1,n1,n1
|
addq n1,n1,n1
|
||||||
bis n1,tmp,n1
|
bis n1,tmp,n1
|
||||||
addq n0,n0,n0
|
addq n0,n0,n0
|
||||||
|
@ -124,27 +122,27 @@ Loop2: cmplt n0,0,tmp
|
||||||
cmovne qb,tmp,n1
|
cmovne qb,tmp,n1
|
||||||
bis n0,qb,n0
|
bis n0,qb,n0
|
||||||
subq cnt,1,cnt
|
subq cnt,1,cnt
|
||||||
bgt cnt,Loop2
|
bgt cnt,.Loop2
|
||||||
|
|
||||||
addq n1,n1,n1
|
addq n1,n1,n1
|
||||||
addq $4,n1,n1
|
addq $4,n1,n1
|
||||||
bne $6,Odd
|
bne $6,.LOdd
|
||||||
stq n1,0(rem_ptr)
|
stq n1,0(rem_ptr)
|
||||||
bis $31,n0,$0
|
bis $31,n0,$0
|
||||||
ret $31,($26),1
|
ret $31,($26),1
|
||||||
|
|
||||||
Odd:
|
.LOdd:
|
||||||
/* q' in n0. r' in n1 */
|
/* q' in n0. r' in n1 */
|
||||||
addq n1,n0,n1
|
addq n1,n0,n1
|
||||||
cmpult n1,n0,tmp # tmp := carry from addq
|
cmpult n1,n0,tmp # tmp := carry from addq
|
||||||
beq tmp,LLp6
|
beq tmp,.LLp6
|
||||||
addq n0,1,n0
|
addq n0,1,n0
|
||||||
subq n1,d,n1
|
subq n1,d,n1
|
||||||
LLp6: cmpult n1,d,tmp
|
.LLp6: cmpult n1,d,tmp
|
||||||
bne tmp,LLp7
|
bne tmp,.LLp7
|
||||||
addq n0,1,n0
|
addq n0,1,n0
|
||||||
subq n1,d,n1
|
subq n1,d,n1
|
||||||
LLp7:
|
.LLp7:
|
||||||
stq n1,0(rem_ptr)
|
stq n1,0(rem_ptr)
|
||||||
bis $31,n0,$0
|
bis $31,n0,$0
|
||||||
ret $31,($26),1
|
ret $31,($26),1
|
||||||
|
|
|
@ -1,7 +1,7 @@
|
||||||
/* mc68020 __mpn_add_n -- Add two limb vectors of the same length > 0 and store
|
/* mc68020 __mpn_add_n -- Add two limb vectors of the same length > 0 and store
|
||||||
sum in a third limb vector.
|
sum in a third limb vector.
|
||||||
|
|
||||||
Copyright (C) 1992, 1994 Free Software Foundation, Inc.
|
Copyright (C) 1992, 1994, 1996 Free Software Foundation, Inc.
|
||||||
|
|
||||||
This file is part of the GNU MP Library.
|
This file is part of the GNU MP Library.
|
||||||
|
|
||||||
|
@ -27,50 +27,53 @@ the Free Software Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. */
|
||||||
size (sp + 12)
|
size (sp + 12)
|
||||||
*/
|
*/
|
||||||
|
|
||||||
|
#include "sysdep.h"
|
||||||
#include "asm-syntax.h"
|
#include "asm-syntax.h"
|
||||||
|
|
||||||
TEXT
|
TEXT
|
||||||
ALIGN
|
ALIGN
|
||||||
GLOBL ___mpn_add_n
|
GLOBL C_SYMBOL_NAME(__mpn_add_n)
|
||||||
|
|
||||||
LAB(___mpn_add_n)
|
C_SYMBOL_NAME(__mpn_add_n:)
|
||||||
|
PROLOG(__mpn_add_n)
|
||||||
/* Save used registers on the stack. */
|
/* Save used registers on the stack. */
|
||||||
INSN2(move,l ,MEM_PREDEC(sp),d2)
|
movel R(d2),MEM_PREDEC(sp)
|
||||||
INSN2(move,l ,MEM_PREDEC(sp),a2)
|
movel R(a2),MEM_PREDEC(sp)
|
||||||
|
|
||||||
/* Copy the arguments to registers. Better use movem? */
|
/* Copy the arguments to registers. Better use movem? */
|
||||||
INSN2(move,l ,a2,MEM_DISP(sp,12))
|
movel MEM_DISP(sp,12),R(a2)
|
||||||
INSN2(move,l ,a0,MEM_DISP(sp,16))
|
movel MEM_DISP(sp,16),R(a0)
|
||||||
INSN2(move,l ,a1,MEM_DISP(sp,20))
|
movel MEM_DISP(sp,20),R(a1)
|
||||||
INSN2(move,l ,d2,MEM_DISP(sp,24))
|
movel MEM_DISP(sp,24),R(d2)
|
||||||
|
|
||||||
INSN2(eor,w ,d2,#1)
|
eorw #1,R(d2)
|
||||||
INSN2(lsr,l ,d2,#1)
|
lsrl #1,R(d2)
|
||||||
bcc L1
|
bcc L(L1)
|
||||||
INSN2(subq,l ,d2,#1) /* clears cy as side effect */
|
subql #1,R(d2) /* clears cy as side effect */
|
||||||
|
|
||||||
LAB(Loop)
|
L(Loop:)
|
||||||
INSN2(move,l ,d0,MEM_POSTINC(a0))
|
movel MEM_POSTINC(a0),R(d0)
|
||||||
INSN2(move,l ,d1,MEM_POSTINC(a1))
|
movel MEM_POSTINC(a1),R(d1)
|
||||||
INSN2(addx,l ,d0,d1)
|
addxl R(d1),R(d0)
|
||||||
INSN2(move,l ,MEM_POSTINC(a2),d0)
|
movel R(d0),MEM_POSTINC(a2)
|
||||||
LAB(L1) INSN2(move,l ,d0,MEM_POSTINC(a0))
|
L(L1:) movel MEM_POSTINC(a0),R(d0)
|
||||||
INSN2(move,l ,d1,MEM_POSTINC(a1))
|
movel MEM_POSTINC(a1),R(d1)
|
||||||
INSN2(addx,l ,d0,d1)
|
addxl R(d1),R(d0)
|
||||||
INSN2(move,l ,MEM_POSTINC(a2),d0)
|
movel R(d0),MEM_POSTINC(a2)
|
||||||
|
|
||||||
dbf d2,Loop /* loop until 16 lsb of %4 == -1 */
|
dbf R(d2),L(Loop) /* loop until 16 lsb of %4 == -1 */
|
||||||
INSN2(subx,l ,d0,d0) /* d0 <= -cy; save cy as 0 or -1 in d0 */
|
subxl R(d0),R(d0) /* d0 <= -cy; save cy as 0 or -1 in d0 */
|
||||||
INSN2(sub,l ,d2,#0x10000)
|
subl #0x10000,R(d2)
|
||||||
bcs L2
|
bcs L(L2)
|
||||||
INSN2(add,l ,d0,d0) /* restore cy */
|
addl R(d0),R(d0) /* restore cy */
|
||||||
bra Loop
|
bra L(Loop)
|
||||||
|
|
||||||
LAB(L2)
|
L(L2:)
|
||||||
INSN1(neg,l ,d0)
|
negl R(d0)
|
||||||
|
|
||||||
/* Restore used registers from stack frame. */
|
/* Restore used registers from stack frame. */
|
||||||
INSN2(move,l ,a2,MEM_POSTINC(sp))
|
movel MEM_POSTINC(sp),R(a2)
|
||||||
INSN2(move,l ,d2,MEM_POSTINC(sp))
|
movel MEM_POSTINC(sp),R(d2)
|
||||||
|
|
||||||
rts
|
rts
|
||||||
|
EPILOG(__mpn_add_n)
|
||||||
|
|
|
@ -0,0 +1,150 @@
|
||||||
|
/* mc68020 __mpn_lshift -- Shift left a low-level natural-number integer.
|
||||||
|
|
||||||
|
Copyright (C) 1996 Free Software Foundation, Inc.
|
||||||
|
|
||||||
|
This file is part of the GNU MP Library.
|
||||||
|
|
||||||
|
The GNU MP Library is free software; you can redistribute it and/or modify
|
||||||
|
it under the terms of the GNU Library General Public License as published by
|
||||||
|
the Free Software Foundation; either version 2 of the License, or (at your
|
||||||
|
option) any later version.
|
||||||
|
|
||||||
|
The GNU MP Library is distributed in the hope that it will be useful, but
|
||||||
|
WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
|
||||||
|
or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Library General Public
|
||||||
|
License for more details.
|
||||||
|
|
||||||
|
You should have received a copy of the GNU Library General Public License
|
||||||
|
along with the GNU MP Library; see the file COPYING.LIB. If not, write to
|
||||||
|
the Free Software Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. */
|
||||||
|
|
||||||
|
/*
|
||||||
|
INPUT PARAMETERS
|
||||||
|
res_ptr (sp + 4)
|
||||||
|
s_ptr (sp + 8)
|
||||||
|
s_size (sp + 16)
|
||||||
|
cnt (sp + 12)
|
||||||
|
*/
|
||||||
|
|
||||||
|
#include "sysdep.h"
|
||||||
|
#include "asm-syntax.h"
|
||||||
|
|
||||||
|
#define res_ptr a1
|
||||||
|
#define s_ptr a0
|
||||||
|
#define s_size d6
|
||||||
|
#define cnt d4
|
||||||
|
|
||||||
|
TEXT
|
||||||
|
ALIGN
|
||||||
|
GLOBL C_SYMBOL_NAME(__mpn_lshift)
|
||||||
|
|
||||||
|
C_SYMBOL_NAME(__mpn_lshift:)
|
||||||
|
PROLOG(__mpn_lshift)
|
||||||
|
|
||||||
|
/* Save used registers on the stack. */
|
||||||
|
moveml R(d2)-R(d6)/R(a2),MEM_PREDEC(sp)
|
||||||
|
|
||||||
|
/* Copy the arguments to registers. */
|
||||||
|
movel MEM_DISP(sp,28),R(res_ptr)
|
||||||
|
movel MEM_DISP(sp,32),R(s_ptr)
|
||||||
|
movel MEM_DISP(sp,36),R(s_size)
|
||||||
|
movel MEM_DISP(sp,40),R(cnt)
|
||||||
|
|
||||||
|
moveql #1,R(d5)
|
||||||
|
cmpl R(d5),R(cnt)
|
||||||
|
bne L(Lnormal)
|
||||||
|
cmpl R(s_ptr),R(res_ptr)
|
||||||
|
bls L(Lspecial) /* jump if s_ptr >= res_ptr */
|
||||||
|
#if (defined (__mc68020__) || defined (__NeXT__) || defined(mc68020))
|
||||||
|
lea MEM_INDX1(s_ptr,s_size,l,4),R(a2)
|
||||||
|
#else /* not mc68020 */
|
||||||
|
movel R(s_size),R(d0)
|
||||||
|
asll #2,R(d0)
|
||||||
|
lea MEM_INDX(s_ptr,d0,l),R(a2)
|
||||||
|
#endif
|
||||||
|
cmpl R(res_ptr),R(a2)
|
||||||
|
bls L(Lspecial) /* jump if res_ptr >= s_ptr + s_size */
|
||||||
|
|
||||||
|
L(Lnormal:)
|
||||||
|
moveql #32,R(d5)
|
||||||
|
subl R(cnt),R(d5)
|
||||||
|
|
||||||
|
#if (defined (__mc68020__) || defined (__NeXT__) || defined(mc68020))
|
||||||
|
lea MEM_INDX1(s_ptr,s_size,l,4),R(s_ptr)
|
||||||
|
lea MEM_INDX1(res_ptr,s_size,l,4),R(res_ptr)
|
||||||
|
#else /* not mc68000 */
|
||||||
|
movel R(s_size),R(d0)
|
||||||
|
asll #2,R(d0)
|
||||||
|
addl R(s_size),R(s_ptr)
|
||||||
|
addl R(s_size),R(res_ptr)
|
||||||
|
#endif
|
||||||
|
movel MEM_PREDEC(s_ptr),R(d2)
|
||||||
|
movel R(d2),R(d0)
|
||||||
|
lsrl R(d5),R(d0) /* compute carry limb */
|
||||||
|
|
||||||
|
lsll R(cnt),R(d2)
|
||||||
|
movel R(d2),R(d1)
|
||||||
|
subql #1,R(s_size)
|
||||||
|
beq L(Lend)
|
||||||
|
lsrl #1,R(s_size)
|
||||||
|
bcs L(L1)
|
||||||
|
subql #1,R(s_size)
|
||||||
|
|
||||||
|
L(Loop:)
|
||||||
|
movel MEM_PREDEC(s_ptr),R(d2)
|
||||||
|
movel R(d2),R(d3)
|
||||||
|
lsrl R(d5),R(d3)
|
||||||
|
orl R(d3),R(d1)
|
||||||
|
movel R(d1),MEM_PREDEC(res_ptr)
|
||||||
|
lsll R(cnt),R(d2)
|
||||||
|
L(L1:)
|
||||||
|
movel MEM_PREDEC(s_ptr),R(d1)
|
||||||
|
movel R(d1),R(d3)
|
||||||
|
lsrl R(d5),R(d3)
|
||||||
|
orl R(d3),R(d2)
|
||||||
|
movel R(d2),MEM_PREDEC(res_ptr)
|
||||||
|
lsll R(cnt),R(d1)
|
||||||
|
|
||||||
|
dbf R(s_size),L(Loop)
|
||||||
|
subl #0x10000,R(s_size)
|
||||||
|
bcc L(Loop)
|
||||||
|
|
||||||
|
L(Lend:)
|
||||||
|
movel R(d1),MEM_PREDEC(res_ptr) /* store least significant limb */
|
||||||
|
|
||||||
|
/* Restore used registers from stack frame. */
|
||||||
|
moveml MEM_POSTINC(sp),R(d2)-R(d6)/R(a2)
|
||||||
|
rts
|
||||||
|
|
||||||
|
/* We loop from least significant end of the arrays, which is only
|
||||||
|
permissable if the source and destination don't overlap, since the
|
||||||
|
function is documented to work for overlapping source and destination. */
|
||||||
|
|
||||||
|
L(Lspecial:)
|
||||||
|
clrl R(d0) /* initialize carry */
|
||||||
|
eorw #1,R(s_size)
|
||||||
|
lsrl #1,R(s_size)
|
||||||
|
bcc L(LL1)
|
||||||
|
subql #1,R(s_size)
|
||||||
|
|
||||||
|
L(LLoop:)
|
||||||
|
movel MEM_POSTINC(s_ptr),R(d2)
|
||||||
|
addxl R(d2),R(d2)
|
||||||
|
movel R(d2),MEM_POSTINC(res_ptr)
|
||||||
|
L(LL1:)
|
||||||
|
movel MEM_POSTINC(s_ptr),R(d2)
|
||||||
|
addxl R(d2),R(d2)
|
||||||
|
movel R(d2),MEM_POSTINC(res_ptr)
|
||||||
|
|
||||||
|
dbf R(s_size),L(LLoop)
|
||||||
|
addxl R(d0),R(d0) /* save cy in lsb */
|
||||||
|
subl #0x10000,R(s_size)
|
||||||
|
bcs L(LLend)
|
||||||
|
lsrl #1,R(d0) /* restore cy */
|
||||||
|
bra L(LLoop)
|
||||||
|
|
||||||
|
L(LLend:)
|
||||||
|
/* Restore used registers from stack frame. */
|
||||||
|
moveml MEM_POSTINC(sp),R(d2)-R(d6)/R(a2)
|
||||||
|
rts
|
||||||
|
EPILOG(__mpn_lshift)
|
|
@ -1,7 +1,7 @@
|
||||||
/* mc68020 __mpn_addmul_1 -- Multiply a limb vector with a limb and add
|
/* mc68020 __mpn_addmul_1 -- Multiply a limb vector with a limb and add
|
||||||
the result to a second limb vector.
|
the result to a second limb vector.
|
||||||
|
|
||||||
Copyright (C) 1992, 1994 Free Software Foundation, Inc.
|
Copyright (C) 1992, 1994, 1996 Free Software Foundation, Inc.
|
||||||
|
|
||||||
This file is part of the GNU MP Library.
|
This file is part of the GNU MP Library.
|
||||||
|
|
||||||
|
@ -23,58 +23,61 @@ the Free Software Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. */
|
||||||
INPUT PARAMETERS
|
INPUT PARAMETERS
|
||||||
res_ptr (sp + 4)
|
res_ptr (sp + 4)
|
||||||
s1_ptr (sp + 8)
|
s1_ptr (sp + 8)
|
||||||
size (sp + 12)
|
s1_size (sp + 12)
|
||||||
s2_limb (sp + 16)
|
s2_limb (sp + 16)
|
||||||
*/
|
*/
|
||||||
|
|
||||||
|
#include "sysdep.h"
|
||||||
#include "asm-syntax.h"
|
#include "asm-syntax.h"
|
||||||
|
|
||||||
TEXT
|
TEXT
|
||||||
ALIGN
|
ALIGN
|
||||||
GLOBL ___mpn_addmul_1
|
GLOBL C_SYMBOL_NAME(__mpn_addmul_1)
|
||||||
|
|
||||||
LAB(___mpn_addmul_1)
|
C_SYMBOL_NAME(__mpn_addmul_1:)
|
||||||
|
PROLOG(__mpn_addmul_1)
|
||||||
|
|
||||||
#define res_ptr a0
|
#define res_ptr a0
|
||||||
#define s1_ptr a1
|
#define s1_ptr a1
|
||||||
#define size d2
|
#define s1_size d2
|
||||||
#define s2_limb d4
|
#define s2_limb d4
|
||||||
|
|
||||||
/* Save used registers on the stack. */
|
/* Save used registers on the stack. */
|
||||||
INSN2(movem,l ,MEM_PREDEC(sp),d2-d5)
|
moveml R(d2)-R(d5),MEM_PREDEC(sp)
|
||||||
|
|
||||||
/* Copy the arguments to registers. Better use movem? */
|
/* Copy the arguments to registers. Better use movem? */
|
||||||
INSN2(move,l ,res_ptr,MEM_DISP(sp,20))
|
movel MEM_DISP(sp,20),R(res_ptr)
|
||||||
INSN2(move,l ,s1_ptr,MEM_DISP(sp,24))
|
movel MEM_DISP(sp,24),R(s1_ptr)
|
||||||
INSN2(move,l ,size,MEM_DISP(sp,28))
|
movel MEM_DISP(sp,28),R(s1_size)
|
||||||
INSN2(move,l ,s2_limb,MEM_DISP(sp,32))
|
movel MEM_DISP(sp,32),R(s2_limb)
|
||||||
|
|
||||||
INSN2(eor,w ,size,#1)
|
eorw #1,R(s1_size)
|
||||||
INSN1(clr,l ,d1)
|
clrl R(d1)
|
||||||
INSN1(clr,l ,d5)
|
clrl R(d5)
|
||||||
INSN2(lsr,l ,size,#1)
|
lsrl #1,R(s1_size)
|
||||||
bcc L1
|
bcc L(L1)
|
||||||
INSN2(subq,l ,size,#1)
|
subql #1,R(s1_size)
|
||||||
INSN2(sub,l ,d0,d0) /* (d0,cy) <= (0,0) */
|
subl R(d0),R(d0) /* (d0,cy) <= (0,0) */
|
||||||
|
|
||||||
LAB(Loop)
|
L(Loop:)
|
||||||
INSN2(move,l ,d3,MEM_POSTINC(s1_ptr))
|
movel MEM_POSTINC(s1_ptr),R(d3)
|
||||||
INSN2(mulu,l ,d1:d3,s2_limb)
|
mulul R(s2_limb),R(d1):R(d3)
|
||||||
INSN2(addx,l ,d3,d0)
|
addxl R(d0),R(d3)
|
||||||
INSN2(addx,l ,d1,d5)
|
addxl R(d5),R(d1)
|
||||||
INSN2(add,l ,MEM_POSTINC(res_ptr),d3)
|
addl R(d3),MEM_POSTINC(res_ptr)
|
||||||
LAB(L1) INSN2(move,l ,d3,MEM_POSTINC(s1_ptr))
|
L(L1:) movel MEM_POSTINC(s1_ptr),R(d3)
|
||||||
INSN2(mulu,l ,d0:d3,s2_limb)
|
mulul R(s2_limb),R(d0):R(d3)
|
||||||
INSN2(addx,l ,d3,d1)
|
addxl R(d1),R(d3)
|
||||||
INSN2(addx,l ,d0,d5)
|
addxl R(d5),R(d0)
|
||||||
INSN2(add,l ,MEM_POSTINC(res_ptr),d3)
|
addl R(d3),MEM_POSTINC(res_ptr)
|
||||||
|
|
||||||
dbf size,Loop
|
dbf R(s1_size),L(Loop)
|
||||||
INSN2(addx,l ,d0,d5)
|
addxl R(d5),R(d0)
|
||||||
INSN2(sub,l ,size,#0x10000)
|
subl #0x10000,R(s1_size)
|
||||||
bcc Loop
|
bcc L(Loop)
|
||||||
|
|
||||||
/* Restore used registers from stack frame. */
|
/* Restore used registers from stack frame. */
|
||||||
INSN2(movem,l ,d2-d5,MEM_POSTINC(sp))
|
moveml MEM_POSTINC(sp),R(d2)-R(d5)
|
||||||
|
|
||||||
rts
|
rts
|
||||||
|
EPILOG(__mpn_addmul_1)
|
||||||
|
|
|
@ -1,7 +1,7 @@
|
||||||
/* mc68020 __mpn_mul_1 -- Multiply a limb vector with a limb and store
|
/* mc68020 __mpn_mul_1 -- Multiply a limb vector with a limb and store
|
||||||
the result in a second limb vector.
|
the result in a second limb vector.
|
||||||
|
|
||||||
Copyright (C) 1992, 1994 Free Software Foundation, Inc.
|
Copyright (C) 1992, 1994, 1996 Free Software Foundation, Inc.
|
||||||
|
|
||||||
This file is part of the GNU MP Library.
|
This file is part of the GNU MP Library.
|
||||||
|
|
||||||
|
@ -23,65 +23,68 @@ the Free Software Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. */
|
||||||
INPUT PARAMETERS
|
INPUT PARAMETERS
|
||||||
res_ptr (sp + 4)
|
res_ptr (sp + 4)
|
||||||
s1_ptr (sp + 8)
|
s1_ptr (sp + 8)
|
||||||
size (sp + 12)
|
s1_size (sp + 12)
|
||||||
s2_limb (sp + 16)
|
s2_limb (sp + 16)
|
||||||
*/
|
*/
|
||||||
|
|
||||||
|
#include "sysdep.h"
|
||||||
#include "asm-syntax.h"
|
#include "asm-syntax.h"
|
||||||
|
|
||||||
TEXT
|
TEXT
|
||||||
ALIGN
|
ALIGN
|
||||||
GLOBL ___mpn_mul_1
|
GLOBL C_SYMBOL_NAME(__mpn_mul_1)
|
||||||
|
|
||||||
LAB(___mpn_mul_1)
|
C_SYMBOL_NAME(__mpn_mul_1:)
|
||||||
|
PROLOG(__mpn_mul_1)
|
||||||
|
|
||||||
#define res_ptr a0
|
#define res_ptr a0
|
||||||
#define s1_ptr a1
|
#define s1_ptr a1
|
||||||
#define size d2
|
#define s1_size d2
|
||||||
#define s2_limb d4
|
#define s2_limb d4
|
||||||
|
|
||||||
/* Save used registers on the stack. */
|
/* Save used registers on the stack. */
|
||||||
INSN2(movem,l ,MEM_PREDEC(sp),d2-d4)
|
moveml R(d2)-R(d4),MEM_PREDEC(sp)
|
||||||
#if 0
|
#if 0
|
||||||
INSN2(move,l ,MEM_PREDEC(sp),d2)
|
movel R(d2),MEM_PREDEC(sp)
|
||||||
INSN2(move,l ,MEM_PREDEC(sp),d3)
|
movel R(d3),MEM_PREDEC(sp)
|
||||||
INSN2(move,l ,MEM_PREDEC(sp),d4)
|
movel R(d4),MEM_PREDEC(sp)
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
/* Copy the arguments to registers. Better use movem? */
|
/* Copy the arguments to registers. Better use movem? */
|
||||||
INSN2(move,l ,res_ptr,MEM_DISP(sp,16))
|
movel MEM_DISP(sp,16),R(res_ptr)
|
||||||
INSN2(move,l ,s1_ptr,MEM_DISP(sp,20))
|
movel MEM_DISP(sp,20),R(s1_ptr)
|
||||||
INSN2(move,l ,size,MEM_DISP(sp,24))
|
movel MEM_DISP(sp,24),R(s1_size)
|
||||||
INSN2(move,l ,s2_limb,MEM_DISP(sp,28))
|
movel MEM_DISP(sp,28),R(s2_limb)
|
||||||
|
|
||||||
INSN2(eor,w ,size,#1)
|
eorw #1,R(s1_size)
|
||||||
INSN1(clr,l ,d1)
|
clrl R(d1)
|
||||||
INSN2(lsr,l ,size,#1)
|
lsrl #1,R(s1_size)
|
||||||
bcc L1
|
bcc L(L1)
|
||||||
INSN2(subq,l ,size,#1)
|
subql #1,R(s1_size)
|
||||||
INSN2(sub,l ,d0,d0) /* (d0,cy) <= (0,0) */
|
subl R(d0),R(d0) /* (d0,cy) <= (0,0) */
|
||||||
|
|
||||||
LAB(Loop)
|
L(Loop:)
|
||||||
INSN2(move,l ,d3,MEM_POSTINC(s1_ptr))
|
movel MEM_POSTINC(s1_ptr),R(d3)
|
||||||
INSN2(mulu,l ,d1:d3,s2_limb)
|
mulul R(s2_limb),R(d1):R(d3)
|
||||||
INSN2(addx,l ,d3,d0)
|
addxl R(d0),R(d3)
|
||||||
INSN2(move,l ,MEM_POSTINC(res_ptr),d3)
|
movel R(d3),MEM_POSTINC(res_ptr)
|
||||||
LAB(L1) INSN2(move,l ,d3,MEM_POSTINC(s1_ptr))
|
L(L1:) movel MEM_POSTINC(s1_ptr),R(d3)
|
||||||
INSN2(mulu,l ,d0:d3,s2_limb)
|
mulul R(s2_limb),R(d0):R(d3)
|
||||||
INSN2(addx,l ,d3,d1)
|
addxl R(d1),R(d3)
|
||||||
INSN2(move,l ,MEM_POSTINC(res_ptr),d3)
|
movel R(d3),MEM_POSTINC(res_ptr)
|
||||||
|
|
||||||
dbf size,Loop
|
dbf R(s1_size),L(Loop)
|
||||||
INSN1(clr,l ,d3)
|
clrl R(d3)
|
||||||
INSN2(addx,l ,d0,d3)
|
addxl R(d3),R(d0)
|
||||||
INSN2(sub,l ,size,#0x10000)
|
subl #0x10000,R(s1_size)
|
||||||
bcc Loop
|
bcc L(Loop)
|
||||||
|
|
||||||
/* Restore used registers from stack frame. */
|
/* Restore used registers from stack frame. */
|
||||||
INSN2(movem,l ,d2-d4,MEM_POSTINC(sp))
|
moveml MEM_POSTINC(sp),R(d2)-R(d4)
|
||||||
#if 0
|
#if 0
|
||||||
INSN2(move,l ,d4,MEM_POSTINC(sp))
|
movel MEM_POSTINC(sp),R(d4)
|
||||||
INSN2(move,l ,d3,MEM_POSTINC(sp))
|
movel MEM_POSTINC(sp),R(d3)
|
||||||
INSN2(move,l ,d2,MEM_POSTINC(sp))
|
movel MEM_POSTINC(sp),R(d2)
|
||||||
#endif
|
#endif
|
||||||
rts
|
rts
|
||||||
|
EPILOG(__mpn_mul_1)
|
||||||
|
|
|
@ -1,7 +1,7 @@
|
||||||
/* mc68020 __mpn_submul_1 -- Multiply a limb vector with a limb and subtract
|
/* mc68020 __mpn_submul_1 -- Multiply a limb vector with a limb and subtract
|
||||||
the result from a second limb vector.
|
the result from a second limb vector.
|
||||||
|
|
||||||
Copyright (C) 1992, 1994 Free Software Foundation, Inc.
|
Copyright (C) 1992, 1994, 1996 Free Software Foundation, Inc.
|
||||||
|
|
||||||
This file is part of the GNU MP Library.
|
This file is part of the GNU MP Library.
|
||||||
|
|
||||||
|
@ -23,58 +23,61 @@ the Free Software Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. */
|
||||||
INPUT PARAMETERS
|
INPUT PARAMETERS
|
||||||
res_ptr (sp + 4)
|
res_ptr (sp + 4)
|
||||||
s1_ptr (sp + 8)
|
s1_ptr (sp + 8)
|
||||||
size (sp + 12)
|
s1_size (sp + 12)
|
||||||
s2_limb (sp + 16)
|
s2_limb (sp + 16)
|
||||||
*/
|
*/
|
||||||
|
|
||||||
|
#include "sysdep.h"
|
||||||
#include "asm-syntax.h"
|
#include "asm-syntax.h"
|
||||||
|
|
||||||
TEXT
|
TEXT
|
||||||
ALIGN
|
ALIGN
|
||||||
GLOBL ___mpn_submul_1
|
GLOBL C_SYMBOL_NAME(__mpn_submul_1)
|
||||||
|
|
||||||
LAB(___mpn_submul_1)
|
C_SYMBOL_NAME(__mpn_submul_1:)
|
||||||
|
PROLOG(__mpn_submul_1)
|
||||||
|
|
||||||
#define res_ptr a0
|
#define res_ptr a0
|
||||||
#define s1_ptr a1
|
#define s1_ptr a1
|
||||||
#define size d2
|
#define s1_size d2
|
||||||
#define s2_limb d4
|
#define s2_limb d4
|
||||||
|
|
||||||
/* Save used registers on the stack. */
|
/* Save used registers on the stack. */
|
||||||
INSN2(movem,l ,MEM_PREDEC(sp),d2-d5)
|
moveml R(d2)-R(d5),MEM_PREDEC(sp)
|
||||||
|
|
||||||
/* Copy the arguments to registers. Better use movem? */
|
/* Copy the arguments to registers. Better use movem? */
|
||||||
INSN2(move,l ,res_ptr,MEM_DISP(sp,20))
|
movel MEM_DISP(sp,20),R(res_ptr)
|
||||||
INSN2(move,l ,s1_ptr,MEM_DISP(sp,24))
|
movel MEM_DISP(sp,24),R(s1_ptr)
|
||||||
INSN2(move,l ,size,MEM_DISP(sp,28))
|
movel MEM_DISP(sp,28),R(s1_size)
|
||||||
INSN2(move,l ,s2_limb,MEM_DISP(sp,32))
|
movel MEM_DISP(sp,32),R(s2_limb)
|
||||||
|
|
||||||
INSN2(eor,w ,size,#1)
|
eorw #1,R(s1_size)
|
||||||
INSN1(clr,l ,d1)
|
clrl R(d1)
|
||||||
INSN1(clr,l ,d5)
|
clrl R(d5)
|
||||||
INSN2(lsr,l ,size,#1)
|
lsrl #1,R(s1_size)
|
||||||
bcc L1
|
bcc L(L1)
|
||||||
INSN2(subq,l ,size,#1)
|
subql #1,R(s1_size)
|
||||||
INSN2(sub,l ,d0,d0) /* (d0,cy) <= (0,0) */
|
subl R(d0),R(d0) /* (d0,cy) <= (0,0) */
|
||||||
|
|
||||||
LAB(Loop)
|
L(Loop:)
|
||||||
INSN2(move,l ,d3,MEM_POSTINC(s1_ptr))
|
movel MEM_POSTINC(s1_ptr),R(d3)
|
||||||
INSN2(mulu,l ,d1:d3,s2_limb)
|
mulul R(s2_limb),R(d1):R(d3)
|
||||||
INSN2(addx,l ,d3,d0)
|
addxl R(d0),R(d3)
|
||||||
INSN2(addx,l ,d1,d5)
|
addxl R(d5),R(d1)
|
||||||
INSN2(sub,l ,MEM_POSTINC(res_ptr),d3)
|
subl R(d3),MEM_POSTINC(res_ptr)
|
||||||
LAB(L1) INSN2(move,l ,d3,MEM_POSTINC(s1_ptr))
|
L(L1:) movel MEM_POSTINC(s1_ptr),R(d3)
|
||||||
INSN2(mulu,l ,d0:d3,s2_limb)
|
mulul R(s2_limb),R(d0):R(d3)
|
||||||
INSN2(addx,l ,d3,d1)
|
addxl R(d1),R(d3)
|
||||||
INSN2(addx,l ,d0,d5)
|
addxl R(d5),R(d0)
|
||||||
INSN2(sub,l ,MEM_POSTINC(res_ptr),d3)
|
subl R(d3),MEM_POSTINC(res_ptr)
|
||||||
|
|
||||||
dbf size,Loop
|
dbf R(s1_size),L(Loop)
|
||||||
INSN2(addx,l ,d0,d5)
|
addxl R(d5),R(d0)
|
||||||
INSN2(sub,l ,size,#0x10000)
|
subl #0x10000,R(s1_size)
|
||||||
bcc Loop
|
bcc L(Loop)
|
||||||
|
|
||||||
/* Restore used registers from stack frame. */
|
/* Restore used registers from stack frame. */
|
||||||
INSN2(movem,l ,d2-d5,MEM_POSTINC(sp))
|
moveml MEM_POSTINC(sp),R(d2)-R(d5)
|
||||||
|
|
||||||
rts
|
rts
|
||||||
|
EPILOG(__mpn_submul_1)
|
||||||
|
|
|
@ -0,0 +1,149 @@
|
||||||
|
/* mc68020 __mpn_rshift -- Shift right a low-level natural-number integer.
|
||||||
|
|
||||||
|
Copyright (C) 1996 Free Software Foundation, Inc.
|
||||||
|
|
||||||
|
This file is part of the GNU MP Library.
|
||||||
|
|
||||||
|
The GNU MP Library is free software; you can redistribute it and/or modify
|
||||||
|
it under the terms of the GNU Library General Public License as published by
|
||||||
|
the Free Software Foundation; either version 2 of the License, or (at your
|
||||||
|
option) any later version.
|
||||||
|
|
||||||
|
The GNU MP Library is distributed in the hope that it will be useful, but
|
||||||
|
WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
|
||||||
|
or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Library General Public
|
||||||
|
License for more details.
|
||||||
|
|
||||||
|
You should have received a copy of the GNU Library General Public License
|
||||||
|
along with the GNU MP Library; see the file COPYING.LIB. If not, write to
|
||||||
|
the Free Software Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. */
|
||||||
|
|
||||||
|
/*
|
||||||
|
INPUT PARAMETERS
|
||||||
|
res_ptr (sp + 4)
|
||||||
|
s_ptr (sp + 8)
|
||||||
|
s_size (sp + 16)
|
||||||
|
cnt (sp + 12)
|
||||||
|
*/
|
||||||
|
|
||||||
|
#include "sysdep.h"
|
||||||
|
#include "asm-syntax.h"
|
||||||
|
|
||||||
|
#define res_ptr a1
|
||||||
|
#define s_ptr a0
|
||||||
|
#define s_size d6
|
||||||
|
#define cnt d4
|
||||||
|
|
||||||
|
TEXT
|
||||||
|
ALIGN
|
||||||
|
GLOBL C_SYMBOL_NAME(__mpn_rshift)
|
||||||
|
|
||||||
|
C_SYMBOL_NAME(__mpn_rshift:)
|
||||||
|
PROLOG(__mpn_rshift)
|
||||||
|
/* Save used registers on the stack. */
|
||||||
|
moveml R(d2)-R(d6)/R(a2),MEM_PREDEC(sp)
|
||||||
|
|
||||||
|
/* Copy the arguments to registers. */
|
||||||
|
movel MEM_DISP(sp,28),R(res_ptr)
|
||||||
|
movel MEM_DISP(sp,32),R(s_ptr)
|
||||||
|
movel MEM_DISP(sp,36),R(s_size)
|
||||||
|
movel MEM_DISP(sp,40),R(cnt)
|
||||||
|
|
||||||
|
moveql #1,R(d5)
|
||||||
|
cmpl R(d5),R(cnt)
|
||||||
|
bne L(Lnormal)
|
||||||
|
cmpl R(res_ptr),R(s_ptr)
|
||||||
|
bls L(Lspecial) /* jump if res_ptr >= s_ptr */
|
||||||
|
#if (defined (__mc68020__) || defined (__NeXT__) || defined(mc68020))
|
||||||
|
lea MEM_INDX1(res_ptr,s_size,l,4),R(a2)
|
||||||
|
#else /* not mc68020 */
|
||||||
|
movel R(s_size),R(d0)
|
||||||
|
asll #2,R(d0)
|
||||||
|
lea MEM_INDX(res_ptr,d0,l),R(a2)
|
||||||
|
#endif
|
||||||
|
cmpl R(s_ptr),R(a2)
|
||||||
|
bls L(Lspecial) /* jump if s_ptr >= res_ptr + s_size */
|
||||||
|
|
||||||
|
L(Lnormal:)
|
||||||
|
moveql #32,R(d5)
|
||||||
|
subl R(cnt),R(d5)
|
||||||
|
movel MEM_POSTINC(s_ptr),R(d2)
|
||||||
|
movel R(d2),R(d0)
|
||||||
|
lsll R(d5),R(d0) /* compute carry limb */
|
||||||
|
|
||||||
|
lsrl R(cnt),R(d2)
|
||||||
|
movel R(d2),R(d1)
|
||||||
|
subql #1,R(s_size)
|
||||||
|
beq L(Lend)
|
||||||
|
lsrl #1,R(s_size)
|
||||||
|
bcs L(L1)
|
||||||
|
subql #1,R(s_size)
|
||||||
|
|
||||||
|
L(Loop:)
|
||||||
|
movel MEM_POSTINC(s_ptr),R(d2)
|
||||||
|
movel R(d2),R(d3)
|
||||||
|
lsll R(d5),R(d3)
|
||||||
|
orl R(d3),R(d1)
|
||||||
|
movel R(d1),MEM_POSTINC(res_ptr)
|
||||||
|
lsrl R(cnt),R(d2)
|
||||||
|
L(L1:)
|
||||||
|
movel MEM_POSTINC(s_ptr),R(d1)
|
||||||
|
movel R(d1),R(d3)
|
||||||
|
lsll R(d5),R(d3)
|
||||||
|
orl R(d3),R(d2)
|
||||||
|
movel R(d2),MEM_POSTINC(res_ptr)
|
||||||
|
lsrl R(cnt),R(d1)
|
||||||
|
|
||||||
|
dbf R(s_size),L(Loop)
|
||||||
|
subl #0x10000,R(s_size)
|
||||||
|
bcc L(Loop)
|
||||||
|
|
||||||
|
L(Lend:)
|
||||||
|
movel R(d1),MEM(res_ptr) /* store most significant limb */
|
||||||
|
|
||||||
|
/* Restore used registers from stack frame. */
|
||||||
|
moveml MEM_POSTINC(sp),R(d2)-R(d6)/R(a2)
|
||||||
|
rts
|
||||||
|
|
||||||
|
/* We loop from most significant end of the arrays, which is only
|
||||||
|
permissable if the source and destination don't overlap, since the
|
||||||
|
function is documented to work for overlapping source and destination. */
|
||||||
|
|
||||||
|
L(Lspecial:)
|
||||||
|
#if (defined (__mc68020__) || defined (__NeXT__) || defined(mc68020))
|
||||||
|
lea MEM_INDX1(s_ptr,s_size,l,4),R(s_ptr)
|
||||||
|
lea MEM_INDX1(res_ptr,s_size,l,4),R(res_ptr)
|
||||||
|
#else /* not mc68000 */
|
||||||
|
movel R(s_size),R(d0)
|
||||||
|
asll #2,R(d0)
|
||||||
|
addl R(s_size),R(s_ptr)
|
||||||
|
addl R(s_size),R(res_ptr)
|
||||||
|
#endif
|
||||||
|
|
||||||
|
clrl R(d0) /* initialize carry */
|
||||||
|
eorw #1,R(s_size)
|
||||||
|
lsrl #1,R(s_size)
|
||||||
|
bcc L(LL1)
|
||||||
|
subql #1,R(s_size)
|
||||||
|
|
||||||
|
L(LLoop:)
|
||||||
|
movel MEM_PREDEC(s_ptr),R(d2)
|
||||||
|
roxrl #1,R(d2)
|
||||||
|
movel R(d2),MEM_PREDEC(res_ptr)
|
||||||
|
L(LL1:)
|
||||||
|
movel MEM_PREDEC(s_ptr),R(d2)
|
||||||
|
roxrl #1,R(d2)
|
||||||
|
movel R(d2),MEM_PREDEC(res_ptr)
|
||||||
|
|
||||||
|
dbf R(s_size),L(LLoop)
|
||||||
|
roxrl #1,R(d0) /* save cy in msb */
|
||||||
|
subl #0x10000,R(s_size)
|
||||||
|
bcs L(LLend)
|
||||||
|
addl R(d0),R(d0) /* restore cy */
|
||||||
|
bra L(LLoop)
|
||||||
|
|
||||||
|
L(LLend:)
|
||||||
|
/* Restore used registers from stack frame. */
|
||||||
|
moveml MEM_POSTINC(sp),R(d2)-R(d6)/R(a2)
|
||||||
|
rts
|
||||||
|
EPILOG(__mpn_rshift)
|
|
@ -1,7 +1,7 @@
|
||||||
/* mc68020 __mpn_sub_n -- Subtract two limb vectors of the same length > 0 and
|
/* mc68020 __mpn_sub_n -- Subtract two limb vectors of the same length > 0 and
|
||||||
store difference in a third limb vector.
|
store difference in a third limb vector.
|
||||||
|
|
||||||
Copyright (C) 1992, 1994 Free Software Foundation, Inc.
|
Copyright (C) 1992, 1994, 1996 Free Software Foundation, Inc.
|
||||||
|
|
||||||
This file is part of the GNU MP Library.
|
This file is part of the GNU MP Library.
|
||||||
|
|
||||||
|
@ -27,50 +27,53 @@ the Free Software Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. */
|
||||||
size (sp + 12)
|
size (sp + 12)
|
||||||
*/
|
*/
|
||||||
|
|
||||||
|
#include "sysdep.h"
|
||||||
#include "asm-syntax.h"
|
#include "asm-syntax.h"
|
||||||
|
|
||||||
TEXT
|
TEXT
|
||||||
ALIGN
|
ALIGN
|
||||||
GLOBL ___mpn_sub_n
|
GLOBL C_SYMBOL_NAME(__mpn_sub_n)
|
||||||
|
|
||||||
LAB(___mpn_sub_n)
|
C_SYMBOL_NAME(__mpn_sub_n:)
|
||||||
|
PROLOG(__mpn_sub_n)
|
||||||
/* Save used registers on the stack. */
|
/* Save used registers on the stack. */
|
||||||
INSN2(move,l ,MEM_PREDEC(sp),d2)
|
movel R(d2),MEM_PREDEC(sp)
|
||||||
INSN2(move,l ,MEM_PREDEC(sp),a2)
|
movel R(a2),MEM_PREDEC(sp)
|
||||||
|
|
||||||
/* Copy the arguments to registers. Better use movem? */
|
/* Copy the arguments to registers. Better use movem? */
|
||||||
INSN2(move,l ,a2,MEM_DISP(sp,12))
|
movel MEM_DISP(sp,12),R(a2)
|
||||||
INSN2(move,l ,a0,MEM_DISP(sp,16))
|
movel MEM_DISP(sp,16),R(a0)
|
||||||
INSN2(move,l ,a1,MEM_DISP(sp,20))
|
movel MEM_DISP(sp,20),R(a1)
|
||||||
INSN2(move,l ,d2,MEM_DISP(sp,24))
|
movel MEM_DISP(sp,24),R(d2)
|
||||||
|
|
||||||
INSN2(eor,w ,d2,#1)
|
eorw #1,R(d2)
|
||||||
INSN2(lsr,l ,d2,#1)
|
lsrl #1,R(d2)
|
||||||
bcc L1
|
bcc L(L1)
|
||||||
INSN2(subq,l ,d2,#1) /* clears cy as side effect */
|
subql #1,R(d2) /* clears cy as side effect */
|
||||||
|
|
||||||
LAB(Loop)
|
L(Loop:)
|
||||||
INSN2(move,l ,d0,MEM_POSTINC(a0))
|
movel MEM_POSTINC(a0),R(d0)
|
||||||
INSN2(move,l ,d1,MEM_POSTINC(a1))
|
movel MEM_POSTINC(a1),R(d1)
|
||||||
INSN2(subx,l ,d0,d1)
|
subxl R(d1),R(d0)
|
||||||
INSN2(move,l ,MEM_POSTINC(a2),d0)
|
movel R(d0),MEM_POSTINC(a2)
|
||||||
LAB(L1) INSN2(move,l ,d0,MEM_POSTINC(a0))
|
L(L1:) movel MEM_POSTINC(a0),R(d0)
|
||||||
INSN2(move,l ,d1,MEM_POSTINC(a1))
|
movel MEM_POSTINC(a1),R(d1)
|
||||||
INSN2(subx,l ,d0,d1)
|
subxl R(d1),R(d0)
|
||||||
INSN2(move,l ,MEM_POSTINC(a2),d0)
|
movel R(d0),MEM_POSTINC(a2)
|
||||||
|
|
||||||
dbf d2,Loop /* loop until 16 lsb of %4 == -1 */
|
dbf R(d2),L(Loop) /* loop until 16 lsb of %4 == -1 */
|
||||||
INSN2(subx,l ,d0,d0) /* d0 <= -cy; save cy as 0 or -1 in d0 */
|
subxl R(d0),R(d0) /* d0 <= -cy; save cy as 0 or -1 in d0 */
|
||||||
INSN2(sub,l ,d2,#0x10000)
|
subl #0x10000,R(d2)
|
||||||
bcs L2
|
bcs L(L2)
|
||||||
INSN2(add,l ,d0,d0) /* restore cy */
|
addl R(d0),R(d0) /* restore cy */
|
||||||
bra Loop
|
bra L(Loop)
|
||||||
|
|
||||||
LAB(L2)
|
L(L2:)
|
||||||
INSN1(neg,l ,d0)
|
negl R(d0)
|
||||||
|
|
||||||
/* Restore used registers from stack frame. */
|
/* Restore used registers from stack frame. */
|
||||||
INSN2(move,l ,a2,MEM_POSTINC(sp))
|
movel MEM_POSTINC(sp),R(a2)
|
||||||
INSN2(move,l ,d2,MEM_POSTINC(sp))
|
movel MEM_POSTINC(sp),R(d2)
|
||||||
|
|
||||||
rts
|
rts
|
||||||
|
EPILOG(__mpn_sub_n)
|
||||||
|
|
|
@ -1,7 +1,7 @@
|
||||||
; mc88100 __mpn_add -- Add two limb vectors of the same length > 0 and store
|
; mc88100 __mpn_add -- Add two limb vectors of the same length > 0 and store
|
||||||
; sum in a third limb vector.
|
; sum in a third limb vector.
|
||||||
|
|
||||||
; Copyright (C) 1992, 1994 Free Software Foundation, Inc.
|
; Copyright (C) 1992, 1994, 1995 Free Software Foundation, Inc.
|
||||||
|
|
||||||
; This file is part of the GNU MP Library.
|
; This file is part of the GNU MP Library.
|
||||||
|
|
||||||
|
|
|
@ -0,0 +1,199 @@
|
||||||
|
; mc88110 __mpn_add_n -- Add two limb vectors of the same length > 0 and store
|
||||||
|
; sum in a third limb vector.
|
||||||
|
|
||||||
|
; Copyright (C) 1995, 1996 Free Software Foundation, Inc.
|
||||||
|
|
||||||
|
; This file is part of the GNU MP Library.
|
||||||
|
|
||||||
|
; The GNU MP Library is free software; you can redistribute it and/or modify
|
||||||
|
; it under the terms of the GNU Library General Public License as published by
|
||||||
|
; the Free Software Foundation; either version 2 of the License, or (at your
|
||||||
|
; option) any later version.
|
||||||
|
|
||||||
|
; The GNU MP Library is distributed in the hope that it will be useful, but
|
||||||
|
; WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
|
||||||
|
; or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Library General Public
|
||||||
|
; License for more details.
|
||||||
|
|
||||||
|
; You should have received a copy of the GNU Library General Public License
|
||||||
|
; along with the GNU MP Library; see the file COPYING.LIB. If not, write to
|
||||||
|
; the Free Software Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
|
||||||
|
|
||||||
|
|
||||||
|
; INPUT PARAMETERS
|
||||||
|
#define res_ptr r2
|
||||||
|
#define s1_ptr r3
|
||||||
|
#define s2_ptr r4
|
||||||
|
#define size r5
|
||||||
|
|
||||||
|
#include "sysdep.h"
|
||||||
|
|
||||||
|
text
|
||||||
|
align 16
|
||||||
|
global C_SYMBOL_NAME(__mpn_add_n)
|
||||||
|
C_SYMBOL_NAME(__mpn_add_n):
|
||||||
|
addu.co r0,r0,r0 ; clear cy flag
|
||||||
|
xor r12,s2_ptr,res_ptr
|
||||||
|
bb1 2,r12,L1
|
||||||
|
; ** V1a **
|
||||||
|
L0: bb0 2,res_ptr,L_v1 ; branch if res_ptr is aligned?
|
||||||
|
/* Add least significant limb separately to align res_ptr and s2_ptr */
|
||||||
|
ld r10,s1_ptr,0
|
||||||
|
addu s1_ptr,s1_ptr,4
|
||||||
|
ld r8,s2_ptr,0
|
||||||
|
addu s2_ptr,s2_ptr,4
|
||||||
|
subu size,size,1
|
||||||
|
addu.co r6,r10,r8
|
||||||
|
st r6,res_ptr,0
|
||||||
|
addu res_ptr,res_ptr,4
|
||||||
|
L_v1: cmp r12,size,2
|
||||||
|
bb1 lt,r12,Lend2
|
||||||
|
|
||||||
|
ld r10,s1_ptr,0
|
||||||
|
ld r12,s1_ptr,4
|
||||||
|
ld.d r8,s2_ptr,0
|
||||||
|
subu size,size,10
|
||||||
|
bcnd lt0,size,Lfin1
|
||||||
|
/* Add blocks of 8 limbs until less than 8 limbs remain */
|
||||||
|
align 8
|
||||||
|
Loop1: subu size,size,8
|
||||||
|
addu.cio r6,r10,r8
|
||||||
|
ld r10,s1_ptr,8
|
||||||
|
addu.cio r7,r12,r9
|
||||||
|
ld r12,s1_ptr,12
|
||||||
|
ld.d r8,s2_ptr,8
|
||||||
|
st.d r6,res_ptr,0
|
||||||
|
addu.cio r6,r10,r8
|
||||||
|
ld r10,s1_ptr,16
|
||||||
|
addu.cio r7,r12,r9
|
||||||
|
ld r12,s1_ptr,20
|
||||||
|
ld.d r8,s2_ptr,16
|
||||||
|
st.d r6,res_ptr,8
|
||||||
|
addu.cio r6,r10,r8
|
||||||
|
ld r10,s1_ptr,24
|
||||||
|
addu.cio r7,r12,r9
|
||||||
|
ld r12,s1_ptr,28
|
||||||
|
ld.d r8,s2_ptr,24
|
||||||
|
st.d r6,res_ptr,16
|
||||||
|
addu.cio r6,r10,r8
|
||||||
|
ld r10,s1_ptr,32
|
||||||
|
addu.cio r7,r12,r9
|
||||||
|
ld r12,s1_ptr,36
|
||||||
|
addu s1_ptr,s1_ptr,32
|
||||||
|
ld.d r8,s2_ptr,32
|
||||||
|
addu s2_ptr,s2_ptr,32
|
||||||
|
st.d r6,res_ptr,24
|
||||||
|
addu res_ptr,res_ptr,32
|
||||||
|
bcnd ge0,size,Loop1
|
||||||
|
|
||||||
|
Lfin1: addu size,size,8-2
|
||||||
|
bcnd lt0,size,Lend1
|
||||||
|
/* Add blocks of 2 limbs until less than 2 limbs remain */
|
||||||
|
Loope1: addu.cio r6,r10,r8
|
||||||
|
ld r10,s1_ptr,8
|
||||||
|
addu.cio r7,r12,r9
|
||||||
|
ld r12,s1_ptr,12
|
||||||
|
ld.d r8,s2_ptr,8
|
||||||
|
st.d r6,res_ptr,0
|
||||||
|
subu size,size,2
|
||||||
|
addu s1_ptr,s1_ptr,8
|
||||||
|
addu s2_ptr,s2_ptr,8
|
||||||
|
addu res_ptr,res_ptr,8
|
||||||
|
bcnd ge0,size,Loope1
|
||||||
|
Lend1: addu.cio r6,r10,r8
|
||||||
|
addu.cio r7,r12,r9
|
||||||
|
st.d r6,res_ptr,0
|
||||||
|
|
||||||
|
bb0 0,size,Lret1
|
||||||
|
/* Add last limb */
|
||||||
|
ld r10,s1_ptr,8
|
||||||
|
ld r8,s2_ptr,8
|
||||||
|
addu.cio r6,r10,r8
|
||||||
|
st r6,res_ptr,8
|
||||||
|
|
||||||
|
Lret1: jmp.n r1
|
||||||
|
addu.ci r2,r0,r0 ; return carry-out from most sign. limb
|
||||||
|
|
||||||
|
L1: xor r12,s1_ptr,res_ptr
|
||||||
|
bb1 2,r12,L2
|
||||||
|
; ** V1b **
|
||||||
|
or r12,r0,s2_ptr
|
||||||
|
or s2_ptr,r0,s1_ptr
|
||||||
|
or s1_ptr,r0,r12
|
||||||
|
br L0
|
||||||
|
|
||||||
|
; ** V2 **
|
||||||
|
/* If we come here, the alignment of s1_ptr and res_ptr as well as the
|
||||||
|
alignment of s2_ptr and res_ptr differ. Since there are only two ways
|
||||||
|
things can be aligned (that we care about) we now know that the alignment
|
||||||
|
of s1_ptr and s2_ptr are the same. */
|
||||||
|
|
||||||
|
L2: cmp r12,size,1
|
||||||
|
bb1 eq,r12,Ljone
|
||||||
|
bb0 2,s1_ptr,L_v2 ; branch if s1_ptr is aligned
|
||||||
|
/* Add least significant limb separately to align res_ptr and s2_ptr */
|
||||||
|
ld r10,s1_ptr,0
|
||||||
|
addu s1_ptr,s1_ptr,4
|
||||||
|
ld r8,s2_ptr,0
|
||||||
|
addu s2_ptr,s2_ptr,4
|
||||||
|
subu size,size,1
|
||||||
|
addu.co r6,r10,r8
|
||||||
|
st r6,res_ptr,0
|
||||||
|
addu res_ptr,res_ptr,4
|
||||||
|
|
||||||
|
L_v2: subu size,size,8
|
||||||
|
bcnd lt0,size,Lfin2
|
||||||
|
/* Add blocks of 8 limbs until less than 8 limbs remain */
|
||||||
|
align 8
|
||||||
|
Loop2: subu size,size,8
|
||||||
|
ld.d r8,s1_ptr,0
|
||||||
|
ld.d r6,s2_ptr,0
|
||||||
|
addu.cio r8,r8,r6
|
||||||
|
st r8,res_ptr,0
|
||||||
|
addu.cio r9,r9,r7
|
||||||
|
st r9,res_ptr,4
|
||||||
|
ld.d r8,s1_ptr,8
|
||||||
|
ld.d r6,s2_ptr,8
|
||||||
|
addu.cio r8,r8,r6
|
||||||
|
st r8,res_ptr,8
|
||||||
|
addu.cio r9,r9,r7
|
||||||
|
st r9,res_ptr,12
|
||||||
|
ld.d r8,s1_ptr,16
|
||||||
|
ld.d r6,s2_ptr,16
|
||||||
|
addu.cio r8,r8,r6
|
||||||
|
st r8,res_ptr,16
|
||||||
|
addu.cio r9,r9,r7
|
||||||
|
st r9,res_ptr,20
|
||||||
|
ld.d r8,s1_ptr,24
|
||||||
|
ld.d r6,s2_ptr,24
|
||||||
|
addu.cio r8,r8,r6
|
||||||
|
st r8,res_ptr,24
|
||||||
|
addu.cio r9,r9,r7
|
||||||
|
st r9,res_ptr,28
|
||||||
|
addu s1_ptr,s1_ptr,32
|
||||||
|
addu s2_ptr,s2_ptr,32
|
||||||
|
addu res_ptr,res_ptr,32
|
||||||
|
bcnd ge0,size,Loop2
|
||||||
|
|
||||||
|
Lfin2: addu size,size,8-2
|
||||||
|
bcnd lt0,size,Lend2
|
||||||
|
Loope2: ld.d r8,s1_ptr,0
|
||||||
|
ld.d r6,s2_ptr,0
|
||||||
|
addu.cio r8,r8,r6
|
||||||
|
st r8,res_ptr,0
|
||||||
|
addu.cio r9,r9,r7
|
||||||
|
st r9,res_ptr,4
|
||||||
|
subu size,size,2
|
||||||
|
addu s1_ptr,s1_ptr,8
|
||||||
|
addu s2_ptr,s2_ptr,8
|
||||||
|
addu res_ptr,res_ptr,8
|
||||||
|
bcnd ge0,size,Loope2
|
||||||
|
Lend2: bb0 0,size,Lret2
|
||||||
|
/* Add last limb */
|
||||||
|
Ljone: ld r10,s1_ptr,0
|
||||||
|
ld r8,s2_ptr,0
|
||||||
|
addu.cio r6,r10,r8
|
||||||
|
st r6,res_ptr,0
|
||||||
|
|
||||||
|
Lret2: jmp.n r1
|
||||||
|
addu.ci r2,r0,r0 ; return carry-out from most sign. limb
|
|
@ -0,0 +1,60 @@
|
||||||
|
; mc88110 __mpn_addmul_1 -- Multiply a limb vector with a single limb and
|
||||||
|
; store the product in a second limb vector.
|
||||||
|
|
||||||
|
; Copyright (C) 1996 Free Software Foundation, Inc.
|
||||||
|
|
||||||
|
; This file is part of the GNU MP Library.
|
||||||
|
|
||||||
|
; The GNU MP Library is free software; you can redistribute it and/or modify
|
||||||
|
; it under the terms of the GNU Library General Public License as published by
|
||||||
|
; the Free Software Foundation; either version 2 of the License, or (at your
|
||||||
|
; option) any later version.
|
||||||
|
|
||||||
|
; The GNU MP Library is distributed in the hope that it will be useful, but
|
||||||
|
; WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
|
||||||
|
; or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Library General Public
|
||||||
|
; License for more details.
|
||||||
|
|
||||||
|
; You should have received a copy of the GNU Library General Public License
|
||||||
|
; along with the GNU MP Library; see the file COPYING.LIB. If not, write to
|
||||||
|
; the Free Software Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
|
||||||
|
|
||||||
|
|
||||||
|
; INPUT PARAMETERS
|
||||||
|
; res_ptr r2
|
||||||
|
; s1_ptr r3
|
||||||
|
; size r4
|
||||||
|
; s2_limb r5
|
||||||
|
|
||||||
|
text
|
||||||
|
align 16
|
||||||
|
global ___mpn_addmul_1
|
||||||
|
___mpn_addmul_1:
|
||||||
|
lda r3,r3[r4]
|
||||||
|
lda r8,r2[r4] ; RES_PTR in r8 since r2 is retval
|
||||||
|
subu r4,r0,r4
|
||||||
|
addu.co r2,r0,r0 ; r2 = cy = 0
|
||||||
|
|
||||||
|
ld r6,r3[r4]
|
||||||
|
addu r4,r4,1
|
||||||
|
subu r8,r8,4
|
||||||
|
bcnd.n eq0,r4,Lend
|
||||||
|
mulu.d r10,r6,r5
|
||||||
|
|
||||||
|
Loop: ld r7,r8[r4]
|
||||||
|
ld r6,r3[r4]
|
||||||
|
addu.cio r9,r11,r2
|
||||||
|
addu.ci r2,r10,r0
|
||||||
|
addu.co r9,r9,r7
|
||||||
|
st r9,r8[r4]
|
||||||
|
addu r4,r4,1
|
||||||
|
mulu.d r10,r6,r5
|
||||||
|
bcnd ne0,r4,Loop
|
||||||
|
|
||||||
|
Lend: ld r7,r8,0
|
||||||
|
addu.cio r9,r11,r2
|
||||||
|
addu.ci r2,r10,r0
|
||||||
|
addu.co r9,r9,r7
|
||||||
|
st r9,r8,0
|
||||||
|
jmp.n r1
|
||||||
|
addu.ci r2,r2,r0
|
|
@ -1,7 +1,7 @@
|
||||||
; mc88110 __mpn_mul_1 -- Multiply a limb vector with a single limb and
|
; mc88110 __mpn_mul_1 -- Multiply a limb vector with a single limb and
|
||||||
; store the product in a second limb vector.
|
; store the product in a second limb vector.
|
||||||
|
|
||||||
; Copyright (C) 1992, 1994 Free Software Foundation, Inc.
|
; Copyright (C) 1992, 1994, 1995 Free Software Foundation, Inc.
|
||||||
|
|
||||||
; This file is part of the GNU MP Library.
|
; This file is part of the GNU MP Library.
|
||||||
|
|
||||||
|
@ -56,29 +56,3 @@ Lend: addu.cio r9,r11,r2
|
||||||
st r9,r8,4
|
st r9,r8,4
|
||||||
jmp.n r1
|
jmp.n r1
|
||||||
addu.ci r2,r10,r0
|
addu.ci r2,r10,r0
|
||||||
|
|
||||||
; This is the Right Way to do this on '110. 4 cycles / 64-bit limb.
|
|
||||||
; ld.d r10,
|
|
||||||
; mulu.d
|
|
||||||
; addu.cio
|
|
||||||
; addu.cio
|
|
||||||
; st.d
|
|
||||||
; mulu.d ,r11,r5
|
|
||||||
; ld.d r12,
|
|
||||||
; mulu.d ,r10,r5
|
|
||||||
; addu.cio
|
|
||||||
; addu.cio
|
|
||||||
; st.d
|
|
||||||
; mulu.d
|
|
||||||
; ld.d r10,
|
|
||||||
; mulu.d
|
|
||||||
; addu.cio
|
|
||||||
; addu.cio
|
|
||||||
; st.d
|
|
||||||
; mulu.d
|
|
||||||
; ld.d r10,
|
|
||||||
; mulu.d
|
|
||||||
; addu.cio
|
|
||||||
; addu.cio
|
|
||||||
; st.d
|
|
||||||
; mulu.d
|
|
||||||
|
|
|
@ -0,0 +1,275 @@
|
||||||
|
; mc88110 __mpn_sub_n -- Subtract two limb vectors of the same length > 0 and
|
||||||
|
; store difference in a third limb vector.
|
||||||
|
|
||||||
|
; Copyright (C) 1995, 1996 Free Software Foundation, Inc.
|
||||||
|
|
||||||
|
; This file is part of the GNU MP Library.
|
||||||
|
|
||||||
|
; The GNU MP Library is free software; you can redistribute it and/or modify
|
||||||
|
; it under the terms of the GNU Library General Public License as published by
|
||||||
|
; the Free Software Foundation; either version 2 of the License, or (at your
|
||||||
|
; option) any later version.
|
||||||
|
|
||||||
|
; The GNU MP Library is distributed in the hope that it will be useful, but
|
||||||
|
; WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
|
||||||
|
; or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Library General Public
|
||||||
|
; License for more details.
|
||||||
|
|
||||||
|
; You should have received a copy of the GNU Library General Public License
|
||||||
|
; along with the GNU MP Library; see the file COPYING.LIB. If not, write to
|
||||||
|
; the Free Software Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
|
||||||
|
|
||||||
|
|
||||||
|
; INPUT PARAMETERS
|
||||||
|
#define res_ptr r2
|
||||||
|
#define s1_ptr r3
|
||||||
|
#define s2_ptr r4
|
||||||
|
#define size r5
|
||||||
|
|
||||||
|
#include "sysdep.h"
|
||||||
|
|
||||||
|
text
|
||||||
|
align 16
|
||||||
|
global C_SYMBOL_NAME(__mpn_sub_n)
|
||||||
|
C_SYMBOL_NAME(__mpn_sub_n):
|
||||||
|
subu.co r0,r0,r0 ; set cy flag
|
||||||
|
xor r12,s2_ptr,res_ptr
|
||||||
|
bb1 2,r12,L1
|
||||||
|
; ** V1a **
|
||||||
|
L0: bb0 2,res_ptr,L_v1 ; branch if res_ptr is aligned
|
||||||
|
/* Add least significant limb separately to align res_ptr and s2_ptr */
|
||||||
|
ld r10,s1_ptr,0
|
||||||
|
addu s1_ptr,s1_ptr,4
|
||||||
|
ld r8,s2_ptr,0
|
||||||
|
addu s2_ptr,s2_ptr,4
|
||||||
|
subu size,size,1
|
||||||
|
subu.co r6,r10,r8
|
||||||
|
st r6,res_ptr,0
|
||||||
|
addu res_ptr,res_ptr,4
|
||||||
|
L_v1: cmp r12,size,2
|
||||||
|
bb1 lt,r12,Lend2
|
||||||
|
|
||||||
|
ld r10,s1_ptr,0
|
||||||
|
ld r12,s1_ptr,4
|
||||||
|
ld.d r8,s2_ptr,0
|
||||||
|
subu size,size,10
|
||||||
|
bcnd lt0,size,Lfin1
|
||||||
|
/* Add blocks of 8 limbs until less than 8 limbs remain */
|
||||||
|
align 8
|
||||||
|
Loop1: subu size,size,8
|
||||||
|
subu.cio r6,r10,r8
|
||||||
|
ld r10,s1_ptr,8
|
||||||
|
subu.cio r7,r12,r9
|
||||||
|
ld r12,s1_ptr,12
|
||||||
|
ld.d r8,s2_ptr,8
|
||||||
|
st.d r6,res_ptr,0
|
||||||
|
subu.cio r6,r10,r8
|
||||||
|
ld r10,s1_ptr,16
|
||||||
|
subu.cio r7,r12,r9
|
||||||
|
ld r12,s1_ptr,20
|
||||||
|
ld.d r8,s2_ptr,16
|
||||||
|
st.d r6,res_ptr,8
|
||||||
|
subu.cio r6,r10,r8
|
||||||
|
ld r10,s1_ptr,24
|
||||||
|
subu.cio r7,r12,r9
|
||||||
|
ld r12,s1_ptr,28
|
||||||
|
ld.d r8,s2_ptr,24
|
||||||
|
st.d r6,res_ptr,16
|
||||||
|
subu.cio r6,r10,r8
|
||||||
|
ld r10,s1_ptr,32
|
||||||
|
subu.cio r7,r12,r9
|
||||||
|
ld r12,s1_ptr,36
|
||||||
|
addu s1_ptr,s1_ptr,32
|
||||||
|
ld.d r8,s2_ptr,32
|
||||||
|
addu s2_ptr,s2_ptr,32
|
||||||
|
st.d r6,res_ptr,24
|
||||||
|
addu res_ptr,res_ptr,32
|
||||||
|
bcnd ge0,size,Loop1
|
||||||
|
|
||||||
|
Lfin1: addu size,size,8-2
|
||||||
|
bcnd lt0,size,Lend1
|
||||||
|
/* Add blocks of 2 limbs until less than 2 limbs remain */
|
||||||
|
Loope1: subu.cio r6,r10,r8
|
||||||
|
ld r10,s1_ptr,8
|
||||||
|
subu.cio r7,r12,r9
|
||||||
|
ld r12,s1_ptr,12
|
||||||
|
ld.d r8,s2_ptr,8
|
||||||
|
st.d r6,res_ptr,0
|
||||||
|
subu size,size,2
|
||||||
|
addu s1_ptr,s1_ptr,8
|
||||||
|
addu s2_ptr,s2_ptr,8
|
||||||
|
addu res_ptr,res_ptr,8
|
||||||
|
bcnd ge0,size,Loope1
|
||||||
|
Lend1: subu.cio r6,r10,r8
|
||||||
|
subu.cio r7,r12,r9
|
||||||
|
st.d r6,res_ptr,0
|
||||||
|
|
||||||
|
bb0 0,size,Lret1
|
||||||
|
/* Add last limb */
|
||||||
|
ld r10,s1_ptr,8
|
||||||
|
ld r8,s2_ptr,8
|
||||||
|
subu.cio r6,r10,r8
|
||||||
|
st r6,res_ptr,8
|
||||||
|
|
||||||
|
Lret1: addu.ci r2,r0,r0 ; return carry-out from most sign. limb
|
||||||
|
jmp.n r1
|
||||||
|
xor r2,r2,1
|
||||||
|
|
||||||
|
L1: xor r12,s1_ptr,res_ptr
|
||||||
|
bb1 2,r12,L2
|
||||||
|
; ** V1b **
|
||||||
|
bb0 2,res_ptr,L_v1b ; branch if res_ptr is aligned
|
||||||
|
/* Add least significant limb separately to align res_ptr and s1_ptr */
|
||||||
|
ld r10,s2_ptr,0
|
||||||
|
addu s2_ptr,s2_ptr,4
|
||||||
|
ld r8,s1_ptr,0
|
||||||
|
addu s1_ptr,s1_ptr,4
|
||||||
|
subu size,size,1
|
||||||
|
subu.co r6,r8,r10
|
||||||
|
st r6,res_ptr,0
|
||||||
|
addu res_ptr,res_ptr,4
|
||||||
|
L_v1b: cmp r12,size,2
|
||||||
|
bb1 lt,r12,Lend2
|
||||||
|
|
||||||
|
ld r10,s2_ptr,0
|
||||||
|
ld r12,s2_ptr,4
|
||||||
|
ld.d r8,s1_ptr,0
|
||||||
|
subu size,size,10
|
||||||
|
bcnd lt0,size,Lfin1b
|
||||||
|
/* Add blocks of 8 limbs until less than 8 limbs remain */
|
||||||
|
align 8
|
||||||
|
Loop1b: subu size,size,8
|
||||||
|
subu.cio r6,r8,r10
|
||||||
|
ld r10,s2_ptr,8
|
||||||
|
subu.cio r7,r9,r12
|
||||||
|
ld r12,s2_ptr,12
|
||||||
|
ld.d r8,s1_ptr,8
|
||||||
|
st.d r6,res_ptr,0
|
||||||
|
subu.cio r6,r8,r10
|
||||||
|
ld r10,s2_ptr,16
|
||||||
|
subu.cio r7,r9,r12
|
||||||
|
ld r12,s2_ptr,20
|
||||||
|
ld.d r8,s1_ptr,16
|
||||||
|
st.d r6,res_ptr,8
|
||||||
|
subu.cio r6,r8,r10
|
||||||
|
ld r10,s2_ptr,24
|
||||||
|
subu.cio r7,r9,r12
|
||||||
|
ld r12,s2_ptr,28
|
||||||
|
ld.d r8,s1_ptr,24
|
||||||
|
st.d r6,res_ptr,16
|
||||||
|
subu.cio r6,r8,r10
|
||||||
|
ld r10,s2_ptr,32
|
||||||
|
subu.cio r7,r9,r12
|
||||||
|
ld r12,s2_ptr,36
|
||||||
|
addu s2_ptr,s2_ptr,32
|
||||||
|
ld.d r8,s1_ptr,32
|
||||||
|
addu s1_ptr,s1_ptr,32
|
||||||
|
st.d r6,res_ptr,24
|
||||||
|
addu res_ptr,res_ptr,32
|
||||||
|
bcnd ge0,size,Loop1b
|
||||||
|
|
||||||
|
Lfin1b: addu size,size,8-2
|
||||||
|
bcnd lt0,size,Lend1b
|
||||||
|
/* Add blocks of 2 limbs until less than 2 limbs remain */
|
||||||
|
Loope1b:subu.cio r6,r8,r10
|
||||||
|
ld r10,s2_ptr,8
|
||||||
|
subu.cio r7,r9,r12
|
||||||
|
ld r12,s2_ptr,12
|
||||||
|
ld.d r8,s1_ptr,8
|
||||||
|
st.d r6,res_ptr,0
|
||||||
|
subu size,size,2
|
||||||
|
addu s1_ptr,s1_ptr,8
|
||||||
|
addu s2_ptr,s2_ptr,8
|
||||||
|
addu res_ptr,res_ptr,8
|
||||||
|
bcnd ge0,size,Loope1b
|
||||||
|
Lend1b: subu.cio r6,r8,r10
|
||||||
|
subu.cio r7,r9,r12
|
||||||
|
st.d r6,res_ptr,0
|
||||||
|
|
||||||
|
bb0 0,size,Lret1b
|
||||||
|
/* Add last limb */
|
||||||
|
ld r10,s2_ptr,8
|
||||||
|
ld r8,s1_ptr,8
|
||||||
|
subu.cio r6,r8,r10
|
||||||
|
st r6,res_ptr,8
|
||||||
|
|
||||||
|
Lret1b: addu.ci r2,r0,r0 ; return carry-out from most sign. limb
|
||||||
|
jmp.n r1
|
||||||
|
xor r2,r2,1
|
||||||
|
|
||||||
|
; ** V2 **
|
||||||
|
/* If we come here, the alignment of s1_ptr and res_ptr as well as the
|
||||||
|
alignment of s2_ptr and res_ptr differ. Since there are only two ways
|
||||||
|
things can be aligned (that we care about) we now know that the alignment
|
||||||
|
of s1_ptr and s2_ptr are the same. */
|
||||||
|
|
||||||
|
L2: cmp r12,size,1
|
||||||
|
bb1 eq,r12,Ljone
|
||||||
|
bb0 2,s1_ptr,L_v2 ; branch if s1_ptr is aligned
|
||||||
|
/* Add least significant limb separately to align res_ptr and s2_ptr */
|
||||||
|
ld r10,s1_ptr,0
|
||||||
|
addu s1_ptr,s1_ptr,4
|
||||||
|
ld r8,s2_ptr,0
|
||||||
|
addu s2_ptr,s2_ptr,4
|
||||||
|
subu size,size,1
|
||||||
|
subu.co r6,r10,r8
|
||||||
|
st r6,res_ptr,0
|
||||||
|
addu res_ptr,res_ptr,4
|
||||||
|
|
||||||
|
L_v2: subu size,size,8
|
||||||
|
bcnd lt0,size,Lfin2
|
||||||
|
/* Add blocks of 8 limbs until less than 8 limbs remain */
|
||||||
|
align 8
|
||||||
|
Loop2: subu size,size,8
|
||||||
|
ld.d r8,s1_ptr,0
|
||||||
|
ld.d r6,s2_ptr,0
|
||||||
|
subu.cio r8,r8,r6
|
||||||
|
st r8,res_ptr,0
|
||||||
|
subu.cio r9,r9,r7
|
||||||
|
st r9,res_ptr,4
|
||||||
|
ld.d r8,s1_ptr,8
|
||||||
|
ld.d r6,s2_ptr,8
|
||||||
|
subu.cio r8,r8,r6
|
||||||
|
st r8,res_ptr,8
|
||||||
|
subu.cio r9,r9,r7
|
||||||
|
st r9,res_ptr,12
|
||||||
|
ld.d r8,s1_ptr,16
|
||||||
|
ld.d r6,s2_ptr,16
|
||||||
|
subu.cio r8,r8,r6
|
||||||
|
st r8,res_ptr,16
|
||||||
|
subu.cio r9,r9,r7
|
||||||
|
st r9,res_ptr,20
|
||||||
|
ld.d r8,s1_ptr,24
|
||||||
|
ld.d r6,s2_ptr,24
|
||||||
|
subu.cio r8,r8,r6
|
||||||
|
st r8,res_ptr,24
|
||||||
|
subu.cio r9,r9,r7
|
||||||
|
st r9,res_ptr,28
|
||||||
|
addu s1_ptr,s1_ptr,32
|
||||||
|
addu s2_ptr,s2_ptr,32
|
||||||
|
addu res_ptr,res_ptr,32
|
||||||
|
bcnd ge0,size,Loop2
|
||||||
|
|
||||||
|
Lfin2: addu size,size,8-2
|
||||||
|
bcnd lt0,size,Lend2
|
||||||
|
Loope2: ld.d r8,s1_ptr,0
|
||||||
|
ld.d r6,s2_ptr,0
|
||||||
|
subu.cio r8,r8,r6
|
||||||
|
st r8,res_ptr,0
|
||||||
|
subu.cio r9,r9,r7
|
||||||
|
st r9,res_ptr,4
|
||||||
|
subu size,size,2
|
||||||
|
addu s1_ptr,s1_ptr,8
|
||||||
|
addu s2_ptr,s2_ptr,8
|
||||||
|
addu res_ptr,res_ptr,8
|
||||||
|
bcnd ge0,size,Loope2
|
||||||
|
Lend2: bb0 0,size,Lret2
|
||||||
|
/* Add last limb */
|
||||||
|
Ljone: ld r10,s1_ptr,0
|
||||||
|
ld r8,s2_ptr,0
|
||||||
|
subu.cio r6,r10,r8
|
||||||
|
st r6,res_ptr,0
|
||||||
|
|
||||||
|
Lret2: addu.ci r2,r0,r0 ; return carry-out from most sign. limb
|
||||||
|
jmp.n r1
|
||||||
|
xor r2,r2,1
|
|
@ -1,7 +1,7 @@
|
||||||
; mc88100 __mpn_mul_1 -- Multiply a limb vector with a single limb and
|
; mc88100 __mpn_mul_1 -- Multiply a limb vector with a single limb and
|
||||||
; store the product in a second limb vector.
|
; store the product in a second limb vector.
|
||||||
|
|
||||||
; Copyright (C) 1992, 1994 Free Software Foundation, Inc.
|
; Copyright (C) 1992, 1994, 1995 Free Software Foundation, Inc.
|
||||||
|
|
||||||
; This file is part of the GNU MP Library.
|
; This file is part of the GNU MP Library.
|
||||||
|
|
||||||
|
@ -55,14 +55,14 @@ ___mpn_mul_1:
|
||||||
; Make S1_PTR and RES_PTR point at the end of their blocks
|
; Make S1_PTR and RES_PTR point at the end of their blocks
|
||||||
; and negate SIZE.
|
; and negate SIZE.
|
||||||
lda r3,r3[r4]
|
lda r3,r3[r4]
|
||||||
lda r6,r2[r4] ; RES_PTR in r6 since r2 is retval
|
lda r6,r2[r4] ; RES_PTR in r6 since r2 is retval
|
||||||
subu r4,r0,r4
|
subu r4,r0,r4
|
||||||
|
|
||||||
addu.co r2,r0,r0 ; r2 = cy = 0
|
addu.co r2,r0,r0 ; r2 = cy = 0
|
||||||
ld r9,r3[r4]
|
ld r9,r3[r4]
|
||||||
mask r7,r5,0xffff ; r7 = lo(S2_LIMB)
|
mask r7,r5,0xffff ; r7 = lo(S2_LIMB)
|
||||||
extu r8,r5,16 ; r8 = hi(S2_LIMB)
|
extu r8,r5,16 ; r8 = hi(S2_LIMB)
|
||||||
bcnd.n eq0,r8,Lsmall ; jump if (hi(S2_LIMB) == 0)
|
bcnd.n eq0,r8,Lsmall ; jump if (hi(S2_LIMB) == 0)
|
||||||
subu r6,r6,4
|
subu r6,r6,4
|
||||||
|
|
||||||
; General code for any value of S2_LIMB.
|
; General code for any value of S2_LIMB.
|
||||||
|
@ -75,28 +75,27 @@ ___mpn_mul_1:
|
||||||
br.n L1
|
br.n L1
|
||||||
addu r4,r4,1
|
addu r4,r4,1
|
||||||
|
|
||||||
Loop:
|
Loop: ld r9,r3[r4]
|
||||||
ld r9,r3[r4]
|
|
||||||
st r26,r6[r4]
|
st r26,r6[r4]
|
||||||
; bcnd ne0,r0,0 ; bubble
|
; bcnd ne0,r0,0 ; bubble
|
||||||
addu r4,r4,1
|
addu r4,r4,1
|
||||||
L1: mul r26,r9,r5 ; low word of product mul_1 WB ld
|
L1: mul r26,r9,r5 ; low word of product mul_1 WB ld
|
||||||
mask r12,r9,0xffff ; r12 = lo(s1_limb) mask_1
|
mask r12,r9,0xffff ; r12 = lo(s1_limb) mask_1
|
||||||
mul r11,r12,r7 ; r11 = prod_0 mul_2 WB mask_1
|
mul r11,r12,r7 ; r11 = prod_0 mul_2 WB mask_1
|
||||||
mul r10,r12,r8 ; r10 = prod_1a mul_3
|
mul r10,r12,r8 ; r10 = prod_1a mul_3
|
||||||
extu r13,r9,16 ; r13 = hi(s1_limb) extu_1 WB mul_1
|
extu r13,r9,16 ; r13 = hi(s1_limb) extu_1 WB mul_1
|
||||||
mul r12,r13,r7 ; r12 = prod_1b mul_4 WB extu_1
|
mul r12,r13,r7 ; r12 = prod_1b mul_4 WB extu_1
|
||||||
mul r25,r13,r8 ; r25 = prod_2 mul_5 WB mul_2
|
mul r25,r13,r8 ; r25 = prod_2 mul_5 WB mul_2
|
||||||
extu r11,r11,16 ; r11 = hi(prod_0) extu_2 WB mul_3
|
extu r11,r11,16 ; r11 = hi(prod_0) extu_2 WB mul_3
|
||||||
addu r10,r10,r11 ; addu_1 WB extu_2
|
addu r10,r10,r11 ; addu_1 WB extu_2
|
||||||
; bcnd ne0,r0,0 ; bubble WB addu_1
|
; bcnd ne0,r0,0 ; bubble WB addu_1
|
||||||
addu.co r10,r10,r12 ; WB mul_4
|
addu.co r10,r10,r12 ; WB mul_4
|
||||||
mask.u r10,r10,0xffff ; move the 16 most significant bits...
|
mask.u r10,r10,0xffff ; move the 16 most significant bits...
|
||||||
addu.ci r10,r10,r0 ; ...to the low half of the word...
|
addu.ci r10,r10,r0 ; ...to the low half of the word...
|
||||||
rot r10,r10,16 ; ...and put carry in pos 16.
|
rot r10,r10,16 ; ...and put carry in pos 16.
|
||||||
addu.co r26,r26,r2 ; add old carry limb
|
addu.co r26,r26,r2 ; add old carry limb
|
||||||
bcnd.n ne0,r4,Loop
|
bcnd.n ne0,r4,Loop
|
||||||
addu.ci r2,r25,r10 ; compute new carry limb
|
addu.ci r2,r25,r10 ; compute new carry limb
|
||||||
|
|
||||||
st r26,r6[r4]
|
st r26,r6[r4]
|
||||||
ld.d r25,r31,8
|
ld.d r25,r31,8
|
||||||
|
@ -109,20 +108,19 @@ Lsmall:
|
||||||
br.n SL1
|
br.n SL1
|
||||||
addu r4,r4,1
|
addu r4,r4,1
|
||||||
|
|
||||||
SLoop:
|
SLoop: ld r9,r3[r4] ;
|
||||||
ld r9,r3[r4] ;
|
st r8,r6[r4] ;
|
||||||
st r8,r6[r4] ;
|
addu r4,r4,1 ;
|
||||||
addu r4,r4,1 ;
|
SL1: mul r8,r9,r5 ; low word of product
|
||||||
SL1: mul r8,r9,r5 ; low word of product
|
mask r12,r9,0xffff ; r12 = lo(s1_limb)
|
||||||
mask r12,r9,0xffff ; r12 = lo(s1_limb)
|
extu r13,r9,16 ; r13 = hi(s1_limb)
|
||||||
extu r13,r9,16 ; r13 = hi(s1_limb)
|
mul r11,r12,r7 ; r11 = prod_0
|
||||||
mul r11,r12,r7 ; r11 = prod_0
|
mul r12,r13,r7 ; r12 = prod_1b
|
||||||
mul r12,r13,r7 ; r12 = prod_1b
|
addu.cio r8,r8,r2 ; add old carry limb
|
||||||
addu.cio r8,r8,r2 ; add old carry limb
|
extu r10,r11,16 ; r11 = hi(prod_0)
|
||||||
extu r10,r11,16 ; r11 = hi(prod_0)
|
addu r10,r10,r12 ;
|
||||||
addu r10,r10,r12 ;
|
|
||||||
bcnd.n ne0,r4,SLoop
|
bcnd.n ne0,r4,SLoop
|
||||||
extu r2,r10,16 ; r2 = new carry limb
|
extu r2,r10,16 ; r2 = new carry limb
|
||||||
|
|
||||||
jmp.n r1
|
jmp.n r1
|
||||||
st r8,r6[r4]
|
st r8,r6[r4]
|
||||||
|
|
|
@ -1,7 +1,7 @@
|
||||||
; mc88100 __mpn_sub -- Subtract two limb vectors of the same length > 0 and
|
; mc88100 __mpn_sub -- Subtract two limb vectors of the same length > 0 and
|
||||||
; store difference in a third limb vector.
|
; store difference in a third limb vector.
|
||||||
|
|
||||||
; Copyright (C) 1992, 1994 Free Software Foundation, Inc.
|
; Copyright (C) 1992, 1994, 1996 Free Software Foundation, Inc.
|
||||||
|
|
||||||
; This file is part of the GNU MP Library.
|
; This file is part of the GNU MP Library.
|
||||||
|
|
||||||
|
@ -41,9 +41,10 @@ ___mpn_sub_n:
|
||||||
extu r10,r5,3
|
extu r10,r5,3
|
||||||
ld r7,r4,0 ; read first limb from s2_ptr
|
ld r7,r4,0 ; read first limb from s2_ptr
|
||||||
|
|
||||||
subu.co r5,r0,r5 ; (clear carry as side effect)
|
subu r5,r0,r5
|
||||||
mak r5,r5,3<4>
|
mak r5,r5,3<4>
|
||||||
bcnd eq0,r5,Lzero
|
bcnd.n eq0,r5,Lzero
|
||||||
|
subu.co r0,r0,r0 ; initialize carry
|
||||||
|
|
||||||
or r12,r0,lo16(Lbase)
|
or r12,r0,lo16(Lbase)
|
||||||
or.u r12,r12,hi16(Lbase)
|
or.u r12,r12,hi16(Lbase)
|
||||||
|
|
|
@ -1,7 +1,7 @@
|
||||||
# MIPS __mpn_addmul_1 -- Multiply a limb vector with a single limb and
|
# MIPS __mpn_addmul_1 -- Multiply a limb vector with a single limb and
|
||||||
# add the product to a second limb vector.
|
# add the product to a second limb vector.
|
||||||
|
|
||||||
# Copyright (C) 1992, 1994 Free Software Foundation, Inc.
|
# Copyright (C) 1992, 1994, 1996 Free Software Foundation, Inc.
|
||||||
|
|
||||||
# This file is part of the GNU MP Library.
|
# This file is part of the GNU MP Library.
|
||||||
|
|
||||||
|
@ -63,7 +63,7 @@ Loop: lw $10,0($4)
|
||||||
addu $2,$2,$10
|
addu $2,$2,$10
|
||||||
sw $3,0($4)
|
sw $3,0($4)
|
||||||
addiu $4,$4,4
|
addiu $4,$4,4
|
||||||
bne $6,$0,Loop # should be "bnel"
|
bne $6,$0,Loop
|
||||||
addu $2,$9,$2 # add high product limb and carry from addition
|
addu $2,$9,$2 # add high product limb and carry from addition
|
||||||
|
|
||||||
# cool down phase 1
|
# cool down phase 1
|
||||||
|
|
|
@ -63,7 +63,7 @@ Loop: ld $10,0($4)
|
||||||
daddu $2,$2,$10
|
daddu $2,$2,$10
|
||||||
sd $3,0($4)
|
sd $3,0($4)
|
||||||
daddiu $4,$4,8
|
daddiu $4,$4,8
|
||||||
bne $6,$0,Loop # should be "bnel"
|
bne $6,$0,Loop
|
||||||
daddu $2,$9,$2 # add high product limb and carry from addition
|
daddu $2,$9,$2 # add high product limb and carry from addition
|
||||||
|
|
||||||
# cool down phase 1
|
# cool down phase 1
|
||||||
|
|
|
@ -59,7 +59,7 @@ Loop: mflo $10
|
||||||
sltu $2,$10,$2 # carry from previous addition -> $2
|
sltu $2,$10,$2 # carry from previous addition -> $2
|
||||||
sd $10,0($4)
|
sd $10,0($4)
|
||||||
daddiu $4,$4,8
|
daddiu $4,$4,8
|
||||||
bne $6,$0,Loop # should be "bnel"
|
bne $6,$0,Loop
|
||||||
daddu $2,$9,$2 # add high product limb and carry from addition
|
daddu $2,$9,$2 # add high product limb and carry from addition
|
||||||
|
|
||||||
# cool down phase 1
|
# cool down phase 1
|
||||||
|
|
|
@ -63,7 +63,7 @@ Loop: ld $10,0($4)
|
||||||
daddu $2,$2,$10
|
daddu $2,$2,$10
|
||||||
sd $3,0($4)
|
sd $3,0($4)
|
||||||
daddiu $4,$4,8
|
daddiu $4,$4,8
|
||||||
bne $6,$0,Loop # should be "bnel"
|
bne $6,$0,Loop
|
||||||
daddu $2,$9,$2 # add high product limb and carry from addition
|
daddu $2,$9,$2 # add high product limb and carry from addition
|
||||||
|
|
||||||
# cool down phase 1
|
# cool down phase 1
|
||||||
|
|
|
@ -1,7 +1,7 @@
|
||||||
# MIPS __mpn_mul_1 -- Multiply a limb vector with a single limb and
|
# MIPS __mpn_mul_1 -- Multiply a limb vector with a single limb and
|
||||||
# store the product in a second limb vector.
|
# store the product in a second limb vector.
|
||||||
|
|
||||||
# Copyright (C) 1992, 1994 Free Software Foundation, Inc.
|
# Copyright (C) 1992, 1994, 1996 Free Software Foundation, Inc.
|
||||||
|
|
||||||
# This file is part of the GNU MP Library.
|
# This file is part of the GNU MP Library.
|
||||||
|
|
||||||
|
@ -59,7 +59,7 @@ Loop: mflo $10
|
||||||
sltu $2,$10,$2 # carry from previous addition -> $2
|
sltu $2,$10,$2 # carry from previous addition -> $2
|
||||||
sw $10,0($4)
|
sw $10,0($4)
|
||||||
addiu $4,$4,4
|
addiu $4,$4,4
|
||||||
bne $6,$0,Loop # should be "bnel"
|
bne $6,$0,Loop
|
||||||
addu $2,$9,$2 # add high product limb and carry from addition
|
addu $2,$9,$2 # add high product limb and carry from addition
|
||||||
|
|
||||||
# cool down phase 1
|
# cool down phase 1
|
||||||
|
|
|
@ -1,7 +1,7 @@
|
||||||
# MIPS __mpn_submul_1 -- Multiply a limb vector with a single limb and
|
# MIPS __mpn_submul_1 -- Multiply a limb vector with a single limb and
|
||||||
# subtract the product from a second limb vector.
|
# subtract the product from a second limb vector.
|
||||||
|
|
||||||
# Copyright (C) 1992, 1994 Free Software Foundation, Inc.
|
# Copyright (C) 1992, 1994, 1996 Free Software Foundation, Inc.
|
||||||
|
|
||||||
# This file is part of the GNU MP Library.
|
# This file is part of the GNU MP Library.
|
||||||
|
|
||||||
|
@ -63,7 +63,7 @@ Loop: lw $10,0($4)
|
||||||
addu $2,$2,$10
|
addu $2,$2,$10
|
||||||
sw $3,0($4)
|
sw $3,0($4)
|
||||||
addiu $4,$4,4
|
addiu $4,$4,4
|
||||||
bne $6,$0,Loop # should be "bnel"
|
bne $6,$0,Loop
|
||||||
addu $2,$9,$2 # add high product limb and carry from addition
|
addu $2,$9,$2 # add high product limb and carry from addition
|
||||||
|
|
||||||
# cool down phase 1
|
# cool down phase 1
|
||||||
|
|
|
@ -1,6 +1,6 @@
|
||||||
# IBM POWER __mpn_add_n -- Add two limb vectors of equal, non-zero length.
|
# IBM POWER __mpn_add_n -- Add two limb vectors of equal, non-zero length.
|
||||||
|
|
||||||
# Copyright (C) 1992, 1994 Free Software Foundation, Inc.
|
# Copyright (C) 1992, 1994, 1995 Free Software Foundation, Inc.
|
||||||
|
|
||||||
# This file is part of the GNU MP Library.
|
# This file is part of the GNU MP Library.
|
||||||
|
|
||||||
|
|
|
@ -1,7 +1,7 @@
|
||||||
# IBM POWER __mpn_sub_n -- Subtract two limb vectors of the same length > 0 and
|
# IBM POWER __mpn_sub_n -- Subtract two limb vectors of the same length > 0 and
|
||||||
# store difference in a third limb vector.
|
# store difference in a third limb vector.
|
||||||
|
|
||||||
# Copyright (C) 1992, 1994 Free Software Foundation, Inc.
|
# Copyright (C) 1992, 1994, 1995 Free Software Foundation, Inc.
|
||||||
|
|
||||||
# This file is part of the GNU MP Library.
|
# This file is part of the GNU MP Library.
|
||||||
|
|
||||||
|
|
|
@ -1,6 +1,6 @@
|
||||||
/* gmp-mparam.h -- Compiler/machine parameter header file.
|
/* gmp-mparam.h -- Compiler/machine parameter header file.
|
||||||
|
|
||||||
Copyright (C) 1991, 1993, 1994 Free Software Foundation, Inc.
|
Copyright (C) 1991, 1993, 1994, 1995 Free Software Foundation, Inc.
|
||||||
|
|
||||||
This file is part of the GNU MP Library.
|
This file is part of the GNU MP Library.
|
||||||
|
|
||||||
|
|
|
@ -1,7 +1,7 @@
|
||||||
! Z8000 __mpn_mul_1 -- Multiply a limb vector with a limb and store
|
! Z8000 __mpn_mul_1 -- Multiply a limb vector with a limb and store
|
||||||
! the result in a second limb vector.
|
! the result in a second limb vector.
|
||||||
|
|
||||||
! Copyright (C) 1993, 1994 Free Software Foundation, Inc.
|
! Copyright (C) 1993, 1994, 1995 Free Software Foundation, Inc.
|
||||||
|
|
||||||
! This file is part of the GNU MP Library.
|
! This file is part of the GNU MP Library.
|
||||||
|
|
||||||
|
|
Loading…
Reference in New Issue