revert between 56095 -> 55830 in arch
[AROS.git] / arch / m68k-all / libgcc1 / _muldf3.s
blobd93ac9aed0cb7d96027901f94a5b682af19558d2
1 | double floating point multiplication routine
3 | written by Kai-Uwe Bloem (I5110401@dbstu1.bitnet).
4 | Based on a 80x86 floating point packet from comp.os.minix, written by P.Housel
7 | Revision 1.2, kub 01-90 :
8 | added support for denormalized numbers
10 | Revision 1.1, kub 12-89 :
11 | Ported over to 68k assembler
13 | Revision 1.0:
14 | original 8088 code from P.S.Housel
16 BIAS8 = 0x3FF-1
18 .text
19 .even
20 .globl __muldf3
22 __muldf3:
23 lea %sp@(4),%a0
24 moveml %d2-%d7,%sp@-
25 moveml %a0@,%d4-%d5/%d6-%d7 | %d4-%d5 = v, %d6-%d7 = u
26 subw #16,%sp | multiplication accumulator
28 movel %d6,%d0 | %d0 = u.exp
29 swap %d0
30 movew %d0,%d2 | %d2 = u.sign
31 lsrw #4,%d0
32 andw #0x07ff,%d0 | kill sign bit
34 movel %d4,%d1 | %d1 = v.exp
35 swap %d1
36 eorw %d1,%d2 | %d2 = u.sign ^ v.sign (in bit 31)
37 lsrw #4,%d1
38 andw #0x07ff,%d1 | kill sign bit
40 andl #0x0fffff,%d6 | remove exponent from u.mantissa
41 tstw %d0 | check for zero exponent - no leading "1"
42 beq L_00
43 orl #0x100000,%d6 | restore implied leading "1"
44 bra L_10
45 L_00: addw #1,%d0 | "normalize" exponent
46 L_10: movel %d6,%d3
47 orl %d7,%d3
48 beq retz | multiplying by zero
50 andl #0x0fffff,%d4 | remove exponent from v.mantissa
51 tstw %d1 | check for zero exponent - no leading "1"
52 beq L_01
53 orl #0x100000,%d4 | restore implied leading "1"
54 bra L_11
55 L_01: addw #1,%d1 | "normalize" exponent
56 L_11: movel %d4,%d3
57 orl %d5,%d3
58 beq retz | multiplying by zero
60 addw %d1,%d0 | add exponents,
61 subw #BIAS8+16-11,%d0 | remove excess bias, acnt for repositioning
63 clrl %sp@ | initialize 128-bit product to zero
64 clrl %sp@(4)
65 clrl %sp@(8)
66 clrl %sp@(12)
67 lea %sp@(8),%a1 | accumulator pointer
69 | see Knuth, Seminumerical Algorithms, section 4.3. algorithm M
71 swap %d2
72 movew #4-1,%d2
73 L_12:
74 movew %d5,%d3
75 mulu %d7,%d3 | mulitply with bigit from multiplier
76 addl %d3,%a1@(4) | store into result
77 movew %d4,%d3
78 mulu %d7,%d3
79 movel %a1@,%d1 | add to result
80 addxl %d3,%d1
81 movel %d1,%a1@
82 roxlw %a1@- | rotate carry in
84 movel %d5,%d3
85 swap %d3
86 mulu %d7,%d3
87 addl %d3,%a1@(4) | add to result
88 movel %d4,%d3
89 swap %d3
90 mulu %d7,%d3
91 movel %a1@,%d1 | add to result
92 addxl %d3,%d1
93 movel %d1,%a1@
95 movew %d6,%d7
96 swap %d6
97 swap %d7
98 dbra %d2,L_12
100 swap %d2 | [TOP 16 BITS SHOULD BE ZERO !]
102 moveml %sp@(2),%d4-%d7 | get the 112 valid bits
103 clrw %d7 | (pad to 128)
104 L_2:
105 cmpl #0x0000ffff,%d4 | multiply (shift) until
106 bhi L_3 | 1 in upper 16 result bits
107 cmpw #9,%d0 | give up for denormalized numbers
108 ble L_3
109 swap %d4 | (we're getting here only when multiplying
110 swap %d5 | with a denormalized number; there's an
111 swap %d6 | eventual loss of 4 bits in the rounding
112 swap %d7 | byte -- what a pity 8-)
113 movew %d5,%d4
114 movew %d6,%d5
115 movew %d7,%d6
116 clrw %d7
117 subw #16,%d0 | decrement exponent
118 bra L_2
119 L_3:
120 movel %d6,%d1 | get rounding bits
121 roll #8,%d1
122 movel %d1,%d3 | see if sticky bit should be set
123 orl %d7,%d3 | (lower 16 bits of %d7 are guaranteed to be 0)
124 andl #0xffffff00,%d3
125 beq L_4
126 orb #1,%d1 | set "sticky bit" if any low-order set
127 L_4: addw #16,%sp | remove accumulator from stack
128 jmp norm_df | (result in %d4/%d5)
130 retz: clrl %d0 | save zero as result
131 clrl %d1
132 addw #16,%sp
133 moveml %sp@+,%d2-%d7
134 rts | no normalizing neccessary