arch/m68k-all/libgcc1/_mulsf3.s

   1 | single floating point multiplication routine
   2 |
   3 | written by Kai-Uwe Bloem (I5110401@dbstu1.bitnet).
   4 | Based on a 80x86 floating point packet from comp.os.minix, written by P.Housel
   5 |
   6 |
   7 | Revision 1.2, kub 01-90 :
   8 | added support for denormalized numbers
   9 |
  10 | Revision 1.1, kub 12-89 :
  11 | Created single float version for 68000. Code could be speed up by having
  12 | the accumulator in the 68000 register set ...
  13 |
  14 | Revision 1.0:
  15 | original 8088 code from P.S.Housel for double floats
  16
  17 BIAS4   =       0x7F-1
  18
  19         .text
  20         .even
  21         .globl  __mulsf3
  22
  23 __mulsf3:
  24         lea     %sp@(4),%a0
  25         moveml  %d2-%d5,%sp@-
  26         moveml  %a0@,%d4/%d5    | %d4 = v, %d5 = u
  27         subw    #8,%sp          | multiplication accumulator
  28
  29         movel   %d5,%d0         | %d0 = u.exp
  30         swap    %d0
  31         movew   %d0,%d2         | %d2 = u.sign
  32         lsrw    #7,%d0
  33         andw    #0xff,%d0       | kill sign bit
  34
  35         movel   %d4,%d1         | %d1 = v.exp
  36         swap    %d1
  37         eorw    %d1,%d2         | %d2 = u.sign ^ v.sign (in bit 31)
  38         lsrw    #7,%d1
  39         andw    #0xff,%d1       | kill sign bit
  40
  41         andl    #0x7fffff,%d5   | remove exponent from u.mantissa
  42         tstw    %d0             | check for zero exponent - no leading "1"
  43         beq     L_00
  44         orl     #0x800000,%d5   | restore implied leading "1"
  45         bra     L_10
  46 L_00:   addw    #1,%d0          | "normalize" exponent
  47 L_10:   tstl    %d5
  48         beq     retz            | multiplying zero
  49
  50         andl    #0x7fffff,%d4   | remove exponent from v.mantissa
  51         tstw    %d1             | check for zero exponent - no leading "1"
  52         beq     L_01
  53         orl     #0x800000,%d4   | restore implied leading "1"
  54         bra     L_11
  55 L_01:   addw    #1,%d1          | "normalize" exponent
  56 L_11:   tstl    %d4
  57         beq     retz            | multiply by zero
  58
  59         addw    %d1,%d0         | add exponents,
  60         subw    #BIAS4+16-8,%d0 | remove excess bias, acnt for repositioning
  61
  62         clrl    %sp@            | initialize 64-bit product to zero
  63         clrl    %sp@(4)
  64
  65 | see Knuth, Seminumerical Algorithms, section 4.3. algorithm M
  66
  67         movew   %d4,%d3
  68         mulu    %d5,%d3         | mulitply with bigit from multiplier
  69         movel   %d3,%sp@(4)     | store into result
  70
  71         movel   %d4,%d3
  72         swap    %d3
  73         mulu    %d5,%d3
  74         addl    %d3,%sp@(2)     | add to result
  75
  76         swap    %d5             | [TOP 8 BITS SHOULD BE ZERO !]
  77
  78         movew   %d4,%d3
  79         mulu    %d5,%d3         | mulitply with bigit from multiplier
  80         addl    %d3,%sp@(2)     | store into result (no carry can occur here)
  81
  82         movel   %d4,%d3
  83         swap    %d3
  84         mulu    %d5,%d3
  85         addl    %d3,%sp@                | add to result
  86                                 | [TOP 16 BITS SHOULD BE ZERO !]
  87         moveml  %sp@(2),%d4-%d5 | get the 48 valid mantissa bits
  88         clrw    %d5             | (pad to 64)
  89 L_2:
  90         cmpl    #0x0000ffff,%d4 | multiply (shift) until
  91         bhi     L_3             |  1 in upper 16 result bits
  92         cmpw    #9,%d0          | give up for denormalized numbers
  93         ble     L_3
  94         swap    %d4             | (we're getting here only when multiplying
  95         swap    %d5             |  with a denormalized number; there's an
  96         movew   %d5,%d4         |  eventual loss of 4 bits in the rounding
  97         clrw    %d5             |  byte -- what a pity 8-)
  98         subw    #16,%d0         | decrement exponent
  99         bra     L_2
 100 L_3:
 101         movel   %d5,%d1         | get rounding bits
 102         roll    #8,%d1
 103         movel   %d1,%d3         | see if sticky bit should be set
 104         andl    #0xffffff00,%d3
 105         beq     L_4
 106         orb     #1,%d1          | set "sticky bit" if any low-order set
 107 L_4:    addw    #8,%sp          | remove accumulator from stack
 108         jmp     norm_sf         | (result in %d4)
 109
 110 retz:   clrl    %d0             | save zero as result
 111         addw    #8,%sp
 112         moveml  %sp@+,%d2-%d5
 113         rts                     | no normalizing neccessary