old-autovect-branch/gcc/config/cris/mulsi3.asm

   1 ;; This code used to be expanded through interesting expansions in
   2 ;; the machine description, compiled from this code:
   3 ;;
   4 ;; #ifdef L_mulsi3
   5 ;; long __Mul (unsigned long a, unsigned long b) __attribute__ ((__const__));
   6 ;;
   7 ;; /* This must be compiled with the -mexpand-mul flag, to synthesize the
   8 ;;    multiplication from the mstep instructions.  The check for
   9 ;;    smaller-size multiplication pays off in the order of .5-10%;
  10 ;;    estimated median 1%, depending on application.
  11 ;;     FIXME: It can be further optimized if we go to assembler code, as
  12 ;;    gcc 2.7.2 adds a few unnecessary instructions and does not put the
  13 ;;    basic blocks in optimal order.  */
  14 ;; long
  15 ;; __Mul (unsigned long a, unsigned long b)
  16 ;; {
  17 ;; #if defined (__CRIS_arch_version) && __CRIS_arch_version >= 10
  18 ;;   /* In case other code is compiled without -march=v10, they will
  19 ;;      contain calls to __Mul, regardless of flags at link-time.  The
  20 ;;      "else"-code below will work, but is unnecessarily slow.  This
  21 ;;      sometimes cuts a few minutes off from simulation time by just
  22 ;;      returning a "mulu.d".  */
  23 ;;   return a * b;
  24 ;; #else
  25 ;;   unsigned long min;
  26 ;;
  27 ;;   /* Get minimum via the bound insn.  */
  28 ;;   min = a < b ? a : b;
  29 ;;
  30 ;;   /* Can we omit computation of the high part?       */
  31 ;;   if (min > 65535)
  32 ;;     /* No.  Perform full multiplication.  */
  33 ;;     return a * b;
  34 ;;   else
  35 ;;     {
  36 ;;       /* Check if both operands are within 16 bits.  */
  37 ;;       unsigned long max;
  38 ;;
  39 ;;       /* Get maximum, by knowing the minimum.
  40 ;;          This will partition a and b into max and min.
  41 ;;          This is not currently something GCC understands,
  42 ;;          so do this trick by asm.  */
  43 ;;       __asm__ ("xor %1,%0\n\txor %2,%0"
  44 ;;                : "=r" (max)
  45 ;;                :  "r" (b), "r" (a), "0" (min));
  46 ;;
  47 ;;     if (max > 65535)
  48 ;;       /* Make GCC understand that only the low part of "min" will be
  49 ;;          used.  */
  50 ;;       return max * (unsigned short) min;
  51 ;;     else
  52 ;;       /* Only the low parts of both operands are necessary.  */
  53 ;;       return ((unsigned short) max) * (unsigned short) min;
  54 ;;     }
  55 ;; #endif /* not __CRIS_arch_version >= 10 */
  56 ;; }
  57 ;; #endif /* L_mulsi3 */
  58 ;;
  59 ;; That approach was abandoned since the caveats outweighted the
  60 ;; benefits.  The expand-multiplication machinery is also removed, so you
  61 ;; can't do this anymore.
  62 ;;
  63 ;; For doubters of there being any benefits, some where: insensitivity to:
  64 ;; - ABI changes (mostly for experimentation)
  65 ;; - assembler syntax differences (mostly debug format).
  66 ;; - insn scheduling issues.
  67 ;; Most ABI experiments will presumably happen with arches with mul insns,
  68 ;; so that argument doesn't really hold anymore, and it's unlikely there
  69 ;; being new arch variants needing insn scheduling and not having mul
  70 ;; insns.
  71
  72 ;; ELF and a.out have different syntax for local labels: the "wrong"
  73 ;; one may not be omitted from the object.
  74 #undef L
  75 #ifdef __AOUT__
  76 # define L(x) x
  77 #else
  78 # define L(x) .x
  79 #endif
  80
  81         .global ___Mul
  82         .type   ___Mul,@function
  83 ___Mul:
  84 #if defined (__CRIS_arch_version) && __CRIS_arch_version >= 10
  85 ;; Can't have the mulu.d last on a cache-line (in the delay-slot of the
  86 ;; "ret"), due to hardware bug.  See documentation for -mmul-bug-workaround.
  87 ;; Not worthwhile to conditionalize here.
  88         .p2alignw 2,0x050f
  89         mulu.d $r11,$r10
  90         ret
  91         nop
  92 #else
  93         move.d $r10,$r12
  94         move.d $r11,$r9
  95         bound.d $r12,$r9
  96         cmpu.w 65535,$r9
  97         bls L(L3)
  98         move.d $r12,$r13
  99
 100         movu.w $r11,$r9
 101         lslq 16,$r13
 102         mstep $r9,$r13
 103         mstep $r9,$r13
 104         mstep $r9,$r13
 105         mstep $r9,$r13
 106         mstep $r9,$r13
 107         mstep $r9,$r13
 108         mstep $r9,$r13
 109         mstep $r9,$r13
 110         mstep $r9,$r13
 111         mstep $r9,$r13
 112         mstep $r9,$r13
 113         mstep $r9,$r13
 114         mstep $r9,$r13
 115         mstep $r9,$r13
 116         mstep $r9,$r13
 117         mstep $r9,$r13
 118         clear.w $r10
 119         test.d $r10
 120         mstep $r9,$r10
 121         mstep $r9,$r10
 122         mstep $r9,$r10
 123         mstep $r9,$r10
 124         mstep $r9,$r10
 125         mstep $r9,$r10
 126         mstep $r9,$r10
 127         mstep $r9,$r10
 128         mstep $r9,$r10
 129         mstep $r9,$r10
 130         mstep $r9,$r10
 131         mstep $r9,$r10
 132         mstep $r9,$r10
 133         mstep $r9,$r10
 134         mstep $r9,$r10
 135         mstep $r9,$r10
 136         movu.w $r12,$r12
 137         move.d $r11,$r9
 138         clear.w $r9
 139         test.d $r9
 140         mstep $r12,$r9
 141         mstep $r12,$r9
 142         mstep $r12,$r9
 143         mstep $r12,$r9
 144         mstep $r12,$r9
 145         mstep $r12,$r9
 146         mstep $r12,$r9
 147         mstep $r12,$r9
 148         mstep $r12,$r9
 149         mstep $r12,$r9
 150         mstep $r12,$r9
 151         mstep $r12,$r9
 152         mstep $r12,$r9
 153         mstep $r12,$r9
 154         mstep $r12,$r9
 155         mstep $r12,$r9
 156         add.w $r9,$r10
 157         lslq 16,$r10
 158         ret
 159         add.d $r13,$r10
 160
 161 L(L3):
 162         move.d $r9,$r10
 163         xor $r11,$r10
 164         xor $r12,$r10
 165         cmpu.w 65535,$r10
 166         bls L(L5)
 167         movu.w $r9,$r13
 168
 169         movu.w $r13,$r13
 170         move.d $r10,$r9
 171         lslq 16,$r9
 172         mstep $r13,$r9
 173         mstep $r13,$r9
 174         mstep $r13,$r9
 175         mstep $r13,$r9
 176         mstep $r13,$r9
 177         mstep $r13,$r9
 178         mstep $r13,$r9
 179         mstep $r13,$r9
 180         mstep $r13,$r9
 181         mstep $r13,$r9
 182         mstep $r13,$r9
 183         mstep $r13,$r9
 184         mstep $r13,$r9
 185         mstep $r13,$r9
 186         mstep $r13,$r9
 187         mstep $r13,$r9
 188         clear.w $r10
 189         test.d $r10
 190         mstep $r13,$r10
 191         mstep $r13,$r10
 192         mstep $r13,$r10
 193         mstep $r13,$r10
 194         mstep $r13,$r10
 195         mstep $r13,$r10
 196         mstep $r13,$r10
 197         mstep $r13,$r10
 198         mstep $r13,$r10
 199         mstep $r13,$r10
 200         mstep $r13,$r10
 201         mstep $r13,$r10
 202         mstep $r13,$r10
 203         mstep $r13,$r10
 204         mstep $r13,$r10
 205         mstep $r13,$r10
 206         lslq 16,$r10
 207         ret
 208         add.d $r9,$r10
 209
 210 L(L5):
 211         movu.w $r9,$r9
 212         lslq 16,$r10
 213         mstep $r9,$r10
 214         mstep $r9,$r10
 215         mstep $r9,$r10
 216         mstep $r9,$r10
 217         mstep $r9,$r10
 218         mstep $r9,$r10
 219         mstep $r9,$r10
 220         mstep $r9,$r10
 221         mstep $r9,$r10
 222         mstep $r9,$r10
 223         mstep $r9,$r10
 224         mstep $r9,$r10
 225         mstep $r9,$r10
 226         mstep $r9,$r10
 227         mstep $r9,$r10
 228         ret
 229         mstep $r9,$r10
 230 #endif
 231 L(Lfe1):
 232         .size   ___Mul,L(Lfe1)-___Mul