sysdeps/sparc/mul_1.S

   1 ! SPARC __mpn_mul_1 -- Multiply a limb vector with a limb and store
   2 ! the result in a second limb vector.
   3
   4 ! Copyright (C) 1992, 1993, 1994 Free Software Foundation, Inc.
   5
   6 ! This file is part of the GNU MP Library.
   7
   8 ! The GNU MP Library is free software; you can redistribute it and/or modify
   9 ! it under the terms of the GNU Library General Public License as published by
  10 ! the Free Software Foundation; either version 2 of the License, or (at your
  11 ! option) any later version.
  12
  13 ! The GNU MP Library is distributed in the hope that it will be useful, but
  14 ! WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
  15 ! or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Library General Public
  16 ! License for more details.
  17
  18 ! You should have received a copy of the GNU Library General Public License
  19 ! along with the GNU MP Library; see the file COPYING.LIB.  If not, write to
  20 ! the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston,
  21 ! MA 02111-1307, USA.
  22
  23
  24 ! INPUT PARAMETERS
  25 ! res_ptr       o0
  26 ! s1_ptr        o1
  27 ! size          o2
  28 ! s2_limb       o3
  29
  30 ! ADD CODE FOR SMALL MULTIPLIERS!
  31 !1:     ld
  32 !       st
  33 !
  34 !2:     ld      ,a
  35 !       addxcc  a,a,x
  36 !       st      x,
  37 !
  38 !3_unrolled:
  39 !       ld      ,a
  40 !       addxcc  a,a,x1          ! 2a + cy
  41 !       addx    %g0,%g0,x2
  42 !       addcc   a,x1,x          ! 3a + c
  43 !       st      x,
  44 !
  45 !       ld      ,a
  46 !       addxcc  a,a,y1
  47 !       addx    %g0,%g0,y2
  48 !       addcc   a,y1,x
  49 !       st      x,
  50 !
  51 !4_unrolled:
  52 !       ld      ,a
  53 !       srl     a,2,x1          ! 4a
  54 !       addxcc  y2,x1,x
  55 !       sll     a,30,x2
  56 !       st      x,
  57 !
  58 !       ld      ,a
  59 !       srl     a,2,y1
  60 !       addxcc  x2,y1,y
  61 !       sll     a,30,y2
  62 !       st      x,
  63 !
  64 !5_unrolled:
  65 !       ld      ,a
  66 !       srl     a,2,x1          ! 4a
  67 !       addxcc  a,x1,x          ! 5a + c
  68 !       sll     a,30,x2
  69 !       addx    %g0,x2,x2
  70 !       st      x,
  71 !
  72 !       ld      ,a
  73 !       srl     a,2,y1
  74 !       addxcc  a,y1,x
  75 !       sll     a,30,y2
  76 !       addx    %g0,y2,y2
  77 !       st      x,
  78 !
  79 !8_unrolled:
  80 !       ld      ,a
  81 !       srl     a,3,x1          ! 8a
  82 !       addxcc  y2,x1,x
  83 !       sll     a,29,x2
  84 !       st      x,
  85 !
  86 !       ld      ,a
  87 !       srl     a,3,y1
  88 !       addxcc  x2,y1,y
  89 !       sll     a,29,y2
  90 !       st      x,
  91
  92 #include "sysdep.h"
  93
  94 .text
  95         .align 4
  96         .global C_SYMBOL_NAME(__mpn_mul_1)
  97 C_SYMBOL_NAME(__mpn_mul_1):
  98         ! Make S1_PTR and RES_PTR point at the end of their blocks
  99         ! and put (- 4 x SIZE) in index/loop counter.
 100         sll     %o2,2,%o2
 101         add     %o0,%o2,%o4     ! RES_PTR in o4 since o0 is retval
 102         add     %o1,%o2,%o1
 103         sub     %g0,%o2,%o2
 104
 105         cmp     %o3,0xfff
 106         bgu     Large
 107         nop
 108
 109         ld      [%o1+%o2],%o5
 110         mov     0,%o0
 111         b       L0
 112          add    %o4,-4,%o4
 113 Loop0:
 114         st      %g1,[%o4+%o2]
 115 L0:     wr      %g0,%o3,%y
 116         sra     %o5,31,%g2
 117         and     %o3,%g2,%g2
 118         andcc   %g1,0,%g1
 119         mulscc  %g1,%o5,%g1
 120         mulscc  %g1,%o5,%g1
 121         mulscc  %g1,%o5,%g1
 122         mulscc  %g1,%o5,%g1
 123         mulscc  %g1,%o5,%g1
 124         mulscc  %g1,%o5,%g1
 125         mulscc  %g1,%o5,%g1
 126         mulscc  %g1,%o5,%g1
 127         mulscc  %g1,%o5,%g1
 128         mulscc  %g1,%o5,%g1
 129         mulscc  %g1,%o5,%g1
 130         mulscc  %g1,%o5,%g1
 131         mulscc  %g1,0,%g1
 132         sra     %g1,20,%g4
 133         sll     %g1,12,%g1
 134         rd      %y,%g3
 135         srl     %g3,20,%g3
 136         or      %g1,%g3,%g1
 137
 138         addcc   %g1,%o0,%g1
 139         addx    %g2,%g4,%o0     ! add sign-compensation and cy to hi limb
 140         addcc   %o2,4,%o2       ! loop counter
 141         bne,a   Loop0
 142          ld     [%o1+%o2],%o5
 143
 144         retl
 145         st      %g1,[%o4+%o2]
 146
 147
 148 Large:  ld      [%o1+%o2],%o5
 149         mov     0,%o0
 150         sra     %o3,31,%g4      ! g4 = mask of ones iff S2_LIMB < 0
 151         b       L1
 152          add    %o4,-4,%o4
 153 Loop:
 154         st      %g3,[%o4+%o2]
 155 L1:     wr      %g0,%o5,%y
 156         and     %o5,%g4,%g2     ! g2 = S1_LIMB iff S2_LIMB < 0, else 0
 157         andcc   %g0,%g0,%g1
 158         mulscc  %g1,%o3,%g1
 159         mulscc  %g1,%o3,%g1
 160         mulscc  %g1,%o3,%g1
 161         mulscc  %g1,%o3,%g1
 162         mulscc  %g1,%o3,%g1
 163         mulscc  %g1,%o3,%g1
 164         mulscc  %g1,%o3,%g1
 165         mulscc  %g1,%o3,%g1
 166         mulscc  %g1,%o3,%g1
 167         mulscc  %g1,%o3,%g1
 168         mulscc  %g1,%o3,%g1
 169         mulscc  %g1,%o3,%g1
 170         mulscc  %g1,%o3,%g1
 171         mulscc  %g1,%o3,%g1
 172         mulscc  %g1,%o3,%g1
 173         mulscc  %g1,%o3,%g1
 174         mulscc  %g1,%o3,%g1
 175         mulscc  %g1,%o3,%g1
 176         mulscc  %g1,%o3,%g1
 177         mulscc  %g1,%o3,%g1
 178         mulscc  %g1,%o3,%g1
 179         mulscc  %g1,%o3,%g1
 180         mulscc  %g1,%o3,%g1
 181         mulscc  %g1,%o3,%g1
 182         mulscc  %g1,%o3,%g1
 183         mulscc  %g1,%o3,%g1
 184         mulscc  %g1,%o3,%g1
 185         mulscc  %g1,%o3,%g1
 186         mulscc  %g1,%o3,%g1
 187         mulscc  %g1,%o3,%g1
 188         mulscc  %g1,%o3,%g1
 189         mulscc  %g1,%o3,%g1
 190         mulscc  %g1,%g0,%g1
 191         rd      %y,%g3
 192         addcc   %g3,%o0,%g3
 193         addx    %g2,%g1,%o0     ! add sign-compensation and cy to hi limb
 194         addcc   %o2,4,%o2       ! loop counter
 195         bne,a   Loop
 196          ld     [%o1+%o2],%o5
 197
 198         retl
 199         st      %g3,[%o4+%o2]