sysdeps/sparc/sparc32/mul_1.S

   1 ! SPARC __mpn_mul_1 -- Multiply a limb vector with a limb and store
   2 ! the result in a second limb vector.
   3 !
   4 ! Copyright (C) 1992-2014 Free Software Foundation, Inc.
   5 !
   6 ! This file is part of the GNU MP Library.
   7 !
   8 ! The GNU MP Library is free software; you can redistribute it and/or modify
   9 ! it under the terms of the GNU Lesser General Public License as published by
  10 ! the Free Software Foundation; either version 2.1 of the License, or (at your
  11 ! option) any later version.
  12 !
  13 ! The GNU MP Library is distributed in the hope that it will be useful, but
  14 ! WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
  15 ! or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
  16 ! License for more details.
  17 !
  18 ! You should have received a copy of the GNU Lesser General Public License
  19 ! along with the GNU MP Library; see the file COPYING.LIB.  If not,
  20 ! see <http://www.gnu.org/licenses/>.
  21
  22
  23 ! INPUT PARAMETERS
  24 ! RES_PTR       o0
  25 ! S1_PTR        o1
  26 ! SIZE          o2
  27 ! S2_LIMB       o3
  28
  29 ! ADD CODE FOR SMALL MULTIPLIERS!
  30 !1:     ld
  31 !       st
  32 !
  33 !2:     ld      ,a
  34 !       addxcc  a,a,x
  35 !       st      x,
  36 !
  37 !3_unrolled:
  38 !       ld      ,a
  39 !       addxcc  a,a,x1          ! 2a + cy
  40 !       addx    %g0,%g0,x2
  41 !       addcc   a,x1,x          ! 3a + c
  42 !       st      x,
  43 !
  44 !       ld      ,a
  45 !       addxcc  a,a,y1
  46 !       addx    %g0,%g0,y2
  47 !       addcc   a,y1,x
  48 !       st      x,
  49 !
  50 !4_unrolled:
  51 !       ld      ,a
  52 !       srl     a,2,x1          ! 4a
  53 !       addxcc  y2,x1,x
  54 !       sll     a,30,x2
  55 !       st      x,
  56 !
  57 !       ld      ,a
  58 !       srl     a,2,y1
  59 !       addxcc  x2,y1,y
  60 !       sll     a,30,y2
  61 !       st      x,
  62 !
  63 !5_unrolled:
  64 !       ld      ,a
  65 !       srl     a,2,x1          ! 4a
  66 !       addxcc  a,x1,x          ! 5a + c
  67 !       sll     a,30,x2
  68 !       addx    %g0,x2,x2
  69 !       st      x,
  70 !
  71 !       ld      ,a
  72 !       srl     a,2,y1
  73 !       addxcc  a,y1,x
  74 !       sll     a,30,y2
  75 !       addx    %g0,y2,y2
  76 !       st      x,
  77 !
  78 !8_unrolled:
  79 !       ld      ,a
  80 !       srl     a,3,x1          ! 8a
  81 !       addxcc  y2,x1,x
  82 !       sll     a,29,x2
  83 !       st      x,
  84 !
  85 !       ld      ,a
  86 !       srl     a,3,y1
  87 !       addxcc  x2,y1,y
  88 !       sll     a,29,y2
  89 !       st      x,
  90
  91 #include <sysdep.h>
  92
  93 ENTRY(__mpn_mul_1)
  94         ! Make S1_PTR and RES_PTR point at the end of their blocks
  95         ! and put (- 4 x SIZE) in index/loop counter.
  96         sll     %o2,2,%o2
  97         add     %o0,%o2,%o4     ! RES_PTR in o4 since o0 is retval
  98         add     %o1,%o2,%o1
  99         sub     %g0,%o2,%o2
 100
 101         cmp     %o3,0xfff
 102         bgu     LOC(large)
 103         nop
 104
 105         ld      [%o1+%o2],%o5
 106         mov     0,%o0
 107         b       LOC(0)
 108          add    %o4,-4,%o4
 109 LOC(loop0):
 110         st      %g1,[%o4+%o2]
 111 LOC(0): wr      %g0,%o3,%y
 112         sra     %o5,31,%g2
 113         and     %o3,%g2,%g2
 114         andcc   %g1,0,%g1
 115         mulscc  %g1,%o5,%g1
 116         mulscc  %g1,%o5,%g1
 117         mulscc  %g1,%o5,%g1
 118         mulscc  %g1,%o5,%g1
 119         mulscc  %g1,%o5,%g1
 120         mulscc  %g1,%o5,%g1
 121         mulscc  %g1,%o5,%g1
 122         mulscc  %g1,%o5,%g1
 123         mulscc  %g1,%o5,%g1
 124         mulscc  %g1,%o5,%g1
 125         mulscc  %g1,%o5,%g1
 126         mulscc  %g1,%o5,%g1
 127         mulscc  %g1,0,%g1
 128         sra     %g1,20,%g4
 129         sll     %g1,12,%g1
 130         rd      %y,%g3
 131         srl     %g3,20,%g3
 132         or      %g1,%g3,%g1
 133
 134         addcc   %g1,%o0,%g1
 135         addx    %g2,%g4,%o0     ! add sign-compensation and cy to hi limb
 136         addcc   %o2,4,%o2       ! loop counter
 137         bne,a   LOC(loop0)
 138          ld     [%o1+%o2],%o5
 139
 140         retl
 141         st      %g1,[%o4+%o2]
 142
 143
 144 LOC(large):
 145         ld      [%o1+%o2],%o5
 146         mov     0,%o0
 147         sra     %o3,31,%g4      ! g4 = mask of ones iff S2_LIMB < 0
 148         b       LOC(1)
 149          add    %o4,-4,%o4
 150 LOC(loop):
 151         st      %g3,[%o4+%o2]
 152 LOC(1): wr      %g0,%o5,%y
 153         and     %o5,%g4,%g2     ! g2 = S1_LIMB iff S2_LIMB < 0, else 0
 154         andcc   %g0,%g0,%g1
 155         mulscc  %g1,%o3,%g1
 156         mulscc  %g1,%o3,%g1
 157         mulscc  %g1,%o3,%g1
 158         mulscc  %g1,%o3,%g1
 159         mulscc  %g1,%o3,%g1
 160         mulscc  %g1,%o3,%g1
 161         mulscc  %g1,%o3,%g1
 162         mulscc  %g1,%o3,%g1
 163         mulscc  %g1,%o3,%g1
 164         mulscc  %g1,%o3,%g1
 165         mulscc  %g1,%o3,%g1
 166         mulscc  %g1,%o3,%g1
 167         mulscc  %g1,%o3,%g1
 168         mulscc  %g1,%o3,%g1
 169         mulscc  %g1,%o3,%g1
 170         mulscc  %g1,%o3,%g1
 171         mulscc  %g1,%o3,%g1
 172         mulscc  %g1,%o3,%g1
 173         mulscc  %g1,%o3,%g1
 174         mulscc  %g1,%o3,%g1
 175         mulscc  %g1,%o3,%g1
 176         mulscc  %g1,%o3,%g1
 177         mulscc  %g1,%o3,%g1
 178         mulscc  %g1,%o3,%g1
 179         mulscc  %g1,%o3,%g1
 180         mulscc  %g1,%o3,%g1
 181         mulscc  %g1,%o3,%g1
 182         mulscc  %g1,%o3,%g1
 183         mulscc  %g1,%o3,%g1
 184         mulscc  %g1,%o3,%g1
 185         mulscc  %g1,%o3,%g1
 186         mulscc  %g1,%o3,%g1
 187         mulscc  %g1,%g0,%g1
 188         rd      %y,%g3
 189         addcc   %g3,%o0,%g3
 190         addx    %g2,%g1,%o0     ! add sign-compensation and cy to hi limb
 191         addcc   %o2,4,%o2       ! loop counter
 192         bne,a   LOC(loop)
 193          ld     [%o1+%o2],%o5
 194
 195         retl
 196         st      %g3,[%o4+%o2]
 197
 198 END(__mpn_mul_1)