sysdeps/sparc/sparc32/sparcv8/mul_1.S

   1 ! SPARC v8 __mpn_mul_1 -- Multiply a limb vector with a single limb and
   2 ! store the product in a second limb vector.
   3
   4 ! Copyright (C) 1992, 1994, 1995 Free Software Foundation, Inc.
   5
   6 ! This file is part of the GNU MP Library.
   7
   8 ! The GNU MP Library is free software; you can redistribute it and/or modify
   9 ! it under the terms of the GNU Lesser General Public License as published by
  10 ! the Free Software Foundation; either version 2.1 of the License, or (at your
  11 ! option) any later version.
  12
  13 ! The GNU MP Library is distributed in the hope that it will be useful, but
  14 ! WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
  15 ! or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
  16 ! License for more details.
  17
  18 ! You should have received a copy of the GNU Lesser General Public License
  19 ! along with the GNU MP Library; see the file COPYING.LIB.  If not, write to
  20 ! the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston,
  21 ! MA 02111-1307, USA.
  22
  23
  24 ! INPUT PARAMETERS
  25 ! res_ptr       o0
  26 ! s1_ptr        o1
  27 ! size          o2
  28 ! s2_limb       o3
  29
  30 #include <sysdep.h>
  31
  32 ENTRY(__mpn_mul_1)
  33         sll     %o2,4,%g1
  34         mov     %o7,%g4                 ! Save return address register
  35         and     %g1,(4-1)<<4,%g1
  36 1:      call    2f
  37          add    %o7,3f-1b,%g3
  38 2:      mov     %g4,%o7                 ! Restore return address register
  39         jmp     %g3+%g1
  40          ld     [%o1+0],%o4     ! 1
  41
  42         .align  4
  43 3:
  44 LOC(00):
  45         add     %o0,-4,%o0
  46         add     %o1,-4,%o1
  47         b       LOC(loop00)             /* 4, 8, 12, ... */
  48          orcc   %g0,%g0,%g2
  49 LOC(01):
  50         b       LOC(loop01)             /* 1, 5, 9, ... */
  51          orcc   %g0,%g0,%g2
  52         nop
  53         nop
  54 LOC(10):
  55         add     %o0,-12,%o0     /* 2, 6, 10, ... */
  56         add     %o1,4,%o1
  57         b       LOC(loop10)
  58          orcc   %g0,%g0,%g2
  59         nop
  60 LOC(11):
  61         add     %o0,-8,%o0      /* 3, 7, 11, ... */
  62         add     %o1,-8,%o1
  63         b       LOC(loop11)
  64          orcc   %g0,%g0,%g2
  65
  66 LOC(loop):
  67         addcc   %g3,%g2,%g3     ! 1
  68         ld      [%o1+4],%o4     ! 2
  69         st      %g3,[%o0+0]     ! 1
  70         rd      %y,%g2          ! 1
  71 LOC(loop00):
  72         umul    %o4,%o3,%g3     ! 2
  73         addxcc  %g3,%g2,%g3     ! 2
  74         ld      [%o1+8],%o4     ! 3
  75         st      %g3,[%o0+4]     ! 2
  76         rd      %y,%g2          ! 2
  77 LOC(loop11):
  78         umul    %o4,%o3,%g3     ! 3
  79         addxcc  %g3,%g2,%g3     ! 3
  80         ld      [%o1+12],%o4    ! 4
  81         add     %o1,16,%o1
  82         st      %g3,[%o0+8]     ! 3
  83         rd      %y,%g2          ! 3
  84 LOC(loop10):
  85         umul    %o4,%o3,%g3     ! 4
  86         addxcc  %g3,%g2,%g3     ! 4
  87         ld      [%o1+0],%o4     ! 1
  88         st      %g3,[%o0+12]    ! 4
  89         add     %o0,16,%o0
  90         rd      %y,%g2          ! 4
  91         addx    %g0,%g2,%g2
  92 LOC(loop01):
  93         addcc   %o2,-4,%o2
  94         bg      LOC(loop)
  95          umul   %o4,%o3,%g3     ! 1
  96
  97         addcc   %g3,%g2,%g3     ! 4
  98         st      %g3,[%o0+0]     ! 4
  99         rd      %y,%g2          ! 4
 100         retl
 101          addx   %g0,%g2,%o0
 102
 103 END(__mpn_mul_1)