sysdeps/sparc/sparc32/sparcv8/addmul_1.S

   1 ! SPARC v8 __mpn_addmul_1 -- Multiply a limb vector with a limb and
   2 ! add the result to a second limb vector.
   3
   4 ! Copyright (C) 1992, 1993, 1994, 1995 Free Software Foundation, Inc.
   5
   6 ! This file is part of the GNU MP Library.
   7
   8 ! The GNU MP Library is free software; you can redistribute it and/or modify
   9 ! it under the terms of the GNU Lesser General Public License as published by
  10 ! the Free Software Foundation; either version 2.1 of the License, or (at your
  11 ! option) any later version.
  12
  13 ! The GNU MP Library is distributed in the hope that it will be useful, but
  14 ! WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
  15 ! or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
  16 ! License for more details.
  17
  18 ! You should have received a copy of the GNU Lesser General Public License
  19 ! along with the GNU MP Library; see the file COPYING.LIB.  If not, write to
  20 ! the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston,
  21 ! MA 02111-1307, USA.
  22
  23
  24 ! INPUT PARAMETERS
  25 ! res_ptr       o0
  26 ! s1_ptr        o1
  27 ! size          o2
  28 ! s2_limb       o3
  29
  30 #include <sysdep.h>
  31
  32 ENTRY(__mpn_addmul_1)
  33         ld      [%o1+0],%o4     ! 1
  34         sll     %o2,4,%g1
  35         orcc    %g0,%g0,%g2
  36         mov     %o7,%g4                 ! Save return address register
  37         and     %g1,(4-1)<<4,%g1
  38 1:      call    2f
  39          add    %o7,3f-1b,%g3
  40 2:      jmp     %g3+%g1
  41          mov    %g4,%o7                 ! Restore return address register
  42
  43         .align  4
  44 3:
  45 LOC(00):
  46         add     %o0,-4,%o0
  47         b       LOC(loop00)             /* 4, 8, 12, ... */
  48          add    %o1,-4,%o1
  49         nop
  50 LOC(01):
  51         b       LOC(loop01)             /* 1, 5, 9, ... */
  52          nop
  53         nop
  54         nop
  55 LOC(10):
  56         add     %o0,-12,%o0     /* 2, 6, 10, ... */
  57         b       LOC(loop10)
  58          add    %o1,4,%o1
  59         nop
  60 LOC(11):
  61         add     %o0,-8,%o0      /* 3, 7, 11, ... */
  62         b       LOC(loop11)
  63          add    %o1,-8,%o1
  64         nop
  65
  66 LOC(loop):
  67         addcc   %g3,%g2,%g3     ! 1
  68         ld      [%o1+4],%o4     ! 2
  69         rd      %y,%g2          ! 1
  70         addx    %g0,%g2,%g2
  71         ld      [%o0+0],%g1     ! 2
  72         addcc   %g1,%g3,%g3
  73         st      %g3,[%o0+0]     ! 1
  74 LOC(loop00):
  75         umul    %o4,%o3,%g3     ! 2
  76         ld      [%o0+4],%g1     ! 2
  77         addxcc  %g3,%g2,%g3     ! 2
  78         ld      [%o1+8],%o4     ! 3
  79         rd      %y,%g2          ! 2
  80         addx    %g0,%g2,%g2
  81         nop
  82         addcc   %g1,%g3,%g3
  83         st      %g3,[%o0+4]     ! 2
  84 LOC(loop11):
  85         umul    %o4,%o3,%g3     ! 3
  86         addxcc  %g3,%g2,%g3     ! 3
  87         ld      [%o1+12],%o4    ! 4
  88         rd      %y,%g2          ! 3
  89         add     %o1,16,%o1
  90         addx    %g0,%g2,%g2
  91         ld      [%o0+8],%g1     ! 2
  92         addcc   %g1,%g3,%g3
  93         st      %g3,[%o0+8]     ! 3
  94 LOC(loop10):
  95         umul    %o4,%o3,%g3     ! 4
  96         addxcc  %g3,%g2,%g3     ! 4
  97         ld      [%o1+0],%o4     ! 1
  98         rd      %y,%g2          ! 4
  99         addx    %g0,%g2,%g2
 100         ld      [%o0+12],%g1    ! 2
 101         addcc   %g1,%g3,%g3
 102         st      %g3,[%o0+12]    ! 4
 103         add     %o0,16,%o0
 104         addx    %g0,%g2,%g2
 105 LOC(loop01):
 106         addcc   %o2,-4,%o2
 107         bg      LOC(loop)
 108          umul   %o4,%o3,%g3     ! 1
 109
 110         addcc   %g3,%g2,%g3     ! 4
 111         rd      %y,%g2          ! 4
 112         addx    %g0,%g2,%g2
 113         ld      [%o0+0],%g1     ! 2
 114         addcc   %g1,%g3,%g3
 115         st      %g3,[%o0+0]     ! 4
 116         retl
 117          addx   %g0,%g2,%o0
 118
 119 END(__mpn_addmul_1)