sysdeps/rs6000/submul_1.s

   1 # IBM POWER __mpn_submul_1 -- Multiply a limb vector with a limb and subtract
   2 # the result from a second limb vector.
   3
   4 # Copyright (C) 1992, 1994 Free Software Foundation, Inc.
   5
   6 # This file is part of the GNU MP Library.
   7
   8 # The GNU MP Library is free software; you can redistribute it and/or modify
   9 # it under the terms of the GNU Lesser General Public License as published by
  10 # the Free Software Foundation; either version 2.1 of the License, or (at your
  11 # option) any later version.
  12
  13 # The GNU MP Library is distributed in the hope that it will be useful, but
  14 # WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
  15 # or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
  16 # License for more details.
  17
  18 # You should have received a copy of the GNU Lesser General Public License
  19 # along with the GNU MP Library; see the file COPYING.LIB.  If not, write to
  20 # the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston,
  21 # MA 02111-1307, USA.
  22
  23
  24 # INPUT PARAMETERS
  25 # res_ptr       r3
  26 # s1_ptr        r4
  27 # size          r5
  28 # s2_limb       r6
  29
  30 # The RS/6000 has no unsigned 32x32->64 bit multiplication instruction.  To
  31 # obtain that operation, we have to use the 32x32->64 signed multiplication
  32 # instruction, and add the appropriate compensation to the high limb of the
  33 # result.  We add the multiplicand if the multiplier has its most significant
  34 # bit set, and we add the multiplier if the multiplicand has its most
  35 # significant bit set.  We need to preserve the carry flag between each
  36 # iteration, so we have to compute the compensation carefully (the natural,
  37 # srai+and doesn't work).  Since the POWER architecture has a branch unit
  38 # we can branch in zero cycles, so that's how we perform the additions.
  39
  40         .toc
  41         .csect .__mpn_submul_1[PR]
  42         .align 2
  43         .globl __mpn_submul_1
  44         .globl .__mpn_submul_1
  45         .csect __mpn_submul_1[DS]
  46 __mpn_submul_1:
  47         .long .__mpn_submul_1[PR], TOC[tc0], 0
  48         .csect .__mpn_submul_1[PR]
  49 .__mpn_submul_1:
  50
  51         cal     3,-4(3)
  52         l       0,0(4)
  53         cmpi    0,6,0
  54         mtctr   5
  55         mul     9,0,6
  56         srai    7,0,31
  57         and     7,7,6
  58         mfmq    11
  59         cax     9,9,7
  60         l       7,4(3)
  61         sf      8,11,7          # add res_limb
  62         a       11,8,11         # invert cy (r11 is junk)
  63         blt     Lneg
  64 Lpos:   bdz     Lend
  65
  66 Lploop: lu      0,4(4)
  67         stu     8,4(3)
  68         cmpi    0,0,0
  69         mul     10,0,6
  70         mfmq    0
  71         ae      11,0,9          # low limb + old_cy_limb + old cy
  72         l       7,4(3)
  73         aze     10,10           # propagate cy to new cy_limb
  74         sf      8,11,7          # add res_limb
  75         a       11,8,11         # invert cy (r11 is junk)
  76         bge     Lp0
  77         cax     10,10,6         # adjust high limb for negative limb from s1
  78 Lp0:    bdz     Lend0
  79         lu      0,4(4)
  80         stu     8,4(3)
  81         cmpi    0,0,0
  82         mul     9,0,6
  83         mfmq    0
  84         ae      11,0,10
  85         l       7,4(3)
  86         aze     9,9
  87         sf      8,11,7
  88         a       11,8,11         # invert cy (r11 is junk)
  89         bge     Lp1
  90         cax     9,9,6           # adjust high limb for negative limb from s1
  91 Lp1:    bdn     Lploop
  92
  93         b       Lend
  94
  95 Lneg:   cax     9,9,0
  96         bdz     Lend
  97 Lnloop: lu      0,4(4)
  98         stu     8,4(3)
  99         cmpi    0,0,0
 100         mul     10,0,6
 101         mfmq    7
 102         ae      11,7,9
 103         l       7,4(3)
 104         ae      10,10,0         # propagate cy to new cy_limb
 105         sf      8,11,7          # add res_limb
 106         a       11,8,11         # invert cy (r11 is junk)
 107         bge     Ln0
 108         cax     10,10,6         # adjust high limb for negative limb from s1
 109 Ln0:    bdz     Lend0
 110         lu      0,4(4)
 111         stu     8,4(3)
 112         cmpi    0,0,0
 113         mul     9,0,6
 114         mfmq    7
 115         ae      11,7,10
 116         l       7,4(3)
 117         ae      9,9,0           # propagate cy to new cy_limb
 118         sf      8,11,7          # add res_limb
 119         a       11,8,11         # invert cy (r11 is junk)
 120         bge     Ln1
 121         cax     9,9,6           # adjust high limb for negative limb from s1
 122 Ln1:    bdn     Lnloop
 123         b       Lend
 124
 125 Lend0:  cal     9,0(10)
 126 Lend:   st      8,4(3)
 127         aze     3,9
 128         br