beta-0.89.2
[luatex.git] / source / libs / gmp / gmp-src / mpn / power / submul_1.asm
blob1788e0d4f4f9b7358a59e0b47ef85e3aa9c11074
1 dnl IBM POWER mpn_submul_1 -- Multiply a limb vector with a limb and subtract
2 dnl the result from a second limb vector.
4 dnl Copyright 1992, 1994, 1999-2001 Free Software Foundation, Inc.
6 dnl This file is part of the GNU MP Library.
7 dnl
8 dnl The GNU MP Library is free software; you can redistribute it and/or modify
9 dnl it under the terms of either:
10 dnl
11 dnl * the GNU Lesser General Public License as published by the Free
12 dnl Software Foundation; either version 3 of the License, or (at your
13 dnl option) any later version.
14 dnl
15 dnl or
16 dnl
17 dnl * the GNU General Public License as published by the Free Software
18 dnl Foundation; either version 2 of the License, or (at your option) any
19 dnl later version.
20 dnl
21 dnl or both in parallel, as here.
22 dnl
23 dnl The GNU MP Library is distributed in the hope that it will be useful, but
24 dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
25 dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
26 dnl for more details.
27 dnl
28 dnl You should have received copies of the GNU General Public License and the
29 dnl GNU Lesser General Public License along with the GNU MP Library. If not,
30 dnl see https://www.gnu.org/licenses/.
33 dnl INPUT PARAMETERS
34 dnl res_ptr r3
35 dnl s1_ptr r4
36 dnl size r5
37 dnl s2_limb r6
39 dnl The POWER architecture has no unsigned 32x32->64 bit multiplication
40 dnl instruction. To obtain that operation, we have to use the 32x32->64
41 dnl signed multiplication instruction, and add the appropriate compensation to
42 dnl the high limb of the result. We add the multiplicand if the multiplier
43 dnl has its most significant bit set, and we add the multiplier if the
44 dnl multiplicand has its most significant bit set. We need to preserve the
45 dnl carry flag between each iteration, so we have to compute the compensation
46 dnl carefully (the natural, srai+and doesn't work). Since all POWER can
47 dnl branch in zero cycles, we use conditional branches for the compensation.
49 include(`../config.m4')
51 ASM_START()
52 PROLOGUE(mpn_submul_1)
53 cal 3,-4(3)
54 l 0,0(4)
55 cmpi 0,6,0
56 mtctr 5
57 mul 9,0,6
58 srai 7,0,31
59 and 7,7,6
60 mfmq 11
61 cax 9,9,7
62 l 7,4(3)
63 sf 8,11,7 C add res_limb
64 a 11,8,11 C invert cy (r11 is junk)
65 blt Lneg
66 Lpos: bdz Lend
68 Lploop: lu 0,4(4)
69 stu 8,4(3)
70 cmpi 0,0,0
71 mul 10,0,6
72 mfmq 0
73 ae 11,0,9 C low limb + old_cy_limb + old cy
74 l 7,4(3)
75 aze 10,10 C propagate cy to new cy_limb
76 sf 8,11,7 C add res_limb
77 a 11,8,11 C invert cy (r11 is junk)
78 bge Lp0
79 cax 10,10,6 C adjust high limb for negative limb from s1
80 Lp0: bdz Lend0
81 lu 0,4(4)
82 stu 8,4(3)
83 cmpi 0,0,0
84 mul 9,0,6
85 mfmq 0
86 ae 11,0,10
87 l 7,4(3)
88 aze 9,9
89 sf 8,11,7
90 a 11,8,11 C invert cy (r11 is junk)
91 bge Lp1
92 cax 9,9,6 C adjust high limb for negative limb from s1
93 Lp1: bdn Lploop
95 b Lend
97 Lneg: cax 9,9,0
98 bdz Lend
99 Lnloop: lu 0,4(4)
100 stu 8,4(3)
101 cmpi 0,0,0
102 mul 10,0,6
103 mfmq 7
104 ae 11,7,9
105 l 7,4(3)
106 ae 10,10,0 C propagate cy to new cy_limb
107 sf 8,11,7 C add res_limb
108 a 11,8,11 C invert cy (r11 is junk)
109 bge Ln0
110 cax 10,10,6 C adjust high limb for negative limb from s1
111 Ln0: bdz Lend0
112 lu 0,4(4)
113 stu 8,4(3)
114 cmpi 0,0,0
115 mul 9,0,6
116 mfmq 7
117 ae 11,7,10
118 l 7,4(3)
119 ae 9,9,0 C propagate cy to new cy_limb
120 sf 8,11,7 C add res_limb
121 a 11,8,11 C invert cy (r11 is junk)
122 bge Ln1
123 cax 9,9,6 C adjust high limb for negative limb from s1
124 Ln1: bdn Lnloop
125 b Lend
127 Lend0: cal 9,0(10)
128 Lend: st 8,4(3)
129 aze 3,9
131 EPILOGUE(mpn_submul_1)