beta-0.89.2
[luatex.git] / source / libs / gmp / gmp-src / mpn / powerpc64 / mode64 / aorsmul_1.asm
blob0c12f9b6608c6d2b596d8bbae2bd8856c2daa206
1 dnl PowerPC-64 mpn_addmul_1 and mpn_submul_1.
3 dnl Copyright 1999-2001, 2003-2006, 2010-2012 Free Software Foundation, Inc.
5 dnl This file is part of the GNU MP Library.
6 dnl
7 dnl The GNU MP Library is free software; you can redistribute it and/or modify
8 dnl it under the terms of either:
9 dnl
10 dnl * the GNU Lesser General Public License as published by the Free
11 dnl Software Foundation; either version 3 of the License, or (at your
12 dnl option) any later version.
13 dnl
14 dnl or
15 dnl
16 dnl * the GNU General Public License as published by the Free Software
17 dnl Foundation; either version 2 of the License, or (at your option) any
18 dnl later version.
19 dnl
20 dnl or both in parallel, as here.
21 dnl
22 dnl The GNU MP Library is distributed in the hope that it will be useful, but
23 dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
24 dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
25 dnl for more details.
26 dnl
27 dnl You should have received copies of the GNU General Public License and the
28 dnl GNU Lesser General Public License along with the GNU MP Library. If not,
29 dnl see https://www.gnu.org/licenses/.
31 include(`../config.m4')
33 C mpn_addmul_1 mpn_submul_1
34 C cycles/limb cycles/limb
35 C POWER3/PPC630 6-18 6-18
36 C POWER4/PPC970 8 8.3
37 C POWER5 8 8.25
38 C POWER6 16.25 16.75
39 C POWER7 3.77 4.9
41 C TODO
42 C * Try to reduce the number of needed live registers
43 C * Add support for _1c entry points
45 C INPUT PARAMETERS
46 define(`rp', `r3')
47 define(`up', `r4')
48 define(`n', `r5')
49 define(`vl', `r6')
51 ifdef(`OPERATION_addmul_1',`
52 define(ADDSUBC, adde)
53 define(ADDSUB, addc)
54 define(func, mpn_addmul_1)
55 define(func_nc, mpn_addmul_1c) C FIXME: not really supported
56 define(SM, `')
58 ifdef(`OPERATION_submul_1',`
59 define(ADDSUBC, subfe)
60 define(ADDSUB, subfc)
61 define(func, mpn_submul_1)
62 define(func_nc, mpn_submul_1c) C FIXME: not really supported
63 define(SM, `$1')
66 MULFUNC_PROLOGUE(mpn_addmul_1 mpn_submul_1)
68 ASM_START()
69 PROLOGUE(func)
70 std r31, -8(r1)
71 rldicl. r0, n, 0,62 C r0 = n & 3, set cr0
72 std r30, -16(r1)
73 cmpdi cr6, r0, 2
74 std r29, -24(r1)
75 addi n, n, 3 C compute count...
76 std r28, -32(r1)
77 srdi n, n, 2 C ...for ctr
78 std r27, -40(r1)
79 mtctr n C copy count into ctr
80 beq cr0, L(b00)
81 blt cr6, L(b01)
82 beq cr6, L(b10)
84 L(b11): ld r9, 0(up)
85 ld r28, 0(rp)
86 mulld r0, r9, r6
87 mulhdu r12, r9, r6
88 ADDSUB r0, r0, r28
89 std r0, 0(rp)
90 addi rp, rp, 8
91 ld r9, 8(up)
92 ld r27, 16(up)
93 addi up, up, 24
94 SM(` subfe r11, r11, r11 ')
95 b L(bot)
97 ALIGN(16)
98 L(b00): ld r9, 0(up)
99 ld r27, 8(up)
100 ld r28, 0(rp)
101 ld r29, 8(rp)
102 mulld r0, r9, r6
103 mulhdu r5, r9, r6
104 mulld r7, r27, r6
105 mulhdu r8, r27, r6
106 addc r7, r7, r5
107 addze r12, r8
108 ADDSUB r0, r0, r28
109 std r0, 0(rp)
110 ADDSUBC r7, r7, r29
111 std r7, 8(rp)
112 addi rp, rp, 16
113 ld r9, 16(up)
114 ld r27, 24(up)
115 addi up, up, 32
116 SM(` subfe r11, r11, r11 ')
117 b L(bot)
119 ALIGN(16)
120 L(b01): bdnz L(gt1)
121 ld r9, 0(up)
122 ld r11, 0(rp)
123 mulld r0, r9, r6
124 mulhdu r8, r9, r6
125 ADDSUB r0, r0, r11
126 std r0, 0(rp)
127 SM(` subfe r11, r11, r11 ')
128 SM(` addic r11, r11, 1 ')
129 addze r3, r8
131 L(gt1): ld r9, 0(up)
132 ld r27, 8(up)
133 mulld r0, r9, r6
134 mulhdu r5, r9, r6
135 mulld r7, r27, r6
136 mulhdu r8, r27, r6
137 ld r9, 16(up)
138 ld r28, 0(rp)
139 ld r29, 8(rp)
140 ld r30, 16(rp)
141 mulld r11, r9, r6
142 mulhdu r10, r9, r6
143 addc r7, r7, r5
144 adde r11, r11, r8
145 addze r12, r10
146 ADDSUB r0, r0, r28
147 std r0, 0(rp)
148 ADDSUBC r7, r7, r29
149 std r7, 8(rp)
150 ADDSUBC r11, r11, r30
151 std r11, 16(rp)
152 addi rp, rp, 24
153 ld r9, 24(up)
154 ld r27, 32(up)
155 addi up, up, 40
156 SM(` subfe r11, r11, r11 ')
157 b L(bot)
159 L(b10): addic r0, r0, 0
160 li r12, 0 C cy_limb = 0
161 ld r9, 0(up)
162 ld r27, 8(up)
163 bdz L(end)
164 addi up, up, 16
166 ALIGN(16)
167 L(top): mulld r0, r9, r6
168 mulhdu r5, r9, r6 C 9
169 mulld r7, r27, r6
170 mulhdu r8, r27, r6 C 27
171 ld r9, 0(up)
172 ld r28, 0(rp)
173 ld r27, 8(up)
174 ld r29, 8(rp)
175 adde r0, r0, r12 C 0 12
176 adde r7, r7, r5 C 5 7
177 mulld r5, r9, r6
178 mulhdu r10, r9, r6 C 9
179 mulld r11, r27, r6
180 mulhdu r12, r27, r6 C 27
181 ld r9, 16(up)
182 ld r30, 16(rp)
183 ld r27, 24(up)
184 ld r31, 24(rp)
185 adde r5, r5, r8 C 8 5
186 adde r11, r11, r10 C 10 11
187 addze r12, r12 C 12
188 ADDSUB r0, r0, r28 C 0 28
189 std r0, 0(rp) C 0
190 ADDSUBC r7, r7, r29 C 7 29
191 std r7, 8(rp) C 7
192 ADDSUBC r5, r5, r30 C 5 30
193 std r5, 16(rp) C 5
194 ADDSUBC r11, r11, r31 C 11 31
195 std r11, 24(rp) C 11
196 addi up, up, 32
197 SM(` subfe r11, r11, r11 ')
198 addi rp, rp, 32
199 L(bot):
200 SM(` addic r11, r11, 1 ')
201 bdnz L(top)
203 L(end): mulld r0, r9, r6
204 mulhdu r5, r9, r6
205 mulld r7, r27, r6
206 mulhdu r8, r27, r6
207 ld r28, 0(rp)
208 ld r29, 8(rp)
209 adde r0, r0, r12
210 adde r7, r7, r5
211 addze r8, r8
212 ADDSUB r0, r0, r28
213 std r0, 0(rp)
214 ADDSUBC r7, r7, r29
215 std r7, 8(rp)
216 SM(` subfe r11, r11, r11 ')
217 SM(` addic r11, r11, 1 ')
218 addze r3, r8
219 ld r31, -8(r1)
220 ld r30, -16(r1)
221 ld r29, -24(r1)
222 ld r28, -32(r1)
223 ld r27, -40(r1)
225 EPILOGUE()