beta-0.89.2
[luatex.git] / source / libs / gmp / gmp-src / mpn / powerpc64 / mode64 / cnd_aors_n.asm
blob24968c19125c20602ca1136e2d0de289cf90c1bc
1 dnl PowerPC-64 mpn_cnd_add_n/mpn_cnd_sub_n.
3 dnl Copyright 1999-2001, 2003-2005, 2007, 2011, 2012 Free Software Foundation,
4 dnl Inc.
6 dnl This file is part of the GNU MP Library.
7 dnl
8 dnl The GNU MP Library is free software; you can redistribute it and/or modify
9 dnl it under the terms of either:
10 dnl
11 dnl * the GNU Lesser General Public License as published by the Free
12 dnl Software Foundation; either version 3 of the License, or (at your
13 dnl option) any later version.
14 dnl
15 dnl or
16 dnl
17 dnl * the GNU General Public License as published by the Free Software
18 dnl Foundation; either version 2 of the License, or (at your option) any
19 dnl later version.
20 dnl
21 dnl or both in parallel, as here.
22 dnl
23 dnl The GNU MP Library is distributed in the hope that it will be useful, but
24 dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
25 dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
26 dnl for more details.
27 dnl
28 dnl You should have received copies of the GNU General Public License and the
29 dnl GNU Lesser General Public License along with the GNU MP Library. If not,
30 dnl see https://www.gnu.org/licenses/.
32 include(`../config.m4')
34 C cycles/limb
35 C POWER3/PPC630 ?
36 C POWER4/PPC970 2.25
37 C POWER5 ?
38 C POWER6 3
39 C POWER7 2
41 C INPUT PARAMETERS
42 define(`cnd', `r3')
43 define(`rp', `r4')
44 define(`up', `r5')
45 define(`vp', `r6')
46 define(`n', `r7')
48 ifdef(`OPERATION_cnd_add_n',`
49 define(ADDSUBC, adde)
50 define(ADDSUB, addc)
51 define(func, mpn_cnd_add_n)
52 define(GENRVAL, `addi r3, r3, 1')
53 define(SETCBR, `addic r0, $1, -1')
54 define(CLRCB, `addic r0, r0, 0')
56 ifdef(`OPERATION_cnd_sub_n',`
57 define(ADDSUBC, subfe)
58 define(ADDSUB, subfc)
59 define(func, mpn_cnd_sub_n)
60 define(GENRVAL, `neg r3, r3')
61 define(SETCBR, `subfic r0, $1, 0')
62 define(CLRCB, `addic r0, r1, -1')
65 MULFUNC_PROLOGUE(mpn_cnd_add_n mpn_cnd_sub_n)
67 ASM_START()
68 PROLOGUE(func)
69 std r31, -8(r1)
70 std r30, -16(r1)
71 std r29, -24(r1)
72 std r28, -32(r1)
73 std r27, -40(r1)
75 subfic cnd, cnd, 0
76 subfe cnd, cnd, cnd
78 rldicl. r0, n, 0,62 C r0 = n & 3, set cr0
79 cmpdi cr6, r0, 2
80 addi n, n, 3 C compute count...
81 srdi n, n, 2 C ...for ctr
82 mtctr n C copy count into ctr
83 beq cr0, L(b00)
84 blt cr6, L(b01)
85 beq cr6, L(b10)
87 L(b11): ld r8, 0(up) C load s1 limb
88 ld r9, 0(vp) C load s2 limb
89 ld r10, 8(up) C load s1 limb
90 ld r11, 8(vp) C load s2 limb
91 ld r12, 16(up) C load s1 limb
92 addi up, up, 24
93 ld r0, 16(vp) C load s2 limb
94 addi vp, vp, 24
95 and r9, r9, cnd
96 and r11, r11, cnd
97 and r0, r0, cnd
98 ADDSUB r29, r9, r8
99 ADDSUBC r30, r11, r10
100 ADDSUBC r31, r0, r12
101 std r29, 0(rp)
102 std r30, 8(rp)
103 std r31, 16(rp)
104 addi rp, rp, 24
105 bdnz L(go)
106 b L(ret)
108 L(b01): ld r12, 0(up) C load s1 limb
109 addi up, up, 8
110 ld r0, 0(vp) C load s2 limb
111 addi vp, vp, 8
112 and r0, r0, cnd
113 ADDSUB r31, r0, r12 C add
114 std r31, 0(rp)
115 addi rp, rp, 8
116 bdnz L(go)
117 b L(ret)
119 L(b10): ld r10, 0(up) C load s1 limb
120 ld r11, 0(vp) C load s2 limb
121 ld r12, 8(up) C load s1 limb
122 addi up, up, 16
123 ld r0, 8(vp) C load s2 limb
124 addi vp, vp, 16
125 and r11, r11, cnd
126 and r0, r0, cnd
127 ADDSUB r30, r11, r10 C add
128 ADDSUBC r31, r0, r12 C add
129 std r30, 0(rp)
130 std r31, 8(rp)
131 addi rp, rp, 16
132 bdnz L(go)
133 b L(ret)
135 L(b00): CLRCB C clear/set cy
136 L(go): ld r7, 0(up) C load s1 limb
137 ld r27, 0(vp) C load s2 limb
138 ld r8, 8(up) C load s1 limb
139 ld r9, 8(vp) C load s2 limb
140 ld r10, 16(up) C load s1 limb
141 ld r11, 16(vp) C load s2 limb
142 ld r12, 24(up) C load s1 limb
143 ld r0, 24(vp) C load s2 limb
144 and r27, r27, cnd
145 and r9, r9, cnd
146 and r11, r11, cnd
147 and r0, r0, cnd
148 bdz L(end)
150 addi up, up, 32
151 addi vp, vp, 32
153 L(top): ADDSUBC r28, r27, r7
154 ld r7, 0(up) C load s1 limb
155 ld r27, 0(vp) C load s2 limb
156 ADDSUBC r29, r9, r8
157 ld r8, 8(up) C load s1 limb
158 ld r9, 8(vp) C load s2 limb
159 ADDSUBC r30, r11, r10
160 ld r10, 16(up) C load s1 limb
161 ld r11, 16(vp) C load s2 limb
162 ADDSUBC r31, r0, r12
163 ld r12, 24(up) C load s1 limb
164 ld r0, 24(vp) C load s2 limb
165 std r28, 0(rp)
166 addi up, up, 32
167 std r29, 8(rp)
168 addi vp, vp, 32
169 std r30, 16(rp)
170 std r31, 24(rp)
171 addi rp, rp, 32
172 and r27, r27, cnd
173 and r9, r9, cnd
174 and r11, r11, cnd
175 and r0, r0, cnd
176 bdnz L(top) C decrement ctr and loop back
178 L(end): ADDSUBC r28, r27, r7
179 ADDSUBC r29, r9, r8
180 ADDSUBC r30, r11, r10
181 ADDSUBC r31, r0, r12
182 std r28, 0(rp)
183 std r29, 8(rp)
184 std r30, 16(rp)
185 std r31, 24(rp)
187 L(ret): ld r31, -8(r1)
188 ld r30, -16(r1)
189 ld r29, -24(r1)
190 ld r28, -32(r1)
191 ld r27, -40(r1)
193 subfe r3, r0, r0 C -cy
194 GENRVAL
196 EPILOGUE()