beta-0.89.2
[luatex.git] / source / libs / gmp / gmp-src / mpn / x86_64 / core2 / aors_err1_n.asm
blob3f875aefa42ecda8f50da5caf91b2531d27c0bb9
1 dnl Core 2 mpn_add_err1_n, mpn_sub_err1_n
3 dnl Contributed by David Harvey.
5 dnl Copyright 2011 Free Software Foundation, Inc.
7 dnl This file is part of the GNU MP Library.
8 dnl
9 dnl The GNU MP Library is free software; you can redistribute it and/or modify
10 dnl it under the terms of either:
11 dnl
12 dnl * the GNU Lesser General Public License as published by the Free
13 dnl Software Foundation; either version 3 of the License, or (at your
14 dnl option) any later version.
15 dnl
16 dnl or
17 dnl
18 dnl * the GNU General Public License as published by the Free Software
19 dnl Foundation; either version 2 of the License, or (at your option) any
20 dnl later version.
21 dnl
22 dnl or both in parallel, as here.
23 dnl
24 dnl The GNU MP Library is distributed in the hope that it will be useful, but
25 dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
26 dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
27 dnl for more details.
28 dnl
29 dnl You should have received copies of the GNU General Public License and the
30 dnl GNU Lesser General Public License along with the GNU MP Library. If not,
31 dnl see https://www.gnu.org/licenses/.
33 include(`../config.m4')
35 C cycles/limb
36 C AMD K8,K9 ?
37 C AMD K10 ?
38 C Intel P4 ?
39 C Intel core2 4.14
40 C Intel corei ?
41 C Intel atom ?
42 C VIA nano ?
45 C INPUT PARAMETERS
46 define(`rp', `%rdi')
47 define(`up', `%rsi')
48 define(`vp', `%rdx')
49 define(`ep', `%rcx')
50 define(`yp', `%r8')
51 define(`n', `%r9')
52 define(`cy_param', `8(%rsp)')
54 define(`el', `%rbx')
55 define(`eh', `%rbp')
56 define(`t0', `%r10')
57 define(`t1', `%r11')
58 define(`t2', `%r12')
59 define(`t3', `%r13')
60 define(`w0', `%r14')
61 define(`w1', `%r15')
63 ifdef(`OPERATION_add_err1_n', `
64 define(ADCSBB, adc)
65 define(func, mpn_add_err1_n)')
66 ifdef(`OPERATION_sub_err1_n', `
67 define(ADCSBB, sbb)
68 define(func, mpn_sub_err1_n)')
70 MULFUNC_PROLOGUE(mpn_add_err1_n mpn_sub_err1_n)
73 ASM_START()
74 TEXT
75 ALIGN(16)
76 PROLOGUE(func)
77 mov cy_param, %rax
79 push %rbx
80 push %rbp
81 push %r12
82 push %r13
83 push %r14
84 push %r15
86 lea (up,n,8), up
87 lea (vp,n,8), vp
88 lea (rp,n,8), rp
90 mov R32(n), R32(%r10)
91 and $3, R32(%r10)
92 jz L(0mod4)
93 cmp $2, R32(%r10)
94 jc L(1mod4)
95 jz L(2mod4)
96 L(3mod4):
97 xor R32(el), R32(el)
98 xor R32(eh), R32(eh)
99 xor R32(t0), R32(t0)
100 xor R32(t1), R32(t1)
101 lea -24(yp,n,8), yp
102 neg n
104 shr $1, %al C restore carry
105 mov (up,n,8), w0
106 mov 8(up,n,8), w1
107 ADCSBB (vp,n,8), w0
108 mov w0, (rp,n,8)
109 cmovc 16(yp), el
110 ADCSBB 8(vp,n,8), w1
111 mov w1, 8(rp,n,8)
112 cmovc 8(yp), t0
113 mov 16(up,n,8), w0
114 ADCSBB 16(vp,n,8), w0
115 mov w0, 16(rp,n,8)
116 cmovc (yp), t1
117 setc %al C save carry
118 add t0, el
119 adc $0, eh
120 add t1, el
121 adc $0, eh
123 add $3, n
124 jnz L(loop)
125 jmp L(end)
127 ALIGN(16)
128 L(0mod4):
129 xor R32(el), R32(el)
130 xor R32(eh), R32(eh)
131 lea (yp,n,8), yp
132 neg n
133 jmp L(loop)
135 ALIGN(16)
136 L(1mod4):
137 xor R32(el), R32(el)
138 xor R32(eh), R32(eh)
139 lea -8(yp,n,8), yp
140 neg n
142 shr $1, %al C restore carry
143 mov (up,n,8), w0
144 ADCSBB (vp,n,8), w0
145 mov w0, (rp,n,8)
146 cmovc (yp), el
147 setc %al C save carry
149 add $1, n
150 jnz L(loop)
151 jmp L(end)
153 ALIGN(16)
154 L(2mod4):
155 xor R32(el), R32(el)
156 xor R32(eh), R32(eh)
157 xor R32(t0), R32(t0)
158 lea -16(yp,n,8), yp
159 neg n
161 shr $1, %al C restore carry
162 mov (up,n,8), w0
163 mov 8(up,n,8), w1
164 ADCSBB (vp,n,8), w0
165 mov w0, (rp,n,8)
166 cmovc 8(yp), el
167 ADCSBB 8(vp,n,8), w1
168 mov w1, 8(rp,n,8)
169 cmovc (yp), t0
170 setc %al C save carry
171 add t0, el
172 adc $0, eh
174 add $2, n
175 jnz L(loop)
176 jmp L(end)
178 ALIGN(32)
179 L(loop):
180 mov (up,n,8), w0
181 shr $1, %al C restore carry
182 mov -8(yp), t0
183 mov $0, R32(t3)
184 ADCSBB (vp,n,8), w0
185 cmovnc t3, t0
186 mov w0, (rp,n,8)
187 mov 8(up,n,8), w1
188 mov 16(up,n,8), w0
189 ADCSBB 8(vp,n,8), w1
190 mov -16(yp), t1
191 cmovnc t3, t1
192 mov -24(yp), t2
193 mov w1, 8(rp,n,8)
194 ADCSBB 16(vp,n,8), w0
195 cmovnc t3, t2
196 mov 24(up,n,8), w1
197 ADCSBB 24(vp,n,8), w1
198 cmovc -32(yp), t3
199 setc %al C save carry
200 add t0, el
201 adc $0, eh
202 add t1, el
203 adc $0, eh
204 add t2, el
205 adc $0, eh
206 lea -32(yp), yp
207 mov w0, 16(rp,n,8)
208 add t3, el
209 adc $0, eh
210 add $4, n
211 mov w1, -8(rp,n,8)
212 jnz L(loop)
214 L(end):
215 mov el, (ep)
216 mov eh, 8(ep)
218 pop %r15
219 pop %r14
220 pop %r13
221 pop %r12
222 pop %rbp
223 pop %rbx
225 EPILOGUE()