beta-0.89.2
[luatex.git] / source / libs / gmp / gmp-src / mpn / x86_64 / aors_err2_n.asm
blobce5c2a49b6de3f0ee2ca74d2c599bc8548c41b29
1 dnl AMD64 mpn_add_err2_n, mpn_sub_err2_n
3 dnl Contributed by David Harvey.
5 dnl Copyright 2011 Free Software Foundation, Inc.
7 dnl This file is part of the GNU MP Library.
8 dnl
9 dnl The GNU MP Library is free software; you can redistribute it and/or modify
10 dnl it under the terms of either:
11 dnl
12 dnl * the GNU Lesser General Public License as published by the Free
13 dnl Software Foundation; either version 3 of the License, or (at your
14 dnl option) any later version.
15 dnl
16 dnl or
17 dnl
18 dnl * the GNU General Public License as published by the Free Software
19 dnl Foundation; either version 2 of the License, or (at your option) any
20 dnl later version.
21 dnl
22 dnl or both in parallel, as here.
23 dnl
24 dnl The GNU MP Library is distributed in the hope that it will be useful, but
25 dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
26 dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
27 dnl for more details.
28 dnl
29 dnl You should have received copies of the GNU General Public License and the
30 dnl GNU Lesser General Public License along with the GNU MP Library. If not,
31 dnl see https://www.gnu.org/licenses/.
33 include(`../config.m4')
35 C cycles/limb
36 C AMD K8,K9 4.5
37 C AMD K10 ?
38 C Intel P4 ?
39 C Intel core2 6.9
40 C Intel corei ?
41 C Intel atom ?
42 C VIA nano ?
45 C INPUT PARAMETERS
46 define(`rp', `%rdi')
47 define(`up', `%rsi')
48 define(`vp', `%rdx')
49 define(`ep', `%rcx')
50 define(`yp1', `%r8')
51 define(`yp2', `%r9')
52 define(`n_param', `8(%rsp)')
53 define(`cy_param', `16(%rsp)')
55 define(`cy1', `%r14')
56 define(`cy2', `%rax')
58 define(`n', `%r10')
60 define(`w', `%rbx')
61 define(`e1l', `%rbp')
62 define(`e1h', `%r11')
63 define(`e2l', `%r12')
64 define(`e2h', `%r13')
67 ifdef(`OPERATION_add_err2_n', `
68 define(ADCSBB, adc)
69 define(func, mpn_add_err2_n)')
70 ifdef(`OPERATION_sub_err2_n', `
71 define(ADCSBB, sbb)
72 define(func, mpn_sub_err2_n)')
74 MULFUNC_PROLOGUE(mpn_add_err2_n mpn_sub_err2_n)
77 ASM_START()
78 TEXT
79 ALIGN(16)
80 PROLOGUE(func)
81 mov cy_param, cy2
82 mov n_param, n
84 push %rbx
85 push %rbp
86 push %r12
87 push %r13
88 push %r14
90 xor R32(e1l), R32(e1l)
91 xor R32(e1h), R32(e1h)
92 xor R32(e2l), R32(e2l)
93 xor R32(e2h), R32(e2h)
95 sub yp1, yp2
97 lea (rp,n,8), rp
98 lea (up,n,8), up
99 lea (vp,n,8), vp
101 test $1, n
102 jnz L(odd)
104 lea -8(yp1,n,8), yp1
105 neg n
106 jmp L(top)
108 ALIGN(16)
109 L(odd):
110 lea -16(yp1,n,8), yp1
111 neg n
112 shr $1, cy2
113 mov (up,n,8), w
114 ADCSBB (vp,n,8), w
115 cmovc 8(yp1), e1l
116 cmovc 8(yp1,yp2), e2l
117 mov w, (rp,n,8)
118 sbb cy2, cy2
119 inc n
120 jz L(end)
122 ALIGN(16)
123 L(top):
124 mov (up,n,8), w
125 shr $1, cy2 C restore carry
126 ADCSBB (vp,n,8), w
127 mov w, (rp,n,8)
128 sbb cy1, cy1 C generate mask, preserve CF
130 mov 8(up,n,8), w
131 ADCSBB 8(vp,n,8), w
132 mov w, 8(rp,n,8)
133 sbb cy2, cy2 C generate mask, preserve CF
135 mov (yp1), w C (e1h:e1l) += cy1 * yp1 limb
136 and cy1, w
137 add w, e1l
138 adc $0, e1h
140 and (yp1,yp2), cy1 C (e2h:e2l) += cy1 * yp2 limb
141 add cy1, e2l
142 adc $0, e2h
144 mov -8(yp1), w C (e1h:e1l) += cy2 * next yp1 limb
145 and cy2, w
146 add w, e1l
147 adc $0, e1h
149 mov -8(yp1,yp2), w C (e2h:e2l) += cy2 * next yp2 limb
150 and cy2, w
151 add w, e2l
152 adc $0, e2h
154 add $2, n
155 lea -16(yp1), yp1
156 jnz L(top)
157 L(end):
159 mov e1l, (ep)
160 mov e1h, 8(ep)
161 mov e2l, 16(ep)
162 mov e2h, 24(ep)
164 and $1, %eax C return carry
166 pop %r14
167 pop %r13
168 pop %r12
169 pop %rbp
170 pop %rbx
172 EPILOGUE()