beta-0.89.2
[luatex.git] / source / libs / gmp / gmp-src / mpn / x86_64 / aors_err3_n.asm
blobbb6d0c53662e6db0d8f1d852950ad507c3844abf
1 dnl AMD64 mpn_add_err3_n, mpn_sub_err3_n
3 dnl Contributed by David Harvey.
5 dnl Copyright 2011 Free Software Foundation, Inc.
7 dnl This file is part of the GNU MP Library.
8 dnl
9 dnl The GNU MP Library is free software; you can redistribute it and/or modify
10 dnl it under the terms of either:
11 dnl
12 dnl * the GNU Lesser General Public License as published by the Free
13 dnl Software Foundation; either version 3 of the License, or (at your
14 dnl option) any later version.
15 dnl
16 dnl or
17 dnl
18 dnl * the GNU General Public License as published by the Free Software
19 dnl Foundation; either version 2 of the License, or (at your option) any
20 dnl later version.
21 dnl
22 dnl or both in parallel, as here.
23 dnl
24 dnl The GNU MP Library is distributed in the hope that it will be useful, but
25 dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
26 dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
27 dnl for more details.
28 dnl
29 dnl You should have received copies of the GNU General Public License and the
30 dnl GNU Lesser General Public License along with the GNU MP Library. If not,
31 dnl see https://www.gnu.org/licenses/.
33 include(`../config.m4')
35 C cycles/limb
36 C AMD K8,K9 7.0
37 C AMD K10 ?
38 C Intel P4 ?
39 C Intel core2 ?
40 C Intel corei ?
41 C Intel atom ?
42 C VIA nano ?
44 C INPUT PARAMETERS
45 define(`rp', `%rdi')
46 define(`up', `%rsi')
47 define(`vp', `%rdx')
48 define(`ep', `%rcx')
49 define(`yp1', `%r8')
50 define(`yp2', `%r9')
51 define(`yp3_param', `8(%rsp)')
52 define(`n_param', `16(%rsp)')
53 define(`cy_param', `24(%rsp)')
55 define(`n', `%r10')
56 define(`yp3', `%rcx')
57 define(`t', `%rbx')
59 define(`e1l', `%rbp')
60 define(`e1h', `%r11')
61 define(`e2l', `%r12')
62 define(`e2h', `%r13')
63 define(`e3l', `%r14')
64 define(`e3h', `%r15')
68 ifdef(`OPERATION_add_err3_n', `
69 define(ADCSBB, adc)
70 define(func, mpn_add_err3_n)')
71 ifdef(`OPERATION_sub_err3_n', `
72 define(ADCSBB, sbb)
73 define(func, mpn_sub_err3_n)')
75 MULFUNC_PROLOGUE(mpn_add_err3_n mpn_sub_err3_n)
78 ASM_START()
79 TEXT
80 ALIGN(16)
81 PROLOGUE(func)
82 mov cy_param, %rax
83 mov n_param, n
85 push %rbx
86 push %rbp
87 push %r12
88 push %r13
89 push %r14
90 push %r15
92 push ep
93 mov 64(%rsp), yp3 C load from yp3_param
95 xor R32(e1l), R32(e1l)
96 xor R32(e1h), R32(e1h)
97 xor R32(e2l), R32(e2l)
98 xor R32(e2h), R32(e2h)
99 xor R32(e3l), R32(e3l)
100 xor R32(e3h), R32(e3h)
102 sub yp1, yp2
103 sub yp1, yp3
105 lea -8(yp1,n,8), yp1
106 lea (rp,n,8), rp
107 lea (up,n,8), up
108 lea (vp,n,8), vp
109 neg n
111 ALIGN(16)
112 L(top):
113 shr $1, %rax C restore carry
114 mov (up,n,8), %rax
115 ADCSBB (vp,n,8), %rax
116 mov %rax, (rp,n,8)
117 sbb %rax, %rax C save carry and generate mask
119 mov (yp1), t
120 and %rax, t
121 add t, e1l
122 adc $0, e1h
124 mov (yp1,yp2), t
125 and %rax, t
126 add t, e2l
127 adc $0, e2h
129 mov (yp1,yp3), t
130 and %rax, t
131 add t, e3l
132 adc $0, e3h
134 lea -8(yp1), yp1
135 inc n
136 jnz L(top)
138 L(end):
139 and $1, %eax
140 pop ep
142 mov e1l, (ep)
143 mov e1h, 8(ep)
144 mov e2l, 16(ep)
145 mov e2h, 24(ep)
146 mov e3l, 32(ep)
147 mov e3h, 40(ep)
149 pop %r15
150 pop %r14
151 pop %r13
152 pop %r12
153 pop %rbp
154 pop %rbx
156 EPILOGUE()