beta-0.89.2
[luatex.git] / source / libs / gmp / gmp-src / mpn / x86_64 / addaddmul_1msb0.asm
blob87c21b4aca97c156947e10f53f3d48dbfe9ec7ae
1 dnl AMD64 mpn_addaddmul_1msb0, R = Au + Bv, u,v < 2^63.
3 dnl Copyright 2008 Free Software Foundation, Inc.
5 dnl This file is part of the GNU MP Library.
6 dnl
7 dnl The GNU MP Library is free software; you can redistribute it and/or modify
8 dnl it under the terms of either:
9 dnl
10 dnl * the GNU Lesser General Public License as published by the Free
11 dnl Software Foundation; either version 3 of the License, or (at your
12 dnl option) any later version.
13 dnl
14 dnl or
15 dnl
16 dnl * the GNU General Public License as published by the Free Software
17 dnl Foundation; either version 2 of the License, or (at your option) any
18 dnl later version.
19 dnl
20 dnl or both in parallel, as here.
21 dnl
22 dnl The GNU MP Library is distributed in the hope that it will be useful, but
23 dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
24 dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
25 dnl for more details.
26 dnl
27 dnl You should have received copies of the GNU General Public License and the
28 dnl GNU Lesser General Public License along with the GNU MP Library. If not,
29 dnl see https://www.gnu.org/licenses/.
31 include(`../config.m4')
33 C cycles/limb
34 C AMD K8,K9 2.167
35 C AMD K10 2.167
36 C Intel P4 12.0
37 C Intel core2 4.0
38 C Intel corei ?
39 C Intel atom ?
40 C VIA nano ?
42 C TODO
43 C * Perhaps handle various n mod 3 sizes better. The code now is too large.
45 C INPUT PARAMETERS
46 define(`rp', `%rdi')
47 define(`ap', `%rsi')
48 define(`bp_param', `%rdx')
49 define(`n', `%rcx')
50 define(`u0', `%r8')
51 define(`v0', `%r9')
54 define(`bp', `%rbp')
56 ASM_START()
57 TEXT
58 ALIGN(16)
59 PROLOGUE(mpn_addaddmul_1msb0)
60 push %r12
61 push %rbp
63 lea (ap,n,8), ap
64 lea (bp_param,n,8), bp
65 lea (rp,n,8), rp
66 neg n
68 mov (ap,n,8), %rax
69 mul %r8
70 mov %rax, %r12
71 mov (bp,n,8), %rax
72 mov %rdx, %r10
73 add $3, n
74 jns L(end)
76 ALIGN(16)
77 L(top): mul %r9
78 add %rax, %r12
79 mov -16(ap,n,8), %rax
80 adc %rdx, %r10
81 mov %r12, -24(rp,n,8)
82 mul %r8
83 add %rax, %r10
84 mov -16(bp,n,8), %rax
85 mov $0, R32(%r11)
86 adc %rdx, %r11
87 mul %r9
88 add %rax, %r10
89 mov -8(ap,n,8), %rax
90 adc %rdx, %r11
91 mov %r10, -16(rp,n,8)
92 mul %r8
93 add %rax, %r11
94 mov -8(bp,n,8), %rax
95 mov $0, R32(%r12)
96 adc %rdx, %r12
97 mul %r9
98 add %rax, %r11
99 adc %rdx, %r12
100 mov (ap,n,8), %rax
101 mul %r8
102 add %rax, %r12
103 mov %r11, -8(rp,n,8)
104 mov (bp,n,8), %rax
105 mov $0, R32(%r10)
106 adc %rdx, %r10
107 add $3, n
108 js L(top)
110 L(end): cmp $1, R32(n)
111 ja 2f
112 jz 1f
114 mul %r9
115 add %rax, %r12
116 mov -16(ap), %rax
117 adc %rdx, %r10
118 mov %r12, -24(rp)
119 mul %r8
120 add %rax, %r10
121 mov -16(bp), %rax
122 mov $0, R32(%r11)
123 adc %rdx, %r11
124 mul %r9
125 add %rax, %r10
126 mov -8(ap), %rax
127 adc %rdx, %r11
128 mov %r10, -16(rp)
129 mul %r8
130 add %rax, %r11
131 mov -8(bp), %rax
132 mov $0, R32(%r12)
133 adc %rdx, %r12
134 mul %r9
135 add %rax, %r11
136 adc %rdx, %r12
137 mov %r11, -8(rp)
138 mov %r12, %rax
139 pop %rbp
140 pop %r12
143 1: mul %r9
144 add %rax, %r12
145 mov -8(ap), %rax
146 adc %rdx, %r10
147 mov %r12, -16(rp)
148 mul %r8
149 add %rax, %r10
150 mov -8(bp), %rax
151 mov $0, R32(%r11)
152 adc %rdx, %r11
153 mul %r9
154 add %rax, %r10
155 adc %rdx, %r11
156 mov %r10, -8(rp)
157 mov %r11, %rax
158 pop %rbp
159 pop %r12
162 2: mul %r9
163 add %rax, %r12
164 mov %r12, -8(rp)
165 adc %rdx, %r10
166 mov %r10, %rax
167 pop %rbp
168 pop %r12
170 EPILOGUE()