beta-0.89.2
[luatex.git] / source / libs / gmp / gmp-src / mpn / x86_64 / bobcat / aorsmul_1.asm
blob415a17cb7ff1bcb5811e9289f2d81576bc6744ce
1 dnl AMD64 mpn_addmul_1 and mpn_submul_1 optimised for AMD bobcat.
3 dnl Copyright 2003-2005, 2007, 2008, 2011, 2012 Free Software Foundation, Inc.
5 dnl This file is part of the GNU MP Library.
6 dnl
7 dnl The GNU MP Library is free software; you can redistribute it and/or modify
8 dnl it under the terms of either:
9 dnl
10 dnl * the GNU Lesser General Public License as published by the Free
11 dnl Software Foundation; either version 3 of the License, or (at your
12 dnl option) any later version.
13 dnl
14 dnl or
15 dnl
16 dnl * the GNU General Public License as published by the Free Software
17 dnl Foundation; either version 2 of the License, or (at your option) any
18 dnl later version.
19 dnl
20 dnl or both in parallel, as here.
21 dnl
22 dnl The GNU MP Library is distributed in the hope that it will be useful, but
23 dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
24 dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
25 dnl for more details.
26 dnl
27 dnl You should have received copies of the GNU General Public License and the
28 dnl GNU Lesser General Public License along with the GNU MP Library. If not,
29 dnl see https://www.gnu.org/licenses/.
31 include(`../config.m4')
33 C cycles/limb
34 C AMD K8,K9 4.5
35 C AMD K10 4.5
36 C AMD bd1 4.75
37 C AMD bobcat 5
38 C Intel P4 17.7
39 C Intel core2 5.5
40 C Intel NHM 5.43
41 C Intel SBR 3.92
42 C Intel atom 23
43 C VIA nano 5.63
45 ABI_SUPPORT(DOS64)
46 ABI_SUPPORT(STD64)
48 ifdef(`OPERATION_addmul_1',`
49 define(`ADDSUB', `add')
50 define(`func', `mpn_addmul_1')
52 ifdef(`OPERATION_submul_1',`
53 define(`ADDSUB', `sub')
54 define(`func', `mpn_submul_1')
57 MULFUNC_PROLOGUE(mpn_addmul_1 mpn_submul_1)
59 C Standard parameters
60 define(`rp', `%rdi')
61 define(`up', `%rsi')
62 define(`n_param', `%rdx')
63 define(`v0', `%rcx')
64 C Standard allocations
65 define(`n', `%rbx')
66 define(`w0', `%r8')
67 define(`w1', `%r9')
68 define(`w2', `%r10')
69 define(`w3', `%r11')
71 C DOS64 parameters
72 IFDOS(` define(`rp', `%rcx') ') dnl
73 IFDOS(` define(`up', `%rsi') ') dnl
74 IFDOS(` define(`n_param', `%r8') ') dnl
75 IFDOS(` define(`v0', `%r9') ') dnl
76 C DOS64 allocations
77 IFDOS(` define(`n', `%rbx') ') dnl
78 IFDOS(` define(`w0', `%r8') ') dnl
79 IFDOS(` define(`w1', `%rdi') ') dnl
80 IFDOS(` define(`w2', `%r10') ') dnl
81 IFDOS(` define(`w3', `%r11') ') dnl
83 ASM_START()
84 TEXT
85 ALIGN(16)
86 PROLOGUE(func)
87 IFDOS(` push %rsi ')
88 IFDOS(` push %rdi ')
89 IFDOS(` mov %rdx, %rsi ')
91 push %rbx
92 mov (up), %rax
94 lea -16(rp,n_param,8), rp
95 lea -16(up,n_param,8), up
97 mov n_param, n
98 and $3, R32(n_param)
99 jz L(b0)
100 cmp $2, R32(n_param)
101 ja L(b3)
102 jz L(b2)
104 L(b1): mul v0
105 cmp $1, n
106 jz L(n1)
107 mov %rax, w2
108 mov %rdx, w3
109 neg n
110 add $3, n
111 jmp L(L1)
112 L(n1): ADDSUB %rax, 8(rp)
113 adc $0, %rdx
114 mov %rdx, %rax
115 pop %rbx
116 IFDOS(` pop %rdi ')
117 IFDOS(` pop %rsi ')
120 L(b3): mul v0
121 mov %rax, w2
122 mov %rdx, w3
123 neg n
124 inc n
125 jmp L(L3)
127 L(b0): mul v0
128 mov %rax, w0
129 mov %rdx, w1
130 neg n
131 add $2, n
132 jmp L(L0)
134 L(b2): mul v0
135 mov %rax, w0
136 mov %rdx, w1
137 neg n
138 jmp L(L2)
140 ALIGN(16)
141 L(top): ADDSUB w0, -16(rp,n,8)
142 adc w1, w2
143 adc $0, w3
144 L(L1): mov 0(up,n,8), %rax
145 mul v0
146 mov %rax, w0
147 mov %rdx, w1
148 ADDSUB w2, -8(rp,n,8)
149 adc w3, w0
150 adc $0, w1
151 L(L0): mov 8(up,n,8), %rax
152 mul v0
153 mov %rax, w2
154 mov %rdx, w3
155 ADDSUB w0, 0(rp,n,8)
156 adc w1, w2
157 adc $0, w3
158 L(L3): mov 16(up,n,8), %rax
159 mul v0
160 mov %rax, w0
161 mov %rdx, w1
162 ADDSUB w2, 8(rp,n,8)
163 adc w3, w0
164 adc $0, w1
165 L(L2): mov 24(up,n,8), %rax
166 mul v0
167 mov %rax, w2
168 mov %rdx, w3
169 add $4, n
170 js L(top)
172 L(end): ADDSUB w0, (rp)
173 adc w1, w2
174 adc $0, w3
175 ADDSUB w2, 8(rp)
176 adc $0, w3
177 mov w3, %rax
179 pop %rbx
180 IFDOS(` pop %rdi ')
181 IFDOS(` pop %rsi ')
183 EPILOGUE()