beta-0.89.2
[luatex.git] / source / libs / gmp / gmp-src / mpn / x86_64 / bd1 / mul_1.asm
blobe59667c0853fa0d3746e912f404787f90a54f2eb
1 dnl AMD64 mpn_mul_1 optimised for AMD Bulldozer.
3 dnl Copyright 2003-2005, 2007, 2008, 2011-2013 Free Software Foundation, Inc.
5 dnl This file is part of the GNU MP Library.
6 dnl
7 dnl The GNU MP Library is free software; you can redistribute it and/or modify
8 dnl it under the terms of either:
9 dnl
10 dnl * the GNU Lesser General Public License as published by the Free
11 dnl Software Foundation; either version 3 of the License, or (at your
12 dnl option) any later version.
13 dnl
14 dnl or
15 dnl
16 dnl * the GNU General Public License as published by the Free Software
17 dnl Foundation; either version 2 of the License, or (at your option) any
18 dnl later version.
19 dnl
20 dnl or both in parallel, as here.
21 dnl
22 dnl The GNU MP Library is distributed in the hope that it will be useful, but
23 dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
24 dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
25 dnl for more details.
26 dnl
27 dnl You should have received copies of the GNU General Public License and the
28 dnl GNU Lesser General Public License along with the GNU MP Library. If not,
29 dnl see https://www.gnu.org/licenses/.
31 include(`../config.m4')
33 C cycles/limb
34 C AMD K8,K9
35 C AMD K10
36 C AMD bd1 4
37 C AMD bobcat
38 C Intel P4
39 C Intel core2
40 C Intel NHM
41 C Intel SBR
42 C Intel atom
43 C VIA nano
45 C The loop of this code is the result of running a code generation and
46 C optimisation tool suite written by David Harvey and Torbjorn Granlund.
48 C TODO
49 C * Move loop code into feed-in blocks, to save insn for zeroing regs.
51 define(`rp', `%rdi') C rcx
52 define(`up', `%rsi') C rdx
53 define(`n_param', `%rdx') C r8
54 define(`v0', `%rcx') C r9
56 define(`n', `%rbx')
58 ABI_SUPPORT(DOS64)
59 ABI_SUPPORT(STD64)
61 IFDOS(` define(`up', ``%rsi'') ') dnl
62 IFDOS(` define(`rp', ``%rcx'') ') dnl
63 IFDOS(` define(`v0', ``%r9'') ') dnl
64 IFDOS(` define(`r9', ``rdi'') ') dnl
65 IFDOS(` define(`n', ``%r8'') ') dnl
66 IFDOS(` define(`r8', ``rbx'') ') dnl
68 ASM_START()
69 TEXT
70 ALIGN(16)
71 PROLOGUE(mpn_mul_1c)
72 IFDOS(``push %rsi '')
73 IFDOS(``push %rdi '')
74 IFDOS(``mov %rdx, %rsi '')
76 mov (up), %rax C read first u limb early
77 push %rbx
78 IFSTD(` mov n_param, %r11 ') C move away n from rdx, mul uses it
79 IFDOS(` mov n, %r11 ')
80 mul v0
82 IFSTD(` add %r8, %rax ')
83 IFDOS(` add 64(%rsp), %rax ') C 40 + 3*8 (3 push insns)
84 adc $0, %rdx
85 jmp L(common)
87 EPILOGUE()
89 ALIGN(16)
90 PROLOGUE(mpn_mul_1)
91 IFDOS(``push %rsi '')
92 IFDOS(``push %rdi '')
93 IFDOS(``mov %rdx, %rsi '')
95 mov (up), %rax C read first u limb early
96 push %rbx
97 IFSTD(` mov n_param, %r11 ') C move away n from rdx, mul uses it
98 IFDOS(` mov n, %r11 ')
99 mul v0
101 L(common):
102 IFSTD(` mov %r11, n ')
104 and $3, R32(%r11)
105 lea -16(rp,n,8), rp
106 jz L(b0)
107 cmp $2, R32(%r11)
108 jb L(b1)
109 jz L(b2)
111 L(b3): mov %rax, %r10
112 mov %rdx, %r11
113 mov 8(up), %rax
114 mul v0
115 lea (up,n,8), up
116 not n
117 jmp L(L3)
119 L(b0): mov %rax, %r9
120 mov %rdx, %r10
121 mov 8(up), %rax
122 lea (up,n,8), up
123 neg n
124 jmp L(L0)
126 L(b1): mov %rax, %r8
127 cmp $1, n
128 jz L(n1)
129 mov %rdx, %r9
130 lea (up,n,8), up
131 neg n
132 mov %r8, 16(rp,n,8)
133 inc n
134 jmp L(L1)
136 L(b2): mov %rax, %r11
137 mov %rdx, %r8
138 mov 8(up), %rax
139 lea (up,n,8), up
140 neg n
141 add $2, n
142 jns L(end)
144 ALIGN(16)
145 L(top): mul v0
146 mov %rdx, %r9
147 add %rax, %r8
148 adc $0, %r9
149 mov %r8, 8(rp,n,8)
150 mov %r11, (rp,n,8)
151 L(L1): mov (up,n,8), %rax
152 mul v0
153 add %rax, %r9
154 mov %rdx, %r10
155 mov 8(up,n,8), %rax
156 adc $0, %r10
157 L(L0): mul v0
158 add %rax, %r10
159 mov %rdx, %r11
160 mov 16(up,n,8), %rax
161 adc $0, %r11
162 mul v0
163 mov %r9, 16(rp,n,8)
164 L(L3): add %rax, %r11
165 mov %r10, 24(rp,n,8)
166 mov %rdx, %r8
167 adc $0, %r8
168 add $4, n
169 mov -8(up,n,8), %rax
170 js L(top)
172 L(end): mul v0
173 add %rax, %r8
174 adc $0, %rdx
175 mov %r11, (rp)
176 L(n1): mov %r8, 8(rp)
177 mov %rdx, %rax
179 pop %rbx
180 IFDOS(``pop %rdi '')
181 IFDOS(``pop %rsi '')
183 EPILOGUE()
184 ASM_END()