beta-0.89.2
[luatex.git] / source / libs / gmp / gmp-src / mpn / x86_64 / logops_n.asm
blobb277f58962441c2661fa0d3bf630072eccdb25aa
1 dnl AMD64 logops.
3 dnl Copyright 2004-2006, 2011, 2012 Free Software Foundation, Inc.
5 dnl This file is part of the GNU MP Library.
6 dnl
7 dnl The GNU MP Library is free software; you can redistribute it and/or modify
8 dnl it under the terms of either:
9 dnl
10 dnl * the GNU Lesser General Public License as published by the Free
11 dnl Software Foundation; either version 3 of the License, or (at your
12 dnl option) any later version.
13 dnl
14 dnl or
15 dnl
16 dnl * the GNU General Public License as published by the Free Software
17 dnl Foundation; either version 2 of the License, or (at your option) any
18 dnl later version.
19 dnl
20 dnl or both in parallel, as here.
21 dnl
22 dnl The GNU MP Library is distributed in the hope that it will be useful, but
23 dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
24 dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
25 dnl for more details.
26 dnl
27 dnl You should have received copies of the GNU General Public License and the
28 dnl GNU Lesser General Public License along with the GNU MP Library. If not,
29 dnl see https://www.gnu.org/licenses/.
31 include(`../config.m4')
34 C cycles/limb
35 C AMD K8,K9 1.5 with fluctuations for variant 2 and 3
36 C AMD K10 1.5 with fluctuations for all variants
37 C Intel P4 2.8/3.35/3.60 (variant1/variant2/variant3)
38 C Intel core2 2
39 C Intel NHM 2
40 C Intel SBR 1.5/1.75/1.75
41 C Intel atom 3.75
42 C VIA nano 3.25
44 ifdef(`OPERATION_and_n',`
45 define(`func',`mpn_and_n')
46 define(`VARIANT_1')
47 define(`LOGOP',`andq')')
48 ifdef(`OPERATION_andn_n',`
49 define(`func',`mpn_andn_n')
50 define(`VARIANT_2')
51 define(`LOGOP',`andq')')
52 ifdef(`OPERATION_nand_n',`
53 define(`func',`mpn_nand_n')
54 define(`VARIANT_3')
55 define(`LOGOP',`andq')')
56 ifdef(`OPERATION_ior_n',`
57 define(`func',`mpn_ior_n')
58 define(`VARIANT_1')
59 define(`LOGOP',`orq')')
60 ifdef(`OPERATION_iorn_n',`
61 define(`func',`mpn_iorn_n')
62 define(`VARIANT_2')
63 define(`LOGOP',`orq')')
64 ifdef(`OPERATION_nior_n',`
65 define(`func',`mpn_nior_n')
66 define(`VARIANT_3')
67 define(`LOGOP',`orq')')
68 ifdef(`OPERATION_xor_n',`
69 define(`func',`mpn_xor_n')
70 define(`VARIANT_1')
71 define(`LOGOP',`xorq')')
72 ifdef(`OPERATION_xnor_n',`
73 define(`func',`mpn_xnor_n')
74 define(`VARIANT_2')
75 define(`LOGOP',`xorq')')
78 MULFUNC_PROLOGUE(mpn_and_n mpn_andn_n mpn_nand_n mpn_ior_n mpn_iorn_n mpn_nior_n mpn_xor_n mpn_xnor_n)
80 C INPUT PARAMETERS
81 define(`rp',`%rdi')
82 define(`up',`%rsi')
83 define(`vp',`%rdx')
84 define(`n',`%rcx')
86 ABI_SUPPORT(DOS64)
87 ABI_SUPPORT(STD64)
89 ASM_START()
91 ifdef(`VARIANT_1',`
92 TEXT
93 ALIGN(32)
94 PROLOGUE(func)
95 FUNC_ENTRY(4)
96 movq (vp), %r8
97 movl R32(%rcx), R32(%rax)
98 leaq (vp,n,8), vp
99 leaq (up,n,8), up
100 leaq (rp,n,8), rp
101 negq n
102 andl $3, R32(%rax)
103 je L(b00)
104 cmpl $2, R32(%rax)
105 jc L(b01)
106 je L(b10)
108 L(b11): LOGOP (up,n,8), %r8
109 movq %r8, (rp,n,8)
110 decq n
111 jmp L(e11)
112 L(b10): addq $-2, n
113 jmp L(e10)
114 L(b01): LOGOP (up,n,8), %r8
115 movq %r8, (rp,n,8)
116 incq n
117 jz L(ret)
119 L(oop): movq (vp,n,8), %r8
120 L(b00): movq 8(vp,n,8), %r9
121 LOGOP (up,n,8), %r8
122 LOGOP 8(up,n,8), %r9
124 movq %r8, (rp,n,8)
125 movq %r9, 8(rp,n,8)
126 L(e11): movq 16(vp,n,8), %r8
127 L(e10): movq 24(vp,n,8), %r9
128 LOGOP 16(up,n,8), %r8
129 LOGOP 24(up,n,8), %r9
130 movq %r8, 16(rp,n,8)
131 movq %r9, 24(rp,n,8)
132 addq $4, n
133 jnc L(oop)
134 L(ret): FUNC_EXIT()
136 EPILOGUE()
139 ifdef(`VARIANT_2',`
140 TEXT
141 ALIGN(32)
142 PROLOGUE(func)
143 FUNC_ENTRY(4)
144 movq (vp), %r8
145 notq %r8
146 movl R32(%rcx), R32(%rax)
147 leaq (vp,n,8), vp
148 leaq (up,n,8), up
149 leaq (rp,n,8), rp
150 negq n
151 andl $3, R32(%rax)
152 je L(b00)
153 cmpl $2, R32(%rax)
154 jc L(b01)
155 je L(b10)
157 L(b11): LOGOP (up,n,8), %r8
158 movq %r8, (rp,n,8)
159 decq n
160 jmp L(e11)
161 L(b10): addq $-2, n
162 jmp L(e10)
163 .byte 0x90,0x90,0x90,0x90,0x90,0x90,0x90,0x90,0x90,0x90,0x90,0x90,0x90
164 L(b01): LOGOP (up,n,8), %r8
165 movq %r8, (rp,n,8)
166 incq n
167 jz L(ret)
169 L(oop): movq (vp,n,8), %r8
170 notq %r8
171 L(b00): movq 8(vp,n,8), %r9
172 notq %r9
173 LOGOP (up,n,8), %r8
174 LOGOP 8(up,n,8), %r9
175 movq %r8, (rp,n,8)
176 movq %r9, 8(rp,n,8)
177 L(e11): movq 16(vp,n,8), %r8
178 notq %r8
179 L(e10): movq 24(vp,n,8), %r9
180 notq %r9
181 LOGOP 16(up,n,8), %r8
182 LOGOP 24(up,n,8), %r9
183 movq %r8, 16(rp,n,8)
184 movq %r9, 24(rp,n,8)
185 addq $4, n
186 jnc L(oop)
187 L(ret): FUNC_EXIT()
189 EPILOGUE()
192 ifdef(`VARIANT_3',`
193 TEXT
194 ALIGN(32)
195 PROLOGUE(func)
196 FUNC_ENTRY(4)
197 movq (vp), %r8
198 movl R32(%rcx), R32(%rax)
199 leaq (vp,n,8), vp
200 leaq (up,n,8), up
201 leaq (rp,n,8), rp
202 negq n
203 andl $3, R32(%rax)
204 je L(b00)
205 cmpl $2, R32(%rax)
206 jc L(b01)
207 je L(b10)
209 L(b11): LOGOP (up,n,8), %r8
210 notq %r8
211 movq %r8, (rp,n,8)
212 decq n
213 jmp L(e11)
214 L(b10): addq $-2, n
215 jmp L(e10)
216 .byte 0x90,0x90,0x90,0x90,0x90,0x90,0x90,0x90,0x90,0x90
217 L(b01): LOGOP (up,n,8), %r8
218 notq %r8
219 movq %r8, (rp,n,8)
220 incq n
221 jz L(ret)
223 L(oop): movq (vp,n,8), %r8
224 L(b00): movq 8(vp,n,8), %r9
225 LOGOP (up,n,8), %r8
226 notq %r8
227 LOGOP 8(up,n,8), %r9
228 notq %r9
229 movq %r8, (rp,n,8)
230 movq %r9, 8(rp,n,8)
231 L(e11): movq 16(vp,n,8), %r8
232 L(e10): movq 24(vp,n,8), %r9
233 LOGOP 16(up,n,8), %r8
234 notq %r8
235 LOGOP 24(up,n,8), %r9
236 notq %r9
237 movq %r8, 16(rp,n,8)
238 movq %r9, 24(rp,n,8)
239 addq $4, n
240 jnc L(oop)
241 L(ret): FUNC_EXIT()
243 EPILOGUE()