beta-0.89.2
[luatex.git] / source / libs / gmp / gmp-src / mpn / x86 / sec_tabselect.asm
blobc7c2e059f143ad31ba5362a90bf2bbcde03f6dfe
1 dnl x86 mpn_sec_tabselect.
3 dnl Copyright 2011 Free Software Foundation, Inc.
5 dnl This file is part of the GNU MP Library.
6 dnl
7 dnl The GNU MP Library is free software; you can redistribute it and/or modify
8 dnl it under the terms of either:
9 dnl
10 dnl * the GNU Lesser General Public License as published by the Free
11 dnl Software Foundation; either version 3 of the License, or (at your
12 dnl option) any later version.
13 dnl
14 dnl or
15 dnl
16 dnl * the GNU General Public License as published by the Free Software
17 dnl Foundation; either version 2 of the License, or (at your option) any
18 dnl later version.
19 dnl
20 dnl or both in parallel, as here.
21 dnl
22 dnl The GNU MP Library is distributed in the hope that it will be useful, but
23 dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
24 dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
25 dnl for more details.
26 dnl
27 dnl You should have received copies of the GNU General Public License and the
28 dnl GNU Lesser General Public License along with the GNU MP Library. If not,
29 dnl see https://www.gnu.org/licenses/.
31 include(`../config.m4')
34 C cycles/limb
35 C P5 ?
36 C P6 model 0-8,10-12 ?
37 C P6 model 9 (Banias) ?
38 C P6 model 13 (Dothan) ?
39 C P4 model 0 (Willamette) ?
40 C P4 model 1 (?) ?
41 C P4 model 2 (Northwood) 4.5
42 C P4 model 3 (Prescott) ?
43 C P4 model 4 (Nocona) ?
44 C Intel Atom ?
45 C AMD K6 ?
46 C AMD K7 3.4
47 C AMD K8 ?
48 C AMD K10 ?
50 C NOTES
51 C * This has not been tuned for any specific processor. Its speed should not
52 C be too bad, though.
53 C * Using SSE2 could result in many-fold speedup.
55 C mpn_sec_tabselect (mp_limb_t *rp, mp_limb_t *tp, mp_size_t n, mp_size_t nents, mp_size_t which)
56 define(`rp', `%edi')
57 define(`tp', `%esi')
58 define(`n', `%ebx')
59 define(`nents', `%ecx')
60 define(`which', `36(%esp)')
62 define(`i', `%ebp')
63 define(`maskp', `20(%esp)')
64 define(`maskn', `32(%esp)')
66 ASM_START()
67 TEXT
68 ALIGN(16)
69 PROLOGUE(mpn_sec_tabselect)
70 push %edi
71 push %esi
72 push %ebx
73 push %ebp
74 mov 20(%esp), rp
75 mov 24(%esp), tp
76 mov 28(%esp), n
77 mov 32(%esp), nents
79 lea (rp,n,4), rp
80 lea (tp,n,4), tp
81 sub nents, which
82 L(outer):
83 mov which, %eax
84 add nents, %eax
85 neg %eax C set CF iff 'which' != k
86 sbb %eax, %eax
87 mov %eax, maskn
88 not %eax
89 mov %eax, maskp
91 mov n, i
92 neg i
94 ALIGN(16)
95 L(top): mov (tp,i,4), %eax
96 and maskp, %eax
97 mov (rp,i,4), %edx
98 and maskn, %edx
99 or %edx, %eax
100 mov %eax, (rp,i,4)
101 inc i
102 js L(top)
104 L(end): mov n, %eax
105 lea (tp,%eax,4), tp
106 dec nents
107 jne L(outer)
109 L(outer_end):
110 pop %ebp
111 pop %ebx
112 pop %esi
113 pop %edi
115 EPILOGUE()