beta-0.89.2
[luatex.git] / source / libs / gmp / gmp-src / mpn / x86 / pentium / logops_n.asm
blob18773172e98881602b2abd1582f06a2cc5a6446e
1 dnl Intel Pentium mpn_and_n,...,mpn_xnor_n -- bitwise logical operations.
3 dnl Copyright 2001, 2002 Free Software Foundation, Inc.
5 dnl This file is part of the GNU MP Library.
6 dnl
7 dnl The GNU MP Library is free software; you can redistribute it and/or modify
8 dnl it under the terms of either:
9 dnl
10 dnl * the GNU Lesser General Public License as published by the Free
11 dnl Software Foundation; either version 3 of the License, or (at your
12 dnl option) any later version.
13 dnl
14 dnl or
15 dnl
16 dnl * the GNU General Public License as published by the Free Software
17 dnl Foundation; either version 2 of the License, or (at your option) any
18 dnl later version.
19 dnl
20 dnl or both in parallel, as here.
21 dnl
22 dnl The GNU MP Library is distributed in the hope that it will be useful, but
23 dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
24 dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
25 dnl for more details.
26 dnl
27 dnl You should have received copies of the GNU General Public License and the
28 dnl GNU Lesser General Public License along with the GNU MP Library. If not,
29 dnl see https://www.gnu.org/licenses/.
31 include(`../config.m4')
34 C P5: 3.0 c/l and, ior, xor
35 C 3.5 c/l andn, iorn, nand, nior, xnor
38 define(M4_choose_op,
39 `ifdef(`OPERATION_$1',`
40 define(`M4_function', `mpn_$1')
41 define(`M4_want_pre', `$4')
42 define(`M4op', `$3')
43 define(`M4_want_post',`$2')
44 ')')
45 define(M4pre, `ifelse(M4_want_pre, yes,`$1')')
46 define(M4post,`ifelse(M4_want_post,yes,`$1')')
48 M4_choose_op( and_n, , andl, )
49 M4_choose_op( andn_n, , andl, yes)
50 M4_choose_op( nand_n, yes, andl, )
51 M4_choose_op( ior_n, , orl, )
52 M4_choose_op( iorn_n, , orl, yes)
53 M4_choose_op( nior_n, yes, orl, )
54 M4_choose_op( xor_n, , xorl, )
55 M4_choose_op( xnor_n, yes, xorl, )
57 ifdef(`M4_function',,
58 `m4_error(`Unrecognised or undefined OPERATION symbol
59 ')')
61 MULFUNC_PROLOGUE(mpn_and_n mpn_andn_n mpn_nand_n mpn_ior_n mpn_iorn_n mpn_nior_n mpn_xor_n mpn_xnor_n)
63 NAILS_SUPPORT(0-31)
66 C void M4_function (mp_ptr wp, mp_srcptr xp, mp_srcptr yp, mp_size_t size);
68 C Nothing complicated here, just some care to avoid data cache bank clashes
69 C and AGIs.
71 C We're one register short of being able to do a simple 4 loads, 2 ops, 2
72 C stores. Instead %ebp is juggled a bit and nops are introduced to keep the
73 C pairings as intended. An in-place operation would free up a register, for
74 C an 0.5 c/l speedup, if that's worth bothering with.
76 C This code seems best for P55 too. Data alignment is a big problem for MMX
77 C and the pairing restrictions on movq and integer instructions make life
78 C difficult.
80 defframe(PARAM_SIZE,16)
81 defframe(PARAM_YP, 12)
82 defframe(PARAM_XP, 8)
83 defframe(PARAM_WP, 4)
85 TEXT
86 ALIGN(8)
88 PROLOGUE(M4_function)
89 deflit(`FRAME',0)
91 pushl %ebx FRAME_pushl()
92 pushl %esi FRAME_pushl()
94 pushl %edi FRAME_pushl()
95 pushl %ebp FRAME_pushl()
97 movl PARAM_SIZE, %ecx
98 movl PARAM_XP, %ebx
100 movl PARAM_YP, %esi
101 movl PARAM_WP, %edi
103 shrl %ecx
104 jnc L(entry)
106 movl (%ebx,%ecx,8), %eax C risk of data cache bank clash here
107 movl (%esi,%ecx,8), %edx
109 M4pre(` notl_or_xorl_GMP_NUMB_MASK(%edx)')
111 M4op %edx, %eax
113 M4post(`xorl $GMP_NUMB_MASK, %eax')
114 orl %ecx, %ecx
116 movl %eax, (%edi,%ecx,8)
117 jz L(done)
119 jmp L(entry)
122 L(top):
123 C eax
124 C ebx xp
125 C ecx counter, limb pairs, decrementing
126 C edx
127 C esi yp
128 C edi wp
129 C ebp
131 M4op %ebp, %edx
134 M4post(`xorl $GMP_NUMB_MASK, %eax')
135 M4post(`xorl $GMP_NUMB_MASK, %edx')
137 movl %eax, 4(%edi,%ecx,8)
138 movl %edx, (%edi,%ecx,8)
140 L(entry):
141 movl -4(%ebx,%ecx,8), %ebp
144 movl -4(%esi,%ecx,8), %eax
145 movl -8(%esi,%ecx,8), %edx
147 M4pre(` xorl $GMP_NUMB_MASK, %eax')
148 M4pre(` xorl $GMP_NUMB_MASK, %edx')
150 M4op %ebp, %eax
151 movl -8(%ebx,%ecx,8), %ebp
153 decl %ecx
154 jnz L(top)
157 M4op %ebp, %edx
160 M4post(`xorl $GMP_NUMB_MASK, %eax')
161 M4post(`xorl $GMP_NUMB_MASK, %edx')
163 movl %eax, 4(%edi,%ecx,8)
164 movl %edx, (%edi,%ecx,8)
167 L(done):
168 popl %ebp
169 popl %edi
171 popl %esi
172 popl %ebx
176 EPILOGUE()