1 dnl Intel Pentium mpn_and_n
,...
,mpn_xnor_n
-- bitwise logical operations.
3 dnl Copyright
2001, 2002 Free Software Foundation
, Inc.
5 dnl
This file is part of the GNU MP Library.
7 dnl The GNU MP Library is free software
; you can redistribute it and/or modify
8 dnl it under the terms of
either:
10 dnl
* the GNU Lesser General
Public License as published by the Free
11 dnl Software Foundation
; either version 3 of the License, or (at your
12 dnl option
) any later version.
16 dnl
* the GNU General
Public License as published by the Free Software
17 dnl Foundation
; either version 2 of the License, or (at your option) any
20 dnl
or both
in parallel
, as here.
22 dnl The GNU MP Library is distributed
in the hope that it will be useful
, but
23 dnl WITHOUT ANY WARRANTY
; without even the implied warranty of MERCHANTABILITY
24 dnl
or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General
Public License
27 dnl You should have received copies of the GNU General
Public License
and the
28 dnl GNU Lesser General
Public License along with the GNU MP Library. If
not,
29 dnl see
https://www.gnu.
org/licenses
/.
31 include(`..
/config.m4
')
34 C P5: 3.0 c/l and, ior, xor
35 C 3.5 c/l andn, iorn, nand, nior, xnor
39 `ifdef(`OPERATION_$1',`
40 define
(`M4_function
', `mpn_$1')
41 define
(`M4_want_pre
', `$4')
43 define
(`M4_want_post
',`$2')
45 define
(M4pre
, `ifelse
(M4_want_pre
, yes
,`
$1')')
46 define
(M4post
,`ifelse
(M4_want_post
,yes
,`
$1')')
48 M4_choose_op
( and_n
, , andl
, )
49 M4_choose_op
( andn_n
, , andl
, yes
)
50 M4_choose_op
( nand_n
, yes
, andl
, )
51 M4_choose_op
( ior_n
, , orl
, )
52 M4_choose_op
( iorn_n
, , orl
, yes
)
53 M4_choose_op
( nior_n
, yes
, orl
, )
54 M4_choose_op
( xor_n
, , xorl
, )
55 M4_choose_op
( xnor_n
, yes
, xorl
, )
58 `m4_error(`Unrecognised or undefined OPERATION symbol
61 MULFUNC_PROLOGUE(mpn_and_n mpn_andn_n mpn_nand_n mpn_ior_n mpn_iorn_n mpn_nior_n mpn_xor_n mpn_xnor_n)
66 C void M4_function (mp_ptr wp, mp_srcptr xp, mp_srcptr yp, mp_size_t size);
68 C Nothing complicated here, just some care to avoid data cache bank clashes
71 C We're one register
short of being able to do a simple
4 loads
, 2 ops
, 2
72 C stores. Instead
%ebp is juggled a bit
and nops are introduced to keep the
73 C pairings as intended. An
in-place operation would free up a register
, for
74 C an
0.5 c
/l speedup
, if that
's worth bothering with.
76 C This code seems best for P55 too. Data alignment is a big problem for MMX
77 C and the pairing restrictions on movq and integer instructions make life
80 defframe(PARAM_SIZE,16)
81 defframe(PARAM_YP, 12)
91 pushl
%ebx FRAME_pushl
()
92 pushl
%esi FRAME_pushl
()
94 pushl
%edi FRAME_pushl
()
95 pushl
%ebp FRAME_pushl
()
106 movl
(%ebx,%ecx,8), %eax C risk of data cache bank clash here
107 movl
(%esi,%ecx,8), %edx
109 M4pre
(` notl_or_xorl_GMP_NUMB_MASK
(%edx)')
113 M4post(`xorl $GMP_NUMB_MASK, %eax')
116 movl
%eax, (%edi,%ecx,8)
125 C
ecx counter
, limb pairs
, decrementing
134 M4post
(`xorl $GMP_NUMB_MASK
, %eax')
135 M4post(`xorl $GMP_NUMB_MASK, %edx')
137 movl
%eax, 4(%edi,%ecx,8)
138 movl
%edx, (%edi,%ecx,8)
141 movl
-4(%ebx,%ecx,8), %ebp
144 movl
-4(%esi,%ecx,8), %eax
145 movl
-8(%esi,%ecx,8), %edx
147 M4pre
(` xorl $GMP_NUMB_MASK
, %eax')
148 M4pre(` xorl $GMP_NUMB_MASK, %edx')
151 movl
-8(%ebx,%ecx,8), %ebp
160 M4post
(`xorl $GMP_NUMB_MASK
, %eax')
161 M4post(`xorl $GMP_NUMB_MASK, %edx')
163 movl
%eax, 4(%edi,%ecx,8)
164 movl
%edx, (%edi,%ecx,8)