1 dnl Intel Pentium mpn_com
-- mpn ones complement.
3 dnl Copyright
1996, 2001, 2002, 2006 Free Software Foundation
, Inc.
5 dnl
This file is part of the GNU MP Library.
7 dnl The GNU MP Library is free software
; you can redistribute it and/or modify
8 dnl it under the terms of
either:
10 dnl
* the GNU Lesser General
Public License as published by the Free
11 dnl Software Foundation
; either version 3 of the License, or (at your
12 dnl option
) any later version.
16 dnl
* the GNU General
Public License as published by the Free Software
17 dnl Foundation
; either version 2 of the License, or (at your option) any
20 dnl
or both
in parallel
, as here.
22 dnl The GNU MP Library is distributed
in the hope that it will be useful
, but
23 dnl WITHOUT ANY WARRANTY
; without even the implied warranty of MERCHANTABILITY
24 dnl
or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General
Public License
27 dnl You should have received copies of the GNU General
Public License
and the
28 dnl GNU Lesser General
Public License along with the GNU MP Library. If
not,
29 dnl see
https://www.gnu.
org/licenses
/.
31 include(`..
/config.m4
')
34 C P5: 1.75 cycles/limb
40 C void mpn_com (mp_ptr dst, mp_srcptr src, mp_size_t size);
42 C This code is similar to mpn_copyi, basically there's just some
"xorl
43 C $GMP_NUMB_MASK"s inserted.
47 C On P55 some MMX code could be
1.25 c
/l
(8 limb unrolled
) if src
and dst
48 C are the same alignment
mod 8, but it doesn
't seem worth the trouble for
49 C just that case (there'd need to be some plain integer available too for
50 C the unaligned case
).
52 defframe
(PARAM_SIZE
,12)
53 defframe
(PARAM_SRC
, 8)
54 defframe
(PARAM_DST
, 4)
64 pushl %esi FRAME_pushl()
65 pushl %edi FRAME_pushl()
67 leal (%eax,%ecx,4), %eax
68 xorl $-1, %ecx C -size-1
71 addl $8, %ecx C -size+7
75 movl (%edx), %esi C fetch destination cache line
81 C ecx counter, limbs, negative
82 C edx dst, incrementing
87 movl 28(%edx), %esi C destination prefetch
90 movl -28(%eax,%ecx,4), %esi
91 movl -24(%eax,%ecx,4), %edi
92 xorl $GMP_NUMB_MASK, %esi
93 xorl $GMP_NUMB_MASK, %edi
97 movl -20(%eax,%ecx,4), %esi
98 movl -16(%eax,%ecx,4), %edi
99 xorl $GMP_NUMB_MASK, %esi
100 xorl $GMP_NUMB_MASK, %edi
104 movl -12(%eax,%ecx,4), %esi
105 movl -8(%eax,%ecx,4), %edi
106 xorl $GMP_NUMB_MASK, %esi
107 xorl $GMP_NUMB_MASK, %edi
111 movl -4(%eax,%ecx,4), %esi
112 movl (%eax,%ecx,4), %edi
113 xorl $GMP_NUMB_MASK, %esi
114 xorl $GMP_NUMB_MASK, %edi
124 C ecx 0 to 7, representing respectively 7 to 0 limbs remaining
125 C edx dst, next location to store
132 movl -12(%eax,%ecx,4), %esi
133 movl -8(%eax,%ecx,4), %edi
134 xorl $GMP_NUMB_MASK, %esi
135 xorl $GMP_NUMB_MASK, %edi
139 movl -4(%eax,%ecx,4), %esi
140 movl (%eax,%ecx,4), %edi
141 xorl $GMP_NUMB_MASK, %esi
142 xorl $GMP_NUMB_MASK, %edi
155 movl -4(%eax,%ecx,4), %esi
156 movl (%eax,%ecx,4), %edi
157 xorl $GMP_NUMB_MASK, %esi
158 xorl $GMP_NUMB_MASK, %edi
171 xorl $GMP_NUMB_MASK, %ecx