beta-0.89.2
[luatex.git] / source / libs / gmp / gmp-src / mpn / x86 / k6 / mmx / com.asm
blobb747454627b9c6a1fc9472a152e76990ca96d307
1 dnl AMD K6-2 mpn_com -- mpn bitwise one's complement.
3 dnl Copyright 1999-2002 Free Software Foundation, Inc.
5 dnl This file is part of the GNU MP Library.
6 dnl
7 dnl The GNU MP Library is free software; you can redistribute it and/or modify
8 dnl it under the terms of either:
9 dnl
10 dnl * the GNU Lesser General Public License as published by the Free
11 dnl Software Foundation; either version 3 of the License, or (at your
12 dnl option) any later version.
13 dnl
14 dnl or
15 dnl
16 dnl * the GNU General Public License as published by the Free Software
17 dnl Foundation; either version 2 of the License, or (at your option) any
18 dnl later version.
19 dnl
20 dnl or both in parallel, as here.
21 dnl
22 dnl The GNU MP Library is distributed in the hope that it will be useful, but
23 dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
24 dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
25 dnl for more details.
26 dnl
27 dnl You should have received copies of the GNU General Public License and the
28 dnl GNU Lesser General Public License along with the GNU MP Library. If not,
29 dnl see https://www.gnu.org/licenses/.
31 include(`../config.m4')
33 NAILS_SUPPORT(0-31)
36 C alignment dst/src, A=0mod8 N=4mod8
37 C A/A A/N N/A N/N
38 C K6-2 1.0 1.18 1.18 1.18 cycles/limb
39 C K6 1.5 1.85 1.75 1.85
42 C void mpn_com (mp_ptr dst, mp_srcptr src, mp_size_t size);
44 C Take the bitwise ones-complement of src,size and write it to dst,size.
46 defframe(PARAM_SIZE,12)
47 defframe(PARAM_SRC, 8)
48 defframe(PARAM_DST, 4)
50 TEXT
51 ALIGN(16)
52 PROLOGUE(mpn_com)
53 deflit(`FRAME',0)
55 movl PARAM_SIZE, %ecx
56 movl PARAM_SRC, %eax
57 movl PARAM_DST, %edx
58 shrl %ecx
59 jnz L(two_or_more)
61 movl (%eax), %eax
62 notl_or_xorl_GMP_NUMB_MASK( %eax)
63 movl %eax, (%edx)
64 ret
67 L(two_or_more):
68 pushl %ebx FRAME_pushl()
69 pcmpeqd %mm7, %mm7 C all ones
71 movl %ecx, %ebx
72 ifelse(GMP_NAIL_BITS,0,,
73 ` psrld $GMP_NAIL_BITS, %mm7') C clear nails
77 ALIGN(8)
78 L(top):
79 C eax src
80 C ebx floor(size/2)
81 C ecx counter
82 C edx dst
84 C mm0 scratch
85 C mm7 mask
87 movq -8(%eax,%ecx,8), %mm0
88 pxor %mm7, %mm0
89 movq %mm0, -8(%edx,%ecx,8)
90 loop L(top)
93 jnc L(no_extra)
94 movl (%eax,%ebx,8), %eax
95 notl_or_xorl_GMP_NUMB_MASK( %eax)
96 movl %eax, (%edx,%ebx,8)
97 L(no_extra):
99 popl %ebx
100 emms_or_femms
103 EPILOGUE()