beta-0.89.2
[luatex.git] / source / libs / gmp / gmp-src / mpn / x86 / pentium4 / sse2 / sub_n.asm
blob5ba1c018ecb6fced0b37825610819eabe783a9ac
1 dnl Intel Pentium-4 mpn_sub_n -- mpn subtraction.
3 dnl Copyright 2001, 2002 Free Software Foundation, Inc.
5 dnl This file is part of the GNU MP Library.
6 dnl
7 dnl The GNU MP Library is free software; you can redistribute it and/or modify
8 dnl it under the terms of either:
9 dnl
10 dnl * the GNU Lesser General Public License as published by the Free
11 dnl Software Foundation; either version 3 of the License, or (at your
12 dnl option) any later version.
13 dnl
14 dnl or
15 dnl
16 dnl * the GNU General Public License as published by the Free Software
17 dnl Foundation; either version 2 of the License, or (at your option) any
18 dnl later version.
19 dnl
20 dnl or both in parallel, as here.
21 dnl
22 dnl The GNU MP Library is distributed in the hope that it will be useful, but
23 dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
24 dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
25 dnl for more details.
26 dnl
27 dnl You should have received copies of the GNU General Public License and the
28 dnl GNU Lesser General Public License along with the GNU MP Library. If not,
29 dnl see https://www.gnu.org/licenses/.
31 include(`../config.m4')
34 C cycles/limb
35 C dst!=src1,2 dst==src1 dst==src2
36 C P6 model 0-8,10-12 -
37 C P6 model 9 (Banias) ?
38 C P6 model 13 (Dothan) ?
39 C P4 model 0-1 (Willamette) ?
40 C P4 model 2 (Northwood) 4 6 6
41 C P4 model 3-4 (Prescott) 4.25 7.5 7.5
43 defframe(PARAM_CARRY,20)
44 defframe(PARAM_SIZE, 16)
45 defframe(PARAM_SRC2, 12)
46 defframe(PARAM_SRC1, 8)
47 defframe(PARAM_DST, 4)
49 dnl re-use parameter space
50 define(SAVE_EBX,`PARAM_SRC1')
52 TEXT
53 ALIGN(8)
55 PROLOGUE(mpn_sub_nc)
56 deflit(`FRAME',0)
57 movd PARAM_CARRY, %mm0
58 jmp L(start_nc)
59 EPILOGUE()
61 ALIGN(8)
62 PROLOGUE(mpn_sub_n)
63 deflit(`FRAME',0)
64 pxor %mm0, %mm0
65 L(start_nc):
66 mov PARAM_SRC1, %eax
67 mov %ebx, SAVE_EBX
68 mov PARAM_SRC2, %ebx
69 mov PARAM_DST, %edx
70 mov PARAM_SIZE, %ecx
72 lea (%eax,%ecx,4), %eax C src1 end
73 lea (%ebx,%ecx,4), %ebx C src2 end
74 lea (%edx,%ecx,4), %edx C dst end
75 neg %ecx C -size
77 L(top):
78 C eax src1 end
79 C ebx src2 end
80 C ecx counter, limbs, negative
81 C edx dst end
82 C mm0 carry bit
84 movd (%eax,%ecx,4), %mm1
85 movd (%ebx,%ecx,4), %mm2
86 psubq %mm2, %mm1
88 psubq %mm0, %mm1
89 movd %mm1, (%edx,%ecx,4)
91 psrlq $63, %mm1
93 add $1, %ecx
94 jz L(done_mm1)
96 movd (%eax,%ecx,4), %mm0
97 movd (%ebx,%ecx,4), %mm2
98 psubq %mm2, %mm0
100 psubq %mm1, %mm0
101 movd %mm0, (%edx,%ecx,4)
103 psrlq $63, %mm0
105 add $1, %ecx
106 jnz L(top)
108 movd %mm0, %eax
109 mov SAVE_EBX, %ebx
110 emms
113 L(done_mm1):
114 movd %mm1, %eax
115 mov SAVE_EBX, %ebx
116 emms
119 EPILOGUE()