beta-0.89.2
[luatex.git] / source / libs / gmp / gmp-src / mpn / x86 / pentium / aors_n.asm
blob01ebfb96ae41a3b6154702326a19a8f4e14314f3
1 dnl Intel Pentium mpn_add_n/mpn_sub_n -- mpn addition and subtraction.
3 dnl Copyright 1992, 1994-1996, 1999, 2000, 2002 Free Software Foundation, Inc.
5 dnl This file is part of the GNU MP Library.
6 dnl
7 dnl The GNU MP Library is free software; you can redistribute it and/or modify
8 dnl it under the terms of either:
9 dnl
10 dnl * the GNU Lesser General Public License as published by the Free
11 dnl Software Foundation; either version 3 of the License, or (at your
12 dnl option) any later version.
13 dnl
14 dnl or
15 dnl
16 dnl * the GNU General Public License as published by the Free Software
17 dnl Foundation; either version 2 of the License, or (at your option) any
18 dnl later version.
19 dnl
20 dnl or both in parallel, as here.
21 dnl
22 dnl The GNU MP Library is distributed in the hope that it will be useful, but
23 dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
24 dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
25 dnl for more details.
26 dnl
27 dnl You should have received copies of the GNU General Public License and the
28 dnl GNU Lesser General Public License along with the GNU MP Library. If not,
29 dnl see https://www.gnu.org/licenses/.
31 include(`../config.m4')
34 C P5: 2.375 cycles/limb
37 ifdef(`OPERATION_add_n',`
38 define(M4_inst, adcl)
39 define(M4_function_n, mpn_add_n)
40 define(M4_function_nc, mpn_add_nc)
42 ',`ifdef(`OPERATION_sub_n',`
43 define(M4_inst, sbbl)
44 define(M4_function_n, mpn_sub_n)
45 define(M4_function_nc, mpn_sub_nc)
47 ',`m4_error(`Need OPERATION_add_n or OPERATION_sub_n
48 ')')')
50 MULFUNC_PROLOGUE(mpn_add_n mpn_add_nc mpn_sub_n mpn_sub_nc)
53 C mp_limb_t M4_function_n (mp_ptr dst, mp_srcptr src1, mp_srcptr src2,
54 C mp_size_t size);
55 C mp_limb_t M4_function_nc (mp_ptr dst, mp_srcptr src1, mp_srcptr src2,
56 C mp_size_t size, mp_limb_t carry);
58 defframe(PARAM_CARRY,20)
59 defframe(PARAM_SIZE, 16)
60 defframe(PARAM_SRC2, 12)
61 defframe(PARAM_SRC1, 8)
62 defframe(PARAM_DST, 4)
64 TEXT
65 ALIGN(8)
66 PROLOGUE(M4_function_nc)
68 pushl %edi
69 pushl %esi
70 pushl %ebx
71 pushl %ebp
72 deflit(`FRAME',16)
74 movl PARAM_DST,%edi
75 movl PARAM_SRC1,%esi
76 movl PARAM_SRC2,%ebp
77 movl PARAM_SIZE,%ecx
79 movl (%ebp),%ebx
81 decl %ecx
82 movl %ecx,%edx
83 shrl $3,%ecx
84 andl $7,%edx
85 testl %ecx,%ecx C zero carry flag
86 jz L(endgo)
88 pushl %edx
89 FRAME_pushl()
90 movl PARAM_CARRY,%eax
91 shrl %eax C shift bit 0 into carry
92 jmp L(oop)
94 L(endgo):
95 deflit(`FRAME',16)
96 movl PARAM_CARRY,%eax
97 shrl %eax C shift bit 0 into carry
98 jmp L(end)
100 EPILOGUE()
103 ALIGN(8)
104 PROLOGUE(M4_function_n)
106 pushl %edi
107 pushl %esi
108 pushl %ebx
109 pushl %ebp
110 deflit(`FRAME',16)
112 movl PARAM_DST,%edi
113 movl PARAM_SRC1,%esi
114 movl PARAM_SRC2,%ebp
115 movl PARAM_SIZE,%ecx
117 movl (%ebp),%ebx
119 decl %ecx
120 movl %ecx,%edx
121 shrl $3,%ecx
122 andl $7,%edx
123 testl %ecx,%ecx C zero carry flag
124 jz L(end)
125 pushl %edx
126 FRAME_pushl()
128 ALIGN(8)
129 L(oop): movl 28(%edi),%eax C fetch destination cache line
130 leal 32(%edi),%edi
132 L(1): movl (%esi),%eax
133 movl 4(%esi),%edx
134 M4_inst %ebx,%eax
135 movl 4(%ebp),%ebx
136 M4_inst %ebx,%edx
137 movl 8(%ebp),%ebx
138 movl %eax,-32(%edi)
139 movl %edx,-28(%edi)
141 L(2): movl 8(%esi),%eax
142 movl 12(%esi),%edx
143 M4_inst %ebx,%eax
144 movl 12(%ebp),%ebx
145 M4_inst %ebx,%edx
146 movl 16(%ebp),%ebx
147 movl %eax,-24(%edi)
148 movl %edx,-20(%edi)
150 L(3): movl 16(%esi),%eax
151 movl 20(%esi),%edx
152 M4_inst %ebx,%eax
153 movl 20(%ebp),%ebx
154 M4_inst %ebx,%edx
155 movl 24(%ebp),%ebx
156 movl %eax,-16(%edi)
157 movl %edx,-12(%edi)
159 L(4): movl 24(%esi),%eax
160 movl 28(%esi),%edx
161 M4_inst %ebx,%eax
162 movl 28(%ebp),%ebx
163 M4_inst %ebx,%edx
164 movl 32(%ebp),%ebx
165 movl %eax,-8(%edi)
166 movl %edx,-4(%edi)
168 leal 32(%esi),%esi
169 leal 32(%ebp),%ebp
170 decl %ecx
171 jnz L(oop)
173 popl %edx
174 FRAME_popl()
175 L(end):
176 decl %edx C test %edx w/o clobbering carry
177 js L(end2)
178 incl %edx
179 L(oop2):
180 leal 4(%edi),%edi
181 movl (%esi),%eax
182 M4_inst %ebx,%eax
183 movl 4(%ebp),%ebx
184 movl %eax,-4(%edi)
185 leal 4(%esi),%esi
186 leal 4(%ebp),%ebp
187 decl %edx
188 jnz L(oop2)
189 L(end2):
190 movl (%esi),%eax
191 M4_inst %ebx,%eax
192 movl %eax,(%edi)
194 sbbl %eax,%eax
195 negl %eax
197 popl %ebp
198 popl %ebx
199 popl %esi
200 popl %edi
203 EPILOGUE()