beta-0.89.2
[luatex.git] / source / libs / gmp / gmp-src / mpn / x86 / aors_n.asm
blob5d359f59b6583f9e3ab73496e7149725d90de9a3
1 dnl x86 mpn_add_n/mpn_sub_n -- mpn addition and subtraction.
3 dnl Copyright 1992, 1994-1996, 1999-2002 Free Software Foundation, Inc.
5 dnl This file is part of the GNU MP Library.
6 dnl
7 dnl The GNU MP Library is free software; you can redistribute it and/or modify
8 dnl it under the terms of either:
9 dnl
10 dnl * the GNU Lesser General Public License as published by the Free
11 dnl Software Foundation; either version 3 of the License, or (at your
12 dnl option) any later version.
13 dnl
14 dnl or
15 dnl
16 dnl * the GNU General Public License as published by the Free Software
17 dnl Foundation; either version 2 of the License, or (at your option) any
18 dnl later version.
19 dnl
20 dnl or both in parallel, as here.
21 dnl
22 dnl The GNU MP Library is distributed in the hope that it will be useful, but
23 dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
24 dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
25 dnl for more details.
26 dnl
27 dnl You should have received copies of the GNU General Public License and the
28 dnl GNU Lesser General Public License along with the GNU MP Library. If not,
29 dnl see https://www.gnu.org/licenses/.
31 include(`../config.m4')
34 C cycles/limb
35 C P5 3.375
36 C P6 3.125
37 C K6 3.5
38 C K7 2.25
39 C P4 8.75
42 ifdef(`OPERATION_add_n',`
43 define(M4_inst, adcl)
44 define(M4_function_n, mpn_add_n)
45 define(M4_function_nc, mpn_add_nc)
47 ',`ifdef(`OPERATION_sub_n',`
48 define(M4_inst, sbbl)
49 define(M4_function_n, mpn_sub_n)
50 define(M4_function_nc, mpn_sub_nc)
52 ',`m4_error(`Need OPERATION_add_n or OPERATION_sub_n
53 ')')')
55 MULFUNC_PROLOGUE(mpn_add_n mpn_add_nc mpn_sub_n mpn_sub_nc)
58 C mp_limb_t M4_function_n (mp_ptr dst, mp_srcptr src1, mp_srcptr src2,
59 C mp_size_t size);
60 C mp_limb_t M4_function_nc (mp_ptr dst, mp_srcptr src1, mp_srcptr src2,
61 C mp_size_t size, mp_limb_t carry);
63 defframe(PARAM_CARRY,20)
64 defframe(PARAM_SIZE, 16)
65 defframe(PARAM_SRC2, 12)
66 defframe(PARAM_SRC1, 8)
67 defframe(PARAM_DST, 4)
69 TEXT
70 ALIGN(8)
72 PROLOGUE(M4_function_nc)
73 deflit(`FRAME',0)
75 pushl %edi FRAME_pushl()
76 pushl %esi FRAME_pushl()
78 movl PARAM_DST,%edi
79 movl PARAM_SRC1,%esi
80 movl PARAM_SRC2,%edx
81 movl PARAM_SIZE,%ecx
83 movl %ecx,%eax
84 shrl $3,%ecx C compute count for unrolled loop
85 negl %eax
86 andl $7,%eax C get index where to start loop
87 jz L(oopgo) C necessary special case for 0
88 incl %ecx C adjust loop count
89 shll $2,%eax C adjustment for pointers...
90 subl %eax,%edi C ... since they are offset ...
91 subl %eax,%esi C ... by a constant when we ...
92 subl %eax,%edx C ... enter the loop
93 shrl $2,%eax C restore previous value
95 ifdef(`PIC',`
96 C Calculate start address in loop for PIC. Due to limitations in
97 C old gas, LF(M4_function_n,oop)-L(0a)-3 cannot be put into the leal
98 call L(0a)
99 L(0a): leal (%eax,%eax,8),%eax
100 addl (%esp),%eax
101 addl $L(oop)-L(0a)-3,%eax
102 addl $4,%esp
104 C Calculate start address in loop for non-PIC.
105 leal L(oop)-3(%eax,%eax,8),%eax
108 C These lines initialize carry from the 5th parameter. Should be
109 C possible to simplify.
110 pushl %ebp FRAME_pushl()
111 movl PARAM_CARRY,%ebp
112 shrl %ebp C shift bit 0 into carry
113 popl %ebp FRAME_popl()
115 jmp *%eax C jump into loop
117 EPILOGUE()
120 ALIGN(16)
121 PROLOGUE(M4_function_n)
122 deflit(`FRAME',0)
124 pushl %edi FRAME_pushl()
125 pushl %esi FRAME_pushl()
127 movl PARAM_DST,%edi
128 movl PARAM_SRC1,%esi
129 movl PARAM_SRC2,%edx
130 movl PARAM_SIZE,%ecx
132 movl %ecx,%eax
133 shrl $3,%ecx C compute count for unrolled loop
134 negl %eax
135 andl $7,%eax C get index where to start loop
136 jz L(oop) C necessary special case for 0
137 incl %ecx C adjust loop count
138 shll $2,%eax C adjustment for pointers...
139 subl %eax,%edi C ... since they are offset ...
140 subl %eax,%esi C ... by a constant when we ...
141 subl %eax,%edx C ... enter the loop
142 shrl $2,%eax C restore previous value
144 ifdef(`PIC',`
145 C Calculate start address in loop for PIC. Due to limitations in
146 C some assemblers, L(oop)-L(0b)-3 cannot be put into the leal
147 call L(0b)
148 L(0b): leal (%eax,%eax,8),%eax
149 addl (%esp),%eax
150 addl $L(oop)-L(0b)-3,%eax
151 addl $4,%esp
153 C Calculate start address in loop for non-PIC.
154 leal L(oop)-3(%eax,%eax,8),%eax
156 jmp *%eax C jump into loop
158 L(oopgo):
159 pushl %ebp FRAME_pushl()
160 movl PARAM_CARRY,%ebp
161 shrl %ebp C shift bit 0 into carry
162 popl %ebp FRAME_popl()
164 ALIGN(16)
165 L(oop): movl (%esi),%eax
166 M4_inst (%edx),%eax
167 movl %eax,(%edi)
168 movl 4(%esi),%eax
169 M4_inst 4(%edx),%eax
170 movl %eax,4(%edi)
171 movl 8(%esi),%eax
172 M4_inst 8(%edx),%eax
173 movl %eax,8(%edi)
174 movl 12(%esi),%eax
175 M4_inst 12(%edx),%eax
176 movl %eax,12(%edi)
177 movl 16(%esi),%eax
178 M4_inst 16(%edx),%eax
179 movl %eax,16(%edi)
180 movl 20(%esi),%eax
181 M4_inst 20(%edx),%eax
182 movl %eax,20(%edi)
183 movl 24(%esi),%eax
184 M4_inst 24(%edx),%eax
185 movl %eax,24(%edi)
186 movl 28(%esi),%eax
187 M4_inst 28(%edx),%eax
188 movl %eax,28(%edi)
189 leal 32(%edi),%edi
190 leal 32(%esi),%esi
191 leal 32(%edx),%edx
192 decl %ecx
193 jnz L(oop)
195 sbbl %eax,%eax
196 negl %eax
198 popl %esi
199 popl %edi
202 EPILOGUE()