beta-0.89.2
[luatex.git] / source / libs / gmp / gmp-src / mpn / x86 / p6 / aors_n.asm
blobdf51c2e6f76c42d00f724954e8c292243c64fb8a
1 dnl Intel P6 mpn_add_n/mpn_sub_n -- mpn add or subtract.
3 dnl Copyright 2006 Free Software Foundation, Inc.
5 dnl This file is part of the GNU MP Library.
6 dnl
7 dnl The GNU MP Library is free software; you can redistribute it and/or modify
8 dnl it under the terms of either:
9 dnl
10 dnl * the GNU Lesser General Public License as published by the Free
11 dnl Software Foundation; either version 3 of the License, or (at your
12 dnl option) any later version.
13 dnl
14 dnl or
15 dnl
16 dnl * the GNU General Public License as published by the Free Software
17 dnl Foundation; either version 2 of the License, or (at your option) any
18 dnl later version.
19 dnl
20 dnl or both in parallel, as here.
21 dnl
22 dnl The GNU MP Library is distributed in the hope that it will be useful, but
23 dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
24 dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
25 dnl for more details.
26 dnl
27 dnl You should have received copies of the GNU General Public License and the
28 dnl GNU Lesser General Public License along with the GNU MP Library. If not,
29 dnl see https://www.gnu.org/licenses/.
31 include(`../config.m4')
33 C TODO:
34 C * Avoid indexed addressing, it makes us stall on the two-ported register
35 C file.
37 C cycles/limb
38 C P6 model 0-8,10-12 3.17
39 C P6 model 9 (Banias) 2.15
40 C P6 model 13 (Dothan) 2.25
43 define(`rp', `%edi')
44 define(`up', `%esi')
45 define(`vp', `%ebx')
46 define(`n', `%ecx')
48 ifdef(`OPERATION_add_n', `
49 define(ADCSBB, adc)
50 define(func, mpn_add_n)
51 define(func_nc, mpn_add_nc)')
52 ifdef(`OPERATION_sub_n', `
53 define(ADCSBB, sbb)
54 define(func, mpn_sub_n)
55 define(func_nc, mpn_sub_nc)')
57 MULFUNC_PROLOGUE(mpn_add_n mpn_add_nc mpn_sub_n mpn_sub_nc)
59 ASM_START()
61 TEXT
62 ALIGN(16)
64 PROLOGUE(func)
65 xor %edx, %edx
66 L(start):
67 push %edi
68 push %esi
69 push %ebx
71 mov 16(%esp), rp
72 mov 20(%esp), up
73 mov 24(%esp), vp
74 mov 28(%esp), n
76 lea (up,n,4), up
77 lea (vp,n,4), vp
78 lea (rp,n,4), rp
80 neg n
81 mov n, %eax
82 and $-8, n
83 and $7, %eax
84 shl $2, %eax C 4x
85 ifdef(`PIC',`
86 call L(pic_calc)
87 L(here):
88 ',`
89 lea L(ent) (%eax,%eax,2), %eax C 12x
92 shr %edx C set cy flag
93 jmp *%eax
95 ifdef(`PIC',`
96 L(pic_calc):
97 C See mpn/x86/README about old gas bugs
98 lea (%eax,%eax,2), %eax
99 add $L(ent)-L(here), %eax
100 add (%esp), %eax
101 ret_internal
104 L(end):
105 sbb %eax, %eax
106 neg %eax
107 pop %ebx
108 pop %esi
109 pop %edi
112 ALIGN(16)
113 L(top):
114 jecxz L(end)
115 L(ent):
116 Zdisp( mov, 0,(up,n,4), %eax)
117 Zdisp( ADCSBB, 0,(vp,n,4), %eax)
118 Zdisp( mov, %eax, 0,(rp,n,4))
120 mov 4(up,n,4), %edx
121 ADCSBB 4(vp,n,4), %edx
122 mov %edx, 4(rp,n,4)
124 mov 8(up,n,4), %eax
125 ADCSBB 8(vp,n,4), %eax
126 mov %eax, 8(rp,n,4)
128 mov 12(up,n,4), %edx
129 ADCSBB 12(vp,n,4), %edx
130 mov %edx, 12(rp,n,4)
132 mov 16(up,n,4), %eax
133 ADCSBB 16(vp,n,4), %eax
134 mov %eax, 16(rp,n,4)
136 mov 20(up,n,4), %edx
137 ADCSBB 20(vp,n,4), %edx
138 mov %edx, 20(rp,n,4)
140 mov 24(up,n,4), %eax
141 ADCSBB 24(vp,n,4), %eax
142 mov %eax, 24(rp,n,4)
144 mov 28(up,n,4), %edx
145 ADCSBB 28(vp,n,4), %edx
146 mov %edx, 28(rp,n,4)
148 lea 8(n), n
149 jmp L(top)
151 EPILOGUE()
153 PROLOGUE(func_nc)
154 movl 20(%esp), %edx
155 jmp L(start)
156 EPILOGUE()