beta-0.89.2
[luatex.git] / source / libs / gmp / gmp-src / mpn / x86_64 / aors_n.asm
blob8941f7a17b9b87801226d3aa8bba074f78711725
1 dnl AMD64 mpn_add_n, mpn_sub_n
3 dnl Copyright 2003-2005, 2007, 2008, 2010-2012 Free Software Foundation, Inc.
5 dnl This file is part of the GNU MP Library.
6 dnl
7 dnl The GNU MP Library is free software; you can redistribute it and/or modify
8 dnl it under the terms of either:
9 dnl
10 dnl * the GNU Lesser General Public License as published by the Free
11 dnl Software Foundation; either version 3 of the License, or (at your
12 dnl option) any later version.
13 dnl
14 dnl or
15 dnl
16 dnl * the GNU General Public License as published by the Free Software
17 dnl Foundation; either version 2 of the License, or (at your option) any
18 dnl later version.
19 dnl
20 dnl or both in parallel, as here.
21 dnl
22 dnl The GNU MP Library is distributed in the hope that it will be useful, but
23 dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
24 dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
25 dnl for more details.
26 dnl
27 dnl You should have received copies of the GNU General Public License and the
28 dnl GNU Lesser General Public License along with the GNU MP Library. If not,
29 dnl see https://www.gnu.org/licenses/.
31 include(`../config.m4')
33 C cycles/limb
34 C AMD K8,K9 1.5
35 C AMD K10 1.5
36 C AMD bd1 1.8
37 C AMD bobcat 2.5
38 C Intel P4
39 C Intel core2 4.9
40 C Intel NHM 5.5
41 C Intel SBR 1.61
42 C Intel IBR 1.61
43 C Intel atom 4
44 C VIA nano 3.25
46 C The loop of this code is the result of running a code generation and
47 C optimization tool suite written by David Harvey and Torbjorn Granlund.
49 C INPUT PARAMETERS
50 define(`rp', `%rdi') C rcx
51 define(`up', `%rsi') C rdx
52 define(`vp', `%rdx') C r8
53 define(`n', `%rcx') C r9
54 define(`cy', `%r8') C rsp+40 (mpn_add_nc and mpn_sub_nc)
56 ifdef(`OPERATION_add_n', `
57 define(ADCSBB, adc)
58 define(func, mpn_add_n)
59 define(func_nc, mpn_add_nc)')
60 ifdef(`OPERATION_sub_n', `
61 define(ADCSBB, sbb)
62 define(func, mpn_sub_n)
63 define(func_nc, mpn_sub_nc)')
65 MULFUNC_PROLOGUE(mpn_add_n mpn_add_nc mpn_sub_n mpn_sub_nc)
67 ABI_SUPPORT(DOS64)
68 ABI_SUPPORT(STD64)
70 ASM_START()
71 TEXT
72 ALIGN(16)
73 PROLOGUE(func_nc)
74 FUNC_ENTRY(4)
75 IFDOS(` mov 56(%rsp), %r8 ')
76 mov R32(n), R32(%rax)
77 shr $2, n
78 and $3, R32(%rax)
79 bt $0, %r8 C cy flag <- carry parameter
80 jrcxz L(lt4)
82 mov (up), %r8
83 mov 8(up), %r9
84 dec n
85 jmp L(mid)
87 EPILOGUE()
88 ALIGN(16)
89 PROLOGUE(func)
90 FUNC_ENTRY(4)
91 mov R32(n), R32(%rax)
92 shr $2, n
93 and $3, R32(%rax)
94 jrcxz L(lt4)
96 mov (up), %r8
97 mov 8(up), %r9
98 dec n
99 jmp L(mid)
101 L(lt4): dec R32(%rax)
102 mov (up), %r8
103 jnz L(2)
104 ADCSBB (vp), %r8
105 mov %r8, (rp)
106 adc R32(%rax), R32(%rax)
107 FUNC_EXIT()
110 L(2): dec R32(%rax)
111 mov 8(up), %r9
112 jnz L(3)
113 ADCSBB (vp), %r8
114 ADCSBB 8(vp), %r9
115 mov %r8, (rp)
116 mov %r9, 8(rp)
117 adc R32(%rax), R32(%rax)
118 FUNC_EXIT()
121 L(3): mov 16(up), %r10
122 ADCSBB (vp), %r8
123 ADCSBB 8(vp), %r9
124 ADCSBB 16(vp), %r10
125 mov %r8, (rp)
126 mov %r9, 8(rp)
127 mov %r10, 16(rp)
128 setc R8(%rax)
129 FUNC_EXIT()
132 ALIGN(16)
133 L(top): ADCSBB (vp), %r8
134 ADCSBB 8(vp), %r9
135 ADCSBB 16(vp), %r10
136 ADCSBB 24(vp), %r11
137 mov %r8, (rp)
138 lea 32(up), up
139 mov %r9, 8(rp)
140 mov %r10, 16(rp)
141 dec n
142 mov %r11, 24(rp)
143 lea 32(vp), vp
144 mov (up), %r8
145 mov 8(up), %r9
146 lea 32(rp), rp
147 L(mid): mov 16(up), %r10
148 mov 24(up), %r11
149 jnz L(top)
151 L(end): lea 32(up), up
152 ADCSBB (vp), %r8
153 ADCSBB 8(vp), %r9
154 ADCSBB 16(vp), %r10
155 ADCSBB 24(vp), %r11
156 lea 32(vp), vp
157 mov %r8, (rp)
158 mov %r9, 8(rp)
159 mov %r10, 16(rp)
160 mov %r11, 24(rp)
161 lea 32(rp), rp
163 inc R32(%rax)
164 dec R32(%rax)
165 jnz L(lt4)
166 adc R32(%rax), R32(%rax)
167 FUNC_EXIT()
169 EPILOGUE()