beta-0.89.2
[luatex.git] / source / libs / gmp / gmp-src / mpn / x86_64 / aorrlshC_n.asm
blob5a9fd4dfb9d690a828795f7dad18b9a12b963a6c
1 dnl AMD64 mpn_addlshC_n -- rp[] = up[] + (vp[] << C)
2 dnl AMD64 mpn_rsblshC_n -- rp[] = (vp[] << C) - up[]
4 dnl Copyright 2009-2012 Free Software Foundation, Inc.
6 dnl This file is part of the GNU MP Library.
7 dnl
8 dnl The GNU MP Library is free software; you can redistribute it and/or modify
9 dnl it under the terms of either:
10 dnl
11 dnl * the GNU Lesser General Public License as published by the Free
12 dnl Software Foundation; either version 3 of the License, or (at your
13 dnl option) any later version.
14 dnl
15 dnl or
16 dnl
17 dnl * the GNU General Public License as published by the Free Software
18 dnl Foundation; either version 2 of the License, or (at your option) any
19 dnl later version.
20 dnl
21 dnl or both in parallel, as here.
22 dnl
23 dnl The GNU MP Library is distributed in the hope that it will be useful, but
24 dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
25 dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
26 dnl for more details.
27 dnl
28 dnl You should have received copies of the GNU General Public License and the
29 dnl GNU Lesser General Public License along with the GNU MP Library. If not,
30 dnl see https://www.gnu.org/licenses/.
33 C cycles/limb
34 C AMD K8,K9 2
35 C AMD K10 2
36 C Intel P4 ?
37 C Intel core2 3
38 C Intel NHM 2.75
39 C Intel SBR 2.55
40 C Intel atom ?
41 C VIA nano ?
43 C INPUT PARAMETERS
44 define(`rp', `%rdi')
45 define(`up', `%rsi')
46 define(`vp', `%rdx')
47 define(`n', `%rcx')
49 define(M, eval(m4_lshift(1,LSH)))
51 ABI_SUPPORT(DOS64)
52 ABI_SUPPORT(STD64)
54 ASM_START()
55 TEXT
56 ALIGN(16)
57 PROLOGUE(func)
58 FUNC_ENTRY(4)
59 push %r12
60 push %r13
61 push %r14
62 push %r15
64 mov (vp), %r8
65 lea (,%r8,M), %r12
66 shr $RSH, %r8
68 mov R32(n), R32(%rax)
69 lea (rp,n,8), rp
70 lea (up,n,8), up
71 lea (vp,n,8), vp
72 neg n
73 and $3, R8(%rax)
74 je L(b00)
75 cmp $2, R8(%rax)
76 jc L(b01)
77 je L(b10)
79 L(b11): mov 8(vp,n,8), %r10
80 lea (%r8,%r10,M), %r14
81 shr $RSH, %r10
82 mov 16(vp,n,8), %r11
83 lea (%r10,%r11,M), %r15
84 shr $RSH, %r11
85 ADDSUB (up,n,8), %r12
86 ADCSBB 8(up,n,8), %r14
87 ADCSBB 16(up,n,8), %r15
88 sbb R32(%rax), R32(%rax) C save carry for next
89 mov %r12, (rp,n,8)
90 mov %r14, 8(rp,n,8)
91 mov %r15, 16(rp,n,8)
92 add $3, n
93 js L(top)
94 jmp L(end)
96 L(b01): mov %r8, %r11
97 ADDSUB (up,n,8), %r12
98 sbb R32(%rax), R32(%rax) C save carry for next
99 mov %r12, (rp,n,8)
100 add $1, n
101 js L(top)
102 jmp L(end)
104 L(b10): mov 8(vp,n,8), %r11
105 lea (%r8,%r11,M), %r15
106 shr $RSH, %r11
107 ADDSUB (up,n,8), %r12
108 ADCSBB 8(up,n,8), %r15
109 sbb R32(%rax), R32(%rax) C save carry for next
110 mov %r12, (rp,n,8)
111 mov %r15, 8(rp,n,8)
112 add $2, n
113 js L(top)
114 jmp L(end)
116 L(b00): mov 8(vp,n,8), %r9
117 mov 16(vp,n,8), %r10
118 jmp L(e00)
120 ALIGN(16)
121 L(top): mov 16(vp,n,8), %r10
122 mov (vp,n,8), %r8
123 mov 8(vp,n,8), %r9
124 lea (%r11,%r8,M), %r12
125 shr $RSH, %r8
126 L(e00): lea (%r8,%r9,M), %r13
127 shr $RSH, %r9
128 mov 24(vp,n,8), %r11
129 lea (%r9,%r10,M), %r14
130 shr $RSH, %r10
131 lea (%r10,%r11,M), %r15
132 shr $RSH, %r11
133 add R32(%rax), R32(%rax) C restore carry
134 ADCSBB (up,n,8), %r12
135 ADCSBB 8(up,n,8), %r13
136 ADCSBB 16(up,n,8), %r14
137 ADCSBB 24(up,n,8), %r15
138 mov %r12, (rp,n,8)
139 mov %r13, 8(rp,n,8)
140 mov %r14, 16(rp,n,8)
141 sbb R32(%rax), R32(%rax) C save carry for next
142 mov %r15, 24(rp,n,8)
143 add $4, n
144 js L(top)
145 L(end):
147 ifelse(ADDSUB,add,`
148 sub R32(%r11), R32(%rax)
149 neg R32(%rax)
151 add R32(%r11), R32(%rax)
152 movslq R32(%rax), %rax
154 pop %r15
155 pop %r14
156 pop %r13
157 pop %r12
158 FUNC_EXIT()
160 EPILOGUE()