beta-0.89.2
[luatex.git] / source / libs / gmp / gmp-src / mpn / x86_64 / lshiftc.asm
blobc4ba04a173849cdcaa5ea98384573a1651895c4a
1 dnl AMD64 mpn_lshiftc -- mpn left shift with complement.
3 dnl Copyright 2003, 2005, 2006, 2009, 2011, 2012 Free Software Foundation, Inc.
5 dnl This file is part of the GNU MP Library.
6 dnl
7 dnl The GNU MP Library is free software; you can redistribute it and/or modify
8 dnl it under the terms of either:
9 dnl
10 dnl * the GNU Lesser General Public License as published by the Free
11 dnl Software Foundation; either version 3 of the License, or (at your
12 dnl option) any later version.
13 dnl
14 dnl or
15 dnl
16 dnl * the GNU General Public License as published by the Free Software
17 dnl Foundation; either version 2 of the License, or (at your option) any
18 dnl later version.
19 dnl
20 dnl or both in parallel, as here.
21 dnl
22 dnl The GNU MP Library is distributed in the hope that it will be useful, but
23 dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
24 dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
25 dnl for more details.
26 dnl
27 dnl You should have received copies of the GNU General Public License and the
28 dnl GNU Lesser General Public License along with the GNU MP Library. If not,
29 dnl see https://www.gnu.org/licenses/.
31 include(`../config.m4')
34 C cycles/limb
35 C AMD K8,K9 2.75
36 C AMD K10 2.75
37 C Intel P4 ?
38 C Intel core2 ?
39 C Intel corei ?
40 C Intel atom ?
41 C VIA nano 3.75
44 C INPUT PARAMETERS
45 define(`rp', `%rdi')
46 define(`up', `%rsi')
47 define(`n', `%rdx')
48 define(`cnt', `%rcx')
50 ABI_SUPPORT(DOS64)
51 ABI_SUPPORT(STD64)
53 ASM_START()
54 TEXT
55 ALIGN(32)
56 PROLOGUE(mpn_lshiftc)
57 FUNC_ENTRY(4)
58 neg R32(%rcx) C put rsh count in cl
59 mov -8(up,n,8), %rax
60 shr R8(%rcx), %rax C function return value
62 neg R32(%rcx) C put lsh count in cl
63 lea 1(n), R32(%r8)
64 and $3, R32(%r8)
65 je L(rlx) C jump for n = 3, 7, 11, ...
67 dec R32(%r8)
68 jne L(1)
69 C n = 4, 8, 12, ...
70 mov -8(up,n,8), %r10
71 shl R8(%rcx), %r10
72 neg R32(%rcx) C put rsh count in cl
73 mov -16(up,n,8), %r8
74 shr R8(%rcx), %r8
75 or %r8, %r10
76 not %r10
77 mov %r10, -8(rp,n,8)
78 dec n
79 jmp L(rll)
81 L(1): dec R32(%r8)
82 je L(1x) C jump for n = 1, 5, 9, 13, ...
83 C n = 2, 6, 10, 16, ...
84 mov -8(up,n,8), %r10
85 shl R8(%rcx), %r10
86 neg R32(%rcx) C put rsh count in cl
87 mov -16(up,n,8), %r8
88 shr R8(%rcx), %r8
89 or %r8, %r10
90 not %r10
91 mov %r10, -8(rp,n,8)
92 dec n
93 neg R32(%rcx) C put lsh count in cl
94 L(1x):
95 cmp $1, n
96 je L(ast)
97 mov -8(up,n,8), %r10
98 shl R8(%rcx), %r10
99 mov -16(up,n,8), %r11
100 shl R8(%rcx), %r11
101 neg R32(%rcx) C put rsh count in cl
102 mov -16(up,n,8), %r8
103 mov -24(up,n,8), %r9
104 shr R8(%rcx), %r8
105 or %r8, %r10
106 shr R8(%rcx), %r9
107 or %r9, %r11
108 not %r10
109 not %r11
110 mov %r10, -8(rp,n,8)
111 mov %r11, -16(rp,n,8)
112 sub $2, n
114 L(rll): neg R32(%rcx) C put lsh count in cl
115 L(rlx): mov -8(up,n,8), %r10
116 shl R8(%rcx), %r10
117 mov -16(up,n,8), %r11
118 shl R8(%rcx), %r11
120 sub $4, n C 4
121 jb L(end) C 2
122 ALIGN(16)
123 L(top):
124 C finish stuff from lsh block
125 neg R32(%rcx) C put rsh count in cl
126 mov 16(up,n,8), %r8
127 mov 8(up,n,8), %r9
128 shr R8(%rcx), %r8
129 or %r8, %r10
130 shr R8(%rcx), %r9
131 or %r9, %r11
132 not %r10
133 not %r11
134 mov %r10, 24(rp,n,8)
135 mov %r11, 16(rp,n,8)
136 C start two new rsh
137 mov 0(up,n,8), %r8
138 mov -8(up,n,8), %r9
139 shr R8(%rcx), %r8
140 shr R8(%rcx), %r9
142 C finish stuff from rsh block
143 neg R32(%rcx) C put lsh count in cl
144 mov 8(up,n,8), %r10
145 mov 0(up,n,8), %r11
146 shl R8(%rcx), %r10
147 or %r10, %r8
148 shl R8(%rcx), %r11
149 or %r11, %r9
150 not %r8
151 not %r9
152 mov %r8, 8(rp,n,8)
153 mov %r9, 0(rp,n,8)
154 C start two new lsh
155 mov -8(up,n,8), %r10
156 mov -16(up,n,8), %r11
157 shl R8(%rcx), %r10
158 shl R8(%rcx), %r11
160 sub $4, n
161 jae L(top) C 2
162 L(end):
163 neg R32(%rcx) C put rsh count in cl
164 mov 8(up), %r8
165 shr R8(%rcx), %r8
166 or %r8, %r10
167 mov (up), %r9
168 shr R8(%rcx), %r9
169 or %r9, %r11
170 not %r10
171 not %r11
172 mov %r10, 16(rp)
173 mov %r11, 8(rp)
175 neg R32(%rcx) C put lsh count in cl
176 L(ast): mov (up), %r10
177 shl R8(%rcx), %r10
178 not %r10
179 mov %r10, (rp)
180 FUNC_EXIT()
182 EPILOGUE()