beta-0.89.2
[luatex.git] / source / libs / gmp / gmp-src / mpn / x86_64 / rshift.asm
blob3f344f1dfc0a9f04e7c84a31a2ff70ef824fefea
1 dnl AMD64 mpn_rshift -- mpn right shift.
3 dnl Copyright 2003, 2005, 2009, 2011, 2012 Free Software Foundation, Inc.
5 dnl This file is part of the GNU MP Library.
6 dnl
7 dnl The GNU MP Library is free software; you can redistribute it and/or modify
8 dnl it under the terms of either:
9 dnl
10 dnl * the GNU Lesser General Public License as published by the Free
11 dnl Software Foundation; either version 3 of the License, or (at your
12 dnl option) any later version.
13 dnl
14 dnl or
15 dnl
16 dnl * the GNU General Public License as published by the Free Software
17 dnl Foundation; either version 2 of the License, or (at your option) any
18 dnl later version.
19 dnl
20 dnl or both in parallel, as here.
21 dnl
22 dnl The GNU MP Library is distributed in the hope that it will be useful, but
23 dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
24 dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
25 dnl for more details.
26 dnl
27 dnl You should have received copies of the GNU General Public License and the
28 dnl GNU Lesser General Public License along with the GNU MP Library. If not,
29 dnl see https://www.gnu.org/licenses/.
31 include(`../config.m4')
34 C cycles/limb
35 C AMD K8,K9 2.375
36 C AMD K10 2.375
37 C Intel P4 8
38 C Intel core2 2.11
39 C Intel corei ?
40 C Intel atom 5.75
41 C VIA nano 3.5
44 C INPUT PARAMETERS
45 define(`rp', `%rdi')
46 define(`up', `%rsi')
47 define(`n', `%rdx')
48 define(`cnt', `%rcx')
50 ABI_SUPPORT(DOS64)
51 ABI_SUPPORT(STD64)
53 ASM_START()
54 TEXT
55 ALIGN(32)
56 PROLOGUE(mpn_rshift)
57 FUNC_ENTRY(4)
58 neg R32(%rcx) C put rsh count in cl
59 mov (up), %rax
60 shl R8(%rcx), %rax C function return value
61 neg R32(%rcx) C put lsh count in cl
63 lea 1(n), R32(%r8)
65 lea -8(up,n,8), up
66 lea -8(rp,n,8), rp
67 neg n
69 and $3, R32(%r8)
70 je L(rlx) C jump for n = 3, 7, 11, ...
72 dec R32(%r8)
73 jne L(1)
74 C n = 4, 8, 12, ...
75 mov 8(up,n,8), %r10
76 shr R8(%rcx), %r10
77 neg R32(%rcx) C put rsh count in cl
78 mov 16(up,n,8), %r8
79 shl R8(%rcx), %r8
80 or %r8, %r10
81 mov %r10, 8(rp,n,8)
82 inc n
83 jmp L(rll)
85 L(1): dec R32(%r8)
86 je L(1x) C jump for n = 1, 5, 9, 13, ...
87 C n = 2, 6, 10, 16, ...
88 mov 8(up,n,8), %r10
89 shr R8(%rcx), %r10
90 neg R32(%rcx) C put rsh count in cl
91 mov 16(up,n,8), %r8
92 shl R8(%rcx), %r8
93 or %r8, %r10
94 mov %r10, 8(rp,n,8)
95 inc n
96 neg R32(%rcx) C put lsh count in cl
97 L(1x):
98 cmp $-1, n
99 je L(ast)
100 mov 8(up,n,8), %r10
101 shr R8(%rcx), %r10
102 mov 16(up,n,8), %r11
103 shr R8(%rcx), %r11
104 neg R32(%rcx) C put rsh count in cl
105 mov 16(up,n,8), %r8
106 mov 24(up,n,8), %r9
107 shl R8(%rcx), %r8
108 or %r8, %r10
109 shl R8(%rcx), %r9
110 or %r9, %r11
111 mov %r10, 8(rp,n,8)
112 mov %r11, 16(rp,n,8)
113 add $2, n
115 L(rll): neg R32(%rcx) C put lsh count in cl
116 L(rlx): mov 8(up,n,8), %r10
117 shr R8(%rcx), %r10
118 mov 16(up,n,8), %r11
119 shr R8(%rcx), %r11
121 add $4, n C 4
122 jb L(end) C 2
123 ALIGN(16)
124 L(top):
125 C finish stuff from lsh block
126 neg R32(%rcx) C put rsh count in cl
127 mov -16(up,n,8), %r8
128 mov -8(up,n,8), %r9
129 shl R8(%rcx), %r8
130 or %r8, %r10
131 shl R8(%rcx), %r9
132 or %r9, %r11
133 mov %r10, -24(rp,n,8)
134 mov %r11, -16(rp,n,8)
135 C start two new rsh
136 mov (up,n,8), %r8
137 mov 8(up,n,8), %r9
138 shl R8(%rcx), %r8
139 shl R8(%rcx), %r9
141 C finish stuff from rsh block
142 neg R32(%rcx) C put lsh count in cl
143 mov -8(up,n,8), %r10
144 mov 0(up,n,8), %r11
145 shr R8(%rcx), %r10
146 or %r10, %r8
147 shr R8(%rcx), %r11
148 or %r11, %r9
149 mov %r8, -8(rp,n,8)
150 mov %r9, 0(rp,n,8)
151 C start two new lsh
152 mov 8(up,n,8), %r10
153 mov 16(up,n,8), %r11
154 shr R8(%rcx), %r10
155 shr R8(%rcx), %r11
157 add $4, n
158 jae L(top) C 2
159 L(end):
160 neg R32(%rcx) C put rsh count in cl
161 mov -8(up), %r8
162 shl R8(%rcx), %r8
163 or %r8, %r10
164 mov (up), %r9
165 shl R8(%rcx), %r9
166 or %r9, %r11
167 mov %r10, -16(rp)
168 mov %r11, -8(rp)
170 neg R32(%rcx) C put lsh count in cl
171 L(ast): mov (up), %r10
172 shr R8(%rcx), %r10
173 mov %r10, (rp)
174 FUNC_EXIT()
176 EPILOGUE()