beta-0.89.2
[luatex.git] / source / libs / gmp / gmp-src / mpn / arm64 / lshift.asm
blobbf6f0ad07216985bf1e8670fe85331c58cf45466
1 dnl ARM64 mpn_lshift.
3 dnl Copyright 2013, 2014 Free Software Foundation, Inc.
5 dnl This file is part of the GNU MP Library.
7 dnl The GNU MP Library is free software; you can redistribute it and/or modify
8 dnl it under the terms of the GNU Lesser General Public License as published
9 dnl by the Free Software Foundation; either version 3 of the License, or (at
10 dnl your option) any later version.
12 dnl The GNU MP Library is distributed in the hope that it will be useful, but
13 dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
14 dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
15 dnl License for more details.
17 dnl You should have received a copy of the GNU Lesser General Public License
18 dnl along with the GNU MP Library. If not, see http://www.gnu.org/licenses/.
20 include(`../config.m4')
22 C cycles/limb
23 C Cortex-A53 ?
24 C Cortex-A57 ?
26 changecom(@&*$)
28 define(`rp_arg', `x0')
29 define(`up', `x1')
30 define(`n', `x2')
31 define(`cnt', `x3')
33 define(`rp', `x16')
35 define(`tnc',`x8')
37 ASM_START()
38 PROLOGUE(mpn_lshift)
39 add rp, rp_arg, n, lsl #3
40 add up, up, n, lsl #3
41 sub tnc, xzr, cnt
42 tbz n, #0, L(bx0)
44 L(bx1): ldr x4, [up,#-8]
45 tbnz n, #1, L(b11)
47 L(b01): lsr x0, x4, tnc
48 lsl x18, x4, cnt
49 sub n, n, #1
50 cbnz n, L(gt1)
51 str x18, [rp,#-8]
52 ret
53 L(gt1): ldp x4, x5, [up,#-24]
54 sub up, up, #8
55 add rp, rp, #16
56 b L(lo2)
58 L(b11): lsr x0, x4, tnc
59 lsl x9, x4, cnt
60 ldp x6, x7, [up,#-24]
61 add n, n, #1
62 add up, up, #8
63 add rp, rp, #32
64 b L(lo0)
66 L(bx0): ldp x4, x5, [up,#-16]
67 tbz n, #1, L(b00)
69 L(b10): lsr x0, x5, tnc
70 lsl x13, x5, cnt
71 lsr x10, x4, tnc
72 lsl x18, x4, cnt
73 sub n, n, #2
74 cbnz n, L(gt2)
75 orr x10, x10, x13
76 stp x18, x10, [rp,#-16]
77 ret
78 L(gt2): ldp x4, x5, [up,#-32]
79 orr x10, x10, x13
80 str x10, [rp,#-8]
81 sub up, up, #16
82 add rp, rp, #8
83 b L(lo2)
85 L(b00): lsr x0, x5, tnc
86 lsl x13, x5, cnt
87 lsr x10, x4, tnc
88 lsl x9, x4, cnt
89 ldp x6, x7, [up,#-32]
90 orr x10, x10, x13
91 str x10, [rp,#-8]
92 add rp, rp, #24
93 b L(lo0)
95 ALIGN(16)
96 L(top): ldp x4, x5, [up,#-48]
97 sub rp, rp, #32 C integrate with stp?
98 sub up, up, #32 C integrate with ldp?
99 orr x11, x11, x9
100 orr x10, x10, x13
101 stp x10, x11, [rp,#-16]
102 L(lo2): lsr x11, x5, tnc
103 lsl x13, x5, cnt
104 lsr x10, x4, tnc
105 lsl x9, x4, cnt
106 ldp x6, x7, [up,#-32]
107 orr x11, x11, x18
108 orr x10, x10, x13
109 stp x10, x11, [rp,#-32]
110 L(lo0): sub n, n, #4
111 lsr x11, x7, tnc
112 lsl x13, x7, cnt
113 lsr x10, x6, tnc
114 lsl x18, x6, cnt
115 cbnz n, L(top)
117 L(end): orr x11, x11, x9
118 orr x10, x10, x13
119 stp x10, x11, [rp,#-48]
120 str x18, [rp,#-56]
122 EPILOGUE()