beta-0.89.2
[luatex.git] / source / libs / gmp / gmp-src / mpn / arm / v6 / mul_2.asm
blob91a74c8fda6686ea2e59fa6de0b96b796d014dac
1 dnl ARM mpn_mul_2.
3 dnl Contributed to the GNU project by Torbjörn Granlund.
5 dnl Copyright 2012 Free Software Foundation, Inc.
7 dnl This file is part of the GNU MP Library.
8 dnl
9 dnl The GNU MP Library is free software; you can redistribute it and/or modify
10 dnl it under the terms of either:
11 dnl
12 dnl * the GNU Lesser General Public License as published by the Free
13 dnl Software Foundation; either version 3 of the License, or (at your
14 dnl option) any later version.
15 dnl
16 dnl or
17 dnl
18 dnl * the GNU General Public License as published by the Free Software
19 dnl Foundation; either version 2 of the License, or (at your option) any
20 dnl later version.
21 dnl
22 dnl or both in parallel, as here.
23 dnl
24 dnl The GNU MP Library is distributed in the hope that it will be useful, but
25 dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
26 dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
27 dnl for more details.
28 dnl
29 dnl You should have received copies of the GNU General Public License and the
30 dnl GNU Lesser General Public License along with the GNU MP Library. If not,
31 dnl see https://www.gnu.org/licenses/.
33 include(`../config.m4')
35 C cycles/limb
36 C StrongARM: -
37 C XScale -
38 C ARM11 5.25
39 C Cortex-A7 3.13
40 C Cortex-A8 5
41 C Cortex-A9 2.25
42 C Cortex-A15 2.5
44 C TODO
45 C * This is a trivial edit of the addmul_2 code. Check for simplifications,
46 C and possible speedups to 2.0 c/l.
48 define(`rp',`r0')
49 define(`up',`r1')
50 define(`n', `r2')
51 define(`vp',`r3')
53 define(`v0',`r6')
54 define(`v1',`r7')
55 define(`u0',`r3')
56 define(`u1',`r9')
58 define(`cya',`r8')
59 define(`cyb',`r12')
62 ASM_START()
63 PROLOGUE(mpn_mul_2)
64 push { r4, r5, r6, r7, r8, r9 }
66 ldm vp, { v0, v1 }
67 mov cya, #0
68 mov cyb, #0
70 tst n, #1
71 beq L(evn)
72 L(odd): mov r5, #0
73 ldr u0, [up, #0]
74 mov r4, #0
75 tst n, #2
76 beq L(fi1)
77 L(fi3): sub up, up, #12
78 sub rp, rp, #16
79 b L(lo3)
80 L(fi1): sub n, n, #1
81 sub up, up, #4
82 sub rp, rp, #8
83 b L(lo1)
84 L(evn): mov r4, #0
85 ldr u1, [up, #0]
86 mov r5, #0
87 tst n, #2
88 bne L(fi2)
89 L(fi0): sub up, up, #8
90 sub rp, rp, #12
91 b L(lo0)
92 L(fi2): subs n, n, #2
93 sub rp, rp, #4
94 bls L(end)
96 ALIGN(16)
97 L(top): ldr u0, [up, #4]
98 umaal r4, cya, u1, v0
99 str r4, [rp, #4]
100 mov r4, #0
101 umaal r5, cyb, u1, v1
102 L(lo1): ldr u1, [up, #8]
103 umaal r5, cya, u0, v0
104 str r5, [rp, #8]
105 mov r5, #0
106 umaal r4, cyb, u0, v1
107 L(lo0): ldr u0, [up, #12]
108 umaal r4, cya, u1, v0
109 str r4, [rp, #12]
110 mov r4, #0
111 umaal r5, cyb, u1, v1
112 L(lo3): ldr u1, [up, #16]!
113 umaal r5, cya, u0, v0
114 str r5, [rp, #16]!
115 mov r5, #0
116 umaal r4, cyb, u0, v1
117 subs n, n, #4
118 bhi L(top)
120 L(end): umaal r4, cya, u1, v0
121 ldr u0, [up, #4]
122 umaal r5, cyb, u1, v1
123 str r4, [rp, #4]
124 umaal r5, cya, u0, v0
125 umaal cya, cyb, u0, v1
126 str r5, [rp, #8]
127 str cya, [rp, #12]
128 mov r0, cyb
130 pop { r4, r5, r6, r7, r8, r9 }
131 bx r14
132 EPILOGUE()