beta-0.89.2
[luatex.git] / source / libs / gmp / gmp-src / mpn / powerpc64 / p7 / copyi.asm
blob854cf9f8090f39671d27a236719d9142f3caa301
1 dnl PowerPC-64 mpn_copyi.
3 dnl Copyright 2004, 2005, 2013 Free Software Foundation, Inc.
5 dnl This file is part of the GNU MP Library.
6 dnl
7 dnl The GNU MP Library is free software; you can redistribute it and/or modify
8 dnl it under the terms of either:
9 dnl
10 dnl * the GNU Lesser General Public License as published by the Free
11 dnl Software Foundation; either version 3 of the License, or (at your
12 dnl option) any later version.
13 dnl
14 dnl or
15 dnl
16 dnl * the GNU General Public License as published by the Free Software
17 dnl Foundation; either version 2 of the License, or (at your option) any
18 dnl later version.
19 dnl
20 dnl or both in parallel, as here.
21 dnl
22 dnl The GNU MP Library is distributed in the hope that it will be useful, but
23 dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
24 dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
25 dnl for more details.
26 dnl
27 dnl You should have received copies of the GNU General Public License and the
28 dnl GNU Lesser General Public License along with the GNU MP Library. If not,
29 dnl see https://www.gnu.org/licenses/.
31 include(`../config.m4')
33 C cycles/limb
34 C POWER3/PPC630 ?
35 C POWER4/PPC970 ?
36 C POWER5 ?
37 C POWER6 1.25
38 C POWER7 1.09
40 C INPUT PARAMETERS
41 define(`rp', `r3')
42 define(`up', `r4')
43 define(`n', `r5')
45 C TODO
46 C * Try rolling the two loop leading std to the end, allowing the code to
47 C handle also n = 2.
48 C * Consider using 4 pointers, schedule ptr update early wrt use.
50 ASM_START()
51 PROLOGUE(mpn_copyi)
53 ifdef(`HAVE_ABI_mode32',
54 ` rldicl n, n, 0,32')
56 cmpdi cr0, n, 4
57 blt L(sml)
59 addi r10, n, 4
60 srdi r10, r10, 3
61 mtctr r10
63 andi. r0, n, 1
64 rlwinm r11, n, 0,30,30
65 rlwinm r12, n, 0,29,29
66 cmpdi cr6, r11, 0
67 cmpdi cr7, r12, 0
69 beq cr0, L(xx0)
70 L(xx1): ld r6, 0(up)
71 addi up, up, 8
72 std r6, 0(rp)
73 addi rp, rp, 8
75 L(xx0): bne cr6, L(x10)
76 L(x00): ld r6, 0(up)
77 ld r7, 8(up)
78 bne cr7, L(100)
79 L(000): addi rp, rp, -32
80 b L(lo0)
81 L(100): addi up, up, -32
82 b L(lo4)
83 L(x10): ld r8, 0(up)
84 ld r9, 8(up)
85 bne cr7, L(110)
86 L(010): addi up, up, 16
87 addi rp, rp, -16
88 b L(lo2)
89 L(110): addi up, up, -16
90 addi rp, rp, -48
91 b L(lo6)
93 L(sml): cmpdi cr0, n, 0
94 beqlr- cr0
95 mtctr n
96 L(t): ld r6, 0(up)
97 addi up, up, 8
98 std r6, 0(rp)
99 addi rp, rp, 8
100 bdnz L(t)
103 ALIGN(32)
104 L(top): std r6, 0(rp)
105 std r7, 8(rp)
106 L(lo2): ld r6, 0(up)
107 ld r7, 8(up)
108 std r8, 16(rp)
109 std r9, 24(rp)
110 L(lo0): ld r8, 16(up)
111 ld r9, 24(up)
112 std r6, 32(rp)
113 std r7, 40(rp)
114 L(lo6): ld r6, 32(up)
115 ld r7, 40(up)
116 std r8, 48(rp)
117 std r9, 56(rp)
118 addi rp, rp, 64
119 L(lo4): ld r8, 48(up)
120 ld r9, 56(up)
121 addi up, up, 64
122 bdnz L(top)
124 L(end): std r6, 0(rp)
125 std r7, 8(rp)
126 std r8, 16(rp)
127 std r9, 24(rp)
129 EPILOGUE()