beta-0.89.2
[luatex.git] / source / libs / gmp / gmp-src / mpn / powerpc64 / sec_tabselect.asm
blob085577ca9be0213f6ce552a308d97390f90091ca
1 dnl PowerPC-64 mpn_sec_tabselect.
3 dnl Contributed to the GNU project by Torbjörn Granlund.
5 dnl Copyright 2011-2013 Free Software Foundation, Inc.
7 dnl This file is part of the GNU MP Library.
8 dnl
9 dnl The GNU MP Library is free software; you can redistribute it and/or modify
10 dnl it under the terms of either:
11 dnl
12 dnl * the GNU Lesser General Public License as published by the Free
13 dnl Software Foundation; either version 3 of the License, or (at your
14 dnl option) any later version.
15 dnl
16 dnl or
17 dnl
18 dnl * the GNU General Public License as published by the Free Software
19 dnl Foundation; either version 2 of the License, or (at your option) any
20 dnl later version.
21 dnl
22 dnl or both in parallel, as here.
23 dnl
24 dnl The GNU MP Library is distributed in the hope that it will be useful, but
25 dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
26 dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
27 dnl for more details.
28 dnl
29 dnl You should have received copies of the GNU General Public License and the
30 dnl GNU Lesser General Public License along with the GNU MP Library. If not,
31 dnl see https://www.gnu.org/licenses/.
33 include(`../config.m4')
35 C cycles/limb
36 C POWER3/PPC630 1.75
37 C POWER4/PPC970 2.0
38 C POWER5 ?
39 C POWER6 5.0
40 C POWER7 1.75
42 define(`rp', `r3')
43 define(`tp', `r4')
44 define(`n', `r5')
45 define(`nents', `r6')
46 define(`which', `r7')
48 define(`i', `r8')
49 define(`j', `r9')
50 define(`stride', `r12')
51 define(`mask', `r11')
54 ASM_START()
55 PROLOGUE(mpn_sec_tabselect)
56 addic. j, n, -4 C outer loop induction variable
57 std r31, -8(r1)
58 std r30, -16(r1)
59 std r29, -24(r1)
60 std r28, -32(r1)
61 std r27, -40(r1)
62 sldi stride, n, 3
64 blt cr0, L(outer_end)
65 L(outer_top):
66 mtctr nents
67 mr r10, tp
68 li r28, 0
69 li r29, 0
70 li r30, 0
71 li r31, 0
72 addic. j, j, -4 C outer loop induction variable
73 mr i, which
75 ALIGN(16)
76 L(top): addic i, i, -1 C set carry iff i != 0
77 subfe mask, mask, mask
78 ld r0, 0(tp)
79 ld r27, 8(tp)
80 and r0, r0, mask
81 and r27, r27, mask
82 or r28, r28, r0
83 or r29, r29, r27
84 ld r0, 16(tp)
85 ld r27, 24(tp)
86 and r0, r0, mask
87 and r27, r27, mask
88 or r30, r30, r0
89 or r31, r31, r27
90 add tp, tp, stride
91 bdnz L(top)
93 std r28, 0(rp)
94 std r29, 8(rp)
95 std r30, 16(rp)
96 std r31, 24(rp)
97 addi tp, r10, 32
98 addi rp, rp, 32
99 bge cr0, L(outer_top)
100 L(outer_end):
102 rldicl. r0, n, 63, 63
103 beq cr0, L(b0x)
104 L(b1x): mtctr nents
105 mr r10, tp
106 li r28, 0
107 li r29, 0
108 mr i, which
109 ALIGN(16)
110 L(tp2): addic i, i, -1
111 subfe mask, mask, mask
112 ld r0, 0(tp)
113 ld r27, 8(tp)
114 and r0, r0, mask
115 and r27, r27, mask
116 or r28, r28, r0
117 or r29, r29, r27
118 add tp, tp, stride
119 bdnz L(tp2)
120 std r28, 0(rp)
121 std r29, 8(rp)
122 addi tp, r10, 16
123 addi rp, rp, 16
125 L(b0x): rldicl. r0, n, 0, 63
126 beq cr0, L(b00)
127 L(b01): mtctr nents
128 mr r10, tp
129 li r28, 0
130 mr i, which
131 ALIGN(16)
132 L(tp1): addic i, i, -1
133 subfe mask, mask, mask
134 ld r0, 0(tp)
135 and r0, r0, mask
136 or r28, r28, r0
137 add tp, tp, stride
138 bdnz L(tp1)
139 std r28, 0(rp)
141 L(b00): ld r31, -8(r1)
142 ld r30, -16(r1)
143 ld r29, -24(r1)
144 ld r28, -32(r1)
145 ld r27, -40(r1)
147 EPILOGUE()