beta-0.89.2
[luatex.git] / source / libs / gmp / gmp-src / mpn / powerpc32 / sec_tabselect.asm
blobd50718e7c82035da117a4ea58b21ba1ec0903a3b
1 dnl PowerPC-32 mpn_sec_tabselect.
3 dnl Contributed to the GNU project by Torbjörn Granlund.
5 dnl Copyright 2011-2013 Free Software Foundation, Inc.
7 dnl This file is part of the GNU MP Library.
8 dnl
9 dnl The GNU MP Library is free software; you can redistribute it and/or modify
10 dnl it under the terms of either:
11 dnl
12 dnl * the GNU Lesser General Public License as published by the Free
13 dnl Software Foundation; either version 3 of the License, or (at your
14 dnl option) any later version.
15 dnl
16 dnl or
17 dnl
18 dnl * the GNU General Public License as published by the Free Software
19 dnl Foundation; either version 2 of the License, or (at your option) any
20 dnl later version.
21 dnl
22 dnl or both in parallel, as here.
23 dnl
24 dnl The GNU MP Library is distributed in the hope that it will be useful, but
25 dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
26 dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
27 dnl for more details.
28 dnl
29 dnl You should have received copies of the GNU General Public License and the
30 dnl GNU Lesser General Public License along with the GNU MP Library. If not,
31 dnl see https://www.gnu.org/licenses/.
33 include(`../config.m4')
35 C cycles/limb
36 C 603e: ?
37 C 604e: ?
38 C 75x (G3): ?
39 C 7400,7410 (G4): 2.5
40 C 744x,745x (G4+): 2.0
41 C power4/ppc970: 2.0
42 C power5: ?
44 define(`rp', `r3')
45 define(`tp', `r4')
46 define(`n', `r5')
47 define(`nents', `r6')
48 define(`which', `r7')
50 define(`i', `r8')
51 define(`j', `r9')
52 define(`stride', `r12')
53 define(`mask', `r11')
56 ASM_START()
57 PROLOGUE(mpn_sec_tabselect)
58 stwu r1, -32(r1)
59 addic. j, n, -4 C outer loop induction variable
60 stmw r27, 8(r1)
61 slwi stride, n, 2
63 blt cr0, L(outer_end)
64 L(outer_top):
65 mtctr nents
66 mr r10, tp
67 li r28, 0
68 li r29, 0
69 li r30, 0
70 li r31, 0
71 addic. j, j, -4 C outer loop induction variable
72 mr i, which
74 ALIGN(16)
75 L(top): addic i, i, -1 C set carry iff i != 0
76 subfe mask, mask, mask
77 lwz r0, 0(tp)
78 lwz r27, 4(tp)
79 and r0, r0, mask
80 and r27, r27, mask
81 or r28, r28, r0
82 or r29, r29, r27
83 lwz r0, 8(tp)
84 lwz r27, 12(tp)
85 and r0, r0, mask
86 and r27, r27, mask
87 or r30, r30, r0
88 or r31, r31, r27
89 add tp, tp, stride
90 bdnz L(top)
92 stw r28, 0(rp)
93 stw r29, 4(rp)
94 stw r30, 8(rp)
95 stw r31, 12(rp)
96 addi tp, r10, 16
97 addi rp, rp, 16
98 bge cr0, L(outer_top)
99 L(outer_end):
101 andi. r0, n, 2
102 beq cr0, L(b0x)
103 L(b1x): mtctr nents
104 mr r10, tp
105 li r28, 0
106 li r29, 0
107 mr i, which
108 ALIGN(16)
109 L(tp2): addic i, i, -1
110 subfe mask, mask, mask
111 lwz r0, 0(tp)
112 lwz r27, 4(tp)
113 and r0, r0, mask
114 and r27, r27, mask
115 or r28, r28, r0
116 or r29, r29, r27
117 add tp, tp, stride
118 bdnz L(tp2)
119 stw r28, 0(rp)
120 stw r29, 4(rp)
121 addi tp, r10, 8
122 addi rp, rp, 8
124 L(b0x): andi. r0, n, 1
125 beq cr0, L(b00)
126 L(b01): mtctr nents
127 mr r10, tp
128 li r28, 0
129 mr i, which
130 ALIGN(16)
131 L(tp1): addic i, i, -1
132 subfe mask, mask, mask
133 lwz r0, 0(tp)
134 and r0, r0, mask
135 or r28, r28, r0
136 add tp, tp, stride
137 bdnz L(tp1)
138 stw r28, 0(rp)
140 L(b00): lmw r27, 8(r1)
141 addi r1, r1, 32
143 EPILOGUE()