beta-0.89.2
[luatex.git] / source / libs / gmp / gmp-src / mpn / powerpc64 / logops_n.asm
blob2fa6985d7a43a84a942ad865c7fb68d388a85bee
1 dnl PowerPC-64 mpn_and_n, mpn_andn_n, mpn_nand_n, mpn_ior_n, mpn_iorn_n,
2 dnl mpn_nior_n, mpn_xor_n, mpn_xnor_n -- mpn bitwise logical operations.
4 dnl Copyright 2003-2005 Free Software Foundation, Inc.
6 dnl This file is part of the GNU MP Library.
7 dnl
8 dnl The GNU MP Library is free software; you can redistribute it and/or modify
9 dnl it under the terms of either:
10 dnl
11 dnl * the GNU Lesser General Public License as published by the Free
12 dnl Software Foundation; either version 3 of the License, or (at your
13 dnl option) any later version.
14 dnl
15 dnl or
16 dnl
17 dnl * the GNU General Public License as published by the Free Software
18 dnl Foundation; either version 2 of the License, or (at your option) any
19 dnl later version.
20 dnl
21 dnl or both in parallel, as here.
22 dnl
23 dnl The GNU MP Library is distributed in the hope that it will be useful, but
24 dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
25 dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
26 dnl for more details.
27 dnl
28 dnl You should have received copies of the GNU General Public License and the
29 dnl GNU Lesser General Public License along with the GNU MP Library. If not,
30 dnl see https://www.gnu.org/licenses/.
32 include(`../config.m4')
34 C cycles/limb
35 C POWER3/PPC630 1.75
36 C POWER4/PPC970 2.10
37 C POWER5 ?
38 C POWER6 ?
39 C POWER7 1.75
41 C n POWER3/PPC630 POWER4/PPC970
42 C 1 15.00 15.33
43 C 2 7.50 7.99
44 C 3 5.33 6.00
45 C 4 4.50 4.74
46 C 5 4.20 4.39
47 C 6 3.50 3.99
48 C 7 3.14 3.64
49 C 8 3.00 3.36
50 C 9 3.00 3.36
51 C 10 2.70 3.25
52 C 11 2.63 3.11
53 C 12 2.58 3.00
54 C 13 2.61 3.02
55 C 14 2.42 2.82
56 C 15 2.40 2.79
57 C 50 2.08 2.67
58 C 100 1.85 2.31
59 C 200 1.80 2.18
60 C 400 1.77 2.14
61 C 1000 1.76 2.10#
62 C 2000 1.75# 2.13
63 C 4000 2.30 2.57
64 C 8000 2.62 2.58
65 C 16000 2.52 4.25
66 C 32000 2.49 16.25
67 C 64000 2.66 18.76
69 ifdef(`OPERATION_and_n',
70 ` define(`func',`mpn_and_n')
71 define(`logop', `and')')
72 ifdef(`OPERATION_andn_n',
73 ` define(`func',`mpn_andn_n')
74 define(`logop', `andc')')
75 ifdef(`OPERATION_nand_n',
76 ` define(`func',`mpn_nand_n')
77 define(`logop', `nand')')
78 ifdef(`OPERATION_ior_n',
79 ` define(`func',`mpn_ior_n')
80 define(`logop', `or')')
81 ifdef(`OPERATION_iorn_n',
82 ` define(`func',`mpn_iorn_n')
83 define(`logop', `orc')')
84 ifdef(`OPERATION_nior_n',
85 ` define(`func',`mpn_nior_n')
86 define(`logop', `nor')')
87 ifdef(`OPERATION_xor_n',
88 ` define(`func',`mpn_xor_n')
89 define(`logop', `xor')')
90 ifdef(`OPERATION_xnor_n',
91 ` define(`func',`mpn_xnor_n')
92 define(`logop', `eqv')')
94 C INPUT PARAMETERS
95 C rp r3
96 C up r4
97 C vp r5
98 C n r6
100 MULFUNC_PROLOGUE(mpn_and_n mpn_andn_n mpn_nand_n mpn_ior_n mpn_iorn_n mpn_nior_n mpn_xor_n mpn_xnor_n)
102 ASM_START()
103 PROLOGUE(func)
104 ld r8, 0(r4) C read lowest u limb
105 ld r9, 0(r5) C read lowest v limb
106 addi r6, r6, 3 C compute branch count (1)
107 rldic. r0, r6, 3, 59 C r0 = (n-1 & 3) << 3; cr0 = (n == 4(t+1))?
108 cmpldi cr6, r0, 16 C cr6 = (n cmp 4t + 3)
110 ifdef(`HAVE_ABI_mode32',
111 ` rldicl r6, r6, 62,34', C ...branch count
112 ` rldicl r6, r6, 62, 2') C ...branch count
113 mtctr r6
115 ld r6, 0(r4) C read lowest u limb (again)
116 ld r7, 0(r5) C read lowest v limb (again)
118 add r5, r5, r0 C offset vp
119 add r4, r4, r0 C offset up
120 add r3, r3, r0 C offset rp
122 beq cr0, L(L01)
123 blt cr6, L(L10)
124 beq cr6, L(L11)
125 b L(L00)
127 L(oop): ld r8, -24(r4)
128 ld r9, -24(r5)
129 logop r10, r6, r7
130 std r10, -32(r3)
131 L(L00): ld r6, -16(r4)
132 ld r7, -16(r5)
133 logop r10, r8, r9
134 std r10, -24(r3)
135 L(L11): ld r8, -8(r4)
136 ld r9, -8(r5)
137 logop r10, r6, r7
138 std r10, -16(r3)
139 L(L10): ld r6, 0(r4)
140 ld r7, 0(r5)
141 logop r10, r8, r9
142 std r10, -8(r3)
143 L(L01): addi r5, r5, 32
144 addi r4, r4, 32
145 addi r3, r3, 32
146 bdnz L(oop)
148 logop r10, r6, r7
149 std r10, -32(r3)
151 EPILOGUE()