beta-0.89.2
[luatex.git] / source / libs / gmp / gmp-src / mpn / sparc32 / v8 / addmul_1.asm
blob0052092784374040428be68f34c38f338072f3c4
1 dnl SPARC v8 mpn_addmul_1 -- Multiply a limb vector with a limb and
2 dnl add the result to a second limb vector.
4 dnl Copyright 1992-1995, 2000 Free Software Foundation, Inc.
6 dnl This file is part of the GNU MP Library.
7 dnl
8 dnl The GNU MP Library is free software; you can redistribute it and/or modify
9 dnl it under the terms of either:
10 dnl
11 dnl * the GNU Lesser General Public License as published by the Free
12 dnl Software Foundation; either version 3 of the License, or (at your
13 dnl option) any later version.
14 dnl
15 dnl or
16 dnl
17 dnl * the GNU General Public License as published by the Free Software
18 dnl Foundation; either version 2 of the License, or (at your option) any
19 dnl later version.
20 dnl
21 dnl or both in parallel, as here.
22 dnl
23 dnl The GNU MP Library is distributed in the hope that it will be useful, but
24 dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
25 dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
26 dnl for more details.
27 dnl
28 dnl You should have received copies of the GNU General Public License and the
29 dnl GNU Lesser General Public License along with the GNU MP Library. If not,
30 dnl see https://www.gnu.org/licenses/.
33 include(`../config.m4')
35 C INPUT PARAMETERS
36 C res_ptr o0
37 C s1_ptr o1
38 C size o2
39 C s2_limb o3
41 ASM_START()
42 PROLOGUE(mpn_addmul_1)
43 orcc %g0,%g0,%g2
44 ld [%o1+0],%o4 C 1
46 sll %o2,4,%g1
47 and %g1,(4-1)<<4,%g1
48 ifdef(`PIC',
49 ` mov %o7,%g4 C Save return address register
50 0: call 1f
51 add %o7,L(1)-0b,%g3
52 1: mov %g4,%o7 C Restore return address register
54 ` sethi %hi(L(1)),%g3
55 or %g3,%lo(L(1)),%g3
57 jmp %g3+%g1
58 nop
59 L(1):
60 L(L00): add %o0,-4,%o0
61 b L(loop00) C 4, 8, 12, ...
62 add %o1,-4,%o1
63 nop
64 L(L01): b L(loop01) C 1, 5, 9, ...
65 nop
66 nop
67 nop
68 L(L10): add %o0,-12,%o0 C 2, 6, 10, ...
69 b L(loop10)
70 add %o1,4,%o1
71 nop
72 L(L11): add %o0,-8,%o0 C 3, 7, 11, ...
73 b L(loop11)
74 add %o1,-8,%o1
75 nop
77 L(loop):
78 addcc %g3,%g2,%g3 C 1
79 ld [%o1+4],%o4 C 2
80 rd %y,%g2 C 1
81 addx %g0,%g2,%g2
82 ld [%o0+0],%g1 C 2
83 addcc %g1,%g3,%g3
84 st %g3,[%o0+0] C 1
85 L(loop00):
86 umul %o4,%o3,%g3 C 2
87 ld [%o0+4],%g1 C 2
88 addxcc %g3,%g2,%g3 C 2
89 ld [%o1+8],%o4 C 3
90 rd %y,%g2 C 2
91 addx %g0,%g2,%g2
92 nop
93 addcc %g1,%g3,%g3
94 st %g3,[%o0+4] C 2
95 L(loop11):
96 umul %o4,%o3,%g3 C 3
97 addxcc %g3,%g2,%g3 C 3
98 ld [%o1+12],%o4 C 4
99 rd %y,%g2 C 3
100 add %o1,16,%o1
101 addx %g0,%g2,%g2
102 ld [%o0+8],%g1 C 2
103 addcc %g1,%g3,%g3
104 st %g3,[%o0+8] C 3
105 L(loop10):
106 umul %o4,%o3,%g3 C 4
107 addxcc %g3,%g2,%g3 C 4
108 ld [%o1+0],%o4 C 1
109 rd %y,%g2 C 4
110 addx %g0,%g2,%g2
111 ld [%o0+12],%g1 C 2
112 addcc %g1,%g3,%g3
113 st %g3,[%o0+12] C 4
114 add %o0,16,%o0
115 addx %g0,%g2,%g2
116 L(loop01):
117 addcc %o2,-4,%o2
118 bg L(loop)
119 umul %o4,%o3,%g3 C 1
121 addcc %g3,%g2,%g3 C 4
122 rd %y,%g2 C 4
123 addx %g0,%g2,%g2
124 ld [%o0+0],%g1 C 2
125 addcc %g1,%g3,%g3
126 st %g3,[%o0+0] C 4
127 addx %g0,%g2,%o0
129 retl
131 EPILOGUE(mpn_addmul_1)