* cris.h (EH_RETURN_DATA_REGNO): Fix unsigned>=0 warning.
[official-gcc.git] / gcc / config / cris / mulsi3.asm
blob3c482e7f2a96326ba74d02efc78c8481be5297b3
1 ;; This code used to be expanded through interesting expansions in
2 ;; the machine description, compiled from this code:
3 ;;
4 ;; #ifdef L_mulsi3
5 ;; long __Mul (unsigned long a, unsigned long b) __attribute__ ((__const__));
6 ;;
7 ;; /* This must be compiled with the -mexpand-mul flag, to synthesize the
8 ;; multiplication from the mstep instructions. The check for
9 ;; smaller-size multiplication pays off in the order of .5-10%;
10 ;; estimated median 1%, depending on application.
11 ;; FIXME: It can be further optimized if we go to assembler code, as
12 ;; gcc 2.7.2 adds a few unnecessary instructions and does not put the
13 ;; basic blocks in optimal order. */
14 ;; long
15 ;; __Mul (unsigned long a, unsigned long b)
16 ;; {
17 ;; #if defined (__CRIS_arch_version) && __CRIS_arch_version >= 10
18 ;; /* In case other code is compiled without -march=v10, they will
19 ;; contain calls to __Mul, regardless of flags at link-time. The
20 ;; "else"-code below will work, but is unnecessarily slow. This
21 ;; sometimes cuts a few minutes off from simulation time by just
22 ;; returning a "mulu.d". */
23 ;; return a * b;
24 ;; #else
25 ;; unsigned long min;
26 ;;
27 ;; /* Get minimum via the bound insn. */
28 ;; min = a < b ? a : b;
29 ;;
30 ;; /* Can we omit computation of the high part? */
31 ;; if (min > 65535)
32 ;; /* No. Perform full multiplication. */
33 ;; return a * b;
34 ;; else
35 ;; {
36 ;; /* Check if both operands are within 16 bits. */
37 ;; unsigned long max;
38 ;;
39 ;; /* Get maximum, by knowing the minimum.
40 ;; This will partition a and b into max and min.
41 ;; This is not currently something GCC understands,
42 ;; so do this trick by asm. */
43 ;; __asm__ ("xor %1,%0\n\txor %2,%0"
44 ;; : "=r" (max)
45 ;; : "r" (b), "r" (a), "0" (min));
46 ;;
47 ;; if (max > 65535)
48 ;; /* Make GCC understand that only the low part of "min" will be
49 ;; used. */
50 ;; return max * (unsigned short) min;
51 ;; else
52 ;; /* Only the low parts of both operands are necessary. */
53 ;; return ((unsigned short) max) * (unsigned short) min;
54 ;; }
55 ;; #endif /* not __CRIS_arch_version >= 10 */
56 ;; }
57 ;; #endif /* L_mulsi3 */
59 ;; That approach was abandoned since the caveats outweighted the
60 ;; benefits. The expand-multiplication machinery is also removed, so you
61 ;; can't do this anymore.
63 ;; For doubters of there being any benefits, some where: insensitivity to:
64 ;; - ABI changes (mostly for experimentation)
65 ;; - assembler syntax differences (mostly debug format).
66 ;; - insn scheduling issues.
67 ;; Most ABI experiments will presumably happen with arches with mul insns,
68 ;; so that argument doesn't really hold anymore, and it's unlikely there
69 ;; being new arch variants needing insn scheduling and not having mul
70 ;; insns.
72 ;; ELF and a.out have different syntax for local labels: the "wrong"
73 ;; one may not be omitted from the object.
74 #undef L
75 #ifdef __AOUT__
76 # define L(x) x
77 #else
78 # define L(x) .x
79 #endif
81 .global ___Mul
82 .type ___Mul,@function
83 ___Mul:
84 #if defined (__CRIS_arch_version) && __CRIS_arch_version >= 10
85 ret
86 mulu.d $r11,$r10
87 #else
88 move.d $r10,$r12
89 move.d $r11,$r9
90 bound.d $r12,$r9
91 cmpu.w 65535,$r9
92 bls L(L3)
93 move.d $r12,$r13
95 movu.w $r11,$r9
96 lslq 16,$r13
97 mstep $r9,$r13
98 mstep $r9,$r13
99 mstep $r9,$r13
100 mstep $r9,$r13
101 mstep $r9,$r13
102 mstep $r9,$r13
103 mstep $r9,$r13
104 mstep $r9,$r13
105 mstep $r9,$r13
106 mstep $r9,$r13
107 mstep $r9,$r13
108 mstep $r9,$r13
109 mstep $r9,$r13
110 mstep $r9,$r13
111 mstep $r9,$r13
112 mstep $r9,$r13
113 clear.w $r10
114 test.d $r10
115 mstep $r9,$r10
116 mstep $r9,$r10
117 mstep $r9,$r10
118 mstep $r9,$r10
119 mstep $r9,$r10
120 mstep $r9,$r10
121 mstep $r9,$r10
122 mstep $r9,$r10
123 mstep $r9,$r10
124 mstep $r9,$r10
125 mstep $r9,$r10
126 mstep $r9,$r10
127 mstep $r9,$r10
128 mstep $r9,$r10
129 mstep $r9,$r10
130 mstep $r9,$r10
131 movu.w $r12,$r12
132 move.d $r11,$r9
133 clear.w $r9
134 test.d $r9
135 mstep $r12,$r9
136 mstep $r12,$r9
137 mstep $r12,$r9
138 mstep $r12,$r9
139 mstep $r12,$r9
140 mstep $r12,$r9
141 mstep $r12,$r9
142 mstep $r12,$r9
143 mstep $r12,$r9
144 mstep $r12,$r9
145 mstep $r12,$r9
146 mstep $r12,$r9
147 mstep $r12,$r9
148 mstep $r12,$r9
149 mstep $r12,$r9
150 mstep $r12,$r9
151 add.w $r9,$r10
152 lslq 16,$r10
154 add.d $r13,$r10
156 L(L3):
157 move.d $r9,$r10
158 xor $r11,$r10
159 xor $r12,$r10
160 cmpu.w 65535,$r10
161 bls L(L5)
162 movu.w $r9,$r13
164 movu.w $r13,$r13
165 move.d $r10,$r9
166 lslq 16,$r9
167 mstep $r13,$r9
168 mstep $r13,$r9
169 mstep $r13,$r9
170 mstep $r13,$r9
171 mstep $r13,$r9
172 mstep $r13,$r9
173 mstep $r13,$r9
174 mstep $r13,$r9
175 mstep $r13,$r9
176 mstep $r13,$r9
177 mstep $r13,$r9
178 mstep $r13,$r9
179 mstep $r13,$r9
180 mstep $r13,$r9
181 mstep $r13,$r9
182 mstep $r13,$r9
183 clear.w $r10
184 test.d $r10
185 mstep $r13,$r10
186 mstep $r13,$r10
187 mstep $r13,$r10
188 mstep $r13,$r10
189 mstep $r13,$r10
190 mstep $r13,$r10
191 mstep $r13,$r10
192 mstep $r13,$r10
193 mstep $r13,$r10
194 mstep $r13,$r10
195 mstep $r13,$r10
196 mstep $r13,$r10
197 mstep $r13,$r10
198 mstep $r13,$r10
199 mstep $r13,$r10
200 mstep $r13,$r10
201 lslq 16,$r10
203 add.d $r9,$r10
205 L(L5):
206 movu.w $r9,$r9
207 lslq 16,$r10
208 mstep $r9,$r10
209 mstep $r9,$r10
210 mstep $r9,$r10
211 mstep $r9,$r10
212 mstep $r9,$r10
213 mstep $r9,$r10
214 mstep $r9,$r10
215 mstep $r9,$r10
216 mstep $r9,$r10
217 mstep $r9,$r10
218 mstep $r9,$r10
219 mstep $r9,$r10
220 mstep $r9,$r10
221 mstep $r9,$r10
222 mstep $r9,$r10
224 mstep $r9,$r10
225 #endif
226 L(Lfe1):
227 .size ___Mul,L(Lfe1)-___Mul