FSF GCC merge 02/23/03
[official-gcc.git] / gcc / config / xtensa / lib1funcs.asm
bloba40f11b5ff817392dda153445a7ee321fdb9f469
1 /* Assembly functions for the Xtensa version of libgcc1.
2 Copyright (C) 2001,2002 Free Software Foundation, Inc.
3 Contributed by Bob Wilson (bwilson@tensilica.com) at Tensilica.
5 This file is part of GCC.
7 GCC is free software; you can redistribute it and/or modify it under
8 the terms of the GNU General Public License as published by the Free
9 Software Foundation; either version 2, or (at your option) any later
10 version.
12 In addition to the permissions in the GNU General Public License, the
13 Free Software Foundation gives you unlimited permission to link the
14 compiled version of this file into combinations with other programs,
15 and to distribute those combinations without any restriction coming
16 from the use of this file. (The General Public License restrictions
17 do apply in other respects; for example, they cover modification of
18 the file, and distribution when not linked into a combine
19 executable.)
21 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
22 WARRANTY; without even the implied warranty of MERCHANTABILITY or
23 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
24 for more details.
26 You should have received a copy of the GNU General Public License
27 along with GCC; see the file COPYING. If not, write to the Free
28 Software Foundation, 59 Temple Place - Suite 330, Boston, MA
29 02111-1307, USA. */
31 #include "xtensa/xtensa-config.h"
33 #ifdef L_mulsi3
34 .align 4
35 .global __mulsi3
36 .type __mulsi3,@function
37 __mulsi3:
38 entry sp, 16
40 #if XCHAL_HAVE_MUL16
41 or a4, a2, a3
42 srai a4, a4, 16
43 bnez a4, .LMUL16
44 mul16u a2, a2, a3
45 retw
46 .LMUL16:
47 srai a4, a2, 16
48 srai a5, a3, 16
49 mul16u a7, a4, a3
50 mul16u a6, a5, a2
51 mul16u a4, a2, a3
52 add a7, a7, a6
53 slli a7, a7, 16
54 add a2, a7, a4
56 #elif XCHAL_HAVE_MAC16
57 mul.aa.hl a2, a3
58 mula.aa.lh a2, a3
59 rsr a5, 16 # ACCLO
60 umul.aa.ll a2, a3
61 rsr a4, 16 # ACCLO
62 slli a5, a5, 16
63 add a2, a4, a5
65 #else /* !XCHAL_HAVE_MUL16 && !XCHAL_HAVE_MAC16 */
67 # Multiply one bit at a time, but unroll the loop 4x to better
68 # exploit the addx instructions.
70 # Peel the first iteration to save a cycle on init
72 # avoid negative numbers
74 xor a5, a2, a3 # top bit is 1 iff one of the inputs is negative
75 abs a3, a3
76 abs a2, a2
78 # swap so that second argument is smaller
79 sub a7, a2, a3
80 mov a4, a3
81 movgez a4, a2, a7 # a4 = max(a2, a3)
82 movltz a3, a2, a7 # a3 = min(a2, a3)
84 movi a2, 0
85 extui a6, a3, 0, 1
86 movnez a2, a4, a6
88 addx2 a7, a4, a2
89 extui a6, a3, 1, 1
90 movnez a2, a7, a6
92 addx4 a7, a4, a2
93 extui a6, a3, 2, 1
94 movnez a2, a7, a6
96 addx8 a7, a4, a2
97 extui a6, a3, 3, 1
98 movnez a2, a7, a6
100 bgeui a3, 16, .Lmult_main_loop
101 neg a3, a2
102 movltz a2, a3, a5
103 retw
106 .align 4
107 .Lmult_main_loop:
108 srli a3, a3, 4
109 slli a4, a4, 4
111 add a7, a4, a2
112 extui a6, a3, 0, 1
113 movnez a2, a7, a6
115 addx2 a7, a4, a2
116 extui a6, a3, 1, 1
117 movnez a2, a7, a6
119 addx4 a7, a4, a2
120 extui a6, a3, 2, 1
121 movnez a2, a7, a6
123 addx8 a7, a4, a2
124 extui a6, a3, 3, 1
125 movnez a2, a7, a6
128 bgeui a3, 16, .Lmult_main_loop
130 neg a3, a2
131 movltz a2, a3, a5
133 #endif /* !XCHAL_HAVE_MUL16 && !XCHAL_HAVE_MAC16 */
135 retw
136 .Lfe0:
137 .size __mulsi3,.Lfe0-__mulsi3
139 #endif /* L_mulsi3 */
142 # Some Xtensa configurations include the NSAU (unsigned
143 # normalize shift amount) instruction which computes the number
144 # of leading zero bits. For other configurations, the "nsau"
145 # operation is implemented as a macro.
147 #if !XCHAL_HAVE_NSA
148 .macro nsau cnt, val, tmp, a
149 mov \a, \val
150 movi \cnt, 0
151 extui \tmp, \a, 16, 16
152 bnez \tmp, 0f
153 movi \cnt, 16
154 slli \a, \a, 16
156 extui \tmp, \a, 24, 8
157 bnez \tmp, 1f
158 addi \cnt, \cnt, 8
159 slli \a, \a, 8
161 movi \tmp, __nsau_data
162 extui \a, \a, 24, 8
163 add \tmp, \tmp, \a
164 l8ui \tmp, \tmp, 0
165 add \cnt, \cnt, \tmp
166 .endm
167 #endif /* !XCHAL_HAVE_NSA */
169 #ifdef L_nsau
170 .section .rodata
171 .align 4
172 .global __nsau_data
173 .type __nsau_data,@object
174 __nsau_data:
175 #if !XCHAL_HAVE_NSA
176 .byte 8, 7, 6, 6, 5, 5, 5, 5, 4, 4, 4, 4, 4, 4, 4, 4
177 .byte 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3
178 .byte 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2
179 .byte 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2
180 .byte 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1
181 .byte 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1
182 .byte 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1
183 .byte 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1
184 .byte 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
185 .byte 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
186 .byte 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
187 .byte 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
188 .byte 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
189 .byte 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
190 .byte 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
191 .byte 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
192 #endif /* !XCHAL_HAVE_NSA */
193 .Lfe1:
194 .size __nsau_data,.Lfe1-__nsau_data
195 .hidden __nsau_data
196 #endif /* L_nsau */
199 #ifdef L_udivsi3
200 .align 4
201 .global __udivsi3
202 .type __udivsi3,@function
203 __udivsi3:
204 entry sp, 16
205 bltui a3, 2, .Lle_one # check if the divisor <= 1
207 mov a6, a2 # keep dividend in a6
208 #if XCHAL_HAVE_NSA
209 nsau a5, a6 # dividend_shift = nsau(dividend)
210 nsau a4, a3 # divisor_shift = nsau(divisor)
211 #else /* !XCHAL_HAVE_NSA */
212 nsau a5, a6, a2, a7 # dividend_shift = nsau(dividend)
213 nsau a4, a3, a2, a7 # divisor_shift = nsau(divisor)
214 #endif /* !XCHAL_HAVE_NSA */
215 bgeu a5, a4, .Lspecial
217 sub a4, a4, a5 # count = divisor_shift - dividend_shift
218 ssl a4
219 sll a3, a3 # divisor <<= count
220 movi a2, 0 # quotient = 0
222 # test-subtract-and-shift loop; one quotient bit on each iteration
223 #if XCHAL_HAVE_LOOPS
224 loopnez a4, .Lloopend
225 #endif /* XCHAL_HAVE_LOOPS */
226 .Lloop:
227 bltu a6, a3, .Lzerobit
228 sub a6, a6, a3
229 addi a2, a2, 1
230 .Lzerobit:
231 slli a2, a2, 1
232 srli a3, a3, 1
233 #if !XCHAL_HAVE_LOOPS
234 addi a4, a4, -1
235 bnez a4, .Lloop
236 #endif /* !XCHAL_HAVE_LOOPS */
237 .Lloopend:
239 bltu a6, a3, .Lreturn
240 addi a2, a2, 1 # increment quotient if dividend >= divisor
241 .Lreturn:
242 retw
244 .Lspecial:
245 # return dividend >= divisor
246 movi a2, 0
247 bltu a6, a3, .Lreturn2
248 movi a2, 1
249 .Lreturn2:
250 retw
252 .Lle_one:
253 beqz a3, .Lerror # if divisor == 1, return the dividend
254 retw
255 .Lerror:
256 movi a2, 0 # just return 0; could throw an exception
257 retw
258 .Lfe2:
259 .size __udivsi3,.Lfe2-__udivsi3
261 #endif /* L_udivsi3 */
264 #ifdef L_divsi3
265 .align 4
266 .global __divsi3
267 .type __divsi3,@function
268 __divsi3:
269 entry sp, 16
270 xor a7, a2, a3 # sign = dividend ^ divisor
271 abs a6, a2 # udividend = abs(dividend)
272 abs a3, a3 # udivisor = abs(divisor)
273 bltui a3, 2, .Lle_one # check if udivisor <= 1
274 #if XCHAL_HAVE_NSA
275 nsau a5, a6 # udividend_shift = nsau(udividend)
276 nsau a4, a3 # udivisor_shift = nsau(udivisor)
277 #else /* !XCHAL_HAVE_NSA */
278 nsau a5, a6, a2, a8 # udividend_shift = nsau(udividend)
279 nsau a4, a3, a2, a8 # udivisor_shift = nsau(udivisor)
280 #endif /* !XCHAL_HAVE_NSA */
281 bgeu a5, a4, .Lspecial
283 sub a4, a4, a5 # count = udivisor_shift - udividend_shift
284 ssl a4
285 sll a3, a3 # udivisor <<= count
286 movi a2, 0 # quotient = 0
288 # test-subtract-and-shift loop; one quotient bit on each iteration
289 #if XCHAL_HAVE_LOOPS
290 loopnez a4, .Lloopend
291 #endif /* XCHAL_HAVE_LOOPS */
292 .Lloop:
293 bltu a6, a3, .Lzerobit
294 sub a6, a6, a3
295 addi a2, a2, 1
296 .Lzerobit:
297 slli a2, a2, 1
298 srli a3, a3, 1
299 #if !XCHAL_HAVE_LOOPS
300 addi a4, a4, -1
301 bnez a4, .Lloop
302 #endif /* !XCHAL_HAVE_LOOPS */
303 .Lloopend:
305 bltu a6, a3, .Lreturn
306 addi a2, a2, 1 # increment quotient if udividend >= udivisor
307 .Lreturn:
308 neg a5, a2
309 movltz a2, a5, a7 # return (sign < 0) ? -quotient : quotient
310 retw
312 .Lspecial:
313 movi a2, 0
314 bltu a6, a3, .Lreturn2 # if dividend < divisor, return 0
315 movi a2, 1
316 movi a4, -1
317 movltz a2, a4, a7 # else return (sign < 0) ? -1 : 1
318 .Lreturn2:
319 retw
321 .Lle_one:
322 beqz a3, .Lerror
323 neg a2, a6 # if udivisor == 1, then return...
324 movgez a2, a6, a7 # (sign < 0) ? -udividend : udividend
325 retw
326 .Lerror:
327 movi a2, 0 # just return 0; could throw an exception
328 retw
329 .Lfe3:
330 .size __divsi3,.Lfe3-__divsi3
332 #endif /* L_divsi3 */
335 #ifdef L_umodsi3
336 .align 4
337 .global __umodsi3
338 .type __umodsi3,@function
339 __umodsi3:
340 entry sp, 16
341 bltui a3, 2, .Lle_one # check if the divisor is <= 1
343 #if XCHAL_HAVE_NSA
344 nsau a5, a2 # dividend_shift = nsau(dividend)
345 nsau a4, a3 # divisor_shift = nsau(divisor)
346 #else /* !XCHAL_HAVE_NSA */
347 nsau a5, a2, a6, a7 # dividend_shift = nsau(dividend)
348 nsau a4, a3, a6, a7 # divisor_shift = nsau(divisor)
349 #endif /* !XCHAL_HAVE_NSA */
350 bgeu a5, a4, .Lspecial
352 sub a4, a4, a5 # count = divisor_shift - dividend_shift
353 ssl a4
354 sll a3, a3 # divisor <<= count
356 # test-subtract-and-shift loop
357 #if XCHAL_HAVE_LOOPS
358 loopnez a4, .Lloopend
359 #endif /* XCHAL_HAVE_LOOPS */
360 .Lloop:
361 bltu a2, a3, .Lzerobit
362 sub a2, a2, a3
363 .Lzerobit:
364 srli a3, a3, 1
365 #if !XCHAL_HAVE_LOOPS
366 addi a4, a4, -1
367 bnez a4, .Lloop
368 #endif /* !XCHAL_HAVE_LOOPS */
369 .Lloopend:
371 bltu a2, a3, .Lreturn
372 sub a2, a2, a3 # subtract once more if dividend >= divisor
373 .Lreturn:
374 retw
376 .Lspecial:
377 bltu a2, a3, .Lreturn2
378 sub a2, a2, a3 # subtract once if dividend >= divisor
379 .Lreturn2:
380 retw
382 .Lle_one:
383 # the divisor is either 0 or 1, so just return 0.
384 # someday we may want to throw an exception if the divisor is 0.
385 movi a2, 0
386 retw
387 .Lfe4:
388 .size __umodsi3,.Lfe4-__umodsi3
390 #endif /* L_umodsi3 */
393 #ifdef L_modsi3
394 .align 4
395 .global __modsi3
396 .type __modsi3,@function
397 __modsi3:
398 entry sp, 16
399 mov a7, a2 # save original (signed) dividend
400 abs a2, a2 # udividend = abs(dividend)
401 abs a3, a3 # udivisor = abs(divisor)
402 bltui a3, 2, .Lle_one # check if udivisor <= 1
403 #if XCHAL_HAVE_NSA
404 nsau a5, a2 # udividend_shift = nsau(udividend)
405 nsau a4, a3 # udivisor_shift = nsau(udivisor)
406 #else /* !XCHAL_HAVE_NSA */
407 nsau a5, a2, a6, a8 # udividend_shift = nsau(udividend)
408 nsau a4, a3, a6, a8 # udivisor_shift = nsau(udivisor)
409 #endif /* !XCHAL_HAVE_NSA */
410 bgeu a5, a4, .Lspecial
412 sub a4, a4, a5 # count = udivisor_shift - udividend_shift
413 ssl a4
414 sll a3, a3 # udivisor <<= count
416 # test-subtract-and-shift loop
417 #if XCHAL_HAVE_LOOPS
418 loopnez a4, .Lloopend
419 #endif /* XCHAL_HAVE_LOOPS */
420 .Lloop:
421 bltu a2, a3, .Lzerobit
422 sub a2, a2, a3
423 .Lzerobit:
424 srli a3, a3, 1
425 #if !XCHAL_HAVE_LOOPS
426 addi a4, a4, -1
427 bnez a4, .Lloop
428 #endif /* !XCHAL_HAVE_LOOPS */
429 .Lloopend:
431 bltu a2, a3, .Lreturn
432 sub a2, a2, a3 # subtract once more if udividend >= udivisor
433 .Lreturn:
434 bgez a7, .Lpositive
435 neg a2, a2 # if (dividend < 0), return -udividend
436 .Lpositive:
437 retw
439 .Lspecial:
440 bltu a2, a3, .Lreturn2
441 sub a2, a2, a3 # subtract once if dividend >= divisor
442 .Lreturn2:
443 bgez a7, .Lpositive2
444 neg a2, a2 # if (dividend < 0), return -udividend
445 .Lpositive2:
446 retw
448 .Lle_one:
449 # udivisor is either 0 or 1, so just return 0.
450 # someday we may want to throw an exception if udivisor is 0.
451 movi a2, 0
452 retw
453 .Lfe5:
454 .size __modsi3,.Lfe5-__modsi3
456 #endif /* L_modsi3 */