2011-03-21 Daniel Jacobowitz <dan@codesourcery.com>
[official-gcc.git] / gcc / config / xtensa / lib1funcs.asm
blob071b9171177c81e7638acc3eb4d1dee075b2386a
1 /* Assembly functions for the Xtensa version of libgcc1.
2 Copyright (C) 2001, 2002, 2003, 2005, 2006, 2007, 2009
3 Free Software Foundation, Inc.
4 Contributed by Bob Wilson (bwilson@tensilica.com) at Tensilica.
6 This file is part of GCC.
8 GCC is free software; you can redistribute it and/or modify it under
9 the terms of the GNU General Public License as published by the Free
10 Software Foundation; either version 3, or (at your option) any later
11 version.
13 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
14 WARRANTY; without even the implied warranty of MERCHANTABILITY or
15 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
16 for more details.
18 Under Section 7 of GPL version 3, you are granted additional
19 permissions described in the GCC Runtime Library Exception, version
20 3.1, as published by the Free Software Foundation.
22 You should have received a copy of the GNU General Public License and
23 a copy of the GCC Runtime Library Exception along with this program;
24 see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
25 <http://www.gnu.org/licenses/>. */
27 #include "xtensa-config.h"
29 /* Define macros for the ABS and ADDX* instructions to handle cases
30 where they are not included in the Xtensa processor configuration. */
32 .macro do_abs dst, src, tmp
33 #if XCHAL_HAVE_ABS
34 abs \dst, \src
35 #else
36 neg \tmp, \src
37 movgez \tmp, \src, \src
38 mov \dst, \tmp
39 #endif
40 .endm
42 .macro do_addx2 dst, as, at, tmp
43 #if XCHAL_HAVE_ADDX
44 addx2 \dst, \as, \at
45 #else
46 slli \tmp, \as, 1
47 add \dst, \tmp, \at
48 #endif
49 .endm
51 .macro do_addx4 dst, as, at, tmp
52 #if XCHAL_HAVE_ADDX
53 addx4 \dst, \as, \at
54 #else
55 slli \tmp, \as, 2
56 add \dst, \tmp, \at
57 #endif
58 .endm
60 .macro do_addx8 dst, as, at, tmp
61 #if XCHAL_HAVE_ADDX
62 addx8 \dst, \as, \at
63 #else
64 slli \tmp, \as, 3
65 add \dst, \tmp, \at
66 #endif
67 .endm
69 /* Define macros for leaf function entry and return, supporting either the
70 standard register windowed ABI or the non-windowed call0 ABI. These
71 macros do not allocate any extra stack space, so they only work for
72 leaf functions that do not need to spill anything to the stack. */
74 .macro leaf_entry reg, size
75 #if XCHAL_HAVE_WINDOWED && !__XTENSA_CALL0_ABI__
76 entry \reg, \size
77 #else
78 /* do nothing */
79 #endif
80 .endm
82 .macro leaf_return
83 #if XCHAL_HAVE_WINDOWED && !__XTENSA_CALL0_ABI__
84 retw
85 #else
86 ret
87 #endif
88 .endm
91 #ifdef L_mulsi3
92 .align 4
93 .global __mulsi3
94 .type __mulsi3, @function
95 __mulsi3:
96 leaf_entry sp, 16
98 #if XCHAL_HAVE_MUL32
99 mull a2, a2, a3
101 #elif XCHAL_HAVE_MUL16
102 or a4, a2, a3
103 srai a4, a4, 16
104 bnez a4, .LMUL16
105 mul16u a2, a2, a3
106 leaf_return
107 .LMUL16:
108 srai a4, a2, 16
109 srai a5, a3, 16
110 mul16u a7, a4, a3
111 mul16u a6, a5, a2
112 mul16u a4, a2, a3
113 add a7, a7, a6
114 slli a7, a7, 16
115 add a2, a7, a4
117 #elif XCHAL_HAVE_MAC16
118 mul.aa.hl a2, a3
119 mula.aa.lh a2, a3
120 rsr a5, ACCLO
121 umul.aa.ll a2, a3
122 rsr a4, ACCLO
123 slli a5, a5, 16
124 add a2, a4, a5
126 #else /* !MUL32 && !MUL16 && !MAC16 */
128 /* Multiply one bit at a time, but unroll the loop 4x to better
129 exploit the addx instructions and avoid overhead.
130 Peel the first iteration to save a cycle on init. */
132 /* Avoid negative numbers. */
133 xor a5, a2, a3 /* Top bit is 1 if one input is negative. */
134 do_abs a3, a3, a6
135 do_abs a2, a2, a6
137 /* Swap so the second argument is smaller. */
138 sub a7, a2, a3
139 mov a4, a3
140 movgez a4, a2, a7 /* a4 = max (a2, a3) */
141 movltz a3, a2, a7 /* a3 = min (a2, a3) */
143 movi a2, 0
144 extui a6, a3, 0, 1
145 movnez a2, a4, a6
147 do_addx2 a7, a4, a2, a7
148 extui a6, a3, 1, 1
149 movnez a2, a7, a6
151 do_addx4 a7, a4, a2, a7
152 extui a6, a3, 2, 1
153 movnez a2, a7, a6
155 do_addx8 a7, a4, a2, a7
156 extui a6, a3, 3, 1
157 movnez a2, a7, a6
159 bgeui a3, 16, .Lmult_main_loop
160 neg a3, a2
161 movltz a2, a3, a5
162 leaf_return
164 .align 4
165 .Lmult_main_loop:
166 srli a3, a3, 4
167 slli a4, a4, 4
169 add a7, a4, a2
170 extui a6, a3, 0, 1
171 movnez a2, a7, a6
173 do_addx2 a7, a4, a2, a7
174 extui a6, a3, 1, 1
175 movnez a2, a7, a6
177 do_addx4 a7, a4, a2, a7
178 extui a6, a3, 2, 1
179 movnez a2, a7, a6
181 do_addx8 a7, a4, a2, a7
182 extui a6, a3, 3, 1
183 movnez a2, a7, a6
185 bgeui a3, 16, .Lmult_main_loop
187 neg a3, a2
188 movltz a2, a3, a5
190 #endif /* !MUL32 && !MUL16 && !MAC16 */
192 leaf_return
193 .size __mulsi3, . - __mulsi3
195 #endif /* L_mulsi3 */
198 #ifdef L_umulsidi3
200 #if !XCHAL_HAVE_MUL16 && !XCHAL_HAVE_MUL32 && !XCHAL_HAVE_MAC16
201 #define XCHAL_NO_MUL 1
202 #endif
204 .align 4
205 .global __umulsidi3
206 .type __umulsidi3, @function
207 __umulsidi3:
208 #if __XTENSA_CALL0_ABI__
209 leaf_entry sp, 32
210 addi sp, sp, -32
211 s32i a12, sp, 16
212 s32i a13, sp, 20
213 s32i a14, sp, 24
214 s32i a15, sp, 28
215 #elif XCHAL_NO_MUL
216 /* This is not really a leaf function; allocate enough stack space
217 to allow CALL12s to a helper function. */
218 leaf_entry sp, 48
219 #else
220 leaf_entry sp, 16
221 #endif
223 #ifdef __XTENSA_EB__
224 #define wh a2
225 #define wl a3
226 #else
227 #define wh a3
228 #define wl a2
229 #endif /* __XTENSA_EB__ */
231 /* This code is taken from the mulsf3 routine in ieee754-sf.S.
232 See more comments there. */
234 #if XCHAL_HAVE_MUL32_HIGH
235 mull a6, a2, a3
236 muluh wh, a2, a3
237 mov wl, a6
239 #else /* ! MUL32_HIGH */
241 #if __XTENSA_CALL0_ABI__ && XCHAL_NO_MUL
242 /* a0 and a8 will be clobbered by calling the multiply function
243 but a8 is not used here and need not be saved. */
244 s32i a0, sp, 0
245 #endif
247 #if XCHAL_HAVE_MUL16 || XCHAL_HAVE_MUL32
249 #define a2h a4
250 #define a3h a5
252 /* Get the high halves of the inputs into registers. */
253 srli a2h, a2, 16
254 srli a3h, a3, 16
256 #define a2l a2
257 #define a3l a3
259 #if XCHAL_HAVE_MUL32 && !XCHAL_HAVE_MUL16
260 /* Clear the high halves of the inputs. This does not matter
261 for MUL16 because the high bits are ignored. */
262 extui a2, a2, 0, 16
263 extui a3, a3, 0, 16
264 #endif
265 #endif /* MUL16 || MUL32 */
268 #if XCHAL_HAVE_MUL16
270 #define do_mul(dst, xreg, xhalf, yreg, yhalf) \
271 mul16u dst, xreg ## xhalf, yreg ## yhalf
273 #elif XCHAL_HAVE_MUL32
275 #define do_mul(dst, xreg, xhalf, yreg, yhalf) \
276 mull dst, xreg ## xhalf, yreg ## yhalf
278 #elif XCHAL_HAVE_MAC16
280 /* The preprocessor insists on inserting a space when concatenating after
281 a period in the definition of do_mul below. These macros are a workaround
282 using underscores instead of periods when doing the concatenation. */
283 #define umul_aa_ll umul.aa.ll
284 #define umul_aa_lh umul.aa.lh
285 #define umul_aa_hl umul.aa.hl
286 #define umul_aa_hh umul.aa.hh
288 #define do_mul(dst, xreg, xhalf, yreg, yhalf) \
289 umul_aa_ ## xhalf ## yhalf xreg, yreg; \
290 rsr dst, ACCLO
292 #else /* no multiply hardware */
294 #define set_arg_l(dst, src) \
295 extui dst, src, 0, 16
296 #define set_arg_h(dst, src) \
297 srli dst, src, 16
299 #if __XTENSA_CALL0_ABI__
300 #define do_mul(dst, xreg, xhalf, yreg, yhalf) \
301 set_arg_ ## xhalf (a13, xreg); \
302 set_arg_ ## yhalf (a14, yreg); \
303 call0 .Lmul_mulsi3; \
304 mov dst, a12
305 #else
306 #define do_mul(dst, xreg, xhalf, yreg, yhalf) \
307 set_arg_ ## xhalf (a14, xreg); \
308 set_arg_ ## yhalf (a15, yreg); \
309 call12 .Lmul_mulsi3; \
310 mov dst, a14
311 #endif /* __XTENSA_CALL0_ABI__ */
313 #endif /* no multiply hardware */
315 /* Add pp1 and pp2 into a6 with carry-out in a9. */
316 do_mul(a6, a2, l, a3, h) /* pp 1 */
317 do_mul(a11, a2, h, a3, l) /* pp 2 */
318 movi a9, 0
319 add a6, a6, a11
320 bgeu a6, a11, 1f
321 addi a9, a9, 1
323 /* Shift the high half of a9/a6 into position in a9. Note that
324 this value can be safely incremented without any carry-outs. */
325 ssai 16
326 src a9, a9, a6
328 /* Compute the low word into a6. */
329 do_mul(a11, a2, l, a3, l) /* pp 0 */
330 sll a6, a6
331 add a6, a6, a11
332 bgeu a6, a11, 1f
333 addi a9, a9, 1
335 /* Compute the high word into wh. */
336 do_mul(wh, a2, h, a3, h) /* pp 3 */
337 add wh, wh, a9
338 mov wl, a6
340 #endif /* !MUL32_HIGH */
342 #if __XTENSA_CALL0_ABI__ && XCHAL_NO_MUL
343 /* Restore the original return address. */
344 l32i a0, sp, 0
345 #endif
346 #if __XTENSA_CALL0_ABI__
347 l32i a12, sp, 16
348 l32i a13, sp, 20
349 l32i a14, sp, 24
350 l32i a15, sp, 28
351 addi sp, sp, 32
352 #endif
353 leaf_return
355 #if XCHAL_NO_MUL
357 /* For Xtensa processors with no multiply hardware, this simplified
358 version of _mulsi3 is used for multiplying 16-bit chunks of
359 the floating-point mantissas. When using CALL0, this function
360 uses a custom ABI: the inputs are passed in a13 and a14, the
361 result is returned in a12, and a8 and a15 are clobbered. */
362 .align 4
363 .Lmul_mulsi3:
364 leaf_entry sp, 16
365 .macro mul_mulsi3_body dst, src1, src2, tmp1, tmp2
366 movi \dst, 0
367 1: add \tmp1, \src2, \dst
368 extui \tmp2, \src1, 0, 1
369 movnez \dst, \tmp1, \tmp2
371 do_addx2 \tmp1, \src2, \dst, \tmp1
372 extui \tmp2, \src1, 1, 1
373 movnez \dst, \tmp1, \tmp2
375 do_addx4 \tmp1, \src2, \dst, \tmp1
376 extui \tmp2, \src1, 2, 1
377 movnez \dst, \tmp1, \tmp2
379 do_addx8 \tmp1, \src2, \dst, \tmp1
380 extui \tmp2, \src1, 3, 1
381 movnez \dst, \tmp1, \tmp2
383 srli \src1, \src1, 4
384 slli \src2, \src2, 4
385 bnez \src1, 1b
386 .endm
387 #if __XTENSA_CALL0_ABI__
388 mul_mulsi3_body a12, a13, a14, a15, a8
389 #else
390 /* The result will be written into a2, so save that argument in a4. */
391 mov a4, a2
392 mul_mulsi3_body a2, a4, a3, a5, a6
393 #endif
394 leaf_return
395 #endif /* XCHAL_NO_MUL */
397 .size __umulsidi3, . - __umulsidi3
399 #endif /* L_umulsidi3 */
402 /* Define a macro for the NSAU (unsigned normalize shift amount)
403 instruction, which computes the number of leading zero bits,
404 to handle cases where it is not included in the Xtensa processor
405 configuration. */
407 .macro do_nsau cnt, val, tmp, a
408 #if XCHAL_HAVE_NSA
409 nsau \cnt, \val
410 #else
411 mov \a, \val
412 movi \cnt, 0
413 extui \tmp, \a, 16, 16
414 bnez \tmp, 0f
415 movi \cnt, 16
416 slli \a, \a, 16
418 extui \tmp, \a, 24, 8
419 bnez \tmp, 1f
420 addi \cnt, \cnt, 8
421 slli \a, \a, 8
423 movi \tmp, __nsau_data
424 extui \a, \a, 24, 8
425 add \tmp, \tmp, \a
426 l8ui \tmp, \tmp, 0
427 add \cnt, \cnt, \tmp
428 #endif /* !XCHAL_HAVE_NSA */
429 .endm
431 #ifdef L_clz
432 .section .rodata
433 .align 4
434 .global __nsau_data
435 .type __nsau_data, @object
436 __nsau_data:
437 #if !XCHAL_HAVE_NSA
438 .byte 8, 7, 6, 6, 5, 5, 5, 5, 4, 4, 4, 4, 4, 4, 4, 4
439 .byte 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3
440 .byte 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2
441 .byte 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2
442 .byte 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1
443 .byte 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1
444 .byte 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1
445 .byte 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1
446 .byte 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
447 .byte 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
448 .byte 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
449 .byte 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
450 .byte 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
451 .byte 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
452 .byte 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
453 .byte 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
454 #endif /* !XCHAL_HAVE_NSA */
455 .size __nsau_data, . - __nsau_data
456 .hidden __nsau_data
457 #endif /* L_clz */
460 #ifdef L_clzsi2
461 .align 4
462 .global __clzsi2
463 .type __clzsi2, @function
464 __clzsi2:
465 leaf_entry sp, 16
466 do_nsau a2, a2, a3, a4
467 leaf_return
468 .size __clzsi2, . - __clzsi2
470 #endif /* L_clzsi2 */
473 #ifdef L_ctzsi2
474 .align 4
475 .global __ctzsi2
476 .type __ctzsi2, @function
477 __ctzsi2:
478 leaf_entry sp, 16
479 neg a3, a2
480 and a3, a3, a2
481 do_nsau a2, a3, a4, a5
482 neg a2, a2
483 addi a2, a2, 31
484 leaf_return
485 .size __ctzsi2, . - __ctzsi2
487 #endif /* L_ctzsi2 */
490 #ifdef L_ffssi2
491 .align 4
492 .global __ffssi2
493 .type __ffssi2, @function
494 __ffssi2:
495 leaf_entry sp, 16
496 neg a3, a2
497 and a3, a3, a2
498 do_nsau a2, a3, a4, a5
499 neg a2, a2
500 addi a2, a2, 32
501 leaf_return
502 .size __ffssi2, . - __ffssi2
504 #endif /* L_ffssi2 */
507 #ifdef L_udivsi3
508 .align 4
509 .global __udivsi3
510 .type __udivsi3, @function
511 __udivsi3:
512 leaf_entry sp, 16
513 #if XCHAL_HAVE_DIV32
514 quou a2, a2, a3
515 #else
516 bltui a3, 2, .Lle_one /* check if the divisor <= 1 */
518 mov a6, a2 /* keep dividend in a6 */
519 do_nsau a5, a6, a2, a7 /* dividend_shift = nsau (dividend) */
520 do_nsau a4, a3, a2, a7 /* divisor_shift = nsau (divisor) */
521 bgeu a5, a4, .Lspecial
523 sub a4, a4, a5 /* count = divisor_shift - dividend_shift */
524 ssl a4
525 sll a3, a3 /* divisor <<= count */
526 movi a2, 0 /* quotient = 0 */
528 /* test-subtract-and-shift loop; one quotient bit on each iteration */
529 #if XCHAL_HAVE_LOOPS
530 loopnez a4, .Lloopend
531 #endif /* XCHAL_HAVE_LOOPS */
532 .Lloop:
533 bltu a6, a3, .Lzerobit
534 sub a6, a6, a3
535 addi a2, a2, 1
536 .Lzerobit:
537 slli a2, a2, 1
538 srli a3, a3, 1
539 #if !XCHAL_HAVE_LOOPS
540 addi a4, a4, -1
541 bnez a4, .Lloop
542 #endif /* !XCHAL_HAVE_LOOPS */
543 .Lloopend:
545 bltu a6, a3, .Lreturn
546 addi a2, a2, 1 /* increment quotient if dividend >= divisor */
547 .Lreturn:
548 leaf_return
550 .Lle_one:
551 beqz a3, .Lerror /* if divisor == 1, return the dividend */
552 leaf_return
554 .Lspecial:
555 /* return dividend >= divisor */
556 bltu a6, a3, .Lreturn0
557 movi a2, 1
558 leaf_return
560 .Lerror:
561 /* Divide by zero: Use an illegal instruction to force an exception.
562 The subsequent "DIV0" string can be recognized by the exception
563 handler to identify the real cause of the exception. */
565 .ascii "DIV0"
567 .Lreturn0:
568 movi a2, 0
569 #endif /* XCHAL_HAVE_DIV32 */
570 leaf_return
571 .size __udivsi3, . - __udivsi3
573 #endif /* L_udivsi3 */
576 #ifdef L_divsi3
577 .align 4
578 .global __divsi3
579 .type __divsi3, @function
580 __divsi3:
581 leaf_entry sp, 16
582 #if XCHAL_HAVE_DIV32
583 quos a2, a2, a3
584 #else
585 xor a7, a2, a3 /* sign = dividend ^ divisor */
586 do_abs a6, a2, a4 /* udividend = abs (dividend) */
587 do_abs a3, a3, a4 /* udivisor = abs (divisor) */
588 bltui a3, 2, .Lle_one /* check if udivisor <= 1 */
589 do_nsau a5, a6, a2, a8 /* udividend_shift = nsau (udividend) */
590 do_nsau a4, a3, a2, a8 /* udivisor_shift = nsau (udivisor) */
591 bgeu a5, a4, .Lspecial
593 sub a4, a4, a5 /* count = udivisor_shift - udividend_shift */
594 ssl a4
595 sll a3, a3 /* udivisor <<= count */
596 movi a2, 0 /* quotient = 0 */
598 /* test-subtract-and-shift loop; one quotient bit on each iteration */
599 #if XCHAL_HAVE_LOOPS
600 loopnez a4, .Lloopend
601 #endif /* XCHAL_HAVE_LOOPS */
602 .Lloop:
603 bltu a6, a3, .Lzerobit
604 sub a6, a6, a3
605 addi a2, a2, 1
606 .Lzerobit:
607 slli a2, a2, 1
608 srli a3, a3, 1
609 #if !XCHAL_HAVE_LOOPS
610 addi a4, a4, -1
611 bnez a4, .Lloop
612 #endif /* !XCHAL_HAVE_LOOPS */
613 .Lloopend:
615 bltu a6, a3, .Lreturn
616 addi a2, a2, 1 /* increment if udividend >= udivisor */
617 .Lreturn:
618 neg a5, a2
619 movltz a2, a5, a7 /* return (sign < 0) ? -quotient : quotient */
620 leaf_return
622 .Lle_one:
623 beqz a3, .Lerror
624 neg a2, a6 /* if udivisor == 1, then return... */
625 movgez a2, a6, a7 /* (sign < 0) ? -udividend : udividend */
626 leaf_return
628 .Lspecial:
629 bltu a6, a3, .Lreturn0 /* if dividend < divisor, return 0 */
630 movi a2, 1
631 movi a4, -1
632 movltz a2, a4, a7 /* else return (sign < 0) ? -1 : 1 */
633 leaf_return
635 .Lerror:
636 /* Divide by zero: Use an illegal instruction to force an exception.
637 The subsequent "DIV0" string can be recognized by the exception
638 handler to identify the real cause of the exception. */
640 .ascii "DIV0"
642 .Lreturn0:
643 movi a2, 0
644 #endif /* XCHAL_HAVE_DIV32 */
645 leaf_return
646 .size __divsi3, . - __divsi3
648 #endif /* L_divsi3 */
651 #ifdef L_umodsi3
652 .align 4
653 .global __umodsi3
654 .type __umodsi3, @function
655 __umodsi3:
656 leaf_entry sp, 16
657 #if XCHAL_HAVE_DIV32
658 remu a2, a2, a3
659 #else
660 bltui a3, 2, .Lle_one /* check if the divisor is <= 1 */
662 do_nsau a5, a2, a6, a7 /* dividend_shift = nsau (dividend) */
663 do_nsau a4, a3, a6, a7 /* divisor_shift = nsau (divisor) */
664 bgeu a5, a4, .Lspecial
666 sub a4, a4, a5 /* count = divisor_shift - dividend_shift */
667 ssl a4
668 sll a3, a3 /* divisor <<= count */
670 /* test-subtract-and-shift loop */
671 #if XCHAL_HAVE_LOOPS
672 loopnez a4, .Lloopend
673 #endif /* XCHAL_HAVE_LOOPS */
674 .Lloop:
675 bltu a2, a3, .Lzerobit
676 sub a2, a2, a3
677 .Lzerobit:
678 srli a3, a3, 1
679 #if !XCHAL_HAVE_LOOPS
680 addi a4, a4, -1
681 bnez a4, .Lloop
682 #endif /* !XCHAL_HAVE_LOOPS */
683 .Lloopend:
685 .Lspecial:
686 bltu a2, a3, .Lreturn
687 sub a2, a2, a3 /* subtract once more if dividend >= divisor */
688 .Lreturn:
689 leaf_return
691 .Lle_one:
692 bnez a3, .Lreturn0
694 /* Divide by zero: Use an illegal instruction to force an exception.
695 The subsequent "DIV0" string can be recognized by the exception
696 handler to identify the real cause of the exception. */
698 .ascii "DIV0"
700 .Lreturn0:
701 movi a2, 0
702 #endif /* XCHAL_HAVE_DIV32 */
703 leaf_return
704 .size __umodsi3, . - __umodsi3
706 #endif /* L_umodsi3 */
709 #ifdef L_modsi3
710 .align 4
711 .global __modsi3
712 .type __modsi3, @function
713 __modsi3:
714 leaf_entry sp, 16
715 #if XCHAL_HAVE_DIV32
716 rems a2, a2, a3
717 #else
718 mov a7, a2 /* save original (signed) dividend */
719 do_abs a2, a2, a4 /* udividend = abs (dividend) */
720 do_abs a3, a3, a4 /* udivisor = abs (divisor) */
721 bltui a3, 2, .Lle_one /* check if udivisor <= 1 */
722 do_nsau a5, a2, a6, a8 /* udividend_shift = nsau (udividend) */
723 do_nsau a4, a3, a6, a8 /* udivisor_shift = nsau (udivisor) */
724 bgeu a5, a4, .Lspecial
726 sub a4, a4, a5 /* count = udivisor_shift - udividend_shift */
727 ssl a4
728 sll a3, a3 /* udivisor <<= count */
730 /* test-subtract-and-shift loop */
731 #if XCHAL_HAVE_LOOPS
732 loopnez a4, .Lloopend
733 #endif /* XCHAL_HAVE_LOOPS */
734 .Lloop:
735 bltu a2, a3, .Lzerobit
736 sub a2, a2, a3
737 .Lzerobit:
738 srli a3, a3, 1
739 #if !XCHAL_HAVE_LOOPS
740 addi a4, a4, -1
741 bnez a4, .Lloop
742 #endif /* !XCHAL_HAVE_LOOPS */
743 .Lloopend:
745 .Lspecial:
746 bltu a2, a3, .Lreturn
747 sub a2, a2, a3 /* subtract again if udividend >= udivisor */
748 .Lreturn:
749 bgez a7, .Lpositive
750 neg a2, a2 /* if (dividend < 0), return -udividend */
751 .Lpositive:
752 leaf_return
754 .Lle_one:
755 bnez a3, .Lreturn0
757 /* Divide by zero: Use an illegal instruction to force an exception.
758 The subsequent "DIV0" string can be recognized by the exception
759 handler to identify the real cause of the exception. */
761 .ascii "DIV0"
763 .Lreturn0:
764 movi a2, 0
765 #endif /* XCHAL_HAVE_DIV32 */
766 leaf_return
767 .size __modsi3, . - __modsi3
769 #endif /* L_modsi3 */
772 #ifdef __XTENSA_EB__
773 #define uh a2
774 #define ul a3
775 #else
776 #define uh a3
777 #define ul a2
778 #endif /* __XTENSA_EB__ */
781 #ifdef L_ashldi3
782 .align 4
783 .global __ashldi3
784 .type __ashldi3, @function
785 __ashldi3:
786 leaf_entry sp, 16
787 ssl a4
788 bgei a4, 32, .Llow_only
789 src uh, uh, ul
790 sll ul, ul
791 leaf_return
793 .Llow_only:
794 sll uh, ul
795 movi ul, 0
796 leaf_return
797 .size __ashldi3, . - __ashldi3
799 #endif /* L_ashldi3 */
802 #ifdef L_ashrdi3
803 .align 4
804 .global __ashrdi3
805 .type __ashrdi3, @function
806 __ashrdi3:
807 leaf_entry sp, 16
808 ssr a4
809 bgei a4, 32, .Lhigh_only
810 src ul, uh, ul
811 sra uh, uh
812 leaf_return
814 .Lhigh_only:
815 sra ul, uh
816 srai uh, uh, 31
817 leaf_return
818 .size __ashrdi3, . - __ashrdi3
820 #endif /* L_ashrdi3 */
823 #ifdef L_lshrdi3
824 .align 4
825 .global __lshrdi3
826 .type __lshrdi3, @function
827 __lshrdi3:
828 leaf_entry sp, 16
829 ssr a4
830 bgei a4, 32, .Lhigh_only1
831 src ul, uh, ul
832 srl uh, uh
833 leaf_return
835 .Lhigh_only1:
836 srl ul, uh
837 movi uh, 0
838 leaf_return
839 .size __lshrdi3, . - __lshrdi3
841 #endif /* L_lshrdi3 */
844 #include "ieee754-df.S"
845 #include "ieee754-sf.S"