PR c++/34774
[official-gcc.git] / gcc / config / xtensa / lib1funcs.asm
blob69162f036e999fb3f13885c3d4a39414748f8742
1 /* Assembly functions for the Xtensa version of libgcc1.
2 Copyright (C) 2001, 2002, 2003, 2005, 2006, 2007
3 Free Software Foundation, Inc.
4 Contributed by Bob Wilson (bwilson@tensilica.com) at Tensilica.
6 This file is part of GCC.
8 GCC is free software; you can redistribute it and/or modify it under
9 the terms of the GNU General Public License as published by the Free
10 Software Foundation; either version 2, or (at your option) any later
11 version.
13 In addition to the permissions in the GNU General Public License, the
14 Free Software Foundation gives you unlimited permission to link the
15 compiled version of this file into combinations with other programs,
16 and to distribute those combinations without any restriction coming
17 from the use of this file. (The General Public License restrictions
18 do apply in other respects; for example, they cover modification of
19 the file, and distribution when not linked into a combine
20 executable.)
22 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
23 WARRANTY; without even the implied warranty of MERCHANTABILITY or
24 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
25 for more details.
27 You should have received a copy of the GNU General Public License
28 along with GCC; see the file COPYING. If not, write to the Free
29 Software Foundation, 51 Franklin Street, Fifth Floor, Boston, MA
30 02110-1301, USA. */
32 #include "xtensa-config.h"
34 /* Define macros for the ABS and ADDX* instructions to handle cases
35 where they are not included in the Xtensa processor configuration. */
37 .macro do_abs dst, src, tmp
38 #if XCHAL_HAVE_ABS
39 abs \dst, \src
40 #else
41 neg \tmp, \src
42 movgez \tmp, \src, \src
43 mov \dst, \tmp
44 #endif
45 .endm
47 .macro do_addx2 dst, as, at, tmp
48 #if XCHAL_HAVE_ADDX
49 addx2 \dst, \as, \at
50 #else
51 slli \tmp, \as, 1
52 add \dst, \tmp, \at
53 #endif
54 .endm
56 .macro do_addx4 dst, as, at, tmp
57 #if XCHAL_HAVE_ADDX
58 addx4 \dst, \as, \at
59 #else
60 slli \tmp, \as, 2
61 add \dst, \tmp, \at
62 #endif
63 .endm
65 .macro do_addx8 dst, as, at, tmp
66 #if XCHAL_HAVE_ADDX
67 addx8 \dst, \as, \at
68 #else
69 slli \tmp, \as, 3
70 add \dst, \tmp, \at
71 #endif
72 .endm
74 /* Define macros for leaf function entry and return, supporting either the
75 standard register windowed ABI or the non-windowed call0 ABI. These
76 macros do not allocate any extra stack space, so they only work for
77 leaf functions that do not need to spill anything to the stack. */
79 .macro leaf_entry reg, size
80 #if XCHAL_HAVE_WINDOWED && !__XTENSA_CALL0_ABI__
81 entry \reg, \size
82 #else
83 /* do nothing */
84 #endif
85 .endm
87 .macro leaf_return
88 #if XCHAL_HAVE_WINDOWED && !__XTENSA_CALL0_ABI__
89 retw
90 #else
91 ret
92 #endif
93 .endm
96 #ifdef L_mulsi3
97 .align 4
98 .global __mulsi3
99 .type __mulsi3, @function
100 __mulsi3:
101 leaf_entry sp, 16
103 #if XCHAL_HAVE_MUL32
104 mull a2, a2, a3
106 #elif XCHAL_HAVE_MUL16
107 or a4, a2, a3
108 srai a4, a4, 16
109 bnez a4, .LMUL16
110 mul16u a2, a2, a3
111 leaf_return
112 .LMUL16:
113 srai a4, a2, 16
114 srai a5, a3, 16
115 mul16u a7, a4, a3
116 mul16u a6, a5, a2
117 mul16u a4, a2, a3
118 add a7, a7, a6
119 slli a7, a7, 16
120 add a2, a7, a4
122 #elif XCHAL_HAVE_MAC16
123 mul.aa.hl a2, a3
124 mula.aa.lh a2, a3
125 rsr a5, ACCLO
126 umul.aa.ll a2, a3
127 rsr a4, ACCLO
128 slli a5, a5, 16
129 add a2, a4, a5
131 #else /* !MUL32 && !MUL16 && !MAC16 */
133 /* Multiply one bit at a time, but unroll the loop 4x to better
134 exploit the addx instructions and avoid overhead.
135 Peel the first iteration to save a cycle on init. */
137 /* Avoid negative numbers. */
138 xor a5, a2, a3 /* Top bit is 1 if one input is negative. */
139 do_abs a3, a3, a6
140 do_abs a2, a2, a6
142 /* Swap so the second argument is smaller. */
143 sub a7, a2, a3
144 mov a4, a3
145 movgez a4, a2, a7 /* a4 = max (a2, a3) */
146 movltz a3, a2, a7 /* a3 = min (a2, a3) */
148 movi a2, 0
149 extui a6, a3, 0, 1
150 movnez a2, a4, a6
152 do_addx2 a7, a4, a2, a7
153 extui a6, a3, 1, 1
154 movnez a2, a7, a6
156 do_addx4 a7, a4, a2, a7
157 extui a6, a3, 2, 1
158 movnez a2, a7, a6
160 do_addx8 a7, a4, a2, a7
161 extui a6, a3, 3, 1
162 movnez a2, a7, a6
164 bgeui a3, 16, .Lmult_main_loop
165 neg a3, a2
166 movltz a2, a3, a5
167 leaf_return
169 .align 4
170 .Lmult_main_loop:
171 srli a3, a3, 4
172 slli a4, a4, 4
174 add a7, a4, a2
175 extui a6, a3, 0, 1
176 movnez a2, a7, a6
178 do_addx2 a7, a4, a2, a7
179 extui a6, a3, 1, 1
180 movnez a2, a7, a6
182 do_addx4 a7, a4, a2, a7
183 extui a6, a3, 2, 1
184 movnez a2, a7, a6
186 do_addx8 a7, a4, a2, a7
187 extui a6, a3, 3, 1
188 movnez a2, a7, a6
190 bgeui a3, 16, .Lmult_main_loop
192 neg a3, a2
193 movltz a2, a3, a5
195 #endif /* !MUL32 && !MUL16 && !MAC16 */
197 leaf_return
198 .size __mulsi3, . - __mulsi3
200 #endif /* L_mulsi3 */
203 #ifdef L_umulsidi3
205 #if !XCHAL_HAVE_MUL16 && !XCHAL_HAVE_MUL32 && !XCHAL_HAVE_MAC16
206 #define XCHAL_NO_MUL 1
207 #endif
209 .align 4
210 .global __umulsidi3
211 .type __umulsidi3, @function
212 __umulsidi3:
213 #if __XTENSA_CALL0_ABI__
214 leaf_entry sp, 32
215 addi sp, sp, -32
216 s32i a12, sp, 16
217 s32i a13, sp, 20
218 s32i a14, sp, 24
219 s32i a15, sp, 28
220 #elif XCHAL_NO_MUL
221 /* This is not really a leaf function; allocate enough stack space
222 to allow CALL12s to a helper function. */
223 leaf_entry sp, 48
224 #else
225 leaf_entry sp, 16
226 #endif
228 #ifdef __XTENSA_EB__
229 #define wh a2
230 #define wl a3
231 #else
232 #define wh a3
233 #define wl a2
234 #endif /* __XTENSA_EB__ */
236 /* This code is taken from the mulsf3 routine in ieee754-sf.S.
237 See more comments there. */
239 #if XCHAL_HAVE_MUL32_HIGH
240 mull a6, a2, a3
241 muluh wh, a2, a3
242 mov wl, a6
244 #else /* ! MUL32_HIGH */
246 #if __XTENSA_CALL0_ABI__ && XCHAL_NO_MUL
247 /* a0 and a8 will be clobbered by calling the multiply function
248 but a8 is not used here and need not be saved. */
249 s32i a0, sp, 0
250 #endif
252 #if XCHAL_HAVE_MUL16 || XCHAL_HAVE_MUL32
254 #define a2h a4
255 #define a3h a5
257 /* Get the high halves of the inputs into registers. */
258 srli a2h, a2, 16
259 srli a3h, a3, 16
261 #define a2l a2
262 #define a3l a3
264 #if XCHAL_HAVE_MUL32 && !XCHAL_HAVE_MUL16
265 /* Clear the high halves of the inputs. This does not matter
266 for MUL16 because the high bits are ignored. */
267 extui a2, a2, 0, 16
268 extui a3, a3, 0, 16
269 #endif
270 #endif /* MUL16 || MUL32 */
273 #if XCHAL_HAVE_MUL16
275 #define do_mul(dst, xreg, xhalf, yreg, yhalf) \
276 mul16u dst, xreg ## xhalf, yreg ## yhalf
278 #elif XCHAL_HAVE_MUL32
280 #define do_mul(dst, xreg, xhalf, yreg, yhalf) \
281 mull dst, xreg ## xhalf, yreg ## yhalf
283 #elif XCHAL_HAVE_MAC16
285 /* The preprocessor insists on inserting a space when concatenating after
286 a period in the definition of do_mul below. These macros are a workaround
287 using underscores instead of periods when doing the concatenation. */
288 #define umul_aa_ll umul.aa.ll
289 #define umul_aa_lh umul.aa.lh
290 #define umul_aa_hl umul.aa.hl
291 #define umul_aa_hh umul.aa.hh
293 #define do_mul(dst, xreg, xhalf, yreg, yhalf) \
294 umul_aa_ ## xhalf ## yhalf xreg, yreg; \
295 rsr dst, ACCLO
297 #else /* no multiply hardware */
299 #define set_arg_l(dst, src) \
300 extui dst, src, 0, 16
301 #define set_arg_h(dst, src) \
302 srli dst, src, 16
304 #if __XTENSA_CALL0_ABI__
305 #define do_mul(dst, xreg, xhalf, yreg, yhalf) \
306 set_arg_ ## xhalf (a13, xreg); \
307 set_arg_ ## yhalf (a14, yreg); \
308 call0 .Lmul_mulsi3; \
309 mov dst, a12
310 #else
311 #define do_mul(dst, xreg, xhalf, yreg, yhalf) \
312 set_arg_ ## xhalf (a14, xreg); \
313 set_arg_ ## yhalf (a15, yreg); \
314 call12 .Lmul_mulsi3; \
315 mov dst, a14
316 #endif /* __XTENSA_CALL0_ABI__ */
318 #endif /* no multiply hardware */
320 /* Add pp1 and pp2 into a6 with carry-out in a9. */
321 do_mul(a6, a2, l, a3, h) /* pp 1 */
322 do_mul(a11, a2, h, a3, l) /* pp 2 */
323 movi a9, 0
324 add a6, a6, a11
325 bgeu a6, a11, 1f
326 addi a9, a9, 1
328 /* Shift the high half of a9/a6 into position in a9. Note that
329 this value can be safely incremented without any carry-outs. */
330 ssai 16
331 src a9, a9, a6
333 /* Compute the low word into a6. */
334 do_mul(a11, a2, l, a3, l) /* pp 0 */
335 sll a6, a6
336 add a6, a6, a11
337 bgeu a6, a11, 1f
338 addi a9, a9, 1
340 /* Compute the high word into wh. */
341 do_mul(wh, a2, h, a3, h) /* pp 3 */
342 add wh, wh, a9
343 mov wl, a6
345 #endif /* !MUL32_HIGH */
347 #if __XTENSA_CALL0_ABI__ && XCHAL_NO_MUL
348 /* Restore the original return address. */
349 l32i a0, sp, 0
350 #endif
351 #if __XTENSA_CALL0_ABI__
352 l32i a12, sp, 16
353 l32i a13, sp, 20
354 l32i a14, sp, 24
355 l32i a15, sp, 28
356 addi sp, sp, 32
357 #endif
358 leaf_return
360 #if XCHAL_NO_MUL
362 /* For Xtensa processors with no multiply hardware, this simplified
363 version of _mulsi3 is used for multiplying 16-bit chunks of
364 the floating-point mantissas. When using CALL0, this function
365 uses a custom ABI: the inputs are passed in a13 and a14, the
366 result is returned in a12, and a8 and a15 are clobbered. */
367 .align 4
368 .Lmul_mulsi3:
369 leaf_entry sp, 16
370 .macro mul_mulsi3_body dst, src1, src2, tmp1, tmp2
371 movi \dst, 0
372 1: add \tmp1, \src2, \dst
373 extui \tmp2, \src1, 0, 1
374 movnez \dst, \tmp1, \tmp2
376 do_addx2 \tmp1, \src2, \dst, \tmp1
377 extui \tmp2, \src1, 1, 1
378 movnez \dst, \tmp1, \tmp2
380 do_addx4 \tmp1, \src2, \dst, \tmp1
381 extui \tmp2, \src1, 2, 1
382 movnez \dst, \tmp1, \tmp2
384 do_addx8 \tmp1, \src2, \dst, \tmp1
385 extui \tmp2, \src1, 3, 1
386 movnez \dst, \tmp1, \tmp2
388 srli \src1, \src1, 4
389 slli \src2, \src2, 4
390 bnez \src1, 1b
391 .endm
392 #if __XTENSA_CALL0_ABI__
393 mul_mulsi3_body a12, a13, a14, a15, a8
394 #else
395 /* The result will be written into a2, so save that argument in a4. */
396 mov a4, a2
397 mul_mulsi3_body a2, a4, a3, a5, a6
398 #endif
399 leaf_return
400 #endif /* XCHAL_NO_MUL */
402 .size __umulsidi3, . - __umulsidi3
404 #endif /* L_umulsidi3 */
407 /* Define a macro for the NSAU (unsigned normalize shift amount)
408 instruction, which computes the number of leading zero bits,
409 to handle cases where it is not included in the Xtensa processor
410 configuration. */
412 .macro do_nsau cnt, val, tmp, a
413 #if XCHAL_HAVE_NSA
414 nsau \cnt, \val
415 #else
416 mov \a, \val
417 movi \cnt, 0
418 extui \tmp, \a, 16, 16
419 bnez \tmp, 0f
420 movi \cnt, 16
421 slli \a, \a, 16
423 extui \tmp, \a, 24, 8
424 bnez \tmp, 1f
425 addi \cnt, \cnt, 8
426 slli \a, \a, 8
428 movi \tmp, __nsau_data
429 extui \a, \a, 24, 8
430 add \tmp, \tmp, \a
431 l8ui \tmp, \tmp, 0
432 add \cnt, \cnt, \tmp
433 #endif /* !XCHAL_HAVE_NSA */
434 .endm
436 #ifdef L_clz
437 .section .rodata
438 .align 4
439 .global __nsau_data
440 .type __nsau_data, @object
441 __nsau_data:
442 #if !XCHAL_HAVE_NSA
443 .byte 8, 7, 6, 6, 5, 5, 5, 5, 4, 4, 4, 4, 4, 4, 4, 4
444 .byte 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3
445 .byte 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2
446 .byte 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2
447 .byte 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1
448 .byte 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1
449 .byte 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1
450 .byte 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1
451 .byte 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
452 .byte 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
453 .byte 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
454 .byte 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
455 .byte 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
456 .byte 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
457 .byte 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
458 .byte 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
459 #endif /* !XCHAL_HAVE_NSA */
460 .size __nsau_data, . - __nsau_data
461 .hidden __nsau_data
462 #endif /* L_clz */
465 #ifdef L_clzsi2
466 .align 4
467 .global __clzsi2
468 .type __clzsi2, @function
469 __clzsi2:
470 leaf_entry sp, 16
471 do_nsau a2, a2, a3, a4
472 leaf_return
473 .size __clzsi2, . - __clzsi2
475 #endif /* L_clzsi2 */
478 #ifdef L_ctzsi2
479 .align 4
480 .global __ctzsi2
481 .type __ctzsi2, @function
482 __ctzsi2:
483 leaf_entry sp, 16
484 neg a3, a2
485 and a3, a3, a2
486 do_nsau a2, a3, a4, a5
487 neg a2, a2
488 addi a2, a2, 31
489 leaf_return
490 .size __ctzsi2, . - __ctzsi2
492 #endif /* L_ctzsi2 */
495 #ifdef L_ffssi2
496 .align 4
497 .global __ffssi2
498 .type __ffssi2, @function
499 __ffssi2:
500 leaf_entry sp, 16
501 neg a3, a2
502 and a3, a3, a2
503 do_nsau a2, a3, a4, a5
504 neg a2, a2
505 addi a2, a2, 32
506 leaf_return
507 .size __ffssi2, . - __ffssi2
509 #endif /* L_ffssi2 */
512 #ifdef L_udivsi3
513 .align 4
514 .global __udivsi3
515 .type __udivsi3, @function
516 __udivsi3:
517 leaf_entry sp, 16
518 #if XCHAL_HAVE_DIV32
519 quou a2, a2, a3
520 #else
521 bltui a3, 2, .Lle_one /* check if the divisor <= 1 */
523 mov a6, a2 /* keep dividend in a6 */
524 do_nsau a5, a6, a2, a7 /* dividend_shift = nsau (dividend) */
525 do_nsau a4, a3, a2, a7 /* divisor_shift = nsau (divisor) */
526 bgeu a5, a4, .Lspecial
528 sub a4, a4, a5 /* count = divisor_shift - dividend_shift */
529 ssl a4
530 sll a3, a3 /* divisor <<= count */
531 movi a2, 0 /* quotient = 0 */
533 /* test-subtract-and-shift loop; one quotient bit on each iteration */
534 #if XCHAL_HAVE_LOOPS
535 loopnez a4, .Lloopend
536 #endif /* XCHAL_HAVE_LOOPS */
537 .Lloop:
538 bltu a6, a3, .Lzerobit
539 sub a6, a6, a3
540 addi a2, a2, 1
541 .Lzerobit:
542 slli a2, a2, 1
543 srli a3, a3, 1
544 #if !XCHAL_HAVE_LOOPS
545 addi a4, a4, -1
546 bnez a4, .Lloop
547 #endif /* !XCHAL_HAVE_LOOPS */
548 .Lloopend:
550 bltu a6, a3, .Lreturn
551 addi a2, a2, 1 /* increment quotient if dividend >= divisor */
552 .Lreturn:
553 leaf_return
555 .Lle_one:
556 beqz a3, .Lerror /* if divisor == 1, return the dividend */
557 leaf_return
559 .Lspecial:
560 /* return dividend >= divisor */
561 bltu a6, a3, .Lreturn0
562 movi a2, 1
563 leaf_return
565 .Lerror:
566 /* Divide by zero: Use an illegal instruction to force an exception.
567 The subsequent "DIV0" string can be recognized by the exception
568 handler to identify the real cause of the exception. */
570 .ascii "DIV0"
572 .Lreturn0:
573 movi a2, 0
574 #endif /* XCHAL_HAVE_DIV32 */
575 leaf_return
576 .size __udivsi3, . - __udivsi3
578 #endif /* L_udivsi3 */
581 #ifdef L_divsi3
582 .align 4
583 .global __divsi3
584 .type __divsi3, @function
585 __divsi3:
586 leaf_entry sp, 16
587 #if XCHAL_HAVE_DIV32
588 quos a2, a2, a3
589 #else
590 xor a7, a2, a3 /* sign = dividend ^ divisor */
591 do_abs a6, a2, a4 /* udividend = abs (dividend) */
592 do_abs a3, a3, a4 /* udivisor = abs (divisor) */
593 bltui a3, 2, .Lle_one /* check if udivisor <= 1 */
594 do_nsau a5, a6, a2, a8 /* udividend_shift = nsau (udividend) */
595 do_nsau a4, a3, a2, a8 /* udivisor_shift = nsau (udivisor) */
596 bgeu a5, a4, .Lspecial
598 sub a4, a4, a5 /* count = udivisor_shift - udividend_shift */
599 ssl a4
600 sll a3, a3 /* udivisor <<= count */
601 movi a2, 0 /* quotient = 0 */
603 /* test-subtract-and-shift loop; one quotient bit on each iteration */
604 #if XCHAL_HAVE_LOOPS
605 loopnez a4, .Lloopend
606 #endif /* XCHAL_HAVE_LOOPS */
607 .Lloop:
608 bltu a6, a3, .Lzerobit
609 sub a6, a6, a3
610 addi a2, a2, 1
611 .Lzerobit:
612 slli a2, a2, 1
613 srli a3, a3, 1
614 #if !XCHAL_HAVE_LOOPS
615 addi a4, a4, -1
616 bnez a4, .Lloop
617 #endif /* !XCHAL_HAVE_LOOPS */
618 .Lloopend:
620 bltu a6, a3, .Lreturn
621 addi a2, a2, 1 /* increment if udividend >= udivisor */
622 .Lreturn:
623 neg a5, a2
624 movltz a2, a5, a7 /* return (sign < 0) ? -quotient : quotient */
625 leaf_return
627 .Lle_one:
628 beqz a3, .Lerror
629 neg a2, a6 /* if udivisor == 1, then return... */
630 movgez a2, a6, a7 /* (sign < 0) ? -udividend : udividend */
631 leaf_return
633 .Lspecial:
634 bltu a6, a3, .Lreturn0 /* if dividend < divisor, return 0 */
635 movi a2, 1
636 movi a4, -1
637 movltz a2, a4, a7 /* else return (sign < 0) ? -1 : 1 */
638 leaf_return
640 .Lerror:
641 /* Divide by zero: Use an illegal instruction to force an exception.
642 The subsequent "DIV0" string can be recognized by the exception
643 handler to identify the real cause of the exception. */
645 .ascii "DIV0"
647 .Lreturn0:
648 movi a2, 0
649 #endif /* XCHAL_HAVE_DIV32 */
650 leaf_return
651 .size __divsi3, . - __divsi3
653 #endif /* L_divsi3 */
656 #ifdef L_umodsi3
657 .align 4
658 .global __umodsi3
659 .type __umodsi3, @function
660 __umodsi3:
661 leaf_entry sp, 16
662 #if XCHAL_HAVE_DIV32
663 remu a2, a2, a3
664 #else
665 bltui a3, 2, .Lle_one /* check if the divisor is <= 1 */
667 do_nsau a5, a2, a6, a7 /* dividend_shift = nsau (dividend) */
668 do_nsau a4, a3, a6, a7 /* divisor_shift = nsau (divisor) */
669 bgeu a5, a4, .Lspecial
671 sub a4, a4, a5 /* count = divisor_shift - dividend_shift */
672 ssl a4
673 sll a3, a3 /* divisor <<= count */
675 /* test-subtract-and-shift loop */
676 #if XCHAL_HAVE_LOOPS
677 loopnez a4, .Lloopend
678 #endif /* XCHAL_HAVE_LOOPS */
679 .Lloop:
680 bltu a2, a3, .Lzerobit
681 sub a2, a2, a3
682 .Lzerobit:
683 srli a3, a3, 1
684 #if !XCHAL_HAVE_LOOPS
685 addi a4, a4, -1
686 bnez a4, .Lloop
687 #endif /* !XCHAL_HAVE_LOOPS */
688 .Lloopend:
690 .Lspecial:
691 bltu a2, a3, .Lreturn
692 sub a2, a2, a3 /* subtract once more if dividend >= divisor */
693 .Lreturn:
694 leaf_return
696 .Lle_one:
697 bnez a3, .Lreturn0
699 /* Divide by zero: Use an illegal instruction to force an exception.
700 The subsequent "DIV0" string can be recognized by the exception
701 handler to identify the real cause of the exception. */
703 .ascii "DIV0"
705 .Lreturn0:
706 movi a2, 0
707 #endif /* XCHAL_HAVE_DIV32 */
708 leaf_return
709 .size __umodsi3, . - __umodsi3
711 #endif /* L_umodsi3 */
714 #ifdef L_modsi3
715 .align 4
716 .global __modsi3
717 .type __modsi3, @function
718 __modsi3:
719 leaf_entry sp, 16
720 #if XCHAL_HAVE_DIV32
721 rems a2, a2, a3
722 #else
723 mov a7, a2 /* save original (signed) dividend */
724 do_abs a2, a2, a4 /* udividend = abs (dividend) */
725 do_abs a3, a3, a4 /* udivisor = abs (divisor) */
726 bltui a3, 2, .Lle_one /* check if udivisor <= 1 */
727 do_nsau a5, a2, a6, a8 /* udividend_shift = nsau (udividend) */
728 do_nsau a4, a3, a6, a8 /* udivisor_shift = nsau (udivisor) */
729 bgeu a5, a4, .Lspecial
731 sub a4, a4, a5 /* count = udivisor_shift - udividend_shift */
732 ssl a4
733 sll a3, a3 /* udivisor <<= count */
735 /* test-subtract-and-shift loop */
736 #if XCHAL_HAVE_LOOPS
737 loopnez a4, .Lloopend
738 #endif /* XCHAL_HAVE_LOOPS */
739 .Lloop:
740 bltu a2, a3, .Lzerobit
741 sub a2, a2, a3
742 .Lzerobit:
743 srli a3, a3, 1
744 #if !XCHAL_HAVE_LOOPS
745 addi a4, a4, -1
746 bnez a4, .Lloop
747 #endif /* !XCHAL_HAVE_LOOPS */
748 .Lloopend:
750 .Lspecial:
751 bltu a2, a3, .Lreturn
752 sub a2, a2, a3 /* subtract again if udividend >= udivisor */
753 .Lreturn:
754 bgez a7, .Lpositive
755 neg a2, a2 /* if (dividend < 0), return -udividend */
756 .Lpositive:
757 leaf_return
759 .Lle_one:
760 bnez a3, .Lreturn0
762 /* Divide by zero: Use an illegal instruction to force an exception.
763 The subsequent "DIV0" string can be recognized by the exception
764 handler to identify the real cause of the exception. */
766 .ascii "DIV0"
768 .Lreturn0:
769 movi a2, 0
770 #endif /* XCHAL_HAVE_DIV32 */
771 leaf_return
772 .size __modsi3, . - __modsi3
774 #endif /* L_modsi3 */
777 #ifdef __XTENSA_EB__
778 #define uh a2
779 #define ul a3
780 #else
781 #define uh a3
782 #define ul a2
783 #endif /* __XTENSA_EB__ */
786 #ifdef L_ashldi3
787 .align 4
788 .global __ashldi3
789 .type __ashldi3, @function
790 __ashldi3:
791 leaf_entry sp, 16
792 ssl a4
793 bgei a4, 32, .Llow_only
794 src uh, uh, ul
795 sll ul, ul
796 leaf_return
798 .Llow_only:
799 sll uh, ul
800 movi ul, 0
801 leaf_return
802 .size __ashldi3, . - __ashldi3
804 #endif /* L_ashldi3 */
807 #ifdef L_ashrdi3
808 .align 4
809 .global __ashrdi3
810 .type __ashrdi3, @function
811 __ashrdi3:
812 leaf_entry sp, 16
813 ssr a4
814 bgei a4, 32, .Lhigh_only
815 src ul, uh, ul
816 sra uh, uh
817 leaf_return
819 .Lhigh_only:
820 sra ul, uh
821 srai uh, uh, 31
822 leaf_return
823 .size __ashrdi3, . - __ashrdi3
825 #endif /* L_ashrdi3 */
828 #ifdef L_lshrdi3
829 .align 4
830 .global __lshrdi3
831 .type __lshrdi3, @function
832 __lshrdi3:
833 leaf_entry sp, 16
834 ssr a4
835 bgei a4, 32, .Lhigh_only1
836 src ul, uh, ul
837 srl uh, uh
838 leaf_return
840 .Lhigh_only1:
841 srl ul, uh
842 movi uh, 0
843 leaf_return
844 .size __lshrdi3, . - __lshrdi3
846 #endif /* L_lshrdi3 */
849 #include "ieee754-df.S"
850 #include "ieee754-sf.S"