Daily bump.
[official-gcc.git] / gcc / config / arm / lib1thumb.asm
blobdaf8361097b1be95fac026f7456505ef92f10bce
1 @ libgcc1 routines for ARM cpu.
2 @ Division routines, written by Richard Earnshaw, (rearnsha@armltd.co.uk)
4 /* Copyright (C) 1995, 1996, 1998 Free Software Foundation, Inc.
6 This file is free software; you can redistribute it and/or modify it
7 under the terms of the GNU General Public License as published by the
8 Free Software Foundation; either version 2, or (at your option) any
9 later version.
11 In addition to the permissions in the GNU General Public License, the
12 Free Software Foundation gives you unlimited permission to link the
13 compiled version of this file with other programs, and to distribute
14 those programs without any restriction coming from the use of this
15 file. (The General Public License restrictions do apply in other
16 respects; for example, they cover modification of the file, and
17 distribution when not linked into another program.)
19 This file is distributed in the hope that it will be useful, but
20 WITHOUT ANY WARRANTY; without even the implied warranty of
21 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
22 General Public License for more details.
24 You should have received a copy of the GNU General Public License
25 along with this program; see the file COPYING. If not, write to
26 the Free Software Foundation, 59 Temple Place - Suite 330,
27 Boston, MA 02111-1307, USA. */
29 /* As a special exception, if you link this library with other files,
30 some of which are compiled with GCC, to produce an executable,
31 this library does not by itself cause the resulting executable
32 to be covered by the GNU General Public License.
33 This exception does not however invalidate any other reasons why
34 the executable file might be covered by the GNU General Public License. */
36 .code 16
38 #ifndef __USER_LABEL_PREFIX__
39 #error __USER_LABEL_PREFIX__ not defined
40 #endif
42 #ifdef __elf__
43 #define __PLT__ (PLT)
44 #define TYPE(x) .type SYM(x),function
45 #define SIZE(x) .size SYM(x), . - SYM(x)
46 #else
47 #define __PLT__
48 #define TYPE(x)
49 #define SIZE(x)
50 #endif
52 #define RET mov pc, lr
54 /* ANSI concatenation macros. */
56 #define CONCAT1(a, b) CONCAT2(a, b)
57 #define CONCAT2(a, b) a ## b
59 /* Use the right prefix for global labels. */
61 #define SYM(x) CONCAT1 (__USER_LABEL_PREFIX__, x)
63 work .req r4 @ XXXX is this safe ?
65 #ifdef L_udivsi3
67 dividend .req r0
68 divisor .req r1
69 result .req r2
70 curbit .req r3
71 ip .req r12
72 sp .req r13
73 lr .req r14
74 pc .req r15
76 .text
77 .globl SYM (__udivsi3)
78 TYPE (__udivsi3)
79 .align 0
80 .thumb_func
81 SYM (__udivsi3):
82 cmp divisor, #0
83 beq Ldiv0
84 mov curbit, #1
85 mov result, #0
87 push { work }
88 cmp dividend, divisor
89 bcc Lgot_result
91 @ Load the constant 0x10000000 into our work register
92 mov work, #1
93 lsl work, #28
94 Loop1:
95 @ Unless the divisor is very big, shift it up in multiples of
96 @ four bits, since this is the amount of unwinding in the main
97 @ division loop. Continue shifting until the divisor is
98 @ larger than the dividend.
99 cmp divisor, work
100 bcs Lbignum
101 cmp divisor, dividend
102 bcs Lbignum
103 lsl divisor, #4
104 lsl curbit, #4
105 b Loop1
107 Lbignum:
108 @ Set work to 0x80000000
109 lsl work, #3
110 Loop2:
111 @ For very big divisors, we must shift it a bit at a time, or
112 @ we will be in danger of overflowing.
113 cmp divisor, work
114 bcs Loop3
115 cmp divisor, dividend
116 bcs Loop3
117 lsl divisor, #1
118 lsl curbit, #1
119 b Loop2
121 Loop3:
122 @ Test for possible subtractions, and note which bits
123 @ are done in the result. On the final pass, this may subtract
124 @ too much from the dividend, but the result will be ok, since the
125 @ "bit" will have been shifted out at the bottom.
126 cmp dividend, divisor
127 bcc Over1
128 sub dividend, dividend, divisor
129 orr result, result, curbit
130 Over1:
131 lsr work, divisor, #1
132 cmp dividend, work
133 bcc Over2
134 sub dividend, dividend, work
135 lsr work, curbit, #1
136 orr result, work
137 Over2:
138 lsr work, divisor, #2
139 cmp dividend, work
140 bcc Over3
141 sub dividend, dividend, work
142 lsr work, curbit, #2
143 orr result, work
144 Over3:
145 lsr work, divisor, #3
146 cmp dividend, work
147 bcc Over4
148 sub dividend, dividend, work
149 lsr work, curbit, #3
150 orr result, work
151 Over4:
152 cmp dividend, #0 @ Early termination?
153 beq Lgot_result
154 lsr curbit, #4 @ No, any more bits to do?
155 beq Lgot_result
156 lsr divisor, #4
157 b Loop3
158 Lgot_result:
159 mov r0, result
160 pop { work }
163 Ldiv0:
164 push { lr }
165 bl SYM (__div0) __PLT__
166 mov r0, #0 @ about as wrong as it could be
167 pop { pc }
169 SIZE (__udivsi3)
171 #endif /* L_udivsi3 */
173 #ifdef L_umodsi3
175 dividend .req r0
176 divisor .req r1
177 overdone .req r2
178 curbit .req r3
179 ip .req r12
180 sp .req r13
181 lr .req r14
182 pc .req r15
184 .text
185 .globl SYM (__umodsi3)
186 TYPE (__umodsi3)
187 .align 0
188 .thumb_func
189 SYM (__umodsi3):
190 cmp divisor, #0
191 beq Ldiv0
192 mov curbit, #1
193 cmp dividend, divisor
194 bcs Over1
195 RET
197 Over1:
198 @ Load the constant 0x10000000 into our work register
199 push { work }
200 mov work, #1
201 lsl work, #28
202 Loop1:
203 @ Unless the divisor is very big, shift it up in multiples of
204 @ four bits, since this is the amount of unwinding in the main
205 @ division loop. Continue shifting until the divisor is
206 @ larger than the dividend.
207 cmp divisor, work
208 bcs Lbignum
209 cmp divisor, dividend
210 bcs Lbignum
211 lsl divisor, #4
212 lsl curbit, #4
213 b Loop1
215 Lbignum:
216 @ Set work to 0x80000000
217 lsl work, #3
218 Loop2:
219 @ For very big divisors, we must shift it a bit at a time, or
220 @ we will be in danger of overflowing.
221 cmp divisor, work
222 bcs Loop3
223 cmp divisor, dividend
224 bcs Loop3
225 lsl divisor, #1
226 lsl curbit, #1
227 b Loop2
229 Loop3:
230 @ Test for possible subtractions. On the final pass, this may
231 @ subtract too much from the dividend, so keep track of which
232 @ subtractions are done, we can fix them up afterwards...
233 mov overdone, #0
234 cmp dividend, divisor
235 bcc Over2
236 sub dividend, dividend, divisor
237 Over2:
238 lsr work, divisor, #1
239 cmp dividend, work
240 bcc Over3
241 sub dividend, dividend, work
242 mov ip, curbit
243 mov work, #1
244 ror curbit, work
245 orr overdone, curbit
246 mov curbit, ip
247 Over3:
248 lsr work, divisor, #2
249 cmp dividend, work
250 bcc Over4
251 sub dividend, dividend, work
252 mov ip, curbit
253 mov work, #2
254 ror curbit, work
255 orr overdone, curbit
256 mov curbit, ip
257 Over4:
258 lsr work, divisor, #3
259 cmp dividend, work
260 bcc Over5
261 sub dividend, dividend, work
262 mov ip, curbit
263 mov work, #3
264 ror curbit, work
265 orr overdone, curbit
266 mov curbit, ip
267 Over5:
268 mov ip, curbit
269 cmp dividend, #0 @ Early termination?
270 beq Over6
271 lsr curbit, #4 @ No, any more bits to do?
272 beq Over6
273 lsr divisor, #4
274 b Loop3
276 Over6:
277 @ Any subtractions that we should not have done will be recorded in
278 @ the top three bits of "overdone". Exactly which were not needed
279 @ are governed by the position of the bit, stored in ip.
280 @ If we terminated early, because dividend became zero,
281 @ then none of the below will match, since the bit in ip will not be
282 @ in the bottom nibble.
284 mov work, #0xe
285 lsl work, #28
286 and overdone, work
287 bne Over7
288 pop { work }
289 RET @ No fixups needed
290 Over7:
291 mov curbit, ip
292 mov work, #3
293 ror curbit, work
294 tst overdone, curbit
295 beq Over8
296 lsr work, divisor, #3
297 add dividend, dividend, work
298 Over8:
299 mov curbit, ip
300 mov work, #2
301 ror curbit, work
302 tst overdone, curbit
303 beq Over9
304 lsr work, divisor, #2
305 add dividend, dividend, work
306 Over9:
307 mov curbit, ip
308 mov work, #1
309 ror curbit, work
310 tst overdone, curbit
311 beq Over10
312 lsr work, divisor, #1
313 add dividend, dividend, work
314 Over10:
315 pop { work }
316 RET
318 Ldiv0:
319 push { lr }
320 bl SYM (__div0) __PLT__
321 mov r0, #0 @ about as wrong as it could be
322 pop { pc }
324 SIZE (__umodsi3)
326 #endif /* L_umodsi3 */
328 #ifdef L_divsi3
330 dividend .req r0
331 divisor .req r1
332 result .req r2
333 curbit .req r3
334 ip .req r12
335 sp .req r13
336 lr .req r14
337 pc .req r15
339 .text
340 .globl SYM (__divsi3)
341 TYPE (__divsi3)
342 .align 0
343 .thumb_func
344 SYM (__divsi3):
345 cmp divisor, #0
346 beq Ldiv0
348 push { work }
349 mov work, dividend
350 eor work, divisor @ Save the sign of the result.
351 mov ip, work
352 mov curbit, #1
353 mov result, #0
354 cmp divisor, #0
355 bpl Over1
356 neg divisor, divisor @ Loops below use unsigned.
357 Over1:
358 cmp dividend, #0
359 bpl Over2
360 neg dividend, dividend
361 Over2:
362 cmp dividend, divisor
363 bcc Lgot_result
365 mov work, #1
366 lsl work, #28
367 Loop1:
368 @ Unless the divisor is very big, shift it up in multiples of
369 @ four bits, since this is the amount of unwinding in the main
370 @ division loop. Continue shifting until the divisor is
371 @ larger than the dividend.
372 cmp divisor, work
373 Bcs Lbignum
374 cmp divisor, dividend
375 Bcs Lbignum
376 lsl divisor, #4
377 lsl curbit, #4
378 b Loop1
380 Lbignum:
381 @ For very big divisors, we must shift it a bit at a time, or
382 @ we will be in danger of overflowing.
383 lsl work, #3
384 Loop2:
385 cmp divisor, work
386 Bcs Loop3
387 cmp divisor, dividend
388 Bcs Loop3
389 lsl divisor, #1
390 lsl curbit, #1
391 b Loop2
393 Loop3:
394 @ Test for possible subtractions, and note which bits
395 @ are done in the result. On the final pass, this may subtract
396 @ too much from the dividend, but the result will be ok, since the
397 @ "bit" will have been shifted out at the bottom.
398 cmp dividend, divisor
399 Bcc Over3
400 sub dividend, dividend, divisor
401 orr result, result, curbit
402 Over3:
403 lsr work, divisor, #1
404 cmp dividend, work
405 Bcc Over4
406 sub dividend, dividend, work
407 lsr work, curbit, #1
408 orr result, work
409 Over4:
410 lsr work, divisor, #2
411 cmp dividend, work
412 Bcc Over5
413 sub dividend, dividend, work
414 lsr work, curbit, #2
415 orr result, result, work
416 Over5:
417 lsr work, divisor, #3
418 cmp dividend, work
419 Bcc Over6
420 sub dividend, dividend, work
421 lsr work, curbit, #3
422 orr result, result, work
423 Over6:
424 cmp dividend, #0 @ Early termination?
425 Beq Lgot_result
426 lsr curbit, #4 @ No, any more bits to do?
427 Beq Lgot_result
428 lsr divisor, #4
429 b Loop3
431 Lgot_result:
432 mov r0, result
433 mov work, ip
434 cmp work, #0
435 Bpl Over7
436 neg r0, r0
437 Over7:
438 pop { work }
439 RET
441 Ldiv0:
442 push { lr }
443 bl SYM (__div0) __PLT__
444 mov r0, #0 @ about as wrong as it could be
445 pop { pc }
447 SIZE (__divsi3)
449 #endif /* L_divsi3 */
451 #ifdef L_modsi3
453 dividend .req r0
454 divisor .req r1
455 overdone .req r2
456 curbit .req r3
457 ip .req r12
458 sp .req r13
459 lr .req r14
460 pc .req r15
462 .text
463 .globl SYM (__modsi3)
464 TYPE (__modsi3)
465 .align 0
466 .thumb_func
467 SYM (__modsi3):
468 mov curbit, #1
469 cmp divisor, #0
470 beq Ldiv0
471 Bpl Over1
472 neg divisor, divisor @ Loops below use unsigned.
473 Over1:
474 push { work }
475 @ Need to save the sign of the dividend, unfortunately, we need
476 @ ip later on. Must do this after saving the original value of
477 @ the work register, because we will pop this value off first.
478 push { dividend }
479 cmp dividend, #0
480 Bpl Over2
481 neg dividend, dividend
482 Over2:
483 cmp dividend, divisor
484 bcc Lgot_result
485 mov work, #1
486 lsl work, #28
487 Loop1:
488 @ Unless the divisor is very big, shift it up in multiples of
489 @ four bits, since this is the amount of unwinding in the main
490 @ division loop. Continue shifting until the divisor is
491 @ larger than the dividend.
492 cmp divisor, work
493 bcs Lbignum
494 cmp divisor, dividend
495 bcs Lbignum
496 lsl divisor, #4
497 lsl curbit, #4
498 b Loop1
500 Lbignum:
501 @ Set work to 0x80000000
502 lsl work, #3
503 Loop2:
504 @ For very big divisors, we must shift it a bit at a time, or
505 @ we will be in danger of overflowing.
506 cmp divisor, work
507 bcs Loop3
508 cmp divisor, dividend
509 bcs Loop3
510 lsl divisor, #1
511 lsl curbit, #1
512 b Loop2
514 Loop3:
515 @ Test for possible subtractions. On the final pass, this may
516 @ subtract too much from the dividend, so keep track of which
517 @ subtractions are done, we can fix them up afterwards...
518 mov overdone, #0
519 cmp dividend, divisor
520 bcc Over3
521 sub dividend, dividend, divisor
522 Over3:
523 lsr work, divisor, #1
524 cmp dividend, work
525 bcc Over4
526 sub dividend, dividend, work
527 mov ip, curbit
528 mov work, #1
529 ror curbit, work
530 orr overdone, curbit
531 mov curbit, ip
532 Over4:
533 lsr work, divisor, #2
534 cmp dividend, work
535 bcc Over5
536 sub dividend, dividend, work
537 mov ip, curbit
538 mov work, #2
539 ror curbit, work
540 orr overdone, curbit
541 mov curbit, ip
542 Over5:
543 lsr work, divisor, #3
544 cmp dividend, work
545 bcc Over6
546 sub dividend, dividend, work
547 mov ip, curbit
548 mov work, #3
549 ror curbit, work
550 orr overdone, curbit
551 mov curbit, ip
552 Over6:
553 mov ip, curbit
554 cmp dividend, #0 @ Early termination?
555 beq Over7
556 lsr curbit, #4 @ No, any more bits to do?
557 beq Over7
558 lsr divisor, #4
559 b Loop3
561 Over7:
562 @ Any subtractions that we should not have done will be recorded in
563 @ the top three bits of "overdone". Exactly which were not needed
564 @ are governed by the position of the bit, stored in ip.
565 @ If we terminated early, because dividend became zero,
566 @ then none of the below will match, since the bit in ip will not be
567 @ in the bottom nibble.
568 mov work, #0xe
569 lsl work, #28
570 and overdone, work
571 beq Lgot_result
573 mov curbit, ip
574 mov work, #3
575 ror curbit, work
576 tst overdone, curbit
577 beq Over8
578 lsr work, divisor, #3
579 add dividend, dividend, work
580 Over8:
581 mov curbit, ip
582 mov work, #2
583 ror curbit, work
584 tst overdone, curbit
585 beq Over9
586 lsr work, divisor, #2
587 add dividend, dividend, work
588 Over9:
589 mov curbit, ip
590 mov work, #1
591 ror curbit, work
592 tst overdone, curbit
593 beq Lgot_result
594 lsr work, divisor, #1
595 add dividend, dividend, work
596 Lgot_result:
597 pop { work }
598 cmp work, #0
599 bpl Over10
600 neg dividend, dividend
601 Over10:
602 pop { work }
603 RET
605 Ldiv0:
606 push { lr }
607 bl SYM (__div0) __PLT__
608 mov r0, #0 @ about as wrong as it could be
609 pop { pc }
611 SIZE (__modsi3)
613 #endif /* L_modsi3 */
615 #ifdef L_dvmd_tls
617 .globl SYM (__div0)
618 TYPE (__div0)
619 .align 0
620 .thumb_func
621 SYM (__div0):
622 RET
624 SIZE (__div0)
626 #endif /* L_divmodsi_tools */
629 #ifdef L_call_via_rX
631 /* These labels & instructions are used by the Arm/Thumb interworking code.
632 The address of function to be called is loaded into a register and then
633 one of these labels is called via a BL instruction. This puts the
634 return address into the link register with the bottom bit set, and the
635 code here switches to the correct mode before executing the function. */
637 .text
638 .align 0
640 .macro call_via register
641 .globl SYM (_call_via_\register)
642 TYPE (_call_via_\register)
643 .thumb_func
644 SYM (_call_via_\register):
645 bx \register
648 SIZE (_call_via_\register)
649 .endm
651 call_via r0
652 call_via r1
653 call_via r2
654 call_via r3
655 call_via r4
656 call_via r5
657 call_via r6
658 call_via r7
659 call_via r8
660 call_via r9
661 call_via sl
662 call_via fp
663 call_via ip
664 call_via sp
665 call_via lr
667 #endif /* L_call_via_rX */
669 #ifdef L_interwork_call_via_rX
671 /* These labels & instructions are used by the Arm/Thumb interworking code,
672 when the target address is in an unknown instruction set. The address
673 of function to be called is loaded into a register and then one of these
674 labels is called via a BL instruction. This puts the return address
675 into the link register with the bottom bit set, and the code here
676 switches to the correct mode before executing the function. Unfortunately
677 the target code cannot be relied upon to return via a BX instruction, so
678 instead we have to store the resturn address on the stack and allow the
679 called function to return here instead. Upon return we recover the real
680 return address and use a BX to get back to Thumb mode. */
682 .text
683 .align 0
685 .code 32
686 .globl _arm_return
687 _arm_return:
688 ldmia r13!, {r12}
689 bx r12
691 .macro interwork register
692 .code 16
694 .globl SYM (_interwork_call_via_\register)
695 TYPE (_interwork_call_via_\register)
696 .thumb_func
697 SYM (_interwork_call_via_\register):
698 bx pc
701 .code 32
702 .globl .Lchange_\register
703 .Lchange_\register:
704 tst \register, #1
705 stmeqdb r13!, {lr}
706 adreq lr, _arm_return
707 bx \register
709 SIZE (_interwork_call_via_\register)
710 .endm
712 interwork r0
713 interwork r1
714 interwork r2
715 interwork r3
716 interwork r4
717 interwork r5
718 interwork r6
719 interwork r7
720 interwork r8
721 interwork r9
722 interwork sl
723 interwork fp
724 interwork ip
725 interwork sp
727 /* The lr case has to be handled a little differently...*/
728 .code 16
729 .globl SYM (_interwork_call_via_lr)
730 TYPE (_interwork_call_via_lr)
731 .thumb_func
732 SYM (_interwork_call_via_lr):
733 bx pc
736 .code 32
737 .globl .Lchange_lr
738 .Lchange_lr:
739 tst lr, #1
740 stmeqdb r13!, {lr}
741 mov ip, lr
742 adreq lr, _arm_return
743 bx ip
745 SIZE (_interwork_call_via_lr)
747 #endif /* L_interwork_call_via_rX */