Introduce H8SX support.
[official-gcc.git] / gcc / config / h8300 / lib1funcs.asm
bloba638d8d8e6d3ca4122846371395d6d8ca59315e5
1 ;; libgcc routines for the Renesas H8/300 CPU.
2 ;; Contributed by Steve Chamberlain <sac@cygnus.com>
3 ;; Optimizations by Toshiyasu Morita <toshiyasu.morita@renesas.com>
5 /* Copyright (C) 1994, 2000, 2001, 2002, 2003, 2004
6 Free Software Foundation, Inc.
8 This file is free software; you can redistribute it and/or modify it
9 under the terms of the GNU General Public License as published by the
10 Free Software Foundation; either version 2, or (at your option) any
11 later version.
13 In addition to the permissions in the GNU General Public License, the
14 Free Software Foundation gives you unlimited permission to link the
15 compiled version of this file into combinations with other programs,
16 and to distribute those combinations without any restriction coming
17 from the use of this file. (The General Public License restrictions
18 do apply in other respects; for example, they cover modification of
19 the file, and distribution when not linked into a combine
20 executable.)
22 This file is distributed in the hope that it will be useful, but
23 WITHOUT ANY WARRANTY; without even the implied warranty of
24 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
25 General Public License for more details.
27 You should have received a copy of the GNU General Public License
28 along with this program; see the file COPYING. If not, write to
29 the Free Software Foundation, 59 Temple Place - Suite 330,
30 Boston, MA 02111-1307, USA. */
32 /* Assembler register definitions. */
34 #define A0 r0
35 #define A0L r0l
36 #define A0H r0h
38 #define A1 r1
39 #define A1L r1l
40 #define A1H r1h
42 #define A2 r2
43 #define A2L r2l
44 #define A2H r2h
46 #define A3 r3
47 #define A3L r3l
48 #define A3H r3h
50 #define S0 r4
51 #define S0L r4l
52 #define S0H r4h
54 #define S1 r5
55 #define S1L r5l
56 #define S1H r5h
58 #define S2 r6
59 #define S2L r6l
60 #define S2H r6h
62 #ifdef __H8300__
63 #define PUSHP push
64 #define POPP pop
66 #define A0P r0
67 #define A1P r1
68 #define A2P r2
69 #define A3P r3
70 #define S0P r4
71 #define S1P r5
72 #define S2P r6
73 #endif
75 #if defined (__H8300H__) || defined (__H8300S__) || defined (__H8300SX__)
76 #define PUSHP push.l
77 #define POPP pop.l
79 #define A0P er0
80 #define A1P er1
81 #define A2P er2
82 #define A3P er3
83 #define S0P er4
84 #define S1P er5
85 #define S2P er6
87 #define A0E e0
88 #define A1E e1
89 #define A2E e2
90 #define A3E e3
91 #endif
93 #ifdef __H8300H__
94 #ifdef __NORMAL_MODE__
95 .h8300hn
96 #else
97 .h8300h
98 #endif
99 #endif
101 #ifdef __H8300S__
102 #ifdef __NORMAL_MODE__
103 .h8300sn
104 #else
105 .h8300s
106 #endif
107 #endif
108 #ifdef __H8300SX__
109 #ifdef __NORMAL_MODE__
110 .h8300sxn
111 #else
112 .h8300sx
113 #endif
114 #endif
116 #ifdef L_cmpsi2
117 #ifdef __H8300__
118 .section .text
119 .align 2
120 .global ___cmpsi2
121 ___cmpsi2:
122 cmp.w A0,A2
123 bne .L2
124 cmp.w A1,A3
125 bne .L4
126 mov.w #1,A0
128 .L2:
129 bgt .L5
130 .L3:
131 mov.w #2,A0
133 .L4:
134 bls .L3
135 .L5:
136 sub.w A0,A0
138 .end
139 #endif
140 #endif /* L_cmpsi2 */
142 #ifdef L_ucmpsi2
143 #ifdef __H8300__
144 .section .text
145 .align 2
146 .global ___ucmpsi2
147 ___ucmpsi2:
148 cmp.w A0,A2
149 bne .L2
150 cmp.w A1,A3
151 bne .L4
152 mov.w #1,A0
154 .L2:
155 bhi .L5
156 .L3:
157 mov.w #2,A0
159 .L4:
160 bls .L3
161 .L5:
162 sub.w A0,A0
164 .end
165 #endif
166 #endif /* L_ucmpsi2 */
168 #ifdef L_divhi3
170 ;; HImode divides for the H8/300.
171 ;; We bunch all of this into one object file since there are several
172 ;; "supporting routines".
174 ; general purpose normalize routine
176 ; divisor in A0
177 ; dividend in A1
178 ; turns both into +ve numbers, and leaves what the answer sign
179 ; should be in A2L
181 #ifdef __H8300__
182 .section .text
183 .align 2
184 divnorm:
185 or A0H,A0H ; is divisor > 0
186 stc ccr,A2L
187 bge _lab1
188 not A0H ; no - then make it +ve
189 not A0L
190 adds #1,A0
191 _lab1: or A1H,A1H ; look at dividend
192 bge _lab2
193 not A1H ; it is -ve, make it positive
194 not A1L
195 adds #1,A1
196 xor #0x8,A2L; and toggle sign of result
197 _lab2: rts
198 ;; Basically the same, except that the sign of the divisor determines
199 ;; the sign.
200 modnorm:
201 or A0H,A0H ; is divisor > 0
202 stc ccr,A2L
203 bge _lab7
204 not A0H ; no - then make it +ve
205 not A0L
206 adds #1,A0
207 _lab7: or A1H,A1H ; look at dividend
208 bge _lab8
209 not A1H ; it is -ve, make it positive
210 not A1L
211 adds #1,A1
212 _lab8: rts
214 ; A0=A0/A1 signed
216 .global ___divhi3
217 ___divhi3:
218 bsr divnorm
219 bsr ___udivhi3
220 negans: btst #3,A2L ; should answer be negative ?
221 beq _lab4
222 not A0H ; yes, so make it so
223 not A0L
224 adds #1,A0
225 _lab4: rts
227 ; A0=A0%A1 signed
229 .global ___modhi3
230 ___modhi3:
231 bsr modnorm
232 bsr ___udivhi3
233 mov A3,A0
234 bra negans
236 ; A0=A0%A1 unsigned
238 .global ___umodhi3
239 ___umodhi3:
240 bsr ___udivhi3
241 mov A3,A0
244 ; A0=A0/A1 unsigned
245 ; A3=A0%A1 unsigned
246 ; A2H trashed
247 ; D high 8 bits of denom
248 ; d low 8 bits of denom
249 ; N high 8 bits of num
250 ; n low 8 bits of num
251 ; M high 8 bits of mod
252 ; m low 8 bits of mod
253 ; Q high 8 bits of quot
254 ; q low 8 bits of quot
255 ; P preserve
257 ; The H8/300 only has a 16/8 bit divide, so we look at the incoming and
258 ; see how to partition up the expression.
260 .global ___udivhi3
261 ___udivhi3:
262 ; A0 A1 A2 A3
263 ; Nn Dd P
264 sub.w A3,A3 ; Nn Dd xP 00
265 or A1H,A1H
266 bne divlongway
267 or A0H,A0H
268 beq _lab6
270 ; we know that D == 0 and N is != 0
271 mov.b A0H,A3L ; Nn Dd xP 0N
272 divxu A1L,A3 ; MQ
273 mov.b A3L,A0H ; Q
274 ; dealt with N, do n
275 _lab6: mov.b A0L,A3L ; n
276 divxu A1L,A3 ; mq
277 mov.b A3L,A0L ; Qq
278 mov.b A3H,A3L ; m
279 mov.b #0x0,A3H ; Qq 0m
282 ; D != 0 - which means the denominator is
283 ; loop around to get the result.
285 divlongway:
286 mov.b A0H,A3L ; Nn Dd xP 0N
287 mov.b #0x0,A0H ; high byte of answer has to be zero
288 mov.b #0x8,A2H ; 8
289 div8: add.b A0L,A0L ; n*=2
290 rotxl A3L ; Make remainder bigger
291 rotxl A3H
292 sub.w A1,A3 ; Q-=N
293 bhs setbit ; set a bit ?
294 add.w A1,A3 ; no : too far , Q+=N
296 dec A2H
297 bne div8 ; next bit
300 setbit: inc A0L ; do insert bit
301 dec A2H
302 bne div8 ; next bit
305 #endif /* __H8300__ */
306 #endif /* L_divhi3 */
308 #ifdef L_divsi3
310 ;; 4 byte integer divides for the H8/300.
312 ;; We have one routine which does all the work and lots of
313 ;; little ones which prepare the args and massage the sign.
314 ;; We bunch all of this into one object file since there are several
315 ;; "supporting routines".
317 .section .text
318 .align 2
320 ; Put abs SIs into r0/r1 and r2/r3, and leave a 1 in r6l with sign of rest.
321 ; This function is here to keep branch displacements small.
323 #ifdef __H8300__
325 divnorm:
326 mov.b A0H,A0H ; is the numerator -ve
327 stc ccr,S2L ; keep the sign in bit 3 of S2L
328 bge postive
330 ; negate arg
331 not A0H
332 not A1H
333 not A0L
334 not A1L
336 add #1,A1L
337 addx #0,A1H
338 addx #0,A0L
339 addx #0,A0H
340 postive:
341 mov.b A2H,A2H ; is the denominator -ve
342 bge postive2
343 not A2L
344 not A2H
345 not A3L
346 not A3H
347 add.b #1,A3L
348 addx #0,A3H
349 addx #0,A2L
350 addx #0,A2H
351 xor.b #0x08,S2L ; toggle the result sign
352 postive2:
355 ;; Basically the same, except that the sign of the divisor determines
356 ;; the sign.
357 modnorm:
358 mov.b A0H,A0H ; is the numerator -ve
359 stc ccr,S2L ; keep the sign in bit 3 of S2L
360 bge mpostive
362 ; negate arg
363 not A0H
364 not A1H
365 not A0L
366 not A1L
368 add #1,A1L
369 addx #0,A1H
370 addx #0,A0L
371 addx #0,A0H
372 mpostive:
373 mov.b A2H,A2H ; is the denominator -ve
374 bge mpostive2
375 not A2L
376 not A2H
377 not A3L
378 not A3H
379 add.b #1,A3L
380 addx #0,A3H
381 addx #0,A2L
382 addx #0,A2H
383 mpostive2:
386 #else /* __H8300H__ */
388 divnorm:
389 mov.l A0P,A0P ; is the numerator -ve
390 stc ccr,S2L ; keep the sign in bit 3 of S2L
391 bge postive
393 neg.l A0P ; negate arg
395 postive:
396 mov.l A1P,A1P ; is the denominator -ve
397 bge postive2
399 neg.l A1P ; negate arg
400 xor.b #0x08,S2L ; toggle the result sign
402 postive2:
405 ;; Basically the same, except that the sign of the divisor determines
406 ;; the sign.
407 modnorm:
408 mov.l A0P,A0P ; is the numerator -ve
409 stc ccr,S2L ; keep the sign in bit 3 of S2L
410 bge mpostive
412 neg.l A0P ; negate arg
414 mpostive:
415 mov.l A1P,A1P ; is the denominator -ve
416 bge mpostive2
418 neg.l A1P ; negate arg
420 mpostive2:
423 #endif
425 ; numerator in A0/A1
426 ; denominator in A2/A3
427 .global ___modsi3
428 ___modsi3:
429 #ifdef __H8300__
430 PUSHP S2P
431 PUSHP S0P
432 PUSHP S1P
433 bsr modnorm
434 bsr divmodsi4
435 mov S0,A0
436 mov S1,A1
437 bra exitdiv
438 #else
439 PUSHP S2P
440 bsr modnorm
441 bsr ___udivsi3
442 mov.l er3,er0
443 bra exitdiv
444 #endif
446 ;; H8/300H and H8S version of ___udivsi3 is defined later in
447 ;; the file.
448 #ifdef __H8300__
449 .global ___udivsi3
450 ___udivsi3:
451 PUSHP S2P
452 PUSHP S0P
453 PUSHP S1P
454 bsr divmodsi4
455 bra reti
456 #endif
458 .global ___umodsi3
459 ___umodsi3:
460 #ifdef __H8300__
461 PUSHP S2P
462 PUSHP S0P
463 PUSHP S1P
464 bsr divmodsi4
465 mov S0,A0
466 mov S1,A1
467 bra reti
468 #else
469 bsr ___udivsi3
470 mov.l er3,er0
472 #endif
474 .global ___divsi3
475 ___divsi3:
476 #ifdef __H8300__
477 PUSHP S2P
478 PUSHP S0P
479 PUSHP S1P
480 jsr divnorm
481 jsr divmodsi4
482 #else
483 PUSHP S2P
484 jsr divnorm
485 bsr ___udivsi3
486 #endif
488 ; examine what the sign should be
489 exitdiv:
490 btst #3,S2L
491 beq reti
493 ; should be -ve
494 #ifdef __H8300__
495 not A0H
496 not A1H
497 not A0L
498 not A1L
500 add #1,A1L
501 addx #0,A1H
502 addx #0,A0L
503 addx #0,A0H
504 #else /* __H8300H__ */
505 neg.l A0P
506 #endif
508 reti:
509 #ifdef __H8300__
510 POPP S1P
511 POPP S0P
512 #endif
513 POPP S2P
516 ; takes A0/A1 numerator (A0P for H8/300H)
517 ; A2/A3 denominator (A1P for H8/300H)
518 ; returns A0/A1 quotient (A0P for H8/300H)
519 ; S0/S1 remainder (S0P for H8/300H)
520 ; trashes S2H
522 #ifdef __H8300__
524 divmodsi4:
525 sub.w S0,S0 ; zero play area
526 mov.w S0,S1
527 mov.b A2H,S2H
528 or A2L,S2H
529 or A3H,S2H
530 bne DenHighNonZero
531 mov.b A0H,A0H
532 bne NumByte0Zero
533 mov.b A0L,A0L
534 bne NumByte1Zero
535 mov.b A1H,A1H
536 bne NumByte2Zero
537 bra NumByte3Zero
538 NumByte0Zero:
539 mov.b A0H,S1L
540 divxu A3L,S1
541 mov.b S1L,A0H
542 NumByte1Zero:
543 mov.b A0L,S1L
544 divxu A3L,S1
545 mov.b S1L,A0L
546 NumByte2Zero:
547 mov.b A1H,S1L
548 divxu A3L,S1
549 mov.b S1L,A1H
550 NumByte3Zero:
551 mov.b A1L,S1L
552 divxu A3L,S1
553 mov.b S1L,A1L
555 mov.b S1H,S1L
556 mov.b #0x0,S1H
559 ; have to do the divide by shift and test
560 DenHighNonZero:
561 mov.b A0H,S1L
562 mov.b A0L,A0H
563 mov.b A1H,A0L
564 mov.b A1L,A1H
566 mov.b #0,A1L
567 mov.b #24,S2H ; only do 24 iterations
569 nextbit:
570 add.w A1,A1 ; double the answer guess
571 rotxl A0L
572 rotxl A0H
574 rotxl S1L ; double remainder
575 rotxl S1H
576 rotxl S0L
577 rotxl S0H
578 sub.w A3,S1 ; does it all fit
579 subx A2L,S0L
580 subx A2H,S0H
581 bhs setone
583 add.w A3,S1 ; no, restore mistake
584 addx A2L,S0L
585 addx A2H,S0H
587 dec S2H
588 bne nextbit
591 setone:
592 inc A1L
593 dec S2H
594 bne nextbit
597 #else /* __H8300H__ */
599 ;; This function also computes the remainder and stores it in er3.
600 .global ___udivsi3
601 ___udivsi3:
602 mov.w A1E,A1E ; denominator top word 0?
603 bne DenHighNonZero
605 ; do it the easy way, see page 107 in manual
606 mov.w A0E,A2
607 extu.l A2P
608 divxu.w A1,A2P
609 mov.w A2E,A0E
610 divxu.w A1,A0P
611 mov.w A0E,A3
612 mov.w A2,A0E
613 extu.l A3P
616 ; er0 = er0 / er1
617 ; er3 = er0 % er1
618 ; trashes er1 er2
619 ; expects er1 >= 2^16
620 DenHighNonZero:
621 mov.l er0,er3
622 mov.l er1,er2
623 #ifdef __H8300H__
624 divmod_L21:
625 shlr.l er0
626 shlr.l er2 ; make divisor < 2^16
627 mov.w e2,e2
628 bne divmod_L21
629 #else
630 shlr.l #2,er2 ; make divisor < 2^16
631 mov.w e2,e2
632 beq divmod_L22A
633 divmod_L21:
634 shlr.l #2,er0
635 divmod_L22:
636 shlr.l #2,er2 ; make divisor < 2^16
637 mov.w e2,e2
638 bne divmod_L21
639 divmod_L22A:
640 rotxl.w r2
641 bcs divmod_L23
642 shlr.l er0
643 bra divmod_L24
644 divmod_L23:
645 rotxr.w r2
646 shlr.l #2,er0
647 divmod_L24:
648 #endif
649 ;; At this point,
650 ;; er0 contains shifted dividend
651 ;; er1 contains divisor
652 ;; er2 contains shifted divisor
653 ;; er3 contains dividend, later remainder
654 divxu.w r2,er0 ; r0 now contains the approximate quotient (AQ)
655 extu.l er0
656 beq divmod_L25
657 subs #1,er0 ; er0 = AQ - 1
658 mov.w e1,r2
659 mulxu.w r0,er2 ; er2 = upper (AQ - 1) * divisor
660 sub.w r2,e3 ; dividend - 65536 * er2
661 mov.w r1,r2
662 mulxu.w r0,er2 ; compute er3 = remainder (tentative)
663 sub.l er2,er3 ; er3 = dividend - (AQ - 1) * divisor
664 divmod_L25:
665 cmp.l er1,er3 ; is divisor < remainder?
666 blo divmod_L26
667 adds #1,er0
668 sub.l er1,er3 ; correct the remainder
669 divmod_L26:
672 #endif
673 #endif /* L_divsi3 */
675 #ifdef L_mulhi3
677 ;; HImode multiply.
678 ; The H8/300 only has an 8*8->16 multiply.
679 ; The answer is the same as:
681 ; product = (srca.l * srcb.l) + ((srca.h * srcb.l) + (srcb.h * srca.l)) * 256
682 ; (we can ignore A1.h * A0.h cause that will all off the top)
683 ; A0 in
684 ; A1 in
685 ; A0 answer
687 #ifdef __H8300__
688 .section .text
689 .align 2
690 .global ___mulhi3
691 ___mulhi3:
692 mov.b A1L,A2L ; A2l gets srcb.l
693 mulxu A0L,A2 ; A2 gets first sub product
695 mov.b A0H,A3L ; prepare for
696 mulxu A1L,A3 ; second sub product
698 add.b A3L,A2H ; sum first two terms
700 mov.b A1H,A3L ; third sub product
701 mulxu A0L,A3
703 add.b A3L,A2H ; almost there
704 mov.w A2,A0 ; that is
707 #endif
708 #endif /* L_mulhi3 */
710 #ifdef L_mulsi3
712 ;; SImode multiply.
714 ;; I think that shift and add may be sufficient for this. Using the
715 ;; supplied 8x8->16 would need 10 ops of 14 cycles each + overhead. This way
716 ;; the inner loop uses maybe 20 cycles + overhead, but terminates
717 ;; quickly on small args.
719 ;; A0/A1 src_a
720 ;; A2/A3 src_b
722 ;; while (a)
723 ;; {
724 ;; if (a & 1)
725 ;; r += b;
726 ;; a >>= 1;
727 ;; b <<= 1;
728 ;; }
730 .section .text
731 .align 2
733 #ifdef __H8300__
735 .global ___mulsi3
736 ___mulsi3:
737 PUSHP S0P
738 PUSHP S1P
740 sub.w S0,S0
741 sub.w S1,S1
743 ; while (a)
744 _top: mov.w A0,A0
745 bne _more
746 mov.w A1,A1
747 beq _done
748 _more: ; if (a & 1)
749 bld #0,A1L
750 bcc _nobit
751 ; r += b
752 add.w A3,S1
753 addx A2L,S0L
754 addx A2H,S0H
755 _nobit:
756 ; a >>= 1
757 shlr A0H
758 rotxr A0L
759 rotxr A1H
760 rotxr A1L
762 ; b <<= 1
763 add.w A3,A3
764 addx A2L,A2L
765 addx A2H,A2H
766 bra _top
768 _done:
769 mov.w S0,A0
770 mov.w S1,A1
771 POPP S1P
772 POPP S0P
775 #else /* __H8300H__ */
778 ; mulsi3 for H8/300H - based on Renesas SH implementation
780 ; by Toshiyasu Morita
782 ; Old code:
784 ; 16b * 16b = 372 states (worst case)
785 ; 32b * 32b = 724 states (worst case)
787 ; New code:
789 ; 16b * 16b = 48 states
790 ; 16b * 32b = 72 states
791 ; 32b * 32b = 92 states
794 .global ___mulsi3
795 ___mulsi3:
796 mov.w r1,r2 ; ( 2 states) b * d
797 mulxu r0,er2 ; (22 states)
799 mov.w e0,r3 ; ( 2 states) a * d
800 beq L_skip1 ; ( 4 states)
801 mulxu r1,er3 ; (22 states)
802 add.w r3,e2 ; ( 2 states)
804 L_skip1:
805 mov.w e1,r3 ; ( 2 states) c * b
806 beq L_skip2 ; ( 4 states)
807 mulxu r0,er3 ; (22 states)
808 add.w r3,e2 ; ( 2 states)
810 L_skip2:
811 mov.l er2,er0 ; ( 2 states)
812 rts ; (10 states)
814 #endif
815 #endif /* L_mulsi3 */
816 #ifdef L_fixunssfsi_asm
817 /* For the h8300 we use asm to save some bytes, to
818 allow more programs to fit into the tiny address
819 space. For the H8/300H and H8S, the C version is good enough. */
820 #ifdef __H8300__
821 /* We still treat NANs different than libgcc2.c, but then, the
822 behavior is undefined anyways. */
823 .global ___fixunssfsi
824 ___fixunssfsi:
825 cmp.b #0x4f,r0h
826 bge Large_num
827 jmp @___fixsfsi
828 Large_num:
829 bhi L_huge_num
830 xor.b #0x80,A0L
831 bmi L_shift8
832 L_huge_num:
833 mov.w #65535,A0
834 mov.w A0,A1
836 L_shift8:
837 mov.b A0L,A0H
838 mov.b A1H,A0L
839 mov.b A1L,A1H
840 mov.b #0,A1L
842 #endif
843 #endif /* L_fixunssfsi_asm */