2003-12-26 Guilhem Lavaux <guilhem@kaffe.org>
[official-gcc.git] / gcc / config / h8300 / lib1funcs.asm
blob3793a4bf52282bb0042281888c1ee5e421ef400c
1 ;; libgcc routines for the Hitachi H8/300 CPU.
2 ;; Contributed by Steve Chamberlain <sac@cygnus.com>
3 ;; Optimizations by Toshiyasu Morita <toshiyasu.morita@hsa.hitachi.com>
5 /* Copyright (C) 1994, 2000, 2001, 2002 Free Software Foundation, Inc.
7 This file is free software; you can redistribute it and/or modify it
8 under the terms of the GNU General Public License as published by the
9 Free Software Foundation; either version 2, or (at your option) any
10 later version.
12 In addition to the permissions in the GNU General Public License, the
13 Free Software Foundation gives you unlimited permission to link the
14 compiled version of this file into combinations with other programs,
15 and to distribute those combinations without any restriction coming
16 from the use of this file. (The General Public License restrictions
17 do apply in other respects; for example, they cover modification of
18 the file, and distribution when not linked into a combine
19 executable.)
21 This file is distributed in the hope that it will be useful, but
22 WITHOUT ANY WARRANTY; without even the implied warranty of
23 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
24 General Public License for more details.
26 You should have received a copy of the GNU General Public License
27 along with this program; see the file COPYING. If not, write to
28 the Free Software Foundation, 59 Temple Place - Suite 330,
29 Boston, MA 02111-1307, USA. */
31 /* Assembler register definitions. */
33 #define A0 r0
34 #define A0L r0l
35 #define A0H r0h
37 #define A1 r1
38 #define A1L r1l
39 #define A1H r1h
41 #define A2 r2
42 #define A2L r2l
43 #define A2H r2h
45 #define A3 r3
46 #define A3L r3l
47 #define A3H r3h
49 #define S0 r4
50 #define S0L r4l
51 #define S0H r4h
53 #define S1 r5
54 #define S1L r5l
55 #define S1H r5h
57 #define S2 r6
58 #define S2L r6l
59 #define S2H r6h
61 #ifdef __H8300__
62 #define PUSHP push
63 #define POPP pop
65 #define A0P r0
66 #define A1P r1
67 #define A2P r2
68 #define A3P r3
69 #define S0P r4
70 #define S1P r5
71 #define S2P r6
72 #endif
74 #if defined (__H8300H__) || defined (__H8300S__)
75 #define PUSHP push.l
76 #define POPP pop.l
78 #define A0P er0
79 #define A1P er1
80 #define A2P er2
81 #define A3P er3
82 #define S0P er4
83 #define S1P er5
84 #define S2P er6
86 #define A0E e0
87 #define A1E e1
88 #define A2E e2
89 #define A3E e3
90 #endif
92 #ifdef __H8300H__
93 #ifdef __NORMAL_MODE__
94 .h8300hn
95 #else
96 .h8300h
97 #endif
98 #endif
100 #ifdef __H8300S__
101 #ifdef __NORMAL_MODE__
102 .h8300sn
103 #else
104 .h8300s
105 #endif
106 #endif
108 #ifdef L_cmpsi2
109 #ifdef __H8300__
110 .section .text
111 .align 2
112 .global ___cmpsi2
113 ___cmpsi2:
114 cmp.w A0,A2
115 bne .L2
116 cmp.w A1,A3
117 bne .L4
118 mov.w #1,A0
120 .L2:
121 bgt .L5
122 .L3:
123 mov.w #2,A0
125 .L4:
126 bls .L3
127 .L5:
128 sub.w A0,A0
130 .end
131 #endif
132 #endif /* L_cmpsi2 */
134 #ifdef L_ucmpsi2
135 #ifdef __H8300__
136 .section .text
137 .align 2
138 .global ___ucmpsi2
139 ___ucmpsi2:
140 cmp.w A0,A2
141 bne .L2
142 cmp.w A1,A3
143 bne .L4
144 mov.w #1,A0
146 .L2:
147 bhi .L5
148 .L3:
149 mov.w #2,A0
151 .L4:
152 bls .L3
153 .L5:
154 sub.w A0,A0
156 .end
157 #endif
158 #endif /* L_ucmpsi2 */
160 #ifdef L_divhi3
162 ;; HImode divides for the H8/300.
163 ;; We bunch all of this into one object file since there are several
164 ;; "supporting routines".
166 ; general purpose normalize routine
168 ; divisor in A0
169 ; dividend in A1
170 ; turns both into +ve numbers, and leaves what the answer sign
171 ; should be in A2L
173 #ifdef __H8300__
174 .section .text
175 .align 2
176 divnorm:
177 mov.b #0x0,A2L
178 or A0H,A0H ; is divisor > 0
179 bge _lab1
180 not A0H ; no - then make it +ve
181 not A0L
182 adds #1,A0
183 xor #0x1,A2L ; and remember that in A2L
184 _lab1: or A1H,A1H ; look at dividend
185 bge _lab2
186 not A1H ; it is -ve, make it positive
187 not A1L
188 adds #1,A1
189 xor #0x1,A2L; and toggle sign of result
190 _lab2: rts
191 ;; Basically the same, except that the sign of the divisor determines
192 ;; the sign.
193 modnorm:
194 mov.b #0x0,A2L
195 or A0H,A0H ; is divisor > 0
196 bge _lab7
197 not A0H ; no - then make it +ve
198 not A0L
199 adds #1,A0
200 xor #0x1,A2L ; and remember that in A2L
201 _lab7: or A1H,A1H ; look at dividend
202 bge _lab8
203 not A1H ; it is -ve, make it positive
204 not A1L
205 adds #1,A1
206 _lab8: rts
208 ; A0=A0/A1 signed
210 .global ___divhi3
211 ___divhi3:
212 bsr divnorm
213 bsr ___udivhi3
214 negans: or A2L,A2L ; should answer be negative ?
215 beq _lab4
216 not A0H ; yes, so make it so
217 not A0L
218 adds #1,A0
219 _lab4: rts
221 ; A0=A0%A1 signed
223 .global ___modhi3
224 ___modhi3:
225 bsr modnorm
226 bsr ___udivhi3
227 mov A3,A0
228 bra negans
230 ; A0=A0%A1 unsigned
232 .global ___umodhi3
233 ___umodhi3:
234 bsr ___udivhi3
235 mov A3,A0
238 ; A0=A0/A1 unsigned
239 ; A3=A0%A1 unsigned
240 ; A2H trashed
241 ; D high 8 bits of denom
242 ; d low 8 bits of denom
243 ; N high 8 bits of num
244 ; n low 8 bits of num
245 ; M high 8 bits of mod
246 ; m low 8 bits of mod
247 ; Q high 8 bits of quot
248 ; q low 8 bits of quot
249 ; P preserve
251 ; The H8/300 only has a 16/8 bit divide, so we look at the incoming and
252 ; see how to partition up the expression.
254 .global ___udivhi3
255 ___udivhi3:
256 ; A0 A1 A2 A3
257 ; Nn Dd P
258 sub.w A3,A3 ; Nn Dd xP 00
259 or A1H,A1H
260 bne divlongway
261 or A0H,A0H
262 beq _lab6
264 ; we know that D == 0 and N is != 0
265 mov.b A0H,A3L ; Nn Dd xP 0N
266 divxu A1L,A3 ; MQ
267 mov.b A3L,A0H ; Q
268 ; dealt with N, do n
269 _lab6: mov.b A0L,A3L ; n
270 divxu A1L,A3 ; mq
271 mov.b A3L,A0L ; Qq
272 mov.b A3H,A3L ; m
273 mov.b #0x0,A3H ; Qq 0m
276 ; D != 0 - which means the denominator is
277 ; loop around to get the result.
279 divlongway:
280 mov.b A0H,A3L ; Nn Dd xP 0N
281 mov.b #0x0,A0H ; high byte of answer has to be zero
282 mov.b #0x8,A2H ; 8
283 div8: add.b A0L,A0L ; n*=2
284 rotxl A3L ; Make remainder bigger
285 rotxl A3H
286 sub.w A1,A3 ; Q-=N
287 bhs setbit ; set a bit ?
288 add.w A1,A3 ; no : too far , Q+=N
290 dec A2H
291 bne div8 ; next bit
294 setbit: inc A0L ; do insert bit
295 dec A2H
296 bne div8 ; next bit
299 #endif /* __H8300__ */
300 #endif /* L_divhi3 */
302 #ifdef L_divsi3
304 ;; 4 byte integer divides for the H8/300.
306 ;; We have one routine which does all the work and lots of
307 ;; little ones which prepare the args and massage the sign.
308 ;; We bunch all of this into one object file since there are several
309 ;; "supporting routines".
311 .section .text
312 .align 2
314 ; Put abs SIs into r0/r1 and r2/r3, and leave a 1 in r6l with sign of rest.
315 ; This function is here to keep branch displacements small.
317 #ifdef __H8300__
319 divnorm:
320 mov.b A0H,A0H ; is the numerator -ve
321 stc ccr,S2L ; keep the sign in bit 3 of S2L
322 bge postive
324 ; negate arg
325 not A0H
326 not A1H
327 not A0L
328 not A1L
330 add #1,A1L
331 addx #0,A1H
332 addx #0,A0L
333 addx #0,A0H
334 postive:
335 mov.b A2H,A2H ; is the denominator -ve
336 bge postive2
337 not A2L
338 not A2H
339 not A3L
340 not A3H
341 add.b #1,A3L
342 addx #0,A3H
343 addx #0,A2L
344 addx #0,A2H
345 xor.b #0x08,S2L ; toggle the result sign
346 postive2:
349 ;; Basically the same, except that the sign of the divisor determines
350 ;; the sign.
351 modnorm:
352 mov.b A0H,A0H ; is the numerator -ve
353 stc ccr,S2L ; keep the sign in bit 3 of S2L
354 bge mpostive
356 ; negate arg
357 not A0H
358 not A1H
359 not A0L
360 not A1L
362 add #1,A1L
363 addx #0,A1H
364 addx #0,A0L
365 addx #0,A0H
366 mpostive:
367 mov.b A2H,A2H ; is the denominator -ve
368 bge mpostive2
369 not A2L
370 not A2H
371 not A3L
372 not A3H
373 add.b #1,A3L
374 addx #0,A3H
375 addx #0,A2L
376 addx #0,A2H
377 mpostive2:
380 #else /* __H8300H__ */
382 divnorm:
383 mov.l A0P,A0P ; is the numerator -ve
384 stc ccr,S2L ; keep the sign in bit 3 of S2L
385 bge postive
387 neg.l A0P ; negate arg
389 postive:
390 mov.l A1P,A1P ; is the denominator -ve
391 bge postive2
393 neg.l A1P ; negate arg
394 xor.b #0x08,S2L ; toggle the result sign
396 postive2:
399 ;; Basically the same, except that the sign of the divisor determines
400 ;; the sign.
401 modnorm:
402 mov.l A0P,A0P ; is the numerator -ve
403 stc ccr,S2L ; keep the sign in bit 3 of S2L
404 bge mpostive
406 neg.l A0P ; negate arg
408 mpostive:
409 mov.l A1P,A1P ; is the denominator -ve
410 bge mpostive2
412 neg.l A1P ; negate arg
414 mpostive2:
417 #endif
419 ; numerator in A0/A1
420 ; denominator in A2/A3
421 .global ___modsi3
422 ___modsi3:
423 #ifdef __H8300__
424 PUSHP S2P
425 PUSHP S0P
426 PUSHP S1P
427 bsr modnorm
428 bsr divmodsi4
429 mov S0,A0
430 mov S1,A1
431 bra exitdiv
432 #else
433 PUSHP S2P
434 bsr modnorm
435 bsr ___udivsi3
436 mov.l er3,er0
437 bra exitdiv
438 #endif
440 ;; H8/300H and H8S version of ___udivsi3 is defined later in
441 ;; the file.
442 #ifdef __H8300__
443 .global ___udivsi3
444 ___udivsi3:
445 PUSHP S2P
446 PUSHP S0P
447 PUSHP S1P
448 bsr divmodsi4
449 bra reti
450 #endif
452 .global ___umodsi3
453 ___umodsi3:
454 #ifdef __H8300__
455 PUSHP S2P
456 PUSHP S0P
457 PUSHP S1P
458 bsr divmodsi4
459 mov S0,A0
460 mov S1,A1
461 bra reti
462 #else
463 bsr ___udivsi3
464 mov.l er3,er0
466 #endif
468 .global ___divsi3
469 ___divsi3:
470 #ifdef __H8300__
471 PUSHP S2P
472 PUSHP S0P
473 PUSHP S1P
474 jsr divnorm
475 jsr divmodsi4
476 #else
477 PUSHP S2P
478 jsr divnorm
479 bsr ___udivsi3
480 #endif
482 ; examine what the sign should be
483 exitdiv:
484 btst #3,S2L
485 beq reti
487 ; should be -ve
488 #ifdef __H8300__
489 not A0H
490 not A1H
491 not A0L
492 not A1L
494 add #1,A1L
495 addx #0,A1H
496 addx #0,A0L
497 addx #0,A0H
498 #else /* __H8300H__ */
499 neg.l A0P
500 #endif
502 reti:
503 #ifdef __H8300__
504 POPP S1P
505 POPP S0P
506 #endif
507 POPP S2P
510 ; takes A0/A1 numerator (A0P for H8/300H)
511 ; A2/A3 denominator (A1P for H8/300H)
512 ; returns A0/A1 quotient (A0P for H8/300H)
513 ; S0/S1 remainder (S0P for H8/300H)
514 ; trashes S2H
516 #ifdef __H8300__
518 divmodsi4:
519 sub.w S0,S0 ; zero play area
520 mov.w S0,S1
521 mov.b A2H,S2H
522 or A2L,S2H
523 or A3H,S2H
524 bne DenHighNonZero
525 mov.b A0H,A0H
526 bne NumByte0Zero
527 mov.b A0L,A0L
528 bne NumByte1Zero
529 mov.b A1H,A1H
530 bne NumByte2Zero
531 bra NumByte3Zero
532 NumByte0Zero:
533 mov.b A0H,S1L
534 divxu A3L,S1
535 mov.b S1L,A0H
536 NumByte1Zero:
537 mov.b A0L,S1L
538 divxu A3L,S1
539 mov.b S1L,A0L
540 NumByte2Zero:
541 mov.b A1H,S1L
542 divxu A3L,S1
543 mov.b S1L,A1H
544 NumByte3Zero:
545 mov.b A1L,S1L
546 divxu A3L,S1
547 mov.b S1L,A1L
549 mov.b S1H,S1L
550 mov.b #0x0,S1H
553 ; have to do the divide by shift and test
554 DenHighNonZero:
555 mov.b A0H,S1L
556 mov.b A0L,A0H
557 mov.b A1H,A0L
558 mov.b A1L,A1H
560 mov.b #0,A1L
561 mov.b #24,S2H ; only do 24 iterations
563 nextbit:
564 add.w A1,A1 ; double the answer guess
565 rotxl A0L
566 rotxl A0H
568 rotxl S1L ; double remainder
569 rotxl S1H
570 rotxl S0L
571 rotxl S0H
572 sub.w A3,S1 ; does it all fit
573 subx A2L,S0L
574 subx A2H,S0H
575 bhs setone
577 add.w A3,S1 ; no, restore mistake
578 addx A2L,S0L
579 addx A2H,S0H
581 dec S2H
582 bne nextbit
585 setone:
586 inc A1L
587 dec S2H
588 bne nextbit
591 #else /* __H8300H__ */
593 ;; This function also computes the remainder and stores it in er3.
594 .global ___udivsi3
595 ___udivsi3:
596 mov.w A1E,A1E ; denominator top word 0?
597 bne DenHighNonZero
599 ; do it the easy way, see page 107 in manual
600 mov.w A0E,A2
601 extu.l A2P
602 divxu.w A1,A2P
603 mov.w A2E,A0E
604 divxu.w A1,A0P
605 mov.w A0E,A3
606 mov.w A2,A0E
607 extu.l A3P
610 ; er0 = er0 / er1
611 ; er3 = er0 % er1
612 ; trashes er1 er2
613 ; expects er1 >= 2^16
614 DenHighNonZero:
615 mov.l er0,er3
616 mov.l er1,er2
617 #ifdef __H8300H__
618 divmod_L21:
619 shlr.l er0
620 shlr.l er2 ; make divisor < 2^16
621 mov.w e2,e2
622 bne divmod_L21
623 #else
624 shlr.l #2,er2 ; make divisor < 2^16
625 mov.w e2,e2
626 beq divmod_L22A
627 divmod_L21:
628 shlr.l #2,er0
629 divmod_L22:
630 shlr.l #2,er2 ; make divisor < 2^16
631 mov.w e2,e2
632 bne divmod_L21
633 divmod_L22A:
634 rotxl.w r2
635 bcs divmod_L23
636 shlr.l er0
637 bra divmod_L24
638 divmod_L23:
639 rotxr.w r2
640 shlr.l #2,er0
641 divmod_L24:
642 #endif
643 ;; At this point,
644 ;; er0 contains shifted dividend
645 ;; er1 contains divisor
646 ;; er2 contains shifted divisor
647 ;; er3 contains dividend, later remainder
648 divxu.w r2,er0 ; r0 now contains the approximate quotient (AQ)
649 extu.l er0
650 beq divmod_L25
651 subs #1,er0 ; er0 = AQ - 1
652 mov.w e1,r2
653 mulxu.w r0,er2 ; er2 = upper (AQ - 1) * divisor
654 sub.w r2,e3 ; dividend - 65536 * er2
655 mov.w r1,r2
656 mulxu.w r0,er2 ; compute er3 = remainder (tentative)
657 sub.l er2,er3 ; er3 = dividend - (AQ - 1) * divisor
658 divmod_L25:
659 cmp.l er1,er3 ; is divisor < remainder?
660 blo divmod_L26
661 adds #1,er0
662 sub.l er1,er3 ; correct the remainder
663 divmod_L26:
666 #endif
667 #endif /* L_divsi3 */
669 #ifdef L_mulhi3
671 ;; HImode multiply.
672 ; The H8/300 only has an 8*8->16 multiply.
673 ; The answer is the same as:
675 ; product = (srca.l * srcb.l) + ((srca.h * srcb.l) + (srcb.h * srca.l)) * 256
676 ; (we can ignore A1.h * A0.h cause that will all off the top)
677 ; A0 in
678 ; A1 in
679 ; A0 answer
681 #ifdef __H8300__
682 .section .text
683 .align 2
684 .global ___mulhi3
685 ___mulhi3:
686 mov.b A1L,A2L ; A2l gets srcb.l
687 mulxu A0L,A2 ; A2 gets first sub product
689 mov.b A0H,A3L ; prepare for
690 mulxu A1L,A3 ; second sub product
692 add.b A3L,A2H ; sum first two terms
694 mov.b A1H,A3L ; third sub product
695 mulxu A0L,A3
697 add.b A3L,A2H ; almost there
698 mov.w A2,A0 ; that is
701 #endif
702 #endif /* L_mulhi3 */
704 #ifdef L_mulsi3
706 ;; SImode multiply.
708 ;; I think that shift and add may be sufficient for this. Using the
709 ;; supplied 8x8->16 would need 10 ops of 14 cycles each + overhead. This way
710 ;; the inner loop uses maybe 20 cycles + overhead, but terminates
711 ;; quickly on small args.
713 ;; A0/A1 src_a
714 ;; A2/A3 src_b
716 ;; while (a)
717 ;; {
718 ;; if (a & 1)
719 ;; r += b;
720 ;; a >>= 1;
721 ;; b <<= 1;
722 ;; }
724 .section .text
725 .align 2
727 #ifdef __H8300__
729 .global ___mulsi3
730 ___mulsi3:
731 PUSHP S0P
732 PUSHP S1P
734 sub.w S0,S0
735 sub.w S1,S1
737 ; while (a)
738 _top: mov.w A0,A0
739 bne _more
740 mov.w A1,A1
741 beq _done
742 _more: ; if (a & 1)
743 bld #0,A1L
744 bcc _nobit
745 ; r += b
746 add.w A3,S1
747 addx A2L,S0L
748 addx A2H,S0H
749 _nobit:
750 ; a >>= 1
751 shlr A0H
752 rotxr A0L
753 rotxr A1H
754 rotxr A1L
756 ; b <<= 1
757 add.w A3,A3
758 addx A2L,A2L
759 addx A2H,A2H
760 bra _top
762 _done:
763 mov.w S0,A0
764 mov.w S1,A1
765 POPP S1P
766 POPP S0P
769 #else /* __H8300H__ */
772 ; mulsi3 for H8/300H - based on Hitachi SH implementation
774 ; by Toshiyasu Morita
776 ; Old code:
778 ; 16b * 16b = 372 states (worst case)
779 ; 32b * 32b = 724 states (worst case)
781 ; New code:
783 ; 16b * 16b = 48 states
784 ; 16b * 32b = 72 states
785 ; 32b * 32b = 92 states
788 .global ___mulsi3
789 ___mulsi3:
790 mov.w r1,r2 ; ( 2 states) b * d
791 mulxu r0,er2 ; (22 states)
793 mov.w e0,r3 ; ( 2 states) a * d
794 beq L_skip1 ; ( 4 states)
795 mulxu r1,er3 ; (22 states)
796 add.w r3,e2 ; ( 2 states)
798 L_skip1:
799 mov.w e1,r3 ; ( 2 states) c * b
800 beq L_skip2 ; ( 4 states)
801 mulxu r0,er3 ; (22 states)
802 add.w r3,e2 ; ( 2 states)
804 L_skip2:
805 mov.l er2,er0 ; ( 2 states)
806 rts ; (10 states)
808 #endif
809 #endif /* L_mulsi3 */
810 #ifdef L_fixunssfsi_asm
811 /* For the h8300 we use asm to save some bytes, to
812 allow more programs to fit into the tiny address
813 space. For the H8/300H and H8S, the C version is good enough. */
814 #ifdef __H8300__
815 /* We still treat NANs different than libgcc2.c, but then, the
816 behavior is undefined anyways. */
817 .global ___fixunssfsi
818 ___fixunssfsi:
819 cmp.b #0x47,r0h
820 bge Large_num
821 jmp @___fixsfsi
822 Large_num:
823 bhi L_huge_num
824 xor.b #0x80,A0L
825 bmi L_shift8
826 L_huge_num:
827 mov.w #65535,A0
828 mov.w A0,A1
830 L_shift8:
831 mov.b A0L,A0H
832 mov.b A1H,A0L
833 mov.b A1L,A1H
834 mov.b #0,A1L
836 #endif
837 #endif /* L_fixunssfsi_asm */