* ChangeLog: Follow spelling conventions.
[official-gcc.git] / gcc / config / h8300 / lib1funcs.asm
blobb86e9efe5e24ff42c44968587cebf05a48359d42
1 ;; libgcc routines for the Hitachi H8/300 CPU.
2 ;; Contributed by Steve Chamberlain <sac@cygnus.com>
3 ;; Optimizations by Toshiyasu Morita <toshiyasu.morita@hsa.hitachi.com>
5 /* Copyright (C) 1994, 2000, 2001, 2002 Free Software Foundation, Inc.
7 This file is free software; you can redistribute it and/or modify it
8 under the terms of the GNU General Public License as published by the
9 Free Software Foundation; either version 2, or (at your option) any
10 later version.
12 In addition to the permissions in the GNU General Public License, the
13 Free Software Foundation gives you unlimited permission to link the
14 compiled version of this file into combinations with other programs,
15 and to distribute those combinations without any restriction coming
16 from the use of this file. (The General Public License restrictions
17 do apply in other respects; for example, they cover modification of
18 the file, and distribution when not linked into a combine
19 executable.)
21 This file is distributed in the hope that it will be useful, but
22 WITHOUT ANY WARRANTY; without even the implied warranty of
23 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
24 General Public License for more details.
26 You should have received a copy of the GNU General Public License
27 along with this program; see the file COPYING. If not, write to
28 the Free Software Foundation, 59 Temple Place - Suite 330,
29 Boston, MA 02111-1307, USA. */
31 /* Assembler register definitions. */
33 #define A0 r0
34 #define A0L r0l
35 #define A0H r0h
37 #define A1 r1
38 #define A1L r1l
39 #define A1H r1h
41 #define A2 r2
42 #define A2L r2l
43 #define A2H r2h
45 #define A3 r3
46 #define A3L r3l
47 #define A3H r3h
49 #define S0 r4
50 #define S0L r4l
51 #define S0H r4h
53 #define S1 r5
54 #define S1L r5l
55 #define S1H r5h
57 #define S2 r6
58 #define S2L r6l
59 #define S2H r6h
61 #ifdef __H8300__
62 #define MOVP mov.w /* pointers are 16 bits */
63 #define ADDP add.w
64 #define CMPP cmp.w
65 #define PUSHP push
66 #define POPP pop
68 #define A0P r0
69 #define A1P r1
70 #define A2P r2
71 #define A3P r3
72 #define S0P r4
73 #define S1P r5
74 #define S2P r6
75 #endif
77 #if defined (__H8300H__) || defined (__H8300S__)
78 #define MOVP mov.l /* pointers are 32 bits */
79 #define ADDP add.l
80 #define CMPP cmp.l
81 #define PUSHP push.l
82 #define POPP pop.l
84 #define A0P er0
85 #define A1P er1
86 #define A2P er2
87 #define A3P er3
88 #define S0P er4
89 #define S1P er5
90 #define S2P er6
92 #define A0E e0
93 #define A1E e1
94 #define A2E e2
95 #define A3E e3
96 #endif
98 #ifdef __H8300H__
99 .h8300h
100 #endif
102 #ifdef __H8300S__
103 .h8300s
104 #endif
106 #ifdef L_cmpsi2
107 #ifdef __H8300__
108 .section .text
109 .align 2
110 .global ___cmpsi2
111 ___cmpsi2:
112 cmp.w A0,A2
113 bne .L2
114 cmp.w A1,A3
115 bne .L4
116 mov.w #1,A0
118 .L2:
119 bgt .L5
120 .L3:
121 mov.w #2,A0
123 .L4:
124 bls .L3
125 .L5:
126 sub.w A0,A0
128 .end
129 #endif
130 #endif /* L_cmpsi2 */
132 #ifdef L_ucmpsi2
133 #ifdef __H8300__
134 .section .text
135 .align 2
136 .global ___ucmpsi2
137 ___ucmpsi2:
138 cmp.w A0,A2
139 bne .L2
140 cmp.w A1,A3
141 bne .L4
142 mov.w #1,A0
144 .L2:
145 bhi .L5
146 .L3:
147 mov.w #2,A0
149 .L4:
150 bls .L3
151 .L5:
152 sub.w A0,A0
154 .end
155 #endif
156 #endif /* L_ucmpsi2 */
158 #ifdef L_divhi3
160 ;; HImode divides for the H8/300.
161 ;; We bunch all of this into one object file since there are several
162 ;; "supporting routines".
164 ; general purpose normalize routine
166 ; divisor in A0
167 ; dividend in A1
168 ; turns both into +ve numbers, and leaves what the answer sign
169 ; should be in A2L
171 #ifdef __H8300__
172 .section .text
173 .align 2
174 divnorm:
175 mov.b #0x0,A2L
176 or A0H,A0H ; is divisor > 0
177 bge _lab1
178 not A0H ; no - then make it +ve
179 not A0L
180 adds #1,A0
181 xor #0x1,A2L ; and remember that in A2L
182 _lab1: or A1H,A1H ; look at dividend
183 bge _lab2
184 not A1H ; it is -ve, make it positive
185 not A1L
186 adds #1,A1
187 xor #0x1,A2L; and toggle sign of result
188 _lab2: rts
189 ;; Basically the same, except that the sign of the divisor determines
190 ;; the sign.
191 modnorm:
192 mov.b #0x0,A2L
193 or A0H,A0H ; is divisor > 0
194 bge _lab7
195 not A0H ; no - then make it +ve
196 not A0L
197 adds #1,A0
198 xor #0x1,A2L ; and remember that in A2L
199 _lab7: or A1H,A1H ; look at dividend
200 bge _lab8
201 not A1H ; it is -ve, make it positive
202 not A1L
203 adds #1,A1
204 _lab8: rts
206 ; A0=A0/A1 signed
208 .global ___divhi3
209 ___divhi3:
210 bsr divnorm
211 bsr ___udivhi3
212 negans: or A2L,A2L ; should answer be negative ?
213 beq _lab4
214 not A0H ; yes, so make it so
215 not A0L
216 adds #1,A0
217 _lab4: rts
219 ; A0=A0%A1 signed
221 .global ___modhi3
222 ___modhi3:
223 bsr modnorm
224 bsr ___udivhi3
225 mov A3,A0
226 bra negans
228 ; A0=A0%A1 unsigned
230 .global ___umodhi3
231 ___umodhi3:
232 bsr ___udivhi3
233 mov A3,A0
236 ; A0=A0/A1 unsigned
237 ; A3=A0%A1 unsigned
238 ; A2H trashed
239 ; D high 8 bits of denom
240 ; d low 8 bits of denom
241 ; N high 8 bits of num
242 ; n low 8 bits of num
243 ; M high 8 bits of mod
244 ; m low 8 bits of mod
245 ; Q high 8 bits of quot
246 ; q low 8 bits of quot
247 ; P preserve
249 ; The H8/300 only has a 16/8 bit divide, so we look at the incoming and
250 ; see how to partition up the expression.
252 .global ___udivhi3
253 ___udivhi3:
254 ; A0 A1 A2 A3
255 ; Nn Dd P
256 sub.w A3,A3 ; Nn Dd xP 00
257 or A1H,A1H
258 bne divlongway
259 or A0H,A0H
260 beq _lab6
262 ; we know that D == 0 and N is != 0
263 mov.b A0H,A3L ; Nn Dd xP 0N
264 divxu A1L,A3 ; MQ
265 mov.b A3L,A0H ; Q
266 ; dealt with N, do n
267 _lab6: mov.b A0L,A3L ; n
268 divxu A1L,A3 ; mq
269 mov.b A3L,A0L ; Qq
270 mov.b A3H,A3L ; m
271 mov.b #0x0,A3H ; Qq 0m
274 ; D != 0 - which means the denominator is
275 ; loop around to get the result.
277 divlongway:
278 mov.b A0H,A3L ; Nn Dd xP 0N
279 mov.b #0x0,A0H ; high byte of answer has to be zero
280 mov.b #0x8,A2H ; 8
281 div8: add.b A0L,A0L ; n*=2
282 rotxl A3L ; Make remainder bigger
283 rotxl A3H
284 sub.w A1,A3 ; Q-=N
285 bhs setbit ; set a bit ?
286 add.w A1,A3 ; no : too far , Q+=N
288 dec A2H
289 bne div8 ; next bit
292 setbit: inc A0L ; do insert bit
293 dec A2H
294 bne div8 ; next bit
297 #endif /* __H8300__ */
298 #endif /* L_divhi3 */
300 #ifdef L_divsi3
302 ;; 4 byte integer divides for the H8/300.
304 ;; We have one routine which does all the work and lots of
305 ;; little ones which prepare the args and massage the sign.
306 ;; We bunch all of this into one object file since there are several
307 ;; "supporting routines".
309 .section .text
310 .align 2
312 ; Put abs SIs into r0/r1 and r2/r3, and leave a 1 in r6l with sign of rest.
313 ; This function is here to keep branch displacements small.
315 #ifdef __H8300__
317 divnorm:
318 mov.b #0,S2L ; keep the sign in S2
319 mov.b A0H,A0H ; is the numerator -ve
320 bge postive
322 ; negate arg
323 not A0H
324 not A1H
325 not A0L
326 not A1L
328 add #1,A1L
329 addx #0,A1H
330 addx #0,A0L
331 addx #0,A0H
333 mov.b #1,S2L ; the sign will be -ve
334 postive:
335 mov.b A2H,A2H ; is the denominator -ve
336 bge postive2
337 not A2L
338 not A2H
339 not A3L
340 not A3H
341 add.b #1,A3L
342 addx #0,A3H
343 addx #0,A2L
344 addx #0,A2H
345 xor #1,S2L ; toggle result sign
346 postive2:
349 ;; Basically the same, except that the sign of the divisor determines
350 ;; the sign.
351 modnorm:
352 mov.b #0,S2L ; keep the sign in S2
353 mov.b A0H,A0H ; is the numerator -ve
354 bge mpostive
356 ; negate arg
357 not A0H
358 not A1H
359 not A0L
360 not A1L
362 add #1,A1L
363 addx #0,A1H
364 addx #0,A0L
365 addx #0,A0H
367 mov.b #1,S2L ; the sign will be -ve
368 mpostive:
369 mov.b A2H,A2H ; is the denominator -ve
370 bge mpostive2
371 not A2L
372 not A2H
373 not A3L
374 not A3H
375 add.b #1,A3L
376 addx #0,A3H
377 addx #0,A2L
378 addx #0,A2H
379 mpostive2:
382 #else /* __H8300H__ */
384 divnorm:
385 mov.b #0,S2L ; keep the sign in S2
386 mov.l A0P,A0P ; is the numerator -ve
387 bge postive
389 neg.l A0P ; negate arg
390 mov.b #1,S2L ; the sign will be -ve
392 postive:
393 mov.l A1P,A1P ; is the denominator -ve
394 bge postive2
396 neg.l A1P ; negate arg
397 xor.b #1,S2L ; toggle result sign
399 postive2:
402 ;; Basically the same, except that the sign of the divisor determines
403 ;; the sign.
404 modnorm:
405 mov.b #0,S2L ; keep the sign in S2
406 mov.l A0P,A0P ; is the numerator -ve
407 bge mpostive
409 neg.l A0P ; negate arg
410 mov.b #1,S2L ; the sign will be -ve
412 mpostive:
413 mov.l A1P,A1P ; is the denominator -ve
414 bge mpostive2
416 neg.l A1P ; negate arg
418 mpostive2:
421 #endif
423 ; numerator in A0/A1
424 ; denominator in A2/A3
425 .global ___modsi3
426 ___modsi3:
427 PUSHP S2P
428 PUSHP S0P
429 PUSHP S1P
431 bsr modnorm
432 bsr divmodsi4
433 #ifdef __H8300__
434 mov S0,A0
435 mov S1,A1
436 #else
437 mov.l S0P,A0P
438 #endif
439 bra exitdiv
441 .global ___udivsi3
442 ___udivsi3:
443 PUSHP S2P
444 PUSHP S0P
445 PUSHP S1P
446 mov.b #0,S2L ; keep sign low
447 bsr divmodsi4
448 bra exitdiv
450 .global ___umodsi3
451 ___umodsi3:
452 PUSHP S2P
453 PUSHP S0P
454 PUSHP S1P
455 mov.b #0,S2L ; keep sign low
456 bsr divmodsi4
457 #ifdef __H8300__
458 mov S0,A0
459 mov S1,A1
460 #else
461 mov.l S0P,A0P
462 #endif
463 bra exitdiv
465 .global ___divsi3
466 ___divsi3:
467 PUSHP S2P
468 PUSHP S0P
469 PUSHP S1P
470 jsr divnorm
471 jsr divmodsi4
473 ; examine what the sign should be
474 exitdiv:
475 POPP S1P
476 POPP S0P
478 or S2L,S2L
479 beq reti
481 ; should be -ve
482 #ifdef __H8300__
483 not A0H
484 not A1H
485 not A0L
486 not A1L
488 add #1,A1L
489 addx #0,A1H
490 addx #0,A0L
491 addx #0,A0H
492 #else /* __H8300H__ */
493 neg.l A0P
494 #endif
496 reti:
497 POPP S2P
500 ; takes A0/A1 numerator (A0P for 300H)
501 ; A2/A3 denominator (A1P for 300H)
502 ; returns A0/A1 quotient (A0P for 300H)
503 ; S0/S1 remainder (S0P for 300H)
504 ; trashes S2
506 #ifdef __H8300__
508 divmodsi4:
509 sub.w S0,S0 ; zero play area
510 mov.w S0,S1
511 mov.b A2H,S2H
512 or A2L,S2H
513 or A3H,S2H
514 bne DenHighZero
515 mov.b A0H,A0H
516 bne NumByte0Zero
517 mov.b A0L,A0L
518 bne NumByte1Zero
519 mov.b A1H,A1H
520 bne NumByte2Zero
521 bra NumByte3Zero
522 NumByte0Zero:
523 mov.b A0H,S1L
524 divxu A3L,S1
525 mov.b S1L,A0H
526 NumByte1Zero:
527 mov.b A0L,S1L
528 divxu A3L,S1
529 mov.b S1L,A0L
530 NumByte2Zero:
531 mov.b A1H,S1L
532 divxu A3L,S1
533 mov.b S1L,A1H
534 NumByte3Zero:
535 mov.b A1L,S1L
536 divxu A3L,S1
537 mov.b S1L,A1L
539 mov.b S1H,S1L
540 mov.b #0x0,S1H
543 ; have to do the divide by shift and test
544 DenHighZero:
545 mov.b A0H,S1L
546 mov.b A0L,A0H
547 mov.b A1H,A0L
548 mov.b A1L,A1H
550 mov.b #0,A1L
551 mov.b #24,S2H ; only do 24 iterations
553 nextbit:
554 add.w A1,A1 ; double the answer guess
555 rotxl A0L
556 rotxl A0H
558 rotxl S1L ; double remainder
559 rotxl S1H
560 rotxl S0L
561 rotxl S0H
562 sub.w A3,S1 ; does it all fit
563 subx A2L,S0L
564 subx A2H,S0H
565 bhs setone
567 add.w A3,S1 ; no, restore mistake
568 addx A2L,S0L
569 addx A2H,S0H
571 dec S2H
572 bne nextbit
575 setone:
576 inc A1L
577 dec S2H
578 bne nextbit
581 #else /* __H8300H__ */
583 divmodsi4:
584 sub.l S0P,S0P ; zero play area
585 mov.w A1E,A1E ; denominator top word 0?
586 bne DenHighZero
588 ; do it the easy way, see page 107 in manual
589 mov.w A0E,A2
590 extu.l A2P
591 divxu.w A1,A2P
592 mov.w A2E,A0E
593 divxu.w A1,A0P
594 mov.w A0E,S0
595 mov.w A2,A0E
596 extu.l S0P
599 DenHighZero:
600 mov.w A0E,A2
601 mov.b A2H,S0L
602 mov.b A2L,A2H
603 mov.b A0H,A2L
604 mov.w A2,A0E
605 mov.b A0L,A0H
606 mov.b #0,A0L
607 mov.b #24,S2H ; only do 24 iterations
609 nextbit:
610 shll.l A0P ; double the answer guess
611 rotxl.l S0P ; double remainder
612 sub.l A1P,S0P ; does it all fit?
613 bhs setone
615 add.l A1P,S0P ; no, restore mistake
616 dec S2H
617 bne nextbit
620 setone:
621 inc A0L
622 dec S2H
623 bne nextbit
626 #endif
627 #endif /* L_divsi3 */
629 #ifdef L_mulhi3
631 ;; HImode multiply.
632 ; The H8/300 only has an 8*8->16 multiply.
633 ; The answer is the same as:
635 ; product = (srca.l * srcb.l) + ((srca.h * srcb.l) + (srcb.h * srca.l)) * 256
636 ; (we can ignore A1.h * A0.h cause that will all off the top)
637 ; A0 in
638 ; A1 in
639 ; A0 answer
641 #ifdef __H8300__
642 .section .text
643 .align 2
644 .global ___mulhi3
645 ___mulhi3:
646 mov.b A1L,A2L ; A2l gets srcb.l
647 mulxu A0L,A2 ; A2 gets first sub product
649 mov.b A0H,A3L ; prepare for
650 mulxu A1L,A3 ; second sub product
652 add.b A3L,A2H ; sum first two terms
654 mov.b A1H,A3L ; third sub product
655 mulxu A0L,A3
657 add.b A3L,A2H ; almost there
658 mov.w A2,A0 ; that is
661 #endif
662 #endif /* L_mulhi3 */
664 #ifdef L_mulsi3
666 ;; SImode multiply.
668 ;; I think that shift and add may be sufficient for this. Using the
669 ;; supplied 8x8->16 would need 10 ops of 14 cycles each + overhead. This way
670 ;; the inner loop uses maybe 20 cycles + overhead, but terminates
671 ;; quickly on small args.
673 ;; A0/A1 src_a
674 ;; A2/A3 src_b
676 ;; while (a)
677 ;; {
678 ;; if (a & 1)
679 ;; r += b;
680 ;; a >>= 1;
681 ;; b <<= 1;
682 ;; }
684 .section .text
685 .align 2
687 #ifdef __H8300__
689 .global ___mulsi3
690 ___mulsi3:
691 PUSHP S0P
692 PUSHP S1P
693 PUSHP S2P
695 sub.w S0,S0
696 sub.w S1,S1
698 ; while (a)
699 _top: mov.w A0,A0
700 bne _more
701 mov.w A1,A1
702 beq _done
703 _more: ; if (a & 1)
704 bld #0,A1L
705 bcc _nobit
706 ; r += b
707 add.w A3,S1
708 addx A2L,S0L
709 addx A2H,S0H
710 _nobit:
711 ; a >>= 1
712 shlr A0H
713 rotxr A0L
714 rotxr A1H
715 rotxr A1L
717 ; b <<= 1
718 add.w A3,A3
719 addx A2L,A2L
720 addx A2H,A2H
721 bra _top
723 _done:
724 mov.w S0,A0
725 mov.w S1,A1
726 POPP S2P
727 POPP S1P
728 POPP S0P
731 #else /* __H8300H__ */
734 ; mulsi3 for H8/300H - based on Hitachi SH implementation
736 ; by Toshiyasu Morita
738 ; Old code:
740 ; 16b * 16b = 372 states (worst case)
741 ; 32b * 32b = 724 states (worst case)
743 ; New code:
745 ; 16b * 16b = 48 states
746 ; 16b * 32b = 72 states
747 ; 32b * 32b = 92 states
750 .global ___mulsi3
751 ___mulsi3:
752 mov.w r1,r2 ; ( 2 states) b * d
753 mulxu r0,er2 ; (22 states)
755 mov.w e0,r3 ; ( 2 states) a * d
756 beq L_skip1 ; ( 4 states)
757 mulxu r1,er3 ; (22 states)
758 add.w r3,e2 ; ( 2 states)
760 L_skip1:
761 mov.w e1,r3 ; ( 2 states) c * b
762 beq L_skip2 ; ( 4 states)
763 mulxu r0,er3 ; (22 states)
764 add.w r3,e2 ; ( 2 states)
766 L_skip2:
767 mov.l er2,er0 ; ( 2 states)
768 rts ; (10 states)
770 #endif
771 #endif /* L_mulsi3 */
772 #ifdef L_fixunssfsi_asm
773 /* For the h8300 we use asm to save some bytes, to
774 allow more programs to fit into the tiny address
775 space. For the H8/300H and H8S, the C version is good enough. */
776 #ifdef __H8300__
777 /* We still treat NANs different than libgcc2.c, but then, the
778 behaviour is undefined anyways. */
779 .global ___fixunssfsi
780 ___fixunssfsi:
781 cmp.b #0x47,r0h
782 bge Large_num
783 jmp @___fixsfsi
784 Large_num:
785 bhi L_huge_num
786 xor.b #0x80,A0L
787 bmi L_shift8
788 L_huge_num:
789 mov.w #65535,A0
790 mov.w A0,A1
792 L_shift8:
793 mov.b A0L,A0H
794 mov.b A1H,A0L
795 mov.b A1L,A1H
796 mov.b #0,A1L
798 #endif
799 #endif /* L_fixunssfsi_asm */