1 ;; libgcc routines for the Renesas H8/300 CPU.
2 ;; Contributed by Steve Chamberlain <sac@cygnus.com>
3 ;; Optimizations by Toshiyasu Morita <toshiyasu.morita@renesas.com>
5 /* Copyright
(C
) 1994, 2000, 2001, 2002, 2003, 2004, 2009
6 Free Software Foundation
, Inc.
8 This file is free software
; you can redistribute it and/or modify it
9 under the terms of the GNU General
Public License as published by the
10 Free Software Foundation
; either version 3, or (at your option) any
13 This file is distributed
in the hope that it will be useful
, but
14 WITHOUT ANY WARRANTY
; without even the implied warranty of
15 MERCHANTABILITY
or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
16 General
Public License for more details.
18 Under
Section 7 of GPL version
3, you are granted additional
19 permissions described
in the GCC Runtime Library Exception
, version
20 3.1, as published by the Free Software Foundation.
22 You should have received a copy of the GNU General
Public License
and
23 a copy of the GCC Runtime Library Exception along with
this program
;
24 see the files COPYING3
and COPYING.RUNTIME respectively. If
not, see
25 <http://www.gnu.
org/licenses
/>.
*/
27 /* Assembler register definitions.
*/
70 #if defined
(__H8300H__
) || defined
(__H8300S__
) || defined
(__H8300SX__
)
89 #ifdef __NORMAL_MODE__
97 #ifdef __NORMAL_MODE__
104 #ifdef __NORMAL_MODE__
135 #endif
/* L_cmpsi2
*/
161 #endif
/* L_ucmpsi2
*/
165 ;; HImode divides for the H8/300.
166 ;; We bunch all of this into one object file since there are several
167 ;; "supporting routines".
169 ; general purpose normalize routine
173 ; turns both into +ve numbers, and leaves what the answer sign
180 or A0H
,A0H
; is divisor > 0
183 not A0H
; no - then make it +ve
186 _lab1: or A1H
,A1H
; look at dividend
188 not A1H
; it is -ve, make it positive
191 xor #
0x8,A2L
; and toggle sign of result
193 ;; Basically the same, except that the sign of the divisor determines
196 or A0H
,A0H
; is divisor > 0
199 not A0H
; no - then make it +ve
202 _lab7: or A1H
,A1H
; look at dividend
204 not A1H
; it is -ve, make it positive
215 negans: btst #
3,A2L
; should answer be negative ?
217 not A0H
; yes, so make it so
242 ; D high 8 bits of denom
243 ; d low 8 bits of denom
244 ; N high 8 bits of num
245 ; n low 8 bits of num
246 ; M high 8 bits of mod
247 ; m low 8 bits of mod
248 ; Q high 8 bits of quot
249 ; q low 8 bits of quot
252 ; The H8/300 only has a 16/8 bit divide, so we look at the incoming and
253 ; see how to partition up the expression.
259 sub.w A3
,A3
; Nn Dd xP 00
265 ; we know that D == 0 and N is != 0
266 mov.b A0H
,A3L
; Nn Dd xP 0N
270 _lab6: mov.b A0L
,A3L
; n
274 mov.b #
0x0,A3H
; Qq 0m
277 ; D != 0 - which means the denominator is
278 ; loop around to get the result.
281 mov.b A0H
,A3L
; Nn Dd xP 0N
282 mov.b #
0x0,A0H
; high byte of answer has to be zero
284 div8: add.b A0L
,A0L
; n*=2
285 rotxl A3L
; Make remainder bigger
288 bhs setbit
; set a bit ?
289 add.w A1
,A3
; no : too far , Q+=N
295 setbit: inc A0L
; do insert bit
300 #endif
/* __H8300__
*/
301 #endif
/* L_divhi3
*/
305 ;; 4 byte integer divides for the H8/300.
307 ;; We have one routine which does all the work and lots of
308 ;; little ones which prepare the args and massage the sign.
309 ;; We bunch all of this into one object file since there are several
310 ;; "supporting routines".
315 ; Put abs SIs into r0/r1 and r2/r3, and leave a 1 in r6l with sign of rest.
316 ; This function is here to keep branch displacements small.
321 mov.b A0H
,A0H
; is the numerator -ve
322 stc ccr
,S2L
; keep the sign in bit 3 of S2L
336 mov.b A2H
,A2H
; is the denominator -ve
346 xor.b #
0x08,S2L
; toggle the result sign
350 ;; Basically the same, except that the sign of the divisor determines
353 mov.b A0H
,A0H
; is the numerator -ve
354 stc ccr
,S2L
; keep the sign in bit 3 of S2L
368 mov.b A2H
,A2H
; is the denominator -ve
381 #else
/* __H8300H__
*/
384 mov.l A0P
,A0P
; is the numerator -ve
385 stc ccr
,S2L
; keep the sign in bit 3 of S2L
388 neg.l A0P
; negate arg
391 mov.l A1P
,A1P
; is the denominator -ve
394 neg.l A1P
; negate arg
395 xor.b #
0x08,S2L
; toggle the result sign
400 ;; Basically the same, except that the sign of the divisor determines
403 mov.l A0P
,A0P
; is the numerator -ve
404 stc ccr
,S2L
; keep the sign in bit 3 of S2L
407 neg.l A0P
; negate arg
410 mov.l A1P
,A1P
; is the denominator -ve
413 neg.l A1P
; negate arg
421 ; denominator in A2/A3
441 ;; H8/300H and H8S version of ___udivsi3 is defined later in
483 ; examine what the sign should be
499 #else
/* __H8300H__
*/
511 ; takes A0/A1 numerator (A0P for H8/300H)
512 ; A2/A3 denominator (A1P for H8/300H)
513 ; returns A0/A1 quotient (A0P for H8/300H)
514 ; S0/S1 remainder (S0P for H8/300H)
520 sub.w S0
,S0
; zero play area
554 ; have to do the divide by shift and test
562 mov.b #
24,S2H
; only do 24 iterations
565 add.w A1
,A1
; double the answer guess
569 rotxl S1L
; double remainder
573 sub.w A3
,S1
; does it all fit
578 add.w A3
,S1
; no, restore mistake
592 #else
/* __H8300H__
*/
594 ;; This function also computes the remainder and stores it in er3.
597 mov.w A1E
,A1E
; denominator top word 0?
600 ; do it the easy way, see page 107 in manual
614 ; expects er1 >= 2^16
621 shlr.l er2
; make divisor < 2^16
625 shlr.l #
2,er2
; make divisor < 2^16
631 shlr.l #
2,er2
; make divisor < 2^16
645 ;; er0 contains shifted dividend
646 ;; er1 contains divisor
647 ;; er2 contains shifted divisor
648 ;; er3 contains dividend, later remainder
649 divxu.w r2
,er0
; r0 now contains the approximate quotient (AQ)
652 subs #
1,er0
; er0 = AQ - 1
654 mulxu.w r0
,er2
; er2 = upper (AQ - 1) * divisor
655 sub.w r2
,e3
; dividend - 65536 * er2
657 mulxu.w r0
,er2
; compute er3 = remainder (tentative)
658 sub.l er2
,er3
; er3 = dividend - (AQ - 1) * divisor
660 cmp.l er1
,er3
; is divisor < remainder?
663 sub.l er1
,er3
; correct the remainder
668 #endif
/* L_divsi3
*/
673 ; The H8/300 only has an 8*8->16 multiply.
674 ; The answer is the same as:
676 ; product = (srca.l * srcb.l) + ((srca.h * srcb.l) + (srcb.h * srca.l)) * 256
677 ; (we can ignore A1.h * A0.h cause that will all off the top)
687 mov.b A1L
,A2L
; A2l gets srcb.l
688 mulxu A0L
,A2
; A2 gets first sub product
690 mov.b A0H
,A3L
; prepare for
691 mulxu A1L
,A3
; second sub product
693 add.b A3L
,A2H
; sum first two terms
695 mov.b A1H
,A3L
; third sub product
698 add.b A3L
,A2H
; almost there
699 mov.w A2
,A0
; that is
703 #endif
/* L_mulhi3
*/
709 ;; I think that shift and add may be sufficient for this. Using the
710 ;; supplied 8x8->16 would need 10 ops of 14 cycles each + overhead. This way
711 ;; the inner loop uses maybe 20 cycles + overhead, but terminates
712 ;; quickly on small args.
770 #else
/* __H8300H__
*/
773 ; mulsi3 for H8/300H - based on Renesas SH implementation
775 ; by Toshiyasu Morita
779 ; 16b * 16b = 372 states (worst case)
780 ; 32b * 32b = 724 states (worst case)
784 ; 16b * 16b = 48 states
785 ; 16b * 32b = 72 states
786 ; 32b * 32b = 92 states
791 mov.w r1
,r2
; ( 2 states) b * d
792 mulxu r0
,er2
; (22 states)
794 mov.w e0
,r3
; ( 2 states) a * d
795 beq L_skip1
; ( 4 states)
796 mulxu r1
,er3
; (22 states)
797 add.w r3
,e2
; ( 2 states)
800 mov.w e1
,r3
; ( 2 states) c * b
801 beq L_skip2
; ( 4 states)
802 mulxu r0
,er3
; (22 states)
803 add.w r3
,e2
; ( 2 states)
806 mov.l er2
,er0
; ( 2 states)
810 #endif
/* L_mulsi3
*/
811 #ifdef L_fixunssfsi_asm
812 /* For the h8300 we use asm to save some bytes
, to
813 allow more programs to fit
into the tiny address
814 space. For the H8
/300H and H8S
, the C version is good enough.
*/
816 /* We still treat NANs different than libgcc2.c
, but then
, the
817 behavior is undefined anyways.
*/
818 .
global ___fixunssfsi
838 #endif
/* L_fixunssfsi_asm
*/