1 ;; libgcc routines for the Renesas H8/300 CPU.
2 ;; Contributed by Steve Chamberlain <sac@cygnus.com>
3 ;; Optimizations by Toshiyasu Morita <toshiyasu.morita@renesas.com>
5 /* Copyright
(C
) 1994, 2000, 2001, 2002, 2003, 2004
6 Free Software Foundation
, Inc.
8 This file is free software
; you can redistribute it and/or modify it
9 under the terms of the GNU General
Public License as published by the
10 Free Software Foundation
; either version 2, or (at your option) any
13 In addition to the permissions
in the GNU General
Public License
, the
14 Free Software Foundation gives you unlimited permission to link the
15 compiled version of
this file
into combinations with other programs
,
16 and to distribute those combinations without any restriction coming
17 from the use of
this file.
(The General
Public License restrictions
18 do apply
in other respects
; for example, they cover modification of
19 the file
, and distribution when
not linked
into a combine
22 This file is distributed
in the hope that it will be useful
, but
23 WITHOUT ANY WARRANTY
; without even the implied warranty of
24 MERCHANTABILITY
or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
25 General
Public License for more details.
27 You should have received a copy of the GNU General
Public License
28 along with
this program
; see the file COPYING. If not, write to
29 the Free Software Foundation
, 59 Temple Place
- Suite
330,
30 Boston
, MA
02111-1307, USA.
*/
32 /* Assembler register definitions.
*/
75 #if defined
(__H8300H__
) || defined
(__H8300S__
) || defined
(__H8300SX__
)
94 #ifdef __NORMAL_MODE__
102 #ifdef __NORMAL_MODE__
109 #ifdef __NORMAL_MODE__
140 #endif
/* L_cmpsi2
*/
166 #endif
/* L_ucmpsi2
*/
170 ;; HImode divides for the H8/300.
171 ;; We bunch all of this into one object file since there are several
172 ;; "supporting routines".
174 ; general purpose normalize routine
178 ; turns both into +ve numbers, and leaves what the answer sign
185 or A0H
,A0H
; is divisor > 0
188 not A0H
; no - then make it +ve
191 _lab1: or A1H
,A1H
; look at dividend
193 not A1H
; it is -ve, make it positive
196 xor #
0x8,A2L
; and toggle sign of result
198 ;; Basically the same, except that the sign of the divisor determines
201 or A0H
,A0H
; is divisor > 0
204 not A0H
; no - then make it +ve
207 _lab7: or A1H
,A1H
; look at dividend
209 not A1H
; it is -ve, make it positive
220 negans: btst #
3,A2L
; should answer be negative ?
222 not A0H
; yes, so make it so
247 ; D high 8 bits of denom
248 ; d low 8 bits of denom
249 ; N high 8 bits of num
250 ; n low 8 bits of num
251 ; M high 8 bits of mod
252 ; m low 8 bits of mod
253 ; Q high 8 bits of quot
254 ; q low 8 bits of quot
257 ; The H8/300 only has a 16/8 bit divide, so we look at the incoming and
258 ; see how to partition up the expression.
264 sub.w A3
,A3
; Nn Dd xP 00
270 ; we know that D == 0 and N is != 0
271 mov.b A0H
,A3L
; Nn Dd xP 0N
275 _lab6: mov.b A0L
,A3L
; n
279 mov.b #
0x0,A3H
; Qq 0m
282 ; D != 0 - which means the denominator is
283 ; loop around to get the result.
286 mov.b A0H
,A3L
; Nn Dd xP 0N
287 mov.b #
0x0,A0H
; high byte of answer has to be zero
289 div8: add.b A0L
,A0L
; n*=2
290 rotxl A3L
; Make remainder bigger
293 bhs setbit
; set a bit ?
294 add.w A1
,A3
; no : too far , Q+=N
300 setbit: inc A0L
; do insert bit
305 #endif
/* __H8300__
*/
306 #endif
/* L_divhi3
*/
310 ;; 4 byte integer divides for the H8/300.
312 ;; We have one routine which does all the work and lots of
313 ;; little ones which prepare the args and massage the sign.
314 ;; We bunch all of this into one object file since there are several
315 ;; "supporting routines".
320 ; Put abs SIs into r0/r1 and r2/r3, and leave a 1 in r6l with sign of rest.
321 ; This function is here to keep branch displacements small.
326 mov.b A0H
,A0H
; is the numerator -ve
327 stc ccr
,S2L
; keep the sign in bit 3 of S2L
341 mov.b A2H
,A2H
; is the denominator -ve
351 xor.b #
0x08,S2L
; toggle the result sign
355 ;; Basically the same, except that the sign of the divisor determines
358 mov.b A0H
,A0H
; is the numerator -ve
359 stc ccr
,S2L
; keep the sign in bit 3 of S2L
373 mov.b A2H
,A2H
; is the denominator -ve
386 #else
/* __H8300H__
*/
389 mov.l A0P
,A0P
; is the numerator -ve
390 stc ccr
,S2L
; keep the sign in bit 3 of S2L
393 neg.l A0P
; negate arg
396 mov.l A1P
,A1P
; is the denominator -ve
399 neg.l A1P
; negate arg
400 xor.b #
0x08,S2L
; toggle the result sign
405 ;; Basically the same, except that the sign of the divisor determines
408 mov.l A0P
,A0P
; is the numerator -ve
409 stc ccr
,S2L
; keep the sign in bit 3 of S2L
412 neg.l A0P
; negate arg
415 mov.l A1P
,A1P
; is the denominator -ve
418 neg.l A1P
; negate arg
426 ; denominator in A2/A3
446 ;; H8/300H and H8S version of ___udivsi3 is defined later in
488 ; examine what the sign should be
504 #else
/* __H8300H__
*/
516 ; takes A0/A1 numerator (A0P for H8/300H)
517 ; A2/A3 denominator (A1P for H8/300H)
518 ; returns A0/A1 quotient (A0P for H8/300H)
519 ; S0/S1 remainder (S0P for H8/300H)
525 sub.w S0
,S0
; zero play area
559 ; have to do the divide by shift and test
567 mov.b #
24,S2H
; only do 24 iterations
570 add.w A1
,A1
; double the answer guess
574 rotxl S1L
; double remainder
578 sub.w A3
,S1
; does it all fit
583 add.w A3
,S1
; no, restore mistake
597 #else
/* __H8300H__
*/
599 ;; This function also computes the remainder and stores it in er3.
602 mov.w A1E
,A1E
; denominator top word 0?
605 ; do it the easy way, see page 107 in manual
619 ; expects er1 >= 2^16
626 shlr.l er2
; make divisor < 2^16
630 shlr.l #
2,er2
; make divisor < 2^16
636 shlr.l #
2,er2
; make divisor < 2^16
650 ;; er0 contains shifted dividend
651 ;; er1 contains divisor
652 ;; er2 contains shifted divisor
653 ;; er3 contains dividend, later remainder
654 divxu.w r2
,er0
; r0 now contains the approximate quotient (AQ)
657 subs #
1,er0
; er0 = AQ - 1
659 mulxu.w r0
,er2
; er2 = upper (AQ - 1) * divisor
660 sub.w r2
,e3
; dividend - 65536 * er2
662 mulxu.w r0
,er2
; compute er3 = remainder (tentative)
663 sub.l er2
,er3
; er3 = dividend - (AQ - 1) * divisor
665 cmp.l er1
,er3
; is divisor < remainder?
668 sub.l er1
,er3
; correct the remainder
673 #endif
/* L_divsi3
*/
678 ; The H8/300 only has an 8*8->16 multiply.
679 ; The answer is the same as:
681 ; product = (srca.l * srcb.l) + ((srca.h * srcb.l) + (srcb.h * srca.l)) * 256
682 ; (we can ignore A1.h * A0.h cause that will all off the top)
692 mov.b A1L
,A2L
; A2l gets srcb.l
693 mulxu A0L
,A2
; A2 gets first sub product
695 mov.b A0H
,A3L
; prepare for
696 mulxu A1L
,A3
; second sub product
698 add.b A3L
,A2H
; sum first two terms
700 mov.b A1H
,A3L
; third sub product
703 add.b A3L
,A2H
; almost there
704 mov.w A2
,A0
; that is
708 #endif
/* L_mulhi3
*/
714 ;; I think that shift and add may be sufficient for this. Using the
715 ;; supplied 8x8->16 would need 10 ops of 14 cycles each + overhead. This way
716 ;; the inner loop uses maybe 20 cycles + overhead, but terminates
717 ;; quickly on small args.
775 #else
/* __H8300H__
*/
778 ; mulsi3 for H8/300H - based on Renesas SH implementation
780 ; by Toshiyasu Morita
784 ; 16b * 16b = 372 states (worst case)
785 ; 32b * 32b = 724 states (worst case)
789 ; 16b * 16b = 48 states
790 ; 16b * 32b = 72 states
791 ; 32b * 32b = 92 states
796 mov.w r1
,r2
; ( 2 states) b * d
797 mulxu r0
,er2
; (22 states)
799 mov.w e0
,r3
; ( 2 states) a * d
800 beq L_skip1
; ( 4 states)
801 mulxu r1
,er3
; (22 states)
802 add.w r3
,e2
; ( 2 states)
805 mov.w e1
,r3
; ( 2 states) c * b
806 beq L_skip2
; ( 4 states)
807 mulxu r0
,er3
; (22 states)
808 add.w r3
,e2
; ( 2 states)
811 mov.l er2
,er0
; ( 2 states)
815 #endif
/* L_mulsi3
*/
816 #ifdef L_fixunssfsi_asm
817 /* For the h8300 we use asm to save some bytes
, to
818 allow more programs to fit
into the tiny address
819 space. For the H8
/300H and H8S
, the C version is good enough.
*/
821 /* We still treat NANs different than libgcc2.c
, but then
, the
822 behavior is undefined anyways.
*/
823 .
global ___fixunssfsi
843 #endif
/* L_fixunssfsi_asm
*/