1 ;; libgcc routines for the Hitachi H8/300 CPU.
2 ;; Contributed by Steve Chamberlain <sac@cygnus.com>
3 ;; Optimizations by Toshiyasu Morita <toshiyasu.morita@hsa.hitachi.com>
5 /* Copyright
(C
) 1994, 2000, 2001, 2002 Free Software Foundation
, Inc.
7 This file is free software
; you can redistribute it and/or modify it
8 under the terms of the GNU General
Public License as published by the
9 Free Software Foundation
; either version 2, or (at your option) any
12 In addition to the permissions
in the GNU General
Public License
, the
13 Free Software Foundation gives you unlimited permission to link the
14 compiled version of
this file
into combinations with other programs
,
15 and to distribute those combinations without any restriction coming
16 from the use of
this file.
(The General
Public License restrictions
17 do apply
in other respects
; for example, they cover modification of
18 the file
, and distribution when
not linked
into a combine
21 This file is distributed
in the hope that it will be useful
, but
22 WITHOUT ANY WARRANTY
; without even the implied warranty of
23 MERCHANTABILITY
or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
24 General
Public License for more details.
26 You should have received a copy of the GNU General
Public License
27 along with
this program
; see the file COPYING. If not, write to
28 the Free Software Foundation
, 59 Temple Place
- Suite
330,
29 Boston
, MA
02111-1307, USA.
*/
31 /* Assembler register definitions.
*/
62 #define MOVP
mov.w
/* pointers are
16 bits
*/
77 #if defined
(__H8300H__
) || defined
(__H8300S__
)
78 #define MOVP
mov.l
/* pointers are
32 bits
*/
130 #endif
/* L_cmpsi2
*/
156 #endif
/* L_ucmpsi2
*/
160 ;; HImode divides for the H8/300.
161 ;; We bunch all of this into one object file since there are several
162 ;; "supporting routines".
164 ; general purpose normalize routine
168 ; turns both into +ve numbers, and leaves what the answer sign
176 or A0H
,A0H
; is divisor > 0
178 not A0H
; no - then make it +ve
181 xor #
0x1,A2L
; and remember that in A2L
182 _lab1: or A1H
,A1H
; look at dividend
184 not A1H
; it is -ve, make it positive
187 xor #
0x1,A2L
; and toggle sign of result
189 ;; Basically the same, except that the sign of the divisor determines
193 or A0H
,A0H
; is divisor > 0
195 not A0H
; no - then make it +ve
198 xor #
0x1,A2L
; and remember that in A2L
199 _lab7: or A1H
,A1H
; look at dividend
201 not A1H
; it is -ve, make it positive
212 negans: or A2L
,A2L
; should answer be negative ?
214 not A0H
; yes, so make it so
239 ; D high 8 bits of denom
240 ; d low 8 bits of denom
241 ; N high 8 bits of num
242 ; n low 8 bits of num
243 ; M high 8 bits of mod
244 ; m low 8 bits of mod
245 ; Q high 8 bits of quot
246 ; q low 8 bits of quot
249 ; The H8/300 only has a 16/8 bit divide, so we look at the incoming and
250 ; see how to partition up the expression.
256 sub.w A3
,A3
; Nn Dd xP 00
262 ; we know that D == 0 and N is != 0
263 mov.b A0H
,A3L
; Nn Dd xP 0N
267 _lab6: mov.b A0L
,A3L
; n
271 mov.b #
0x0,A3H
; Qq 0m
274 ; D != 0 - which means the denominator is
275 ; loop around to get the result.
278 mov.b A0H
,A3L
; Nn Dd xP 0N
279 mov.b #
0x0,A0H
; high byte of answer has to be zero
281 div8: add.b A0L
,A0L
; n*=2
282 rotxl A3L
; Make remainder bigger
285 bhs setbit
; set a bit ?
286 add.w A1
,A3
; no : too far , Q+=N
292 setbit: inc A0L
; do insert bit
297 #endif
/* __H8300__
*/
298 #endif
/* L_divhi3
*/
302 ;; 4 byte integer divides for the H8/300.
304 ;; We have one routine which does all the work and lots of
305 ;; little ones which prepare the args and massage the sign.
306 ;; We bunch all of this into one object file since there are several
307 ;; "supporting routines".
312 ; Put abs SIs into r0/r1 and r2/r3, and leave a 1 in r6l with sign of rest.
313 ; This function is here to keep branch displacements small.
318 mov.b #
0,S2L
; keep the sign in S2
319 mov.b A0H
,A0H
; is the numerator -ve
333 mov.b #
1,S2L
; the sign will be -ve
335 mov.b A2H
,A2H
; is the denominator -ve
345 xor #
1,S2L
; toggle result sign
349 ;; Basically the same, except that the sign of the divisor determines
352 mov.b #
0,S2L
; keep the sign in S2
353 mov.b A0H
,A0H
; is the numerator -ve
367 mov.b #
1,S2L
; the sign will be -ve
369 mov.b A2H
,A2H
; is the denominator -ve
382 #else
/* __H8300H__
*/
385 mov.b #
0,S2L
; keep the sign in S2
386 mov.l A0P
,A0P
; is the numerator -ve
389 neg.l A0P
; negate arg
390 mov.b #
1,S2L
; the sign will be -ve
393 mov.l A1P
,A1P
; is the denominator -ve
396 neg.l A1P
; negate arg
397 xor.b #
1,S2L
; toggle result sign
402 ;; Basically the same, except that the sign of the divisor determines
405 mov.b #
0,S2L
; keep the sign in S2
406 mov.l A0P
,A0P
; is the numerator -ve
409 neg.l A0P
; negate arg
410 mov.b #
1,S2L
; the sign will be -ve
413 mov.l A1P
,A1P
; is the denominator -ve
416 neg.l A1P
; negate arg
424 ; denominator in A2/A3
446 mov.b #
0,S2L
; keep sign low
455 mov.b #
0,S2L
; keep sign low
473 ; examine what the sign should be
492 #else
/* __H8300H__
*/
500 ; takes A0/A1 numerator (A0P for 300H)
501 ; A2/A3 denominator (A1P for 300H)
502 ; returns A0/A1 quotient (A0P for 300H)
503 ; S0/S1 remainder (S0P for 300H)
509 sub.w S0
,S0
; zero play area
543 ; have to do the divide by shift and test
551 mov.b #
24,S2H
; only do 24 iterations
554 add.w A1
,A1
; double the answer guess
558 rotxl S1L
; double remainder
562 sub.w A3
,S1
; does it all fit
567 add.w A3
,S1
; no, restore mistake
581 #else
/* __H8300H__
*/
584 sub.l S0P
,S0P
; zero play area
585 mov.w A1E
,A1E
; denominator top word 0?
588 ; do it the easy way, see page 107 in manual
607 mov.b #
24,S2H
; only do 24 iterations
610 shll.l A0P
; double the answer guess
611 rotxl.l S0P
; double remainder
612 sub.l A1P
,S0P
; does it all fit?
615 add.l A1P
,S0P
; no, restore mistake
627 #endif
/* L_divsi3
*/
632 ; The H8/300 only has an 8*8->16 multiply.
633 ; The answer is the same as:
635 ; product = (srca.l * srcb.l) + ((srca.h * srcb.l) + (srcb.h * srca.l)) * 256
636 ; (we can ignore A1.h * A0.h cause that will all off the top)
646 mov.b A1L
,A2L
; A2l gets srcb.l
647 mulxu A0L
,A2
; A2 gets first sub product
649 mov.b A0H
,A3L
; prepare for
650 mulxu A1L
,A3
; second sub product
652 add.b A3L
,A2H
; sum first two terms
654 mov.b A1H
,A3L
; third sub product
657 add.b A3L
,A2H
; almost there
658 mov.w A2
,A0
; that is
662 #endif
/* L_mulhi3
*/
668 ;; I think that shift and add may be sufficient for this. Using the
669 ;; supplied 8x8->16 would need 10 ops of 14 cycles each + overhead. This way
670 ;; the inner loop uses maybe 20 cycles + overhead, but terminates
671 ;; quickly on small args.
731 #else
/* __H8300H__
*/
734 ; mulsi3 for H8/300H - based on Hitachi SH implementation
736 ; by Toshiyasu Morita
740 ; 16b * 16b = 372 states (worst case)
741 ; 32b * 32b = 724 states (worst case)
745 ; 16b * 16b = 48 states
746 ; 16b * 32b = 72 states
747 ; 32b * 32b = 92 states
752 mov.w r1
,r2
; ( 2 states) b * d
753 mulxu r0
,er2
; (22 states)
755 mov.w e0
,r3
; ( 2 states) a * d
756 beq L_skip1
; ( 4 states)
757 mulxu r1
,er3
; (22 states)
758 add.w r3
,e2
; ( 2 states)
761 mov.w e1
,r3
; ( 2 states) c * b
762 beq L_skip2
; ( 4 states)
763 mulxu r0
,er3
; (22 states)
764 add.w r3
,e2
; ( 2 states)
767 mov.l er2
,er0
; ( 2 states)
771 #endif
/* L_mulsi3
*/
772 #ifdef L_fixunssfsi_asm
773 /* For the h8300 we use asm to save some bytes
, to
774 allow more programs to fit
into the tiny address
775 space. For the H8
/300H and H8S
, the C version is good enough.
*/
777 /* We still treat NANs different than libgcc2.c
, but then
, the
778 behaviour is undefined anyways.
*/
779 .
global ___fixunssfsi
799 #endif
/* L_fixunssfsi_asm
*/