* tree-ssa-phiopt.c, config/arm/arm.c, config/fr30/fr30.md,
[official-gcc.git] / gcc / config / sh / lib1funcs.asm
blob30f10a992cda50db48b6961d7b90aca670cfe671
1 /* Copyright (C) 1994, 1995, 1997, 1998, 1999, 2000, 2001, 2002, 2003, 2004
2 Free Software Foundation, Inc.
4 This file is free software; you can redistribute it and/or modify it
5 under the terms of the GNU General Public License as published by the
6 Free Software Foundation; either version 2, or (at your option) any
7 later version.
9 In addition to the permissions in the GNU General Public License, the
10 Free Software Foundation gives you unlimited permission to link the
11 compiled version of this file into combinations with other programs,
12 and to distribute those combinations without any restriction coming
13 from the use of this file. (The General Public License restrictions
14 do apply in other respects; for example, they cover modification of
15 the file, and distribution when not linked into a combine
16 executable.)
18 This file is distributed in the hope that it will be useful, but
19 WITHOUT ANY WARRANTY; without even the implied warranty of
20 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
21 General Public License for more details.
23 You should have received a copy of the GNU General Public License
24 along with this program; see the file COPYING. If not, write to
25 the Free Software Foundation, 59 Temple Place - Suite 330,
26 Boston, MA 02111-1307, USA. */
28 !! libgcc routines for the Renesas / SuperH SH CPUs.
29 !! Contributed by Steve Chamberlain.
30 !! sac@cygnus.com
32 !! ashiftrt_r4_x, ___ashrsi3, ___ashlsi3, ___lshrsi3 routines
33 !! recoded in assembly by Toshiyasu Morita
34 !! tm@netcom.com
36 /* SH2 optimizations for ___ashrsi3, ___ashlsi3, ___lshrsi3 and
37 ELF local label prefixes by J"orn Rennecke
38 amylaar@cygnus.com */
40 #ifdef __ELF__
41 #define LOCAL(X) .L_##X
42 #define FUNC(X) .type X,@function
43 #define ENDFUNC0(X) .Lfe_##X: .size X,.Lfe_##X-X
44 #define ENDFUNC(X) ENDFUNC0(X)
45 #else
46 #define LOCAL(X) L_##X
47 #define FUNC(X)
48 #define ENDFUNC(X)
49 #endif
51 #define CONCAT(A,B) A##B
52 #define GLOBAL0(U,X) CONCAT(U,__##X)
53 #define GLOBAL(X) GLOBAL0(__USER_LABEL_PREFIX__,X)
55 #define ALIAS(X,Y) .global GLOBAL(X); .set GLOBAL(X),GLOBAL(Y)
57 #if defined __SH5__ && ! defined __SH4_NOFPU__ && ! defined (__LITTLE_ENDIAN__)
58 #define FMOVD_WORKS
59 #endif
61 #ifdef __SH2A__
62 #undef FMOVD_WORKS
63 #define FMOVD_WORKS
64 #endif
66 #if ! __SH5__
67 #ifdef L_ashiftrt
68 .global GLOBAL(ashiftrt_r4_0)
69 .global GLOBAL(ashiftrt_r4_1)
70 .global GLOBAL(ashiftrt_r4_2)
71 .global GLOBAL(ashiftrt_r4_3)
72 .global GLOBAL(ashiftrt_r4_4)
73 .global GLOBAL(ashiftrt_r4_5)
74 .global GLOBAL(ashiftrt_r4_6)
75 .global GLOBAL(ashiftrt_r4_7)
76 .global GLOBAL(ashiftrt_r4_8)
77 .global GLOBAL(ashiftrt_r4_9)
78 .global GLOBAL(ashiftrt_r4_10)
79 .global GLOBAL(ashiftrt_r4_11)
80 .global GLOBAL(ashiftrt_r4_12)
81 .global GLOBAL(ashiftrt_r4_13)
82 .global GLOBAL(ashiftrt_r4_14)
83 .global GLOBAL(ashiftrt_r4_15)
84 .global GLOBAL(ashiftrt_r4_16)
85 .global GLOBAL(ashiftrt_r4_17)
86 .global GLOBAL(ashiftrt_r4_18)
87 .global GLOBAL(ashiftrt_r4_19)
88 .global GLOBAL(ashiftrt_r4_20)
89 .global GLOBAL(ashiftrt_r4_21)
90 .global GLOBAL(ashiftrt_r4_22)
91 .global GLOBAL(ashiftrt_r4_23)
92 .global GLOBAL(ashiftrt_r4_24)
93 .global GLOBAL(ashiftrt_r4_25)
94 .global GLOBAL(ashiftrt_r4_26)
95 .global GLOBAL(ashiftrt_r4_27)
96 .global GLOBAL(ashiftrt_r4_28)
97 .global GLOBAL(ashiftrt_r4_29)
98 .global GLOBAL(ashiftrt_r4_30)
99 .global GLOBAL(ashiftrt_r4_31)
100 .global GLOBAL(ashiftrt_r4_32)
102 FUNC(GLOBAL(ashiftrt_r4_0))
103 FUNC(GLOBAL(ashiftrt_r4_1))
104 FUNC(GLOBAL(ashiftrt_r4_2))
105 FUNC(GLOBAL(ashiftrt_r4_3))
106 FUNC(GLOBAL(ashiftrt_r4_4))
107 FUNC(GLOBAL(ashiftrt_r4_5))
108 FUNC(GLOBAL(ashiftrt_r4_6))
109 FUNC(GLOBAL(ashiftrt_r4_7))
110 FUNC(GLOBAL(ashiftrt_r4_8))
111 FUNC(GLOBAL(ashiftrt_r4_9))
112 FUNC(GLOBAL(ashiftrt_r4_10))
113 FUNC(GLOBAL(ashiftrt_r4_11))
114 FUNC(GLOBAL(ashiftrt_r4_12))
115 FUNC(GLOBAL(ashiftrt_r4_13))
116 FUNC(GLOBAL(ashiftrt_r4_14))
117 FUNC(GLOBAL(ashiftrt_r4_15))
118 FUNC(GLOBAL(ashiftrt_r4_16))
119 FUNC(GLOBAL(ashiftrt_r4_17))
120 FUNC(GLOBAL(ashiftrt_r4_18))
121 FUNC(GLOBAL(ashiftrt_r4_19))
122 FUNC(GLOBAL(ashiftrt_r4_20))
123 FUNC(GLOBAL(ashiftrt_r4_21))
124 FUNC(GLOBAL(ashiftrt_r4_22))
125 FUNC(GLOBAL(ashiftrt_r4_23))
126 FUNC(GLOBAL(ashiftrt_r4_24))
127 FUNC(GLOBAL(ashiftrt_r4_25))
128 FUNC(GLOBAL(ashiftrt_r4_26))
129 FUNC(GLOBAL(ashiftrt_r4_27))
130 FUNC(GLOBAL(ashiftrt_r4_28))
131 FUNC(GLOBAL(ashiftrt_r4_29))
132 FUNC(GLOBAL(ashiftrt_r4_30))
133 FUNC(GLOBAL(ashiftrt_r4_31))
134 FUNC(GLOBAL(ashiftrt_r4_32))
136 .align 1
137 GLOBAL(ashiftrt_r4_32):
138 GLOBAL(ashiftrt_r4_31):
139 rotcl r4
141 subc r4,r4
143 GLOBAL(ashiftrt_r4_30):
144 shar r4
145 GLOBAL(ashiftrt_r4_29):
146 shar r4
147 GLOBAL(ashiftrt_r4_28):
148 shar r4
149 GLOBAL(ashiftrt_r4_27):
150 shar r4
151 GLOBAL(ashiftrt_r4_26):
152 shar r4
153 GLOBAL(ashiftrt_r4_25):
154 shar r4
155 GLOBAL(ashiftrt_r4_24):
156 shlr16 r4
157 shlr8 r4
159 exts.b r4,r4
161 GLOBAL(ashiftrt_r4_23):
162 shar r4
163 GLOBAL(ashiftrt_r4_22):
164 shar r4
165 GLOBAL(ashiftrt_r4_21):
166 shar r4
167 GLOBAL(ashiftrt_r4_20):
168 shar r4
169 GLOBAL(ashiftrt_r4_19):
170 shar r4
171 GLOBAL(ashiftrt_r4_18):
172 shar r4
173 GLOBAL(ashiftrt_r4_17):
174 shar r4
175 GLOBAL(ashiftrt_r4_16):
176 shlr16 r4
178 exts.w r4,r4
180 GLOBAL(ashiftrt_r4_15):
181 shar r4
182 GLOBAL(ashiftrt_r4_14):
183 shar r4
184 GLOBAL(ashiftrt_r4_13):
185 shar r4
186 GLOBAL(ashiftrt_r4_12):
187 shar r4
188 GLOBAL(ashiftrt_r4_11):
189 shar r4
190 GLOBAL(ashiftrt_r4_10):
191 shar r4
192 GLOBAL(ashiftrt_r4_9):
193 shar r4
194 GLOBAL(ashiftrt_r4_8):
195 shar r4
196 GLOBAL(ashiftrt_r4_7):
197 shar r4
198 GLOBAL(ashiftrt_r4_6):
199 shar r4
200 GLOBAL(ashiftrt_r4_5):
201 shar r4
202 GLOBAL(ashiftrt_r4_4):
203 shar r4
204 GLOBAL(ashiftrt_r4_3):
205 shar r4
206 GLOBAL(ashiftrt_r4_2):
207 shar r4
208 GLOBAL(ashiftrt_r4_1):
210 shar r4
212 GLOBAL(ashiftrt_r4_0):
216 ENDFUNC(GLOBAL(ashiftrt_r4_0))
217 ENDFUNC(GLOBAL(ashiftrt_r4_1))
218 ENDFUNC(GLOBAL(ashiftrt_r4_2))
219 ENDFUNC(GLOBAL(ashiftrt_r4_3))
220 ENDFUNC(GLOBAL(ashiftrt_r4_4))
221 ENDFUNC(GLOBAL(ashiftrt_r4_5))
222 ENDFUNC(GLOBAL(ashiftrt_r4_6))
223 ENDFUNC(GLOBAL(ashiftrt_r4_7))
224 ENDFUNC(GLOBAL(ashiftrt_r4_8))
225 ENDFUNC(GLOBAL(ashiftrt_r4_9))
226 ENDFUNC(GLOBAL(ashiftrt_r4_10))
227 ENDFUNC(GLOBAL(ashiftrt_r4_11))
228 ENDFUNC(GLOBAL(ashiftrt_r4_12))
229 ENDFUNC(GLOBAL(ashiftrt_r4_13))
230 ENDFUNC(GLOBAL(ashiftrt_r4_14))
231 ENDFUNC(GLOBAL(ashiftrt_r4_15))
232 ENDFUNC(GLOBAL(ashiftrt_r4_16))
233 ENDFUNC(GLOBAL(ashiftrt_r4_17))
234 ENDFUNC(GLOBAL(ashiftrt_r4_18))
235 ENDFUNC(GLOBAL(ashiftrt_r4_19))
236 ENDFUNC(GLOBAL(ashiftrt_r4_20))
237 ENDFUNC(GLOBAL(ashiftrt_r4_21))
238 ENDFUNC(GLOBAL(ashiftrt_r4_22))
239 ENDFUNC(GLOBAL(ashiftrt_r4_23))
240 ENDFUNC(GLOBAL(ashiftrt_r4_24))
241 ENDFUNC(GLOBAL(ashiftrt_r4_25))
242 ENDFUNC(GLOBAL(ashiftrt_r4_26))
243 ENDFUNC(GLOBAL(ashiftrt_r4_27))
244 ENDFUNC(GLOBAL(ashiftrt_r4_28))
245 ENDFUNC(GLOBAL(ashiftrt_r4_29))
246 ENDFUNC(GLOBAL(ashiftrt_r4_30))
247 ENDFUNC(GLOBAL(ashiftrt_r4_31))
248 ENDFUNC(GLOBAL(ashiftrt_r4_32))
249 #endif
251 #ifdef L_ashiftrt_n
254 ! GLOBAL(ashrsi3)
256 ! Entry:
258 ! r4: Value to shift
259 ! r5: Shifts
261 ! Exit:
263 ! r0: Result
265 ! Destroys:
267 ! (none)
270 .global GLOBAL(ashrsi3)
271 FUNC(GLOBAL(ashrsi3))
272 .align 2
273 GLOBAL(ashrsi3):
274 mov #31,r0
275 and r0,r5
276 mova LOCAL(ashrsi3_table),r0
277 mov.b @(r0,r5),r5
278 #ifdef __sh1__
279 add r5,r0
280 jmp @r0
281 #else
282 braf r5
283 #endif
284 mov r4,r0
286 .align 2
287 LOCAL(ashrsi3_table):
288 .byte LOCAL(ashrsi3_0)-LOCAL(ashrsi3_table)
289 .byte LOCAL(ashrsi3_1)-LOCAL(ashrsi3_table)
290 .byte LOCAL(ashrsi3_2)-LOCAL(ashrsi3_table)
291 .byte LOCAL(ashrsi3_3)-LOCAL(ashrsi3_table)
292 .byte LOCAL(ashrsi3_4)-LOCAL(ashrsi3_table)
293 .byte LOCAL(ashrsi3_5)-LOCAL(ashrsi3_table)
294 .byte LOCAL(ashrsi3_6)-LOCAL(ashrsi3_table)
295 .byte LOCAL(ashrsi3_7)-LOCAL(ashrsi3_table)
296 .byte LOCAL(ashrsi3_8)-LOCAL(ashrsi3_table)
297 .byte LOCAL(ashrsi3_9)-LOCAL(ashrsi3_table)
298 .byte LOCAL(ashrsi3_10)-LOCAL(ashrsi3_table)
299 .byte LOCAL(ashrsi3_11)-LOCAL(ashrsi3_table)
300 .byte LOCAL(ashrsi3_12)-LOCAL(ashrsi3_table)
301 .byte LOCAL(ashrsi3_13)-LOCAL(ashrsi3_table)
302 .byte LOCAL(ashrsi3_14)-LOCAL(ashrsi3_table)
303 .byte LOCAL(ashrsi3_15)-LOCAL(ashrsi3_table)
304 .byte LOCAL(ashrsi3_16)-LOCAL(ashrsi3_table)
305 .byte LOCAL(ashrsi3_17)-LOCAL(ashrsi3_table)
306 .byte LOCAL(ashrsi3_18)-LOCAL(ashrsi3_table)
307 .byte LOCAL(ashrsi3_19)-LOCAL(ashrsi3_table)
308 .byte LOCAL(ashrsi3_20)-LOCAL(ashrsi3_table)
309 .byte LOCAL(ashrsi3_21)-LOCAL(ashrsi3_table)
310 .byte LOCAL(ashrsi3_22)-LOCAL(ashrsi3_table)
311 .byte LOCAL(ashrsi3_23)-LOCAL(ashrsi3_table)
312 .byte LOCAL(ashrsi3_24)-LOCAL(ashrsi3_table)
313 .byte LOCAL(ashrsi3_25)-LOCAL(ashrsi3_table)
314 .byte LOCAL(ashrsi3_26)-LOCAL(ashrsi3_table)
315 .byte LOCAL(ashrsi3_27)-LOCAL(ashrsi3_table)
316 .byte LOCAL(ashrsi3_28)-LOCAL(ashrsi3_table)
317 .byte LOCAL(ashrsi3_29)-LOCAL(ashrsi3_table)
318 .byte LOCAL(ashrsi3_30)-LOCAL(ashrsi3_table)
319 .byte LOCAL(ashrsi3_31)-LOCAL(ashrsi3_table)
321 LOCAL(ashrsi3_31):
322 rotcl r0
324 subc r0,r0
326 LOCAL(ashrsi3_30):
327 shar r0
328 LOCAL(ashrsi3_29):
329 shar r0
330 LOCAL(ashrsi3_28):
331 shar r0
332 LOCAL(ashrsi3_27):
333 shar r0
334 LOCAL(ashrsi3_26):
335 shar r0
336 LOCAL(ashrsi3_25):
337 shar r0
338 LOCAL(ashrsi3_24):
339 shlr16 r0
340 shlr8 r0
342 exts.b r0,r0
344 LOCAL(ashrsi3_23):
345 shar r0
346 LOCAL(ashrsi3_22):
347 shar r0
348 LOCAL(ashrsi3_21):
349 shar r0
350 LOCAL(ashrsi3_20):
351 shar r0
352 LOCAL(ashrsi3_19):
353 shar r0
354 LOCAL(ashrsi3_18):
355 shar r0
356 LOCAL(ashrsi3_17):
357 shar r0
358 LOCAL(ashrsi3_16):
359 shlr16 r0
361 exts.w r0,r0
363 LOCAL(ashrsi3_15):
364 shar r0
365 LOCAL(ashrsi3_14):
366 shar r0
367 LOCAL(ashrsi3_13):
368 shar r0
369 LOCAL(ashrsi3_12):
370 shar r0
371 LOCAL(ashrsi3_11):
372 shar r0
373 LOCAL(ashrsi3_10):
374 shar r0
375 LOCAL(ashrsi3_9):
376 shar r0
377 LOCAL(ashrsi3_8):
378 shar r0
379 LOCAL(ashrsi3_7):
380 shar r0
381 LOCAL(ashrsi3_6):
382 shar r0
383 LOCAL(ashrsi3_5):
384 shar r0
385 LOCAL(ashrsi3_4):
386 shar r0
387 LOCAL(ashrsi3_3):
388 shar r0
389 LOCAL(ashrsi3_2):
390 shar r0
391 LOCAL(ashrsi3_1):
393 shar r0
395 LOCAL(ashrsi3_0):
399 ENDFUNC(GLOBAL(ashrsi3))
400 #endif
402 #ifdef L_ashiftlt
405 ! GLOBAL(ashlsi3)
407 ! Entry:
409 ! r4: Value to shift
410 ! r5: Shifts
412 ! Exit:
414 ! r0: Result
416 ! Destroys:
418 ! (none)
420 .global GLOBAL(ashlsi3)
421 FUNC(GLOBAL(ashlsi3))
422 .align 2
423 GLOBAL(ashlsi3):
424 mov #31,r0
425 and r0,r5
426 mova LOCAL(ashlsi3_table),r0
427 mov.b @(r0,r5),r5
428 #ifdef __sh1__
429 add r5,r0
430 jmp @r0
431 #else
432 braf r5
433 #endif
434 mov r4,r0
436 .align 2
437 LOCAL(ashlsi3_table):
438 .byte LOCAL(ashlsi3_0)-LOCAL(ashlsi3_table)
439 .byte LOCAL(ashlsi3_1)-LOCAL(ashlsi3_table)
440 .byte LOCAL(ashlsi3_2)-LOCAL(ashlsi3_table)
441 .byte LOCAL(ashlsi3_3)-LOCAL(ashlsi3_table)
442 .byte LOCAL(ashlsi3_4)-LOCAL(ashlsi3_table)
443 .byte LOCAL(ashlsi3_5)-LOCAL(ashlsi3_table)
444 .byte LOCAL(ashlsi3_6)-LOCAL(ashlsi3_table)
445 .byte LOCAL(ashlsi3_7)-LOCAL(ashlsi3_table)
446 .byte LOCAL(ashlsi3_8)-LOCAL(ashlsi3_table)
447 .byte LOCAL(ashlsi3_9)-LOCAL(ashlsi3_table)
448 .byte LOCAL(ashlsi3_10)-LOCAL(ashlsi3_table)
449 .byte LOCAL(ashlsi3_11)-LOCAL(ashlsi3_table)
450 .byte LOCAL(ashlsi3_12)-LOCAL(ashlsi3_table)
451 .byte LOCAL(ashlsi3_13)-LOCAL(ashlsi3_table)
452 .byte LOCAL(ashlsi3_14)-LOCAL(ashlsi3_table)
453 .byte LOCAL(ashlsi3_15)-LOCAL(ashlsi3_table)
454 .byte LOCAL(ashlsi3_16)-LOCAL(ashlsi3_table)
455 .byte LOCAL(ashlsi3_17)-LOCAL(ashlsi3_table)
456 .byte LOCAL(ashlsi3_18)-LOCAL(ashlsi3_table)
457 .byte LOCAL(ashlsi3_19)-LOCAL(ashlsi3_table)
458 .byte LOCAL(ashlsi3_20)-LOCAL(ashlsi3_table)
459 .byte LOCAL(ashlsi3_21)-LOCAL(ashlsi3_table)
460 .byte LOCAL(ashlsi3_22)-LOCAL(ashlsi3_table)
461 .byte LOCAL(ashlsi3_23)-LOCAL(ashlsi3_table)
462 .byte LOCAL(ashlsi3_24)-LOCAL(ashlsi3_table)
463 .byte LOCAL(ashlsi3_25)-LOCAL(ashlsi3_table)
464 .byte LOCAL(ashlsi3_26)-LOCAL(ashlsi3_table)
465 .byte LOCAL(ashlsi3_27)-LOCAL(ashlsi3_table)
466 .byte LOCAL(ashlsi3_28)-LOCAL(ashlsi3_table)
467 .byte LOCAL(ashlsi3_29)-LOCAL(ashlsi3_table)
468 .byte LOCAL(ashlsi3_30)-LOCAL(ashlsi3_table)
469 .byte LOCAL(ashlsi3_31)-LOCAL(ashlsi3_table)
471 LOCAL(ashlsi3_6):
472 shll2 r0
473 LOCAL(ashlsi3_4):
474 shll2 r0
475 LOCAL(ashlsi3_2):
477 shll2 r0
479 LOCAL(ashlsi3_7):
480 shll2 r0
481 LOCAL(ashlsi3_5):
482 shll2 r0
483 LOCAL(ashlsi3_3):
484 shll2 r0
485 LOCAL(ashlsi3_1):
487 shll r0
489 LOCAL(ashlsi3_14):
490 shll2 r0
491 LOCAL(ashlsi3_12):
492 shll2 r0
493 LOCAL(ashlsi3_10):
494 shll2 r0
495 LOCAL(ashlsi3_8):
497 shll8 r0
499 LOCAL(ashlsi3_15):
500 shll2 r0
501 LOCAL(ashlsi3_13):
502 shll2 r0
503 LOCAL(ashlsi3_11):
504 shll2 r0
505 LOCAL(ashlsi3_9):
506 shll8 r0
508 shll r0
510 LOCAL(ashlsi3_22):
511 shll2 r0
512 LOCAL(ashlsi3_20):
513 shll2 r0
514 LOCAL(ashlsi3_18):
515 shll2 r0
516 LOCAL(ashlsi3_16):
518 shll16 r0
520 LOCAL(ashlsi3_23):
521 shll2 r0
522 LOCAL(ashlsi3_21):
523 shll2 r0
524 LOCAL(ashlsi3_19):
525 shll2 r0
526 LOCAL(ashlsi3_17):
527 shll16 r0
529 shll r0
531 LOCAL(ashlsi3_30):
532 shll2 r0
533 LOCAL(ashlsi3_28):
534 shll2 r0
535 LOCAL(ashlsi3_26):
536 shll2 r0
537 LOCAL(ashlsi3_24):
538 shll16 r0
540 shll8 r0
542 LOCAL(ashlsi3_31):
543 shll2 r0
544 LOCAL(ashlsi3_29):
545 shll2 r0
546 LOCAL(ashlsi3_27):
547 shll2 r0
548 LOCAL(ashlsi3_25):
549 shll16 r0
550 shll8 r0
552 shll r0
554 LOCAL(ashlsi3_0):
558 ENDFUNC(GLOBAL(ashlsi3))
559 #endif
561 #ifdef L_lshiftrt
564 ! GLOBAL(lshrsi3)
566 ! Entry:
568 ! r4: Value to shift
569 ! r5: Shifts
571 ! Exit:
573 ! r0: Result
575 ! Destroys:
577 ! (none)
579 .global GLOBAL(lshrsi3)
580 FUNC(GLOBAL(lshrsi3))
581 .align 2
582 GLOBAL(lshrsi3):
583 mov #31,r0
584 and r0,r5
585 mova LOCAL(lshrsi3_table),r0
586 mov.b @(r0,r5),r5
587 #ifdef __sh1__
588 add r5,r0
589 jmp @r0
590 #else
591 braf r5
592 #endif
593 mov r4,r0
595 .align 2
596 LOCAL(lshrsi3_table):
597 .byte LOCAL(lshrsi3_0)-LOCAL(lshrsi3_table)
598 .byte LOCAL(lshrsi3_1)-LOCAL(lshrsi3_table)
599 .byte LOCAL(lshrsi3_2)-LOCAL(lshrsi3_table)
600 .byte LOCAL(lshrsi3_3)-LOCAL(lshrsi3_table)
601 .byte LOCAL(lshrsi3_4)-LOCAL(lshrsi3_table)
602 .byte LOCAL(lshrsi3_5)-LOCAL(lshrsi3_table)
603 .byte LOCAL(lshrsi3_6)-LOCAL(lshrsi3_table)
604 .byte LOCAL(lshrsi3_7)-LOCAL(lshrsi3_table)
605 .byte LOCAL(lshrsi3_8)-LOCAL(lshrsi3_table)
606 .byte LOCAL(lshrsi3_9)-LOCAL(lshrsi3_table)
607 .byte LOCAL(lshrsi3_10)-LOCAL(lshrsi3_table)
608 .byte LOCAL(lshrsi3_11)-LOCAL(lshrsi3_table)
609 .byte LOCAL(lshrsi3_12)-LOCAL(lshrsi3_table)
610 .byte LOCAL(lshrsi3_13)-LOCAL(lshrsi3_table)
611 .byte LOCAL(lshrsi3_14)-LOCAL(lshrsi3_table)
612 .byte LOCAL(lshrsi3_15)-LOCAL(lshrsi3_table)
613 .byte LOCAL(lshrsi3_16)-LOCAL(lshrsi3_table)
614 .byte LOCAL(lshrsi3_17)-LOCAL(lshrsi3_table)
615 .byte LOCAL(lshrsi3_18)-LOCAL(lshrsi3_table)
616 .byte LOCAL(lshrsi3_19)-LOCAL(lshrsi3_table)
617 .byte LOCAL(lshrsi3_20)-LOCAL(lshrsi3_table)
618 .byte LOCAL(lshrsi3_21)-LOCAL(lshrsi3_table)
619 .byte LOCAL(lshrsi3_22)-LOCAL(lshrsi3_table)
620 .byte LOCAL(lshrsi3_23)-LOCAL(lshrsi3_table)
621 .byte LOCAL(lshrsi3_24)-LOCAL(lshrsi3_table)
622 .byte LOCAL(lshrsi3_25)-LOCAL(lshrsi3_table)
623 .byte LOCAL(lshrsi3_26)-LOCAL(lshrsi3_table)
624 .byte LOCAL(lshrsi3_27)-LOCAL(lshrsi3_table)
625 .byte LOCAL(lshrsi3_28)-LOCAL(lshrsi3_table)
626 .byte LOCAL(lshrsi3_29)-LOCAL(lshrsi3_table)
627 .byte LOCAL(lshrsi3_30)-LOCAL(lshrsi3_table)
628 .byte LOCAL(lshrsi3_31)-LOCAL(lshrsi3_table)
630 LOCAL(lshrsi3_6):
631 shlr2 r0
632 LOCAL(lshrsi3_4):
633 shlr2 r0
634 LOCAL(lshrsi3_2):
636 shlr2 r0
638 LOCAL(lshrsi3_7):
639 shlr2 r0
640 LOCAL(lshrsi3_5):
641 shlr2 r0
642 LOCAL(lshrsi3_3):
643 shlr2 r0
644 LOCAL(lshrsi3_1):
646 shlr r0
648 LOCAL(lshrsi3_14):
649 shlr2 r0
650 LOCAL(lshrsi3_12):
651 shlr2 r0
652 LOCAL(lshrsi3_10):
653 shlr2 r0
654 LOCAL(lshrsi3_8):
656 shlr8 r0
658 LOCAL(lshrsi3_15):
659 shlr2 r0
660 LOCAL(lshrsi3_13):
661 shlr2 r0
662 LOCAL(lshrsi3_11):
663 shlr2 r0
664 LOCAL(lshrsi3_9):
665 shlr8 r0
667 shlr r0
669 LOCAL(lshrsi3_22):
670 shlr2 r0
671 LOCAL(lshrsi3_20):
672 shlr2 r0
673 LOCAL(lshrsi3_18):
674 shlr2 r0
675 LOCAL(lshrsi3_16):
677 shlr16 r0
679 LOCAL(lshrsi3_23):
680 shlr2 r0
681 LOCAL(lshrsi3_21):
682 shlr2 r0
683 LOCAL(lshrsi3_19):
684 shlr2 r0
685 LOCAL(lshrsi3_17):
686 shlr16 r0
688 shlr r0
690 LOCAL(lshrsi3_30):
691 shlr2 r0
692 LOCAL(lshrsi3_28):
693 shlr2 r0
694 LOCAL(lshrsi3_26):
695 shlr2 r0
696 LOCAL(lshrsi3_24):
697 shlr16 r0
699 shlr8 r0
701 LOCAL(lshrsi3_31):
702 shlr2 r0
703 LOCAL(lshrsi3_29):
704 shlr2 r0
705 LOCAL(lshrsi3_27):
706 shlr2 r0
707 LOCAL(lshrsi3_25):
708 shlr16 r0
709 shlr8 r0
711 shlr r0
713 LOCAL(lshrsi3_0):
717 ENDFUNC(GLOBAL(lshrsi3))
718 #endif
720 #ifdef L_movmem
721 .text
722 ! done all the large groups, do the remainder
724 ! jump to movmem+
725 done:
726 add #64,r5
727 mova GLOBAL(movmemSI0),r0
728 shll2 r6
729 add r6,r0
730 jmp @r0
731 add #64,r4
732 .align 4
733 ! ??? We need aliases movstr* for movmem* for the older libraries. These
734 ! aliases will be removed at the some point in the future.
735 .global GLOBAL(movmemSI64)
736 FUNC(GLOBAL(movmemSI64))
737 ALIAS(movstrSI64,movmemSI64)
738 GLOBAL(movmemSI64):
739 mov.l @(60,r5),r0
740 mov.l r0,@(60,r4)
741 .global GLOBAL(movmemSI60)
742 FUNC(GLOBAL(movmemSI60))
743 ALIAS(movstrSI60,movmemSI60)
744 GLOBAL(movmemSI60):
745 mov.l @(56,r5),r0
746 mov.l r0,@(56,r4)
747 .global GLOBAL(movmemSI56)
748 FUNC(GLOBAL(movmemSI56))
749 ALIAS(movstrSI56,movmemSI56)
750 GLOBAL(movmemSI56):
751 mov.l @(52,r5),r0
752 mov.l r0,@(52,r4)
753 .global GLOBAL(movmemSI52)
754 FUNC(GLOBAL(movmemSI52))
755 ALIAS(movstrSI52,movmemSI52)
756 GLOBAL(movmemSI52):
757 mov.l @(48,r5),r0
758 mov.l r0,@(48,r4)
759 .global GLOBAL(movmemSI48)
760 FUNC(GLOBAL(movmemSI48))
761 ALIAS(movstrSI48,movmemSI48)
762 GLOBAL(movmemSI48):
763 mov.l @(44,r5),r0
764 mov.l r0,@(44,r4)
765 .global GLOBAL(movmemSI44)
766 FUNC(GLOBAL(movmemSI44))
767 ALIAS(movstrSI44,movmemSI44)
768 GLOBAL(movmemSI44):
769 mov.l @(40,r5),r0
770 mov.l r0,@(40,r4)
771 .global GLOBAL(movmemSI40)
772 FUNC(GLOBAL(movmemSI40))
773 ALIAS(movstrSI40,movmemSI40)
774 GLOBAL(movmemSI40):
775 mov.l @(36,r5),r0
776 mov.l r0,@(36,r4)
777 .global GLOBAL(movmemSI36)
778 FUNC(GLOBAL(movmemSI36))
779 ALIAS(movstrSI36,movmemSI36)
780 GLOBAL(movmemSI36):
781 mov.l @(32,r5),r0
782 mov.l r0,@(32,r4)
783 .global GLOBAL(movmemSI32)
784 FUNC(GLOBAL(movmemSI32))
785 ALIAS(movstrSI32,movmemSI32)
786 GLOBAL(movmemSI32):
787 mov.l @(28,r5),r0
788 mov.l r0,@(28,r4)
789 .global GLOBAL(movmemSI28)
790 FUNC(GLOBAL(movmemSI28))
791 ALIAS(movstrSI28,movmemSI28)
792 GLOBAL(movmemSI28):
793 mov.l @(24,r5),r0
794 mov.l r0,@(24,r4)
795 .global GLOBAL(movmemSI24)
796 FUNC(GLOBAL(movmemSI24))
797 ALIAS(movstrSI24,movmemSI24)
798 GLOBAL(movmemSI24):
799 mov.l @(20,r5),r0
800 mov.l r0,@(20,r4)
801 .global GLOBAL(movmemSI20)
802 FUNC(GLOBAL(movmemSI20))
803 ALIAS(movstrSI20,movmemSI20)
804 GLOBAL(movmemSI20):
805 mov.l @(16,r5),r0
806 mov.l r0,@(16,r4)
807 .global GLOBAL(movmemSI16)
808 FUNC(GLOBAL(movmemSI16))
809 ALIAS(movstrSI16,movmemSI16)
810 GLOBAL(movmemSI16):
811 mov.l @(12,r5),r0
812 mov.l r0,@(12,r4)
813 .global GLOBAL(movmemSI12)
814 FUNC(GLOBAL(movmemSI12))
815 ALIAS(movstrSI12,movmemSI12)
816 GLOBAL(movmemSI12):
817 mov.l @(8,r5),r0
818 mov.l r0,@(8,r4)
819 .global GLOBAL(movmemSI8)
820 FUNC(GLOBAL(movmemSI8))
821 ALIAS(movstrSI8,movmemSI8)
822 GLOBAL(movmemSI8):
823 mov.l @(4,r5),r0
824 mov.l r0,@(4,r4)
825 .global GLOBAL(movmemSI4)
826 FUNC(GLOBAL(movmemSI4))
827 ALIAS(movstrSI4,movmemSI4)
828 GLOBAL(movmemSI4):
829 mov.l @(0,r5),r0
830 mov.l r0,@(0,r4)
831 .global GLOBAL(movmemSI0)
832 FUNC(GLOBAL(movmemSI0))
833 ALIAS(movstrSI0,movmemSI0)
834 GLOBAL(movmemSI0):
838 ENDFUNC(GLOBAL(movmemSI64))
839 ENDFUNC(GLOBAL(movmemSI60))
840 ENDFUNC(GLOBAL(movmemSI56))
841 ENDFUNC(GLOBAL(movmemSI52))
842 ENDFUNC(GLOBAL(movmemSI48))
843 ENDFUNC(GLOBAL(movmemSI44))
844 ENDFUNC(GLOBAL(movmemSI40))
845 ENDFUNC(GLOBAL(movmemSI36))
846 ENDFUNC(GLOBAL(movmemSI32))
847 ENDFUNC(GLOBAL(movmemSI28))
848 ENDFUNC(GLOBAL(movmemSI24))
849 ENDFUNC(GLOBAL(movmemSI20))
850 ENDFUNC(GLOBAL(movmemSI16))
851 ENDFUNC(GLOBAL(movmemSI12))
852 ENDFUNC(GLOBAL(movmemSI8))
853 ENDFUNC(GLOBAL(movmemSI4))
854 ENDFUNC(GLOBAL(movmemSI0))
856 .align 4
858 .global GLOBAL(movmem)
859 FUNC(GLOBAL(movmem))
860 ALIAS(movstr,movmem)
861 GLOBAL(movmem):
862 mov.l @(60,r5),r0
863 mov.l r0,@(60,r4)
865 mov.l @(56,r5),r0
866 mov.l r0,@(56,r4)
868 mov.l @(52,r5),r0
869 mov.l r0,@(52,r4)
871 mov.l @(48,r5),r0
872 mov.l r0,@(48,r4)
874 mov.l @(44,r5),r0
875 mov.l r0,@(44,r4)
877 mov.l @(40,r5),r0
878 mov.l r0,@(40,r4)
880 mov.l @(36,r5),r0
881 mov.l r0,@(36,r4)
883 mov.l @(32,r5),r0
884 mov.l r0,@(32,r4)
886 mov.l @(28,r5),r0
887 mov.l r0,@(28,r4)
889 mov.l @(24,r5),r0
890 mov.l r0,@(24,r4)
892 mov.l @(20,r5),r0
893 mov.l r0,@(20,r4)
895 mov.l @(16,r5),r0
896 mov.l r0,@(16,r4)
898 mov.l @(12,r5),r0
899 mov.l r0,@(12,r4)
901 mov.l @(8,r5),r0
902 mov.l r0,@(8,r4)
904 mov.l @(4,r5),r0
905 mov.l r0,@(4,r4)
907 mov.l @(0,r5),r0
908 mov.l r0,@(0,r4)
910 add #-16,r6
911 cmp/pl r6
912 bf done
914 add #64,r5
915 bra GLOBAL(movmem)
916 add #64,r4
918 FUNC(GLOBAL(movmem))
919 #endif
921 #ifdef L_movmem_i4
922 .text
923 .global GLOBAL(movmem_i4_even)
924 .global GLOBAL(movmem_i4_odd)
925 .global GLOBAL(movmemSI12_i4)
927 FUNC(GLOBAL(movmem_i4_even))
928 FUNC(GLOBAL(movmem_i4_odd))
929 FUNC(GLOBAL(movmemSI12_i4))
931 ALIAS(movstr_i4_even,movmem_i4_even)
932 ALIAS(movstr_i4_odd,movmem_i4_odd)
933 ALIAS(movstrSI12_i4,movmemSI12_i4)
935 .p2align 5
936 L_movmem_2mod4_end:
937 mov.l r0,@(16,r4)
939 mov.l r1,@(20,r4)
941 .p2align 2
943 GLOBAL(movmem_i4_even):
944 mov.l @r5+,r0
945 bra L_movmem_start_even
946 mov.l @r5+,r1
948 GLOBAL(movmem_i4_odd):
949 mov.l @r5+,r1
950 add #-4,r4
951 mov.l @r5+,r2
952 mov.l @r5+,r3
953 mov.l r1,@(4,r4)
954 mov.l r2,@(8,r4)
956 L_movmem_loop:
957 mov.l r3,@(12,r4)
958 dt r6
959 mov.l @r5+,r0
960 bt/s L_movmem_2mod4_end
961 mov.l @r5+,r1
962 add #16,r4
963 L_movmem_start_even:
964 mov.l @r5+,r2
965 mov.l @r5+,r3
966 mov.l r0,@r4
967 dt r6
968 mov.l r1,@(4,r4)
969 bf/s L_movmem_loop
970 mov.l r2,@(8,r4)
972 mov.l r3,@(12,r4)
974 ENDFUNC(GLOBAL(movmem_i4_even))
975 ENDFUNC(GLOBAL(movmem_i4_odd))
977 .p2align 4
978 GLOBAL(movmemSI12_i4):
979 mov.l @r5,r0
980 mov.l @(4,r5),r1
981 mov.l @(8,r5),r2
982 mov.l r0,@r4
983 mov.l r1,@(4,r4)
985 mov.l r2,@(8,r4)
987 ENDFUNC(GLOBAL(movmemSI12_i4))
988 #endif
990 #ifdef L_mulsi3
993 .global GLOBAL(mulsi3)
994 FUNC(GLOBAL(mulsi3))
996 ! r4 = aabb
997 ! r5 = ccdd
998 ! r0 = aabb*ccdd via partial products
1000 ! if aa == 0 and cc = 0
1001 ! r0 = bb*dd
1003 ! else
1004 ! aa = bb*dd + (aa*dd*65536) + (cc*bb*65536)
1007 GLOBAL(mulsi3):
1008 mulu.w r4,r5 ! multiply the lsws macl=bb*dd
1009 mov r5,r3 ! r3 = ccdd
1010 swap.w r4,r2 ! r2 = bbaa
1011 xtrct r2,r3 ! r3 = aacc
1012 tst r3,r3 ! msws zero ?
1013 bf hiset
1014 rts ! yes - then we have the answer
1015 sts macl,r0
1017 hiset: sts macl,r0 ! r0 = bb*dd
1018 mulu.w r2,r5 ! brewing macl = aa*dd
1019 sts macl,r1
1020 mulu.w r3,r4 ! brewing macl = cc*bb
1021 sts macl,r2
1022 add r1,r2
1023 shll16 r2
1025 add r2,r0
1027 FUNC(GLOBAL(mulsi3))
1028 #endif
1029 #endif /* ! __SH5__ */
1030 #ifdef L_sdivsi3_i4
1031 .title "SH DIVIDE"
1032 !! 4 byte integer Divide code for the Renesas SH
1033 #ifdef __SH4__
1034 !! args in r4 and r5, result in fpul, clobber dr0, dr2
1036 .global GLOBAL(sdivsi3_i4)
1037 FUNC(GLOBAL(sdivsi3_i4))
1038 GLOBAL(sdivsi3_i4):
1039 lds r4,fpul
1040 float fpul,dr0
1041 lds r5,fpul
1042 float fpul,dr2
1043 fdiv dr2,dr0
1045 ftrc dr0,fpul
1047 ENDFUNC(GLOBAL(sdivsi3_i4))
1048 #elif defined(__SH4_SINGLE__) || defined(__SH4_SINGLE_ONLY__) || (defined (__SH5__) && ! defined __SH4_NOFPU__)
1049 !! args in r4 and r5, result in fpul, clobber r2, dr0, dr2
1051 #if ! __SH5__ || __SH5__ == 32
1052 #if __SH5__
1053 .mode SHcompact
1054 #endif
1055 .global GLOBAL(sdivsi3_i4)
1056 FUNC(GLOBAL(sdivsi3_i4))
1057 GLOBAL(sdivsi3_i4):
1058 sts.l fpscr,@-r15
1059 mov #8,r2
1060 swap.w r2,r2
1061 lds r2,fpscr
1062 lds r4,fpul
1063 float fpul,dr0
1064 lds r5,fpul
1065 float fpul,dr2
1066 fdiv dr2,dr0
1067 ftrc dr0,fpul
1069 lds.l @r15+,fpscr
1071 ENDFUNC(GLOBAL(sdivsi3_i4))
1072 #endif /* ! __SH5__ || __SH5__ == 32 */
1073 #endif /* ! __SH4__ */
1074 #endif
1076 #ifdef L_sdivsi3
1077 /* __SH4_SINGLE_ONLY__ keeps this part for link compatibility with
1078 sh2e/sh3e code. */
1079 #if (! defined(__SH4__) && ! defined (__SH4_SINGLE__)) || defined (__linux__)
1081 !! Steve Chamberlain
1082 !! sac@cygnus.com
1086 !! args in r4 and r5, result in r0 clobber r1, r2, r3, and t bit
1088 .global GLOBAL(sdivsi3)
1089 FUNC(GLOBAL(sdivsi3))
1090 #if __SHMEDIA__
1091 #if __SH5__ == 32
1092 .section .text..SHmedia32,"ax"
1093 #else
1094 .text
1095 #endif
1096 .align 2
1097 #if 0
1098 /* The assembly code that follows is a hand-optimized version of the C
1099 code that follows. Note that the registers that are modified are
1100 exactly those listed as clobbered in the patterns divsi3_i1 and
1101 divsi3_i1_media.
1103 int __sdivsi3 (i, j)
1104 int i, j;
1106 register unsigned long long r18 asm ("r18");
1107 register unsigned long long r19 asm ("r19");
1108 register unsigned long long r0 asm ("r0") = 0;
1109 register unsigned long long r1 asm ("r1") = 1;
1110 register int r2 asm ("r2") = i >> 31;
1111 register int r3 asm ("r3") = j >> 31;
1113 r2 = r2 ? r2 : r1;
1114 r3 = r3 ? r3 : r1;
1115 r18 = i * r2;
1116 r19 = j * r3;
1117 r2 *= r3;
1119 r19 <<= 31;
1120 r1 <<= 31;
1122 if (r18 >= r19)
1123 r0 |= r1, r18 -= r19;
1124 while (r19 >>= 1, r1 >>= 1);
1126 return r2 * (int)r0;
1129 GLOBAL(sdivsi3):
1130 pt/l LOCAL(sdivsi3_dontadd), tr2
1131 pt/l LOCAL(sdivsi3_loop), tr1
1132 ptabs/l r18, tr0
1133 movi 0, r0
1134 movi 1, r1
1135 shari.l r4, 31, r2
1136 shari.l r5, 31, r3
1137 cmveq r2, r1, r2
1138 cmveq r3, r1, r3
1139 muls.l r4, r2, r18
1140 muls.l r5, r3, r19
1141 muls.l r2, r3, r2
1142 shlli r19, 31, r19
1143 shlli r1, 31, r1
1144 LOCAL(sdivsi3_loop):
1145 bgtu r19, r18, tr2
1146 or r0, r1, r0
1147 sub r18, r19, r18
1148 LOCAL(sdivsi3_dontadd):
1149 shlri r1, 1, r1
1150 shlri r19, 1, r19
1151 bnei r1, 0, tr1
1152 muls.l r0, r2, r0
1153 add.l r0, r63, r0
1154 blink tr0, r63
1155 #else /* ! 0 */
1156 // inputs: r4,r5
1157 // clobbered: r1,r2,r3,r18,r19,r20,r21,r25,tr0
1158 // result in r0
1159 GLOBAL(sdivsi3):
1160 // can create absolute value without extra latency,
1161 // but dependent on proper sign extension of inputs:
1162 // shari.l r5,31,r2
1163 // xor r5,r2,r20
1164 // sub r20,r2,r20 // r20 is now absolute value of r5, zero-extended.
1165 shari.l r5,31,r2
1166 ori r2,1,r2
1167 muls.l r5,r2,r20 // r20 is now absolute value of r5, zero-extended.
1168 movi 0xffffffffffffbb0c,r19 // shift count eqiv 76
1169 shari.l r4,31,r3
1170 nsb r20,r0
1171 shlld r20,r0,r25
1172 shlri r25,48,r25
1173 sub r19,r25,r1
1174 mmulfx.w r1,r1,r2
1175 mshflo.w r1,r63,r1
1176 // If r4 was to be used in-place instead of r21, could use this sequence
1177 // to compute absolute:
1178 // sub r63,r4,r19 // compute absolute value of r4
1179 // shlri r4,32,r3 // into lower 32 bit of r4, keeping
1180 // mcmv r19,r3,r4 // the sign in the upper 32 bits intact.
1181 ori r3,1,r3
1182 mmulfx.w r25,r2,r2
1183 sub r19,r0,r0
1184 muls.l r4,r3,r21
1185 msub.w r1,r2,r2
1186 addi r2,-2,r1
1187 mulu.l r21,r1,r19
1188 mmulfx.w r2,r2,r2
1189 shlli r1,15,r1
1190 shlrd r19,r0,r19
1191 mulu.l r19,r20,r3
1192 mmacnfx.wl r25,r2,r1
1193 ptabs r18,tr0
1194 sub r21,r3,r25
1196 mulu.l r25,r1,r2
1197 addi r0,14,r0
1198 xor r4,r5,r18
1199 shlrd r2,r0,r2
1200 mulu.l r2,r20,r3
1201 add r19,r2,r19
1202 shari.l r18,31,r18
1203 sub r25,r3,r25
1205 mulu.l r25,r1,r2
1206 sub r25,r20,r25
1207 add r19,r18,r19
1208 shlrd r2,r0,r2
1209 mulu.l r2,r20,r3
1210 addi r25,1,r25
1211 add r19,r2,r19
1213 cmpgt r25,r3,r25
1214 add.l r19,r25,r0
1215 xor r0,r18,r0
1216 blink tr0,r63
1217 #endif
1218 #elif defined __SHMEDIA__
1219 /* m5compact-nofpu */
1220 // clobbered: r18,r19,r20,r21,r25,tr0,tr1,tr2
1221 .mode SHmedia
1222 .section .text..SHmedia32,"ax"
1223 .align 2
1224 GLOBAL(sdivsi3):
1225 pt/l LOCAL(sdivsi3_dontsub), tr0
1226 pt/l LOCAL(sdivsi3_loop), tr1
1227 ptabs/l r18,tr2
1228 shari.l r4,31,r18
1229 shari.l r5,31,r19
1230 xor r4,r18,r20
1231 xor r5,r19,r21
1232 sub.l r20,r18,r20
1233 sub.l r21,r19,r21
1234 xor r18,r19,r19
1235 shlli r21,32,r25
1236 addi r25,-1,r21
1237 addz.l r20,r63,r20
1238 LOCAL(sdivsi3_loop):
1239 shlli r20,1,r20
1240 bgeu/u r21,r20,tr0
1241 sub r20,r21,r20
1242 LOCAL(sdivsi3_dontsub):
1243 addi.l r25,-1,r25
1244 bnei r25,-32,tr1
1245 xor r20,r19,r20
1246 sub.l r20,r19,r0
1247 blink tr2,r63
1248 #else /* ! __SHMEDIA__ */
1249 GLOBAL(sdivsi3):
1250 mov r4,r1
1251 mov r5,r0
1253 tst r0,r0
1254 bt div0
1255 mov #0,r2
1256 div0s r2,r1
1257 subc r3,r3
1258 subc r2,r1
1259 div0s r0,r3
1260 rotcl r1
1261 div1 r0,r3
1262 rotcl r1
1263 div1 r0,r3
1264 rotcl r1
1265 div1 r0,r3
1266 rotcl r1
1267 div1 r0,r3
1268 rotcl r1
1269 div1 r0,r3
1270 rotcl r1
1271 div1 r0,r3
1272 rotcl r1
1273 div1 r0,r3
1274 rotcl r1
1275 div1 r0,r3
1276 rotcl r1
1277 div1 r0,r3
1278 rotcl r1
1279 div1 r0,r3
1280 rotcl r1
1281 div1 r0,r3
1282 rotcl r1
1283 div1 r0,r3
1284 rotcl r1
1285 div1 r0,r3
1286 rotcl r1
1287 div1 r0,r3
1288 rotcl r1
1289 div1 r0,r3
1290 rotcl r1
1291 div1 r0,r3
1292 rotcl r1
1293 div1 r0,r3
1294 rotcl r1
1295 div1 r0,r3
1296 rotcl r1
1297 div1 r0,r3
1298 rotcl r1
1299 div1 r0,r3
1300 rotcl r1
1301 div1 r0,r3
1302 rotcl r1
1303 div1 r0,r3
1304 rotcl r1
1305 div1 r0,r3
1306 rotcl r1
1307 div1 r0,r3
1308 rotcl r1
1309 div1 r0,r3
1310 rotcl r1
1311 div1 r0,r3
1312 rotcl r1
1313 div1 r0,r3
1314 rotcl r1
1315 div1 r0,r3
1316 rotcl r1
1317 div1 r0,r3
1318 rotcl r1
1319 div1 r0,r3
1320 rotcl r1
1321 div1 r0,r3
1322 rotcl r1
1323 div1 r0,r3
1324 rotcl r1
1325 addc r2,r1
1327 mov r1,r0
1330 div0: rts
1331 mov #0,r0
1333 ENDFUNC(GLOBAL(sdivsi3))
1334 #endif /* ! __SHMEDIA__ */
1335 #endif /* ! __SH4__ */
1336 #endif
1337 #ifdef L_udivsi3_i4
1339 .title "SH DIVIDE"
1340 !! 4 byte integer Divide code for the Renesas SH
1341 #ifdef __SH4__
1342 !! args in r4 and r5, result in fpul, clobber r0, r1, r4, r5, dr0, dr2, dr4,
1343 !! and t bit
1345 .global GLOBAL(udivsi3_i4)
1346 FUNC(GLOBAL(udivsi3_i4))
1347 GLOBAL(udivsi3_i4):
1348 mov #1,r1
1349 cmp/hi r1,r5
1350 bf trivial
1351 rotr r1
1352 xor r1,r4
1353 lds r4,fpul
1354 mova L1,r0
1355 #ifdef FMOVD_WORKS
1356 fmov.d @r0+,dr4
1357 #else
1358 #ifdef __LITTLE_ENDIAN__
1359 fmov.s @r0+,fr5
1360 fmov.s @r0,fr4
1361 #else
1362 fmov.s @r0+,fr4
1363 fmov.s @r0,fr5
1364 #endif
1365 #endif
1366 float fpul,dr0
1367 xor r1,r5
1368 lds r5,fpul
1369 float fpul,dr2
1370 fadd dr4,dr0
1371 fadd dr4,dr2
1372 fdiv dr2,dr0
1374 ftrc dr0,fpul
1376 trivial:
1378 lds r4,fpul
1380 .align 2
1381 #ifdef FMOVD_WORKS
1382 .align 3 ! make double below 8 byte aligned.
1383 #endif
1385 .double 2147483648
1387 ENDFUNC(GLOBAL(udivsi3_i4))
1388 #elif defined (__SH5__) && ! defined (__SH4_NOFPU__)
1389 #if ! __SH5__ || __SH5__ == 32
1390 !! args in r4 and r5, result in fpul, clobber r20, r21, dr0, fr33
1391 .mode SHmedia
1392 .global GLOBAL(udivsi3_i4)
1393 FUNC(GLOBAL(udivsi3_i4))
1394 GLOBAL(udivsi3_i4):
1395 addz.l r4,r63,r20
1396 addz.l r5,r63,r21
1397 fmov.qd r20,dr0
1398 fmov.qd r21,dr32
1399 ptabs r18,tr0
1400 float.qd dr0,dr0
1401 float.qd dr32,dr32
1402 fdiv.d dr0,dr32,dr0
1403 ftrc.dq dr0,dr32
1404 fmov.s fr33,fr32
1405 blink tr0,r63
1407 ENDFUNC(GLOBAL(udivsi3_i4))
1408 #endif /* ! __SH5__ || __SH5__ == 32 */
1409 #elif defined(__SH4_SINGLE__) || defined(__SH4_SINGLE_ONLY__)
1410 !! args in r4 and r5, result in fpul, clobber r0, r1, r4, r5, dr0, dr2, dr4
1412 .global GLOBAL(udivsi3_i4)
1413 GLOBAL(udivsi3_i4):
1414 mov #1,r1
1415 cmp/hi r1,r5
1416 bf trivial
1417 sts.l fpscr,@-r15
1418 mova L1,r0
1419 lds.l @r0+,fpscr
1420 rotr r1
1421 xor r1,r4
1422 lds r4,fpul
1423 #ifdef FMOVD_WORKS
1424 fmov.d @r0+,dr4
1425 #else
1426 #ifdef __LITTLE_ENDIAN__
1427 fmov.s @r0+,fr5
1428 fmov.s @r0,fr4
1429 #else
1430 fmov.s @r0+,fr4
1431 fmov.s @r0,fr5
1432 #endif
1433 #endif
1434 float fpul,dr0
1435 xor r1,r5
1436 lds r5,fpul
1437 float fpul,dr2
1438 fadd dr4,dr0
1439 fadd dr4,dr2
1440 fdiv dr2,dr0
1441 ftrc dr0,fpul
1443 lds.l @r15+,fpscr
1445 #ifdef FMOVD_WORKS
1446 .align 3 ! make double below 8 byte aligned.
1447 #endif
1448 trivial:
1450 lds r4,fpul
1452 .align 2
1454 #ifndef FMOVD_WORKS
1455 .long 0x80000
1456 #else
1457 .long 0x180000
1458 #endif
1459 .double 2147483648
1461 ENDFUNC(GLOBAL(udivsi3_i4))
1462 #endif /* ! __SH4__ */
1463 #endif
1465 #ifdef L_udivsi3
1466 /* __SH4_SINGLE_ONLY__ keeps this part for link compatibility with
1467 sh2e/sh3e code. */
1468 #if (! defined(__SH4__) && ! defined (__SH4_SINGLE__)) || defined (__linux__)
1470 !! args in r4 and r5, result in r0, clobbers r4, pr, and t bit
1471 .global GLOBAL(udivsi3)
1472 FUNC(GLOBAL(udivsi3))
1474 #if __SHMEDIA__
1475 #if __SH5__ == 32
1476 .section .text..SHmedia32,"ax"
1477 #else
1478 .text
1479 #endif
1480 .align 2
1481 #if 0
1482 /* The assembly code that follows is a hand-optimized version of the C
1483 code that follows. Note that the registers that are modified are
1484 exactly those listed as clobbered in the patterns udivsi3_i1 and
1485 udivsi3_i1_media.
1487 unsigned
1488 __udivsi3 (i, j)
1489 unsigned i, j;
1491 register unsigned long long r0 asm ("r0") = 0;
1492 register unsigned long long r18 asm ("r18") = 1;
1493 register unsigned long long r4 asm ("r4") = i;
1494 register unsigned long long r19 asm ("r19") = j;
1496 r19 <<= 31;
1497 r18 <<= 31;
1499 if (r4 >= r19)
1500 r0 |= r18, r4 -= r19;
1501 while (r19 >>= 1, r18 >>= 1);
1503 return r0;
1506 GLOBAL(udivsi3):
1507 pt/l LOCAL(udivsi3_dontadd), tr2
1508 pt/l LOCAL(udivsi3_loop), tr1
1509 ptabs/l r18, tr0
1510 movi 0, r0
1511 movi 1, r18
1512 addz.l r5, r63, r19
1513 addz.l r4, r63, r4
1514 shlli r19, 31, r19
1515 shlli r18, 31, r18
1516 LOCAL(udivsi3_loop):
1517 bgtu r19, r4, tr2
1518 or r0, r18, r0
1519 sub r4, r19, r4
1520 LOCAL(udivsi3_dontadd):
1521 shlri r18, 1, r18
1522 shlri r19, 1, r19
1523 bnei r18, 0, tr1
1524 blink tr0, r63
1525 #else
1526 GLOBAL(udivsi3):
1527 // inputs: r4,r5
1528 // clobbered: r18,r19,r20,r21,r22,r25,tr0
1529 // result in r0.
1530 addz.l r5,r63,r22
1531 nsb r22,r0
1532 shlld r22,r0,r25
1533 shlri r25,48,r25
1534 movi 0xffffffffffffbb0c,r20 // shift count eqiv 76
1535 sub r20,r25,r21
1536 mmulfx.w r21,r21,r19
1537 mshflo.w r21,r63,r21
1538 ptabs r18,tr0
1539 mmulfx.w r25,r19,r19
1540 sub r20,r0,r0
1541 /* bubble */
1542 msub.w r21,r19,r19
1543 addi r19,-2,r21 /* It would be nice for scheduling to do this add to r21
1544 before the msub.w, but we need a different value for
1545 r19 to keep errors under control. */
1546 mulu.l r4,r21,r18
1547 mmulfx.w r19,r19,r19
1548 shlli r21,15,r21
1549 shlrd r18,r0,r18
1550 mulu.l r18,r22,r20
1551 mmacnfx.wl r25,r19,r21
1552 /* bubble */
1553 sub r4,r20,r25
1555 mulu.l r25,r21,r19
1556 addi r0,14,r0
1557 /* bubble */
1558 shlrd r19,r0,r19
1559 mulu.l r19,r22,r20
1560 add r18,r19,r18
1561 /* bubble */
1562 sub.l r25,r20,r25
1564 mulu.l r25,r21,r19
1565 addz.l r25,r63,r25
1566 sub r25,r22,r25
1567 shlrd r19,r0,r19
1568 mulu.l r19,r22,r20
1569 addi r25,1,r25
1570 add r18,r19,r18
1572 cmpgt r25,r20,r25
1573 add.l r18,r25,r0
1574 blink tr0,r63
1575 #endif
1576 #elif defined (__SHMEDIA__)
1577 /* m5compact-nofpu - more emphasis on code size than on speed, but don't
1578 ignore speed altogether - div1 needs 9 cycles, subc 7 and rotcl 4.
1579 So use a short shmedia loop. */
1580 // clobbered: r20,r21,r25,tr0,tr1,tr2
1581 .mode SHmedia
1582 .section .text..SHmedia32,"ax"
1583 .align 2
1584 GLOBAL(udivsi3):
1585 pt/l LOCAL(udivsi3_dontsub), tr0
1586 pt/l LOCAL(udivsi3_loop), tr1
1587 ptabs/l r18,tr2
1588 shlli r5,32,r25
1589 addi r25,-1,r21
1590 addz.l r4,r63,r20
1591 LOCAL(udivsi3_loop):
1592 shlli r20,1,r20
1593 bgeu/u r21,r20,tr0
1594 sub r20,r21,r20
1595 LOCAL(udivsi3_dontsub):
1596 addi.l r25,-1,r25
1597 bnei r25,-32,tr1
1598 add.l r20,r63,r0
1599 blink tr2,r63
1600 #else /* ! defined (__SHMEDIA__) */
1601 LOCAL(div8):
1602 div1 r5,r4
1603 LOCAL(div7):
1604 div1 r5,r4; div1 r5,r4; div1 r5,r4
1605 div1 r5,r4; div1 r5,r4; div1 r5,r4; rts; div1 r5,r4
1607 LOCAL(divx4):
1608 div1 r5,r4; rotcl r0
1609 div1 r5,r4; rotcl r0
1610 div1 r5,r4; rotcl r0
1611 rts; div1 r5,r4
1613 GLOBAL(udivsi3):
1614 sts.l pr,@-r15
1615 extu.w r5,r0
1616 cmp/eq r5,r0
1617 #ifdef __sh1__
1618 bf LOCAL(large_divisor)
1619 #else
1620 bf/s LOCAL(large_divisor)
1621 #endif
1622 div0u
1623 swap.w r4,r0
1624 shlr16 r4
1625 bsr LOCAL(div8)
1626 shll16 r5
1627 bsr LOCAL(div7)
1628 div1 r5,r4
1629 xtrct r4,r0
1630 xtrct r0,r4
1631 bsr LOCAL(div8)
1632 swap.w r4,r4
1633 bsr LOCAL(div7)
1634 div1 r5,r4
1635 lds.l @r15+,pr
1636 xtrct r4,r0
1637 swap.w r0,r0
1638 rotcl r0
1640 shlr16 r5
1642 LOCAL(large_divisor):
1643 #ifdef __sh1__
1644 div0u
1645 #endif
1646 mov #0,r0
1647 xtrct r4,r0
1648 xtrct r0,r4
1649 bsr LOCAL(divx4)
1650 rotcl r0
1651 bsr LOCAL(divx4)
1652 rotcl r0
1653 bsr LOCAL(divx4)
1654 rotcl r0
1655 bsr LOCAL(divx4)
1656 rotcl r0
1657 lds.l @r15+,pr
1659 rotcl r0
1661 ENDFUNC(GLOBAL(udivsi3))
1662 #endif /* ! __SHMEDIA__ */
1663 #endif /* __SH4__ */
1664 #endif /* L_udivsi3 */
1666 #ifdef L_udivdi3
1667 #ifdef __SHMEDIA__
1668 .mode SHmedia
1669 .section .text..SHmedia32,"ax"
1670 .align 2
1671 .global GLOBAL(udivdi3)
1672 FUNC(GLOBAL(udivdi3))
1673 GLOBAL(udivdi3):
1674 shlri r3,1,r4
1675 nsb r4,r22
1676 shlld r3,r22,r6
1677 shlri r6,49,r5
1678 movi 0xffffffffffffbaf1,r21 /* .l shift count 17. */
1679 sub r21,r5,r1
1680 mmulfx.w r1,r1,r4
1681 mshflo.w r1,r63,r1
1682 sub r63,r22,r20 // r63 == 64 % 64
1683 mmulfx.w r5,r4,r4
1684 pta LOCAL(large_divisor),tr0
1685 addi r20,32,r9
1686 msub.w r1,r4,r1
1687 madd.w r1,r1,r1
1688 mmulfx.w r1,r1,r4
1689 shlri r6,32,r7
1690 bgt/u r9,r63,tr0 // large_divisor
1691 mmulfx.w r5,r4,r4
1692 shlri r2,32+14,r19
1693 addi r22,-31,r0
1694 msub.w r1,r4,r1
1696 mulu.l r1,r7,r4
1697 addi r1,-3,r5
1698 mulu.l r5,r19,r5
1699 sub r63,r4,r4 // Negate to make sure r1 ends up <= 1/r2
1700 shlri r4,2,r4 /* chop off leading %0000000000000000 001.00000000000 - or, as
1701 the case may be, %0000000000000000 000.11111111111, still */
1702 muls.l r1,r4,r4 /* leaving at least one sign bit. */
1703 mulu.l r5,r3,r8
1704 mshalds.l r1,r21,r1
1705 shari r4,26,r4
1706 shlld r8,r0,r8
1707 add r1,r4,r1 // 31 bit unsigned reciprocal now in r1 (msb equiv. 0.5)
1708 sub r2,r8,r2
1709 /* Can do second step of 64 : 32 div now, using r1 and the rest in r2. */
1711 shlri r2,22,r21
1712 mulu.l r21,r1,r21
1713 shlld r5,r0,r8
1714 addi r20,30-22,r0
1715 shlrd r21,r0,r21
1716 mulu.l r21,r3,r5
1717 add r8,r21,r8
1718 mcmpgt.l r21,r63,r21 // See Note 1
1719 addi r20,30,r0
1720 mshfhi.l r63,r21,r21
1721 sub r2,r5,r2
1722 andc r2,r21,r2
1724 /* small divisor: need a third divide step */
1725 mulu.l r2,r1,r7
1726 ptabs r18,tr0
1727 addi r2,1,r2
1728 shlrd r7,r0,r7
1729 mulu.l r7,r3,r5
1730 add r8,r7,r8
1731 sub r2,r3,r2
1732 cmpgt r2,r5,r5
1733 add r8,r5,r2
1734 /* could test r3 here to check for divide by zero. */
1735 blink tr0,r63
1737 LOCAL(large_divisor):
1738 mmulfx.w r5,r4,r4
1739 shlrd r2,r9,r25
1740 shlri r25,32,r8
1741 msub.w r1,r4,r1
1743 mulu.l r1,r7,r4
1744 addi r1,-3,r5
1745 mulu.l r5,r8,r5
1746 sub r63,r4,r4 // Negate to make sure r1 ends up <= 1/r2
1747 shlri r4,2,r4 /* chop off leading %0000000000000000 001.00000000000 - or, as
1748 the case may be, %0000000000000000 000.11111111111, still */
1749 muls.l r1,r4,r4 /* leaving at least one sign bit. */
1750 shlri r5,14-1,r8
1751 mulu.l r8,r7,r5
1752 mshalds.l r1,r21,r1
1753 shari r4,26,r4
1754 add r1,r4,r1 // 31 bit unsigned reciprocal now in r1 (msb equiv. 0.5)
1755 sub r25,r5,r25
1756 /* Can do second step of 64 : 32 div now, using r1 and the rest in r25. */
1758 shlri r25,22,r21
1759 mulu.l r21,r1,r21
1760 pta LOCAL(no_lo_adj),tr0
1761 addi r22,32,r0
1762 shlri r21,40,r21
1763 mulu.l r21,r7,r5
1764 add r8,r21,r8
1765 shlld r2,r0,r2
1766 sub r25,r5,r25
1767 bgtu/u r7,r25,tr0 // no_lo_adj
1768 addi r8,1,r8
1769 sub r25,r7,r25
1770 LOCAL(no_lo_adj):
1771 mextr4 r2,r25,r2
1773 /* large_divisor: only needs a few adjustments. */
1774 mulu.l r8,r6,r5
1775 ptabs r18,tr0
1776 /* bubble */
1777 cmpgtu r5,r2,r5
1778 sub r8,r5,r2
1779 blink tr0,r63
1780 ENDFUNC(GLOBAL(udivdi3))
1781 /* Note 1: To shift the result of the second divide stage so that the result
1782 always fits into 32 bits, yet we still reduce the rest sufficiently
1783 would require a lot of instructions to do the shifts just right. Using
1784 the full 64 bit shift result to multiply with the divisor would require
1785 four extra instructions for the upper 32 bits (shift / mulu / shift / sub).
1786 Fortunately, if the upper 32 bits of the shift result are nonzero, we
1787 know that the rest after taking this partial result into account will
1788 fit into 32 bits. So we just clear the upper 32 bits of the rest if the
1789 upper 32 bits of the partial result are nonzero. */
1790 #endif /* __SHMEDIA__ */
1791 #endif /* L_udivdi3 */
1793 #ifdef L_divdi3
1794 #ifdef __SHMEDIA__
1795 .mode SHmedia
1796 .section .text..SHmedia32,"ax"
1797 .align 2
1798 .global GLOBAL(divdi3)
1799 FUNC(GLOBAL(divdi3))
1800 GLOBAL(divdi3):
1801 pta GLOBAL(udivdi3),tr0
1802 shari r2,63,r22
1803 shari r3,63,r23
1804 xor r2,r22,r2
1805 xor r3,r23,r3
1806 sub r2,r22,r2
1807 sub r3,r23,r3
1808 beq/u r22,r23,tr0
1809 ptabs r18,tr1
1810 blink tr0,r18
1811 sub r63,r2,r2
1812 blink tr1,r63
1813 ENDFUNC(GLOBAL(divdi3))
1814 #endif /* __SHMEDIA__ */
1815 #endif /* L_divdi3 */
1817 #ifdef L_umoddi3
1818 #ifdef __SHMEDIA__
1819 .mode SHmedia
1820 .section .text..SHmedia32,"ax"
1821 .align 2
1822 .global GLOBAL(umoddi3)
1823 FUNC(GLOBAL(umoddi3))
1824 GLOBAL(umoddi3):
1825 shlri r3,1,r4
1826 nsb r4,r22
1827 shlld r3,r22,r6
1828 shlri r6,49,r5
1829 movi 0xffffffffffffbaf1,r21 /* .l shift count 17. */
1830 sub r21,r5,r1
1831 mmulfx.w r1,r1,r4
1832 mshflo.w r1,r63,r1
1833 sub r63,r22,r20 // r63 == 64 % 64
1834 mmulfx.w r5,r4,r4
1835 pta LOCAL(large_divisor),tr0
1836 addi r20,32,r9
1837 msub.w r1,r4,r1
1838 madd.w r1,r1,r1
1839 mmulfx.w r1,r1,r4
1840 shlri r6,32,r7
1841 bgt/u r9,r63,tr0 // large_divisor
1842 mmulfx.w r5,r4,r4
1843 shlri r2,32+14,r19
1844 addi r22,-31,r0
1845 msub.w r1,r4,r1
1847 mulu.l r1,r7,r4
1848 addi r1,-3,r5
1849 mulu.l r5,r19,r5
1850 sub r63,r4,r4 // Negate to make sure r1 ends up <= 1/r2
1851 shlri r4,2,r4 /* chop off leading %0000000000000000 001.00000000000 - or, as
1852 the case may be, %0000000000000000 000.11111111111, still */
1853 muls.l r1,r4,r4 /* leaving at least one sign bit. */
1854 mulu.l r5,r3,r5
1855 mshalds.l r1,r21,r1
1856 shari r4,26,r4
1857 shlld r5,r0,r5
1858 add r1,r4,r1 // 31 bit unsigned reciprocal now in r1 (msb equiv. 0.5)
1859 sub r2,r5,r2
1860 /* Can do second step of 64 : 32 div now, using r1 and the rest in r2. */
1862 shlri r2,22,r21
1863 mulu.l r21,r1,r21
1864 addi r20,30-22,r0
1865 /* bubble */ /* could test r3 here to check for divide by zero. */
1866 shlrd r21,r0,r21
1867 mulu.l r21,r3,r5
1868 mcmpgt.l r21,r63,r21 // See Note 1
1869 addi r20,30,r0
1870 mshfhi.l r63,r21,r21
1871 sub r2,r5,r2
1872 andc r2,r21,r2
1874 /* small divisor: need a third divide step */
1875 mulu.l r2,r1,r7
1876 ptabs r18,tr0
1877 sub r2,r3,r8 /* re-use r8 here for rest - r3 */
1878 shlrd r7,r0,r7
1879 mulu.l r7,r3,r5
1880 /* bubble */
1881 addi r8,1,r7
1882 cmpgt r7,r5,r7
1883 cmvne r7,r8,r2
1884 sub r2,r5,r2
1885 blink tr0,r63
1887 LOCAL(large_divisor):
1888 mmulfx.w r5,r4,r4
1889 shlrd r2,r9,r25
1890 shlri r25,32,r8
1891 msub.w r1,r4,r1
1893 mulu.l r1,r7,r4
1894 addi r1,-3,r5
1895 mulu.l r5,r8,r5
1896 sub r63,r4,r4 // Negate to make sure r1 ends up <= 1/r2
1897 shlri r4,2,r4 /* chop off leading %0000000000000000 001.00000000000 - or, as
1898 the case may be, %0000000000000000 000.11111111111, still */
1899 muls.l r1,r4,r4 /* leaving at least one sign bit. */
1900 shlri r5,14-1,r8
1901 mulu.l r8,r7,r5
1902 mshalds.l r1,r21,r1
1903 shari r4,26,r4
1904 add r1,r4,r1 // 31 bit unsigned reciprocal now in r1 (msb equiv. 0.5)
1905 sub r25,r5,r25
1906 /* Can do second step of 64 : 32 div now, using r1 and the rest in r25. */
1908 shlri r25,22,r21
1909 mulu.l r21,r1,r21
1910 pta LOCAL(no_lo_adj),tr0
1911 addi r22,32,r0
1912 shlri r21,40,r21
1913 mulu.l r21,r7,r5
1914 add r8,r21,r8
1915 shlld r2,r0,r2
1916 sub r25,r5,r25
1917 bgtu/u r7,r25,tr0 // no_lo_adj
1918 addi r8,1,r8
1919 sub r25,r7,r25
1920 LOCAL(no_lo_adj):
1921 mextr4 r2,r25,r2
1923 /* large_divisor: only needs a few adjustments. */
1924 mulu.l r8,r6,r5
1925 ptabs r18,tr0
1926 add r2,r6,r7
1927 cmpgtu r5,r2,r8
1928 cmvne r8,r7,r2
1929 sub r2,r5,r2
1930 shlrd r2,r22,r2
1931 blink tr0,r63
1932 ENDFUNC(GLOBAL(umoddi3))
1933 /* Note 1: To shift the result of the second divide stage so that the result
1934 always fits into 32 bits, yet we still reduce the rest sufficiently
1935 would require a lot of instructions to do the shifts just right. Using
1936 the full 64 bit shift result to multiply with the divisor would require
1937 four extra instructions for the upper 32 bits (shift / mulu / shift / sub).
1938 Fortunately, if the upper 32 bits of the shift result are nonzero, we
1939 know that the rest after taking this partial result into account will
1940 fit into 32 bits. So we just clear the upper 32 bits of the rest if the
1941 upper 32 bits of the partial result are nonzero. */
1942 #endif /* __SHMEDIA__ */
1943 #endif /* L_umoddi3 */
1945 #ifdef L_moddi3
1946 #ifdef __SHMEDIA__
1947 .mode SHmedia
1948 .section .text..SHmedia32,"ax"
1949 .align 2
1950 .global GLOBAL(moddi3)
1951 FUNC(GLOBAL(moddi3))
1952 GLOBAL(moddi3):
1953 pta GLOBAL(umoddi3),tr0
1954 shari r2,63,r22
1955 shari r3,63,r23
1956 xor r2,r22,r2
1957 xor r3,r23,r3
1958 sub r2,r22,r2
1959 sub r3,r23,r3
1960 beq/u r22,r63,tr0
1961 ptabs r18,tr1
1962 blink tr0,r18
1963 sub r63,r2,r2
1964 blink tr1,r63
1965 ENDFUNC(GLOBAL(moddi3))
1966 #endif /* __SHMEDIA__ */
1967 #endif /* L_moddi3 */
1969 #ifdef L_set_fpscr
1970 #if !defined (__SH2A_NOFPU__)
1971 #if defined (__SH2E__) || defined (__SH2A__) || defined (__SH3E__) || defined(__SH4_SINGLE__) || defined(__SH4__) || defined(__SH4_SINGLE_ONLY__) || __SH5__ == 32
1972 #ifdef __SH5__
1973 .mode SHcompact
1974 #endif
1975 .global GLOBAL(set_fpscr)
1976 FUNC(GLOBAL(set_fpscr))
1977 GLOBAL(set_fpscr):
1978 lds r4,fpscr
1979 #ifdef __PIC__
1980 mov.l r12,@-r15
1981 mova LOCAL(set_fpscr_L0),r0
1982 mov.l LOCAL(set_fpscr_L0),r12
1983 add r0,r12
1984 mov.l LOCAL(set_fpscr_L1),r0
1985 mov.l @(r0,r12),r1
1986 mov.l @r15+,r12
1987 #else
1988 mov.l LOCAL(set_fpscr_L1),r1
1989 #endif
1990 swap.w r4,r0
1991 or #24,r0
1992 #ifndef FMOVD_WORKS
1993 xor #16,r0
1994 #endif
1995 #if defined(__SH4__) || defined (__SH2A_DOUBLE__)
1996 swap.w r0,r3
1997 mov.l r3,@(4,r1)
1998 #else /* defined (__SH2E__) || defined(__SH3E__) || defined(__SH4_SINGLE*__) */
1999 swap.w r0,r2
2000 mov.l r2,@r1
2001 #endif
2002 #ifndef FMOVD_WORKS
2003 xor #8,r0
2004 #else
2005 xor #24,r0
2006 #endif
2007 #if defined(__SH4__) || defined (__SH2A_DOUBLE__)
2008 swap.w r0,r2
2010 mov.l r2,@r1
2011 #else /* defined(__SH2E__) || defined(__SH3E__) || defined(__SH4_SINGLE*__) */
2012 swap.w r0,r3
2014 mov.l r3,@(4,r1)
2015 #endif
2016 .align 2
2017 #ifdef __PIC__
2018 LOCAL(set_fpscr_L0):
2019 .long _GLOBAL_OFFSET_TABLE_
2020 LOCAL(set_fpscr_L1):
2021 .long GLOBAL(fpscr_values@GOT)
2022 #else
2023 LOCAL(set_fpscr_L1):
2024 .long GLOBAL(fpscr_values)
2025 #endif
2027 ENDFUNC(GLOBAL(set_fpscr))
2028 #ifndef NO_FPSCR_VALUES
2029 #ifdef __ELF__
2030 .comm GLOBAL(fpscr_values),8,4
2031 #else
2032 .comm GLOBAL(fpscr_values),8
2033 #endif /* ELF */
2034 #endif /* NO_FPSCR_VALUES */
2035 #endif /* SH2E / SH3E / SH4 */
2036 #endif /* __SH2A_NOFPU__ */
2037 #endif /* L_set_fpscr */
2038 #ifdef L_ic_invalidate
2039 #if __SH5__ == 32
2040 .mode SHmedia
2041 .section .text..SHmedia32,"ax"
2042 .align 2
2043 .global GLOBAL(init_trampoline)
2044 FUNC(GLOBAL(init_trampoline))
2045 GLOBAL(init_trampoline):
2046 st.l r0,8,r2
2047 #ifdef __LITTLE_ENDIAN__
2048 movi 9,r20
2049 shori 0x402b,r20
2050 shori 0xd101,r20
2051 shori 0xd002,r20
2052 #else
2053 movi 0xffffffffffffd002,r20
2054 shori 0xd101,r20
2055 shori 0x402b,r20
2056 shori 9,r20
2057 #endif
2058 st.q r0,0,r20
2059 st.l r0,12,r3
2060 .global GLOBAL(ic_invalidate)
2061 FUNC(GLOBAL(ic_invalidate))
2062 GLOBAL(ic_invalidate):
2063 ocbwb r0,0
2064 synco
2065 icbi r0, 0
2066 ptabs r18, tr0
2067 synci
2068 blink tr0, r63
2070 ENDFUNC(GLOBAL(ic_invalidate))
2071 ENDFUNC(GLOBAL(init_trampoline))
2072 #elif defined(__SH4A__)
2073 .global GLOBAL(ic_invalidate)
2074 FUNC(GLOBAL(ic_invalidate))
2075 GLOBAL(ic_invalidate):
2076 ocbwb @r4
2077 synco
2079 icbi @r4
2080 ENDFUNC(GLOBAL(ic_invalidate))
2081 #elif defined(__SH4_SINGLE__) || defined(__SH4__) || defined(__SH4_SINGLE_ONLY__)
2082 /* This assumes a direct-mapped cache, which is the case for
2083 the first SH4, but not for the second version of SH4, that
2084 uses a 2-way set-associative cache, nor SH4a, that is 4-way.
2085 SH4a fortunately offers an instruction to invalidate the
2086 instruction cache, and we use it above, but SH4 doesn't.
2087 However, since the libraries don't contain any nested
2088 functions (the only case in which GCC would emit this pattern)
2089 and we actually emit the ic_invalidate_line_i pattern for
2090 cache invalidation on all SH4 multilibs (even 4-nofpu, that
2091 isn't even corevered here), and pre-SH4 cores don't have
2092 caches, it seems like this code is pointless, unless it's
2093 meant for backward binary compatibility or for userland-only
2094 cache invalidation for say sh4-*-linux-gnu. Such a feature
2095 should probably be moved into a system call, such that the
2096 kernel could do whatever it takes to invalidate a cache line
2097 on the core it's actually running on. I.e., this hideous :-)
2098 piece of code should go away at some point. */
2100 .global GLOBAL(ic_invalidate)
2101 FUNC(GLOBAL(ic_invalidate))
2102 GLOBAL(ic_invalidate):
2103 ocbwb @r4
2104 mova 0f,r0
2105 mov.w 1f,r1
2106 /* Compute how many cache lines 0f is away from r4. */
2107 sub r0,r4
2108 and r1,r4
2109 /* Prepare to branch to 0f plus the cache-line offset. */
2110 add # 0f - 1f,r4
2111 braf r4
2114 .short 0x1fe0
2115 .p2align 5
2116 /* This must be aligned to the beginning of a cache line. */
2118 .rept 256 /* There are 256 cache lines of 32 bytes. */
2120 .rept 15
2122 .endr
2123 .endr
2125 ENDFUNC(GLOBAL(ic_invalidate))
2126 #endif /* SH4 */
2127 #endif /* L_ic_invalidate */
2129 #if defined (__SH5__) && __SH5__ == 32
2130 #ifdef L_shcompact_call_trampoline
2131 .section .rodata
2132 .align 1
2133 LOCAL(ct_main_table):
2134 .word LOCAL(ct_r2_fp) - datalabel LOCAL(ct_main_label)
2135 .word LOCAL(ct_r2_ld) - datalabel LOCAL(ct_main_label)
2136 .word LOCAL(ct_r2_pop) - datalabel LOCAL(ct_main_label)
2137 .word LOCAL(ct_r3_fp) - datalabel LOCAL(ct_main_label)
2138 .word LOCAL(ct_r3_ld) - datalabel LOCAL(ct_main_label)
2139 .word LOCAL(ct_r3_pop) - datalabel LOCAL(ct_main_label)
2140 .word LOCAL(ct_r4_fp) - datalabel LOCAL(ct_main_label)
2141 .word LOCAL(ct_r4_ld) - datalabel LOCAL(ct_main_label)
2142 .word LOCAL(ct_r4_pop) - datalabel LOCAL(ct_main_label)
2143 .word LOCAL(ct_r5_fp) - datalabel LOCAL(ct_main_label)
2144 .word LOCAL(ct_r5_ld) - datalabel LOCAL(ct_main_label)
2145 .word LOCAL(ct_r5_pop) - datalabel LOCAL(ct_main_label)
2146 .word LOCAL(ct_r6_fph) - datalabel LOCAL(ct_main_label)
2147 .word LOCAL(ct_r6_fpl) - datalabel LOCAL(ct_main_label)
2148 .word LOCAL(ct_r6_ld) - datalabel LOCAL(ct_main_label)
2149 .word LOCAL(ct_r6_pop) - datalabel LOCAL(ct_main_label)
2150 .word LOCAL(ct_r7_fph) - datalabel LOCAL(ct_main_label)
2151 .word LOCAL(ct_r7_fpl) - datalabel LOCAL(ct_main_label)
2152 .word LOCAL(ct_r7_ld) - datalabel LOCAL(ct_main_label)
2153 .word LOCAL(ct_r7_pop) - datalabel LOCAL(ct_main_label)
2154 .word LOCAL(ct_r8_fph) - datalabel LOCAL(ct_main_label)
2155 .word LOCAL(ct_r8_fpl) - datalabel LOCAL(ct_main_label)
2156 .word LOCAL(ct_r8_ld) - datalabel LOCAL(ct_main_label)
2157 .word LOCAL(ct_r8_pop) - datalabel LOCAL(ct_main_label)
2158 .word LOCAL(ct_r9_fph) - datalabel LOCAL(ct_main_label)
2159 .word LOCAL(ct_r9_fpl) - datalabel LOCAL(ct_main_label)
2160 .word LOCAL(ct_r9_ld) - datalabel LOCAL(ct_main_label)
2161 .word LOCAL(ct_r9_pop) - datalabel LOCAL(ct_main_label)
2162 .word LOCAL(ct_pop_seq) - datalabel LOCAL(ct_main_label)
2163 .word LOCAL(ct_pop_seq) - datalabel LOCAL(ct_main_label)
2164 .word LOCAL(ct_r9_pop) - datalabel LOCAL(ct_main_label)
2165 .word LOCAL(ct_ret_wide) - datalabel LOCAL(ct_main_label)
2166 .word LOCAL(ct_call_func) - datalabel LOCAL(ct_main_label)
2167 .mode SHmedia
2168 .section .text..SHmedia32, "ax"
2169 .align 2
2171 /* This function loads 64-bit general-purpose registers from the
2172 stack, from a memory address contained in them or from an FP
2173 register, according to a cookie passed in r1. Its execution
2174 time is linear on the number of registers that actually have
2175 to be copied. See sh.h for details on the actual bit pattern.
2177 The function to be called is passed in r0. If a 32-bit return
2178 value is expected, the actual function will be tail-called,
2179 otherwise the return address will be stored in r10 (that the
2180 caller should expect to be clobbered) and the return value
2181 will be expanded into r2/r3 upon return. */
2183 .global GLOBAL(GCC_shcompact_call_trampoline)
2184 FUNC(GLOBAL(GCC_shcompact_call_trampoline))
2185 GLOBAL(GCC_shcompact_call_trampoline):
2186 ptabs/l r0, tr0 /* Prepare to call the actual function. */
2187 movi ((datalabel LOCAL(ct_main_table) - 31 * 2) >> 16) & 65535, r0
2188 pt/l LOCAL(ct_loop), tr1
2189 addz.l r1, r63, r1
2190 shori ((datalabel LOCAL(ct_main_table) - 31 * 2)) & 65535, r0
2191 LOCAL(ct_loop):
2192 nsb r1, r28
2193 shlli r28, 1, r29
2194 ldx.w r0, r29, r30
2195 LOCAL(ct_main_label):
2196 ptrel/l r30, tr2
2197 blink tr2, r63
2198 LOCAL(ct_r2_fp): /* Copy r2 from an FP register. */
2199 /* It must be dr0, so just do it. */
2200 fmov.dq dr0, r2
2201 movi 7, r30
2202 shlli r30, 29, r31
2203 andc r1, r31, r1
2204 blink tr1, r63
2205 LOCAL(ct_r3_fp): /* Copy r3 from an FP register. */
2206 /* It is either dr0 or dr2. */
2207 movi 7, r30
2208 shlri r1, 26, r32
2209 shlli r30, 26, r31
2210 andc r1, r31, r1
2211 fmov.dq dr0, r3
2212 beqi/l r32, 4, tr1
2213 fmov.dq dr2, r3
2214 blink tr1, r63
2215 LOCAL(ct_r4_fp): /* Copy r4 from an FP register. */
2216 shlri r1, 23 - 3, r34
2217 andi r34, 3 << 3, r33
2218 addi r33, LOCAL(ct_r4_fp_copy) - datalabel LOCAL(ct_r4_fp_base), r32
2219 LOCAL(ct_r4_fp_base):
2220 ptrel/l r32, tr2
2221 movi 7, r30
2222 shlli r30, 23, r31
2223 andc r1, r31, r1
2224 blink tr2, r63
2225 LOCAL(ct_r4_fp_copy):
2226 fmov.dq dr0, r4
2227 blink tr1, r63
2228 fmov.dq dr2, r4
2229 blink tr1, r63
2230 fmov.dq dr4, r4
2231 blink tr1, r63
2232 LOCAL(ct_r5_fp): /* Copy r5 from an FP register. */
2233 shlri r1, 20 - 3, r34
2234 andi r34, 3 << 3, r33
2235 addi r33, LOCAL(ct_r5_fp_copy) - datalabel LOCAL(ct_r5_fp_base), r32
2236 LOCAL(ct_r5_fp_base):
2237 ptrel/l r32, tr2
2238 movi 7, r30
2239 shlli r30, 20, r31
2240 andc r1, r31, r1
2241 blink tr2, r63
2242 LOCAL(ct_r5_fp_copy):
2243 fmov.dq dr0, r5
2244 blink tr1, r63
2245 fmov.dq dr2, r5
2246 blink tr1, r63
2247 fmov.dq dr4, r5
2248 blink tr1, r63
2249 fmov.dq dr6, r5
2250 blink tr1, r63
2251 LOCAL(ct_r6_fph): /* Copy r6 from a high FP register. */
2252 /* It must be dr8. */
2253 fmov.dq dr8, r6
2254 movi 15, r30
2255 shlli r30, 16, r31
2256 andc r1, r31, r1
2257 blink tr1, r63
2258 LOCAL(ct_r6_fpl): /* Copy r6 from a low FP register. */
2259 shlri r1, 16 - 3, r34
2260 andi r34, 3 << 3, r33
2261 addi r33, LOCAL(ct_r6_fp_copy) - datalabel LOCAL(ct_r6_fp_base), r32
2262 LOCAL(ct_r6_fp_base):
2263 ptrel/l r32, tr2
2264 movi 7, r30
2265 shlli r30, 16, r31
2266 andc r1, r31, r1
2267 blink tr2, r63
2268 LOCAL(ct_r6_fp_copy):
2269 fmov.dq dr0, r6
2270 blink tr1, r63
2271 fmov.dq dr2, r6
2272 blink tr1, r63
2273 fmov.dq dr4, r6
2274 blink tr1, r63
2275 fmov.dq dr6, r6
2276 blink tr1, r63
2277 LOCAL(ct_r7_fph): /* Copy r7 from a high FP register. */
2278 /* It is either dr8 or dr10. */
2279 movi 15 << 12, r31
2280 shlri r1, 12, r32
2281 andc r1, r31, r1
2282 fmov.dq dr8, r7
2283 beqi/l r32, 8, tr1
2284 fmov.dq dr10, r7
2285 blink tr1, r63
2286 LOCAL(ct_r7_fpl): /* Copy r7 from a low FP register. */
2287 shlri r1, 12 - 3, r34
2288 andi r34, 3 << 3, r33
2289 addi r33, LOCAL(ct_r7_fp_copy) - datalabel LOCAL(ct_r7_fp_base), r32
2290 LOCAL(ct_r7_fp_base):
2291 ptrel/l r32, tr2
2292 movi 7 << 12, r31
2293 andc r1, r31, r1
2294 blink tr2, r63
2295 LOCAL(ct_r7_fp_copy):
2296 fmov.dq dr0, r7
2297 blink tr1, r63
2298 fmov.dq dr2, r7
2299 blink tr1, r63
2300 fmov.dq dr4, r7
2301 blink tr1, r63
2302 fmov.dq dr6, r7
2303 blink tr1, r63
2304 LOCAL(ct_r8_fph): /* Copy r8 from a high FP register. */
2305 /* It is either dr8 or dr10. */
2306 movi 15 << 8, r31
2307 andi r1, 1 << 8, r32
2308 andc r1, r31, r1
2309 fmov.dq dr8, r8
2310 beq/l r32, r63, tr1
2311 fmov.dq dr10, r8
2312 blink tr1, r63
2313 LOCAL(ct_r8_fpl): /* Copy r8 from a low FP register. */
2314 shlri r1, 8 - 3, r34
2315 andi r34, 3 << 3, r33
2316 addi r33, LOCAL(ct_r8_fp_copy) - datalabel LOCAL(ct_r8_fp_base), r32
2317 LOCAL(ct_r8_fp_base):
2318 ptrel/l r32, tr2
2319 movi 7 << 8, r31
2320 andc r1, r31, r1
2321 blink tr2, r63
2322 LOCAL(ct_r8_fp_copy):
2323 fmov.dq dr0, r8
2324 blink tr1, r63
2325 fmov.dq dr2, r8
2326 blink tr1, r63
2327 fmov.dq dr4, r8
2328 blink tr1, r63
2329 fmov.dq dr6, r8
2330 blink tr1, r63
2331 LOCAL(ct_r9_fph): /* Copy r9 from a high FP register. */
2332 /* It is either dr8 or dr10. */
2333 movi 15 << 4, r31
2334 andi r1, 1 << 4, r32
2335 andc r1, r31, r1
2336 fmov.dq dr8, r9
2337 beq/l r32, r63, tr1
2338 fmov.dq dr10, r9
2339 blink tr1, r63
2340 LOCAL(ct_r9_fpl): /* Copy r9 from a low FP register. */
2341 shlri r1, 4 - 3, r34
2342 andi r34, 3 << 3, r33
2343 addi r33, LOCAL(ct_r9_fp_copy) - datalabel LOCAL(ct_r9_fp_base), r32
2344 LOCAL(ct_r9_fp_base):
2345 ptrel/l r32, tr2
2346 movi 7 << 4, r31
2347 andc r1, r31, r1
2348 blink tr2, r63
2349 LOCAL(ct_r9_fp_copy):
2350 fmov.dq dr0, r9
2351 blink tr1, r63
2352 fmov.dq dr2, r9
2353 blink tr1, r63
2354 fmov.dq dr4, r9
2355 blink tr1, r63
2356 fmov.dq dr6, r9
2357 blink tr1, r63
2358 LOCAL(ct_r2_ld): /* Copy r2 from a memory address. */
2359 pt/l LOCAL(ct_r2_load), tr2
2360 movi 3, r30
2361 shlli r30, 29, r31
2362 and r1, r31, r32
2363 andc r1, r31, r1
2364 beq/l r31, r32, tr2
2365 addi.l r2, 8, r3
2366 ldx.q r2, r63, r2
2367 /* Fall through. */
2368 LOCAL(ct_r3_ld): /* Copy r3 from a memory address. */
2369 pt/l LOCAL(ct_r3_load), tr2
2370 movi 3, r30
2371 shlli r30, 26, r31
2372 and r1, r31, r32
2373 andc r1, r31, r1
2374 beq/l r31, r32, tr2
2375 addi.l r3, 8, r4
2376 ldx.q r3, r63, r3
2377 LOCAL(ct_r4_ld): /* Copy r4 from a memory address. */
2378 pt/l LOCAL(ct_r4_load), tr2
2379 movi 3, r30
2380 shlli r30, 23, r31
2381 and r1, r31, r32
2382 andc r1, r31, r1
2383 beq/l r31, r32, tr2
2384 addi.l r4, 8, r5
2385 ldx.q r4, r63, r4
2386 LOCAL(ct_r5_ld): /* Copy r5 from a memory address. */
2387 pt/l LOCAL(ct_r5_load), tr2
2388 movi 3, r30
2389 shlli r30, 20, r31
2390 and r1, r31, r32
2391 andc r1, r31, r1
2392 beq/l r31, r32, tr2
2393 addi.l r5, 8, r6
2394 ldx.q r5, r63, r5
2395 LOCAL(ct_r6_ld): /* Copy r6 from a memory address. */
2396 pt/l LOCAL(ct_r6_load), tr2
2397 movi 3 << 16, r31
2398 and r1, r31, r32
2399 andc r1, r31, r1
2400 beq/l r31, r32, tr2
2401 addi.l r6, 8, r7
2402 ldx.q r6, r63, r6
2403 LOCAL(ct_r7_ld): /* Copy r7 from a memory address. */
2404 pt/l LOCAL(ct_r7_load), tr2
2405 movi 3 << 12, r31
2406 and r1, r31, r32
2407 andc r1, r31, r1
2408 beq/l r31, r32, tr2
2409 addi.l r7, 8, r8
2410 ldx.q r7, r63, r7
2411 LOCAL(ct_r8_ld): /* Copy r8 from a memory address. */
2412 pt/l LOCAL(ct_r8_load), tr2
2413 movi 3 << 8, r31
2414 and r1, r31, r32
2415 andc r1, r31, r1
2416 beq/l r31, r32, tr2
2417 addi.l r8, 8, r9
2418 ldx.q r8, r63, r8
2419 LOCAL(ct_r9_ld): /* Copy r9 from a memory address. */
2420 pt/l LOCAL(ct_check_tramp), tr2
2421 ldx.q r9, r63, r9
2422 blink tr2, r63
2423 LOCAL(ct_r2_load):
2424 ldx.q r2, r63, r2
2425 blink tr1, r63
2426 LOCAL(ct_r3_load):
2427 ldx.q r3, r63, r3
2428 blink tr1, r63
2429 LOCAL(ct_r4_load):
2430 ldx.q r4, r63, r4
2431 blink tr1, r63
2432 LOCAL(ct_r5_load):
2433 ldx.q r5, r63, r5
2434 blink tr1, r63
2435 LOCAL(ct_r6_load):
2436 ldx.q r6, r63, r6
2437 blink tr1, r63
2438 LOCAL(ct_r7_load):
2439 ldx.q r7, r63, r7
2440 blink tr1, r63
2441 LOCAL(ct_r8_load):
2442 ldx.q r8, r63, r8
2443 blink tr1, r63
2444 LOCAL(ct_r2_pop): /* Pop r2 from the stack. */
2445 movi 1, r30
2446 ldx.q r15, r63, r2
2447 shlli r30, 29, r31
2448 addi.l r15, 8, r15
2449 andc r1, r31, r1
2450 blink tr1, r63
2451 LOCAL(ct_r3_pop): /* Pop r3 from the stack. */
2452 movi 1, r30
2453 ldx.q r15, r63, r3
2454 shlli r30, 26, r31
2455 addi.l r15, 8, r15
2456 andc r1, r31, r1
2457 blink tr1, r63
2458 LOCAL(ct_r4_pop): /* Pop r4 from the stack. */
2459 movi 1, r30
2460 ldx.q r15, r63, r4
2461 shlli r30, 23, r31
2462 addi.l r15, 8, r15
2463 andc r1, r31, r1
2464 blink tr1, r63
2465 LOCAL(ct_r5_pop): /* Pop r5 from the stack. */
2466 movi 1, r30
2467 ldx.q r15, r63, r5
2468 shlli r30, 20, r31
2469 addi.l r15, 8, r15
2470 andc r1, r31, r1
2471 blink tr1, r63
2472 LOCAL(ct_r6_pop): /* Pop r6 from the stack. */
2473 movi 1, r30
2474 ldx.q r15, r63, r6
2475 shlli r30, 16, r31
2476 addi.l r15, 8, r15
2477 andc r1, r31, r1
2478 blink tr1, r63
2479 LOCAL(ct_r7_pop): /* Pop r7 from the stack. */
2480 ldx.q r15, r63, r7
2481 movi 1 << 12, r31
2482 addi.l r15, 8, r15
2483 andc r1, r31, r1
2484 blink tr1, r63
2485 LOCAL(ct_r8_pop): /* Pop r8 from the stack. */
2486 ldx.q r15, r63, r8
2487 movi 1 << 8, r31
2488 addi.l r15, 8, r15
2489 andc r1, r31, r1
2490 blink tr1, r63
2491 LOCAL(ct_pop_seq): /* Pop a sequence of registers off the stack. */
2492 andi r1, 7 << 1, r30
2493 movi (LOCAL(ct_end_of_pop_seq) >> 16) & 65535, r32
2494 shlli r30, 2, r31
2495 shori LOCAL(ct_end_of_pop_seq) & 65535, r32
2496 sub.l r32, r31, r33
2497 ptabs/l r33, tr2
2498 blink tr2, r63
2499 LOCAL(ct_start_of_pop_seq): /* Beginning of pop sequence. */
2500 ldx.q r15, r63, r3
2501 addi.l r15, 8, r15
2502 ldx.q r15, r63, r4
2503 addi.l r15, 8, r15
2504 ldx.q r15, r63, r5
2505 addi.l r15, 8, r15
2506 ldx.q r15, r63, r6
2507 addi.l r15, 8, r15
2508 ldx.q r15, r63, r7
2509 addi.l r15, 8, r15
2510 ldx.q r15, r63, r8
2511 addi.l r15, 8, r15
2512 LOCAL(ct_r9_pop): /* Pop r9 from the stack. */
2513 ldx.q r15, r63, r9
2514 addi.l r15, 8, r15
2515 LOCAL(ct_end_of_pop_seq): /* Label used to compute first pop instruction. */
2516 LOCAL(ct_check_tramp): /* Check whether we need a trampoline. */
2517 pt/u LOCAL(ct_ret_wide), tr2
2518 andi r1, 1, r1
2519 bne/u r1, r63, tr2
2520 LOCAL(ct_call_func): /* Just branch to the function. */
2521 blink tr0, r63
2522 LOCAL(ct_ret_wide): /* Call the function, so that we can unpack its
2523 64-bit return value. */
2524 add.l r18, r63, r10
2525 blink tr0, r18
2526 ptabs r10, tr0
2527 #if __LITTLE_ENDIAN__
2528 shari r2, 32, r3
2529 add.l r2, r63, r2
2530 #else
2531 add.l r2, r63, r3
2532 shari r2, 32, r2
2533 #endif
2534 blink tr0, r63
2536 ENDFUNC(GLOBAL(GCC_shcompact_call_trampoline))
2537 #endif /* L_shcompact_call_trampoline */
2539 #ifdef L_shcompact_return_trampoline
2540 /* This function does the converse of the code in `ret_wide'
2541 above. It is tail-called by SHcompact functions returning
2542 64-bit non-floating-point values, to pack the 32-bit values in
2543 r2 and r3 into r2. */
2545 .mode SHmedia
2546 .section .text..SHmedia32, "ax"
2547 .align 2
2548 .global GLOBAL(GCC_shcompact_return_trampoline)
2549 FUNC(GLOBAL(GCC_shcompact_return_trampoline))
2550 GLOBAL(GCC_shcompact_return_trampoline):
2551 ptabs/l r18, tr0
2552 #if __LITTLE_ENDIAN__
2553 addz.l r2, r63, r2
2554 shlli r3, 32, r3
2555 #else
2556 addz.l r3, r63, r3
2557 shlli r2, 32, r2
2558 #endif
2559 or r3, r2, r2
2560 blink tr0, r63
2562 ENDFUNC(GLOBAL(GCC_shcompact_return_trampoline))
2563 #endif /* L_shcompact_return_trampoline */
2565 #ifdef L_shcompact_incoming_args
2566 .section .rodata
2567 .align 1
2568 LOCAL(ia_main_table):
2569 .word 1 /* Invalid, just loop */
2570 .word LOCAL(ia_r2_ld) - datalabel LOCAL(ia_main_label)
2571 .word LOCAL(ia_r2_push) - datalabel LOCAL(ia_main_label)
2572 .word 1 /* Invalid, just loop */
2573 .word LOCAL(ia_r3_ld) - datalabel LOCAL(ia_main_label)
2574 .word LOCAL(ia_r3_push) - datalabel LOCAL(ia_main_label)
2575 .word 1 /* Invalid, just loop */
2576 .word LOCAL(ia_r4_ld) - datalabel LOCAL(ia_main_label)
2577 .word LOCAL(ia_r4_push) - datalabel LOCAL(ia_main_label)
2578 .word 1 /* Invalid, just loop */
2579 .word LOCAL(ia_r5_ld) - datalabel LOCAL(ia_main_label)
2580 .word LOCAL(ia_r5_push) - datalabel LOCAL(ia_main_label)
2581 .word 1 /* Invalid, just loop */
2582 .word 1 /* Invalid, just loop */
2583 .word LOCAL(ia_r6_ld) - datalabel LOCAL(ia_main_label)
2584 .word LOCAL(ia_r6_push) - datalabel LOCAL(ia_main_label)
2585 .word 1 /* Invalid, just loop */
2586 .word 1 /* Invalid, just loop */
2587 .word LOCAL(ia_r7_ld) - datalabel LOCAL(ia_main_label)
2588 .word LOCAL(ia_r7_push) - datalabel LOCAL(ia_main_label)
2589 .word 1 /* Invalid, just loop */
2590 .word 1 /* Invalid, just loop */
2591 .word LOCAL(ia_r8_ld) - datalabel LOCAL(ia_main_label)
2592 .word LOCAL(ia_r8_push) - datalabel LOCAL(ia_main_label)
2593 .word 1 /* Invalid, just loop */
2594 .word 1 /* Invalid, just loop */
2595 .word LOCAL(ia_r9_ld) - datalabel LOCAL(ia_main_label)
2596 .word LOCAL(ia_r9_push) - datalabel LOCAL(ia_main_label)
2597 .word LOCAL(ia_push_seq) - datalabel LOCAL(ia_main_label)
2598 .word LOCAL(ia_push_seq) - datalabel LOCAL(ia_main_label)
2599 .word LOCAL(ia_r9_push) - datalabel LOCAL(ia_main_label)
2600 .word LOCAL(ia_return) - datalabel LOCAL(ia_main_label)
2601 .word LOCAL(ia_return) - datalabel LOCAL(ia_main_label)
2602 .mode SHmedia
2603 .section .text..SHmedia32, "ax"
2604 .align 2
2606 /* This function stores 64-bit general-purpose registers back in
2607 the stack, and loads the address in which each register
2608 was stored into itself. The lower 32 bits of r17 hold the address
2609 to begin storing, and the upper 32 bits of r17 hold the cookie.
2610 Its execution time is linear on the
2611 number of registers that actually have to be copied, and it is
2612 optimized for structures larger than 64 bits, as opposed to
2613 individual `long long' arguments. See sh.h for details on the
2614 actual bit pattern. */
2616 .global GLOBAL(GCC_shcompact_incoming_args)
2617 FUNC(GLOBAL(GCC_shcompact_incoming_args))
2618 GLOBAL(GCC_shcompact_incoming_args):
2619 ptabs/l r18, tr0 /* Prepare to return. */
2620 shlri r17, 32, r0 /* Load the cookie. */
2621 movi ((datalabel LOCAL(ia_main_table) - 31 * 2) >> 16) & 65535, r43
2622 pt/l LOCAL(ia_loop), tr1
2623 add.l r17, r63, r17
2624 shori ((datalabel LOCAL(ia_main_table) - 31 * 2)) & 65535, r43
2625 LOCAL(ia_loop):
2626 nsb r0, r36
2627 shlli r36, 1, r37
2628 ldx.w r43, r37, r38
2629 LOCAL(ia_main_label):
2630 ptrel/l r38, tr2
2631 blink tr2, r63
2632 LOCAL(ia_r2_ld): /* Store r2 and load its address. */
2633 movi 3, r38
2634 shlli r38, 29, r39
2635 and r0, r39, r40
2636 andc r0, r39, r0
2637 stx.q r17, r63, r2
2638 add.l r17, r63, r2
2639 addi.l r17, 8, r17
2640 beq/u r39, r40, tr1
2641 LOCAL(ia_r3_ld): /* Store r3 and load its address. */
2642 movi 3, r38
2643 shlli r38, 26, r39
2644 and r0, r39, r40
2645 andc r0, r39, r0
2646 stx.q r17, r63, r3
2647 add.l r17, r63, r3
2648 addi.l r17, 8, r17
2649 beq/u r39, r40, tr1
2650 LOCAL(ia_r4_ld): /* Store r4 and load its address. */
2651 movi 3, r38
2652 shlli r38, 23, r39
2653 and r0, r39, r40
2654 andc r0, r39, r0
2655 stx.q r17, r63, r4
2656 add.l r17, r63, r4
2657 addi.l r17, 8, r17
2658 beq/u r39, r40, tr1
2659 LOCAL(ia_r5_ld): /* Store r5 and load its address. */
2660 movi 3, r38
2661 shlli r38, 20, r39
2662 and r0, r39, r40
2663 andc r0, r39, r0
2664 stx.q r17, r63, r5
2665 add.l r17, r63, r5
2666 addi.l r17, 8, r17
2667 beq/u r39, r40, tr1
2668 LOCAL(ia_r6_ld): /* Store r6 and load its address. */
2669 movi 3, r38
2670 shlli r38, 16, r39
2671 and r0, r39, r40
2672 andc r0, r39, r0
2673 stx.q r17, r63, r6
2674 add.l r17, r63, r6
2675 addi.l r17, 8, r17
2676 beq/u r39, r40, tr1
2677 LOCAL(ia_r7_ld): /* Store r7 and load its address. */
2678 movi 3 << 12, r39
2679 and r0, r39, r40
2680 andc r0, r39, r0
2681 stx.q r17, r63, r7
2682 add.l r17, r63, r7
2683 addi.l r17, 8, r17
2684 beq/u r39, r40, tr1
2685 LOCAL(ia_r8_ld): /* Store r8 and load its address. */
2686 movi 3 << 8, r39
2687 and r0, r39, r40
2688 andc r0, r39, r0
2689 stx.q r17, r63, r8
2690 add.l r17, r63, r8
2691 addi.l r17, 8, r17
2692 beq/u r39, r40, tr1
2693 LOCAL(ia_r9_ld): /* Store r9 and load its address. */
2694 stx.q r17, r63, r9
2695 add.l r17, r63, r9
2696 blink tr0, r63
2697 LOCAL(ia_r2_push): /* Push r2 onto the stack. */
2698 movi 1, r38
2699 shlli r38, 29, r39
2700 andc r0, r39, r0
2701 stx.q r17, r63, r2
2702 addi.l r17, 8, r17
2703 blink tr1, r63
2704 LOCAL(ia_r3_push): /* Push r3 onto the stack. */
2705 movi 1, r38
2706 shlli r38, 26, r39
2707 andc r0, r39, r0
2708 stx.q r17, r63, r3
2709 addi.l r17, 8, r17
2710 blink tr1, r63
2711 LOCAL(ia_r4_push): /* Push r4 onto the stack. */
2712 movi 1, r38
2713 shlli r38, 23, r39
2714 andc r0, r39, r0
2715 stx.q r17, r63, r4
2716 addi.l r17, 8, r17
2717 blink tr1, r63
2718 LOCAL(ia_r5_push): /* Push r5 onto the stack. */
2719 movi 1, r38
2720 shlli r38, 20, r39
2721 andc r0, r39, r0
2722 stx.q r17, r63, r5
2723 addi.l r17, 8, r17
2724 blink tr1, r63
2725 LOCAL(ia_r6_push): /* Push r6 onto the stack. */
2726 movi 1, r38
2727 shlli r38, 16, r39
2728 andc r0, r39, r0
2729 stx.q r17, r63, r6
2730 addi.l r17, 8, r17
2731 blink tr1, r63
2732 LOCAL(ia_r7_push): /* Push r7 onto the stack. */
2733 movi 1 << 12, r39
2734 andc r0, r39, r0
2735 stx.q r17, r63, r7
2736 addi.l r17, 8, r17
2737 blink tr1, r63
2738 LOCAL(ia_r8_push): /* Push r8 onto the stack. */
2739 movi 1 << 8, r39
2740 andc r0, r39, r0
2741 stx.q r17, r63, r8
2742 addi.l r17, 8, r17
2743 blink tr1, r63
2744 LOCAL(ia_push_seq): /* Push a sequence of registers onto the stack. */
2745 andi r0, 7 << 1, r38
2746 movi (LOCAL(ia_end_of_push_seq) >> 16) & 65535, r40
2747 shlli r38, 2, r39
2748 shori LOCAL(ia_end_of_push_seq) & 65535, r40
2749 sub.l r40, r39, r41
2750 ptabs/l r41, tr2
2751 blink tr2, r63
2752 LOCAL(ia_stack_of_push_seq): /* Beginning of push sequence. */
2753 stx.q r17, r63, r3
2754 addi.l r17, 8, r17
2755 stx.q r17, r63, r4
2756 addi.l r17, 8, r17
2757 stx.q r17, r63, r5
2758 addi.l r17, 8, r17
2759 stx.q r17, r63, r6
2760 addi.l r17, 8, r17
2761 stx.q r17, r63, r7
2762 addi.l r17, 8, r17
2763 stx.q r17, r63, r8
2764 addi.l r17, 8, r17
2765 LOCAL(ia_r9_push): /* Push r9 onto the stack. */
2766 stx.q r17, r63, r9
2767 LOCAL(ia_return): /* Return. */
2768 blink tr0, r63
2769 LOCAL(ia_end_of_push_seq): /* Label used to compute the first push instruction. */
2770 ENDFUNC(GLOBAL(GCC_shcompact_incoming_args))
2771 #endif /* L_shcompact_incoming_args */
2772 #endif
2773 #if __SH5__
2774 #ifdef L_nested_trampoline
2775 #if __SH5__ == 32
2776 .section .text..SHmedia32,"ax"
2777 #else
2778 .text
2779 #endif
2780 .align 3 /* It is copied in units of 8 bytes in SHmedia mode. */
2781 .global GLOBAL(GCC_nested_trampoline)
2782 FUNC(GLOBAL(GCC_nested_trampoline))
2783 GLOBAL(GCC_nested_trampoline):
2784 .mode SHmedia
2785 ptrel/u r63, tr0
2786 gettr tr0, r0
2787 #if __SH5__ == 64
2788 ld.q r0, 24, r1
2789 #else
2790 ld.l r0, 24, r1
2791 #endif
2792 ptabs/l r1, tr1
2793 #if __SH5__ == 64
2794 ld.q r0, 32, r1
2795 #else
2796 ld.l r0, 28, r1
2797 #endif
2798 blink tr1, r63
2800 ENDFUNC(GLOBAL(GCC_nested_trampoline))
2801 #endif /* L_nested_trampoline */
2802 #endif /* __SH5__ */
2803 #if __SH5__ == 32
2804 #ifdef L_push_pop_shmedia_regs
2805 .section .text..SHmedia32,"ax"
2806 .mode SHmedia
2807 .align 2
2808 #ifndef __SH4_NOFPU__
2809 .global GLOBAL(GCC_push_shmedia_regs)
2810 FUNC(GLOBAL(GCC_push_shmedia_regs))
2811 GLOBAL(GCC_push_shmedia_regs):
2812 addi.l r15, -14*8, r15
2813 fst.d r15, 13*8, dr62
2814 fst.d r15, 12*8, dr60
2815 fst.d r15, 11*8, dr58
2816 fst.d r15, 10*8, dr56
2817 fst.d r15, 9*8, dr54
2818 fst.d r15, 8*8, dr52
2819 fst.d r15, 7*8, dr50
2820 fst.d r15, 6*8, dr48
2821 fst.d r15, 5*8, dr46
2822 fst.d r15, 4*8, dr44
2823 fst.d r15, 3*8, dr42
2824 fst.d r15, 2*8, dr40
2825 fst.d r15, 1*8, dr38
2826 fst.d r15, 0*8, dr36
2827 #endif
2828 .global GLOBAL(GCC_push_shmedia_regs_nofpu)
2829 FUNC(GLOBAL(GCC_push_shmedia_regs_nofpu))
2830 GLOBAL(GCC_push_shmedia_regs_nofpu):
2831 ptabs/l r18, tr0
2832 addi.l r15, -27*8, r15
2833 gettr tr7, r62
2834 gettr tr6, r61
2835 gettr tr5, r60
2836 st.q r15, 26*8, r62
2837 st.q r15, 25*8, r61
2838 st.q r15, 24*8, r60
2839 st.q r15, 23*8, r59
2840 st.q r15, 22*8, r58
2841 st.q r15, 21*8, r57
2842 st.q r15, 20*8, r56
2843 st.q r15, 19*8, r55
2844 st.q r15, 18*8, r54
2845 st.q r15, 17*8, r53
2846 st.q r15, 16*8, r52
2847 st.q r15, 15*8, r51
2848 st.q r15, 14*8, r50
2849 st.q r15, 13*8, r49
2850 st.q r15, 12*8, r48
2851 st.q r15, 11*8, r47
2852 st.q r15, 10*8, r46
2853 st.q r15, 9*8, r45
2854 st.q r15, 8*8, r44
2855 st.q r15, 7*8, r35
2856 st.q r15, 6*8, r34
2857 st.q r15, 5*8, r33
2858 st.q r15, 4*8, r32
2859 st.q r15, 3*8, r31
2860 st.q r15, 2*8, r30
2861 st.q r15, 1*8, r29
2862 st.q r15, 0*8, r28
2863 blink tr0, r63
2865 #ifndef __SH4_NOFPU__
2866 ENDFUNC(GLOBAL(GCC_push_shmedia_regs))
2867 #endif
2868 ENDFUNC(GLOBAL(GCC_push_shmedia_regs_nofpu))
2869 #ifndef __SH4_NOFPU__
2870 .global GLOBAL(GCC_pop_shmedia_regs)
2871 FUNC(GLOBAL(GCC_pop_shmedia_regs))
2872 GLOBAL(GCC_pop_shmedia_regs):
2873 pt .L0, tr1
2874 movi 41*8, r0
2875 fld.d r15, 40*8, dr62
2876 fld.d r15, 39*8, dr60
2877 fld.d r15, 38*8, dr58
2878 fld.d r15, 37*8, dr56
2879 fld.d r15, 36*8, dr54
2880 fld.d r15, 35*8, dr52
2881 fld.d r15, 34*8, dr50
2882 fld.d r15, 33*8, dr48
2883 fld.d r15, 32*8, dr46
2884 fld.d r15, 31*8, dr44
2885 fld.d r15, 30*8, dr42
2886 fld.d r15, 29*8, dr40
2887 fld.d r15, 28*8, dr38
2888 fld.d r15, 27*8, dr36
2889 blink tr1, r63
2890 #endif
2891 .global GLOBAL(GCC_pop_shmedia_regs_nofpu)
2892 FUNC(GLOBAL(GCC_pop_shmedia_regs_nofpu))
2893 GLOBAL(GCC_pop_shmedia_regs_nofpu):
2894 movi 27*8, r0
2895 .L0:
2896 ptabs r18, tr0
2897 ld.q r15, 26*8, r62
2898 ld.q r15, 25*8, r61
2899 ld.q r15, 24*8, r60
2900 ptabs r62, tr7
2901 ptabs r61, tr6
2902 ptabs r60, tr5
2903 ld.q r15, 23*8, r59
2904 ld.q r15, 22*8, r58
2905 ld.q r15, 21*8, r57
2906 ld.q r15, 20*8, r56
2907 ld.q r15, 19*8, r55
2908 ld.q r15, 18*8, r54
2909 ld.q r15, 17*8, r53
2910 ld.q r15, 16*8, r52
2911 ld.q r15, 15*8, r51
2912 ld.q r15, 14*8, r50
2913 ld.q r15, 13*8, r49
2914 ld.q r15, 12*8, r48
2915 ld.q r15, 11*8, r47
2916 ld.q r15, 10*8, r46
2917 ld.q r15, 9*8, r45
2918 ld.q r15, 8*8, r44
2919 ld.q r15, 7*8, r35
2920 ld.q r15, 6*8, r34
2921 ld.q r15, 5*8, r33
2922 ld.q r15, 4*8, r32
2923 ld.q r15, 3*8, r31
2924 ld.q r15, 2*8, r30
2925 ld.q r15, 1*8, r29
2926 ld.q r15, 0*8, r28
2927 add.l r15, r0, r15
2928 blink tr0, r63
2930 #ifndef __SH4_NOFPU__
2931 ENDFUNC(GLOBAL(GCC_pop_shmedia_regs))
2932 #endif
2933 ENDFUNC(GLOBAL(GCC_pop_shmedia_regs_nofpu))
2934 #endif /* __SH5__ == 32 */
2935 #endif /* L_push_pop_shmedia_regs */