* ChangeLog: Follow spelling conventions.
[official-gcc.git] / gcc / config / sh / lib1funcs.asm
blob68b2ca30529e95ca9e2d9c8f5e2dc072f40ae247
1 /* Copyright (C) 1994, 1995, 1997, 1998, 1999, 2000, 2001, 2002
2 Free Software Foundation, Inc.
4 This file is free software; you can redistribute it and/or modify it
5 under the terms of the GNU General Public License as published by the
6 Free Software Foundation; either version 2, or (at your option) any
7 later version.
9 In addition to the permissions in the GNU General Public License, the
10 Free Software Foundation gives you unlimited permission to link the
11 compiled version of this file into combinations with other programs,
12 and to distribute those combinations without any restriction coming
13 from the use of this file. (The General Public License restrictions
14 do apply in other respects; for example, they cover modification of
15 the file, and distribution when not linked into a combine
16 executable.)
18 This file is distributed in the hope that it will be useful, but
19 WITHOUT ANY WARRANTY; without even the implied warranty of
20 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
21 General Public License for more details.
23 You should have received a copy of the GNU General Public License
24 along with this program; see the file COPYING. If not, write to
25 the Free Software Foundation, 59 Temple Place - Suite 330,
26 Boston, MA 02111-1307, USA. */
28 !! libgcc routines for the Hitachi / SuperH SH CPUs.
29 !! Contributed by Steve Chamberlain.
30 !! sac@cygnus.com
32 !! ashiftrt_r4_x, ___ashrsi3, ___ashlsi3, ___lshrsi3 routines
33 !! recoded in assembly by Toshiyasu Morita
34 !! tm@netcom.com
36 /* SH2 optimizations for ___ashrsi3, ___ashlsi3, ___lshrsi3 and
37 ELF local label prefixes by J"orn Rennecke
38 amylaar@cygnus.com */
40 #ifdef __ELF__
41 #define LOCAL(X) .L_##X
42 #else
43 #define LOCAL(X) L_##X
44 #endif
46 #define CONCAT(A,B) A##B
47 #define GLOBAL0(U,X) CONCAT(U,__##X)
48 #define GLOBAL(X) GLOBAL0(__USER_LABEL_PREFIX__,X)
50 #if defined __SH5__ && ! defined __SH4_NOFPU__ && ! defined (__LITTLE_ENDIAN__)
51 #define FMOVD_WORKS
52 #endif
54 #if ! __SH5__
55 #ifdef L_ashiftrt
56 .global GLOBAL(ashiftrt_r4_0)
57 .global GLOBAL(ashiftrt_r4_1)
58 .global GLOBAL(ashiftrt_r4_2)
59 .global GLOBAL(ashiftrt_r4_3)
60 .global GLOBAL(ashiftrt_r4_4)
61 .global GLOBAL(ashiftrt_r4_5)
62 .global GLOBAL(ashiftrt_r4_6)
63 .global GLOBAL(ashiftrt_r4_7)
64 .global GLOBAL(ashiftrt_r4_8)
65 .global GLOBAL(ashiftrt_r4_9)
66 .global GLOBAL(ashiftrt_r4_10)
67 .global GLOBAL(ashiftrt_r4_11)
68 .global GLOBAL(ashiftrt_r4_12)
69 .global GLOBAL(ashiftrt_r4_13)
70 .global GLOBAL(ashiftrt_r4_14)
71 .global GLOBAL(ashiftrt_r4_15)
72 .global GLOBAL(ashiftrt_r4_16)
73 .global GLOBAL(ashiftrt_r4_17)
74 .global GLOBAL(ashiftrt_r4_18)
75 .global GLOBAL(ashiftrt_r4_19)
76 .global GLOBAL(ashiftrt_r4_20)
77 .global GLOBAL(ashiftrt_r4_21)
78 .global GLOBAL(ashiftrt_r4_22)
79 .global GLOBAL(ashiftrt_r4_23)
80 .global GLOBAL(ashiftrt_r4_24)
81 .global GLOBAL(ashiftrt_r4_25)
82 .global GLOBAL(ashiftrt_r4_26)
83 .global GLOBAL(ashiftrt_r4_27)
84 .global GLOBAL(ashiftrt_r4_28)
85 .global GLOBAL(ashiftrt_r4_29)
86 .global GLOBAL(ashiftrt_r4_30)
87 .global GLOBAL(ashiftrt_r4_31)
88 .global GLOBAL(ashiftrt_r4_32)
90 .align 1
91 GLOBAL(ashiftrt_r4_32):
92 GLOBAL(ashiftrt_r4_31):
93 rotcl r4
94 rts
95 subc r4,r4
97 GLOBAL(ashiftrt_r4_30):
98 shar r4
99 GLOBAL(ashiftrt_r4_29):
100 shar r4
101 GLOBAL(ashiftrt_r4_28):
102 shar r4
103 GLOBAL(ashiftrt_r4_27):
104 shar r4
105 GLOBAL(ashiftrt_r4_26):
106 shar r4
107 GLOBAL(ashiftrt_r4_25):
108 shar r4
109 GLOBAL(ashiftrt_r4_24):
110 shlr16 r4
111 shlr8 r4
113 exts.b r4,r4
115 GLOBAL(ashiftrt_r4_23):
116 shar r4
117 GLOBAL(ashiftrt_r4_22):
118 shar r4
119 GLOBAL(ashiftrt_r4_21):
120 shar r4
121 GLOBAL(ashiftrt_r4_20):
122 shar r4
123 GLOBAL(ashiftrt_r4_19):
124 shar r4
125 GLOBAL(ashiftrt_r4_18):
126 shar r4
127 GLOBAL(ashiftrt_r4_17):
128 shar r4
129 GLOBAL(ashiftrt_r4_16):
130 shlr16 r4
132 exts.w r4,r4
134 GLOBAL(ashiftrt_r4_15):
135 shar r4
136 GLOBAL(ashiftrt_r4_14):
137 shar r4
138 GLOBAL(ashiftrt_r4_13):
139 shar r4
140 GLOBAL(ashiftrt_r4_12):
141 shar r4
142 GLOBAL(ashiftrt_r4_11):
143 shar r4
144 GLOBAL(ashiftrt_r4_10):
145 shar r4
146 GLOBAL(ashiftrt_r4_9):
147 shar r4
148 GLOBAL(ashiftrt_r4_8):
149 shar r4
150 GLOBAL(ashiftrt_r4_7):
151 shar r4
152 GLOBAL(ashiftrt_r4_6):
153 shar r4
154 GLOBAL(ashiftrt_r4_5):
155 shar r4
156 GLOBAL(ashiftrt_r4_4):
157 shar r4
158 GLOBAL(ashiftrt_r4_3):
159 shar r4
160 GLOBAL(ashiftrt_r4_2):
161 shar r4
162 GLOBAL(ashiftrt_r4_1):
164 shar r4
166 GLOBAL(ashiftrt_r4_0):
169 #endif
171 #ifdef L_ashiftrt_n
174 ! GLOBAL(ashrsi3)
176 ! Entry:
178 ! r4: Value to shift
179 ! r5: Shifts
181 ! Exit:
183 ! r0: Result
185 ! Destroys:
187 ! (none)
190 .global GLOBAL(ashrsi3)
191 .align 2
192 GLOBAL(ashrsi3):
193 mov #31,r0
194 and r0,r5
195 mova LOCAL(ashrsi3_table),r0
196 mov.b @(r0,r5),r5
197 #ifdef __sh1__
198 add r5,r0
199 jmp @r0
200 #else
201 braf r5
202 #endif
203 mov r4,r0
205 .align 2
206 LOCAL(ashrsi3_table):
207 .byte LOCAL(ashrsi3_0)-LOCAL(ashrsi3_table)
208 .byte LOCAL(ashrsi3_1)-LOCAL(ashrsi3_table)
209 .byte LOCAL(ashrsi3_2)-LOCAL(ashrsi3_table)
210 .byte LOCAL(ashrsi3_3)-LOCAL(ashrsi3_table)
211 .byte LOCAL(ashrsi3_4)-LOCAL(ashrsi3_table)
212 .byte LOCAL(ashrsi3_5)-LOCAL(ashrsi3_table)
213 .byte LOCAL(ashrsi3_6)-LOCAL(ashrsi3_table)
214 .byte LOCAL(ashrsi3_7)-LOCAL(ashrsi3_table)
215 .byte LOCAL(ashrsi3_8)-LOCAL(ashrsi3_table)
216 .byte LOCAL(ashrsi3_9)-LOCAL(ashrsi3_table)
217 .byte LOCAL(ashrsi3_10)-LOCAL(ashrsi3_table)
218 .byte LOCAL(ashrsi3_11)-LOCAL(ashrsi3_table)
219 .byte LOCAL(ashrsi3_12)-LOCAL(ashrsi3_table)
220 .byte LOCAL(ashrsi3_13)-LOCAL(ashrsi3_table)
221 .byte LOCAL(ashrsi3_14)-LOCAL(ashrsi3_table)
222 .byte LOCAL(ashrsi3_15)-LOCAL(ashrsi3_table)
223 .byte LOCAL(ashrsi3_16)-LOCAL(ashrsi3_table)
224 .byte LOCAL(ashrsi3_17)-LOCAL(ashrsi3_table)
225 .byte LOCAL(ashrsi3_18)-LOCAL(ashrsi3_table)
226 .byte LOCAL(ashrsi3_19)-LOCAL(ashrsi3_table)
227 .byte LOCAL(ashrsi3_20)-LOCAL(ashrsi3_table)
228 .byte LOCAL(ashrsi3_21)-LOCAL(ashrsi3_table)
229 .byte LOCAL(ashrsi3_22)-LOCAL(ashrsi3_table)
230 .byte LOCAL(ashrsi3_23)-LOCAL(ashrsi3_table)
231 .byte LOCAL(ashrsi3_24)-LOCAL(ashrsi3_table)
232 .byte LOCAL(ashrsi3_25)-LOCAL(ashrsi3_table)
233 .byte LOCAL(ashrsi3_26)-LOCAL(ashrsi3_table)
234 .byte LOCAL(ashrsi3_27)-LOCAL(ashrsi3_table)
235 .byte LOCAL(ashrsi3_28)-LOCAL(ashrsi3_table)
236 .byte LOCAL(ashrsi3_29)-LOCAL(ashrsi3_table)
237 .byte LOCAL(ashrsi3_30)-LOCAL(ashrsi3_table)
238 .byte LOCAL(ashrsi3_31)-LOCAL(ashrsi3_table)
240 LOCAL(ashrsi3_31):
241 rotcl r0
243 subc r0,r0
245 LOCAL(ashrsi3_30):
246 shar r0
247 LOCAL(ashrsi3_29):
248 shar r0
249 LOCAL(ashrsi3_28):
250 shar r0
251 LOCAL(ashrsi3_27):
252 shar r0
253 LOCAL(ashrsi3_26):
254 shar r0
255 LOCAL(ashrsi3_25):
256 shar r0
257 LOCAL(ashrsi3_24):
258 shlr16 r0
259 shlr8 r0
261 exts.b r0,r0
263 LOCAL(ashrsi3_23):
264 shar r0
265 LOCAL(ashrsi3_22):
266 shar r0
267 LOCAL(ashrsi3_21):
268 shar r0
269 LOCAL(ashrsi3_20):
270 shar r0
271 LOCAL(ashrsi3_19):
272 shar r0
273 LOCAL(ashrsi3_18):
274 shar r0
275 LOCAL(ashrsi3_17):
276 shar r0
277 LOCAL(ashrsi3_16):
278 shlr16 r0
280 exts.w r0,r0
282 LOCAL(ashrsi3_15):
283 shar r0
284 LOCAL(ashrsi3_14):
285 shar r0
286 LOCAL(ashrsi3_13):
287 shar r0
288 LOCAL(ashrsi3_12):
289 shar r0
290 LOCAL(ashrsi3_11):
291 shar r0
292 LOCAL(ashrsi3_10):
293 shar r0
294 LOCAL(ashrsi3_9):
295 shar r0
296 LOCAL(ashrsi3_8):
297 shar r0
298 LOCAL(ashrsi3_7):
299 shar r0
300 LOCAL(ashrsi3_6):
301 shar r0
302 LOCAL(ashrsi3_5):
303 shar r0
304 LOCAL(ashrsi3_4):
305 shar r0
306 LOCAL(ashrsi3_3):
307 shar r0
308 LOCAL(ashrsi3_2):
309 shar r0
310 LOCAL(ashrsi3_1):
312 shar r0
314 LOCAL(ashrsi3_0):
318 #endif
320 #ifdef L_ashiftlt
323 ! GLOBAL(ashlsi3)
325 ! Entry:
327 ! r4: Value to shift
328 ! r5: Shifts
330 ! Exit:
332 ! r0: Result
334 ! Destroys:
336 ! (none)
338 .global GLOBAL(ashlsi3)
339 .align 2
340 GLOBAL(ashlsi3):
341 mov #31,r0
342 and r0,r5
343 mova LOCAL(ashlsi3_table),r0
344 mov.b @(r0,r5),r5
345 #ifdef __sh1__
346 add r5,r0
347 jmp @r0
348 #else
349 braf r5
350 #endif
351 mov r4,r0
353 .align 2
354 LOCAL(ashlsi3_table):
355 .byte LOCAL(ashlsi3_0)-LOCAL(ashlsi3_table)
356 .byte LOCAL(ashlsi3_1)-LOCAL(ashlsi3_table)
357 .byte LOCAL(ashlsi3_2)-LOCAL(ashlsi3_table)
358 .byte LOCAL(ashlsi3_3)-LOCAL(ashlsi3_table)
359 .byte LOCAL(ashlsi3_4)-LOCAL(ashlsi3_table)
360 .byte LOCAL(ashlsi3_5)-LOCAL(ashlsi3_table)
361 .byte LOCAL(ashlsi3_6)-LOCAL(ashlsi3_table)
362 .byte LOCAL(ashlsi3_7)-LOCAL(ashlsi3_table)
363 .byte LOCAL(ashlsi3_8)-LOCAL(ashlsi3_table)
364 .byte LOCAL(ashlsi3_9)-LOCAL(ashlsi3_table)
365 .byte LOCAL(ashlsi3_10)-LOCAL(ashlsi3_table)
366 .byte LOCAL(ashlsi3_11)-LOCAL(ashlsi3_table)
367 .byte LOCAL(ashlsi3_12)-LOCAL(ashlsi3_table)
368 .byte LOCAL(ashlsi3_13)-LOCAL(ashlsi3_table)
369 .byte LOCAL(ashlsi3_14)-LOCAL(ashlsi3_table)
370 .byte LOCAL(ashlsi3_15)-LOCAL(ashlsi3_table)
371 .byte LOCAL(ashlsi3_16)-LOCAL(ashlsi3_table)
372 .byte LOCAL(ashlsi3_17)-LOCAL(ashlsi3_table)
373 .byte LOCAL(ashlsi3_18)-LOCAL(ashlsi3_table)
374 .byte LOCAL(ashlsi3_19)-LOCAL(ashlsi3_table)
375 .byte LOCAL(ashlsi3_20)-LOCAL(ashlsi3_table)
376 .byte LOCAL(ashlsi3_21)-LOCAL(ashlsi3_table)
377 .byte LOCAL(ashlsi3_22)-LOCAL(ashlsi3_table)
378 .byte LOCAL(ashlsi3_23)-LOCAL(ashlsi3_table)
379 .byte LOCAL(ashlsi3_24)-LOCAL(ashlsi3_table)
380 .byte LOCAL(ashlsi3_25)-LOCAL(ashlsi3_table)
381 .byte LOCAL(ashlsi3_26)-LOCAL(ashlsi3_table)
382 .byte LOCAL(ashlsi3_27)-LOCAL(ashlsi3_table)
383 .byte LOCAL(ashlsi3_28)-LOCAL(ashlsi3_table)
384 .byte LOCAL(ashlsi3_29)-LOCAL(ashlsi3_table)
385 .byte LOCAL(ashlsi3_30)-LOCAL(ashlsi3_table)
386 .byte LOCAL(ashlsi3_31)-LOCAL(ashlsi3_table)
388 LOCAL(ashlsi3_6):
389 shll2 r0
390 LOCAL(ashlsi3_4):
391 shll2 r0
392 LOCAL(ashlsi3_2):
394 shll2 r0
396 LOCAL(ashlsi3_7):
397 shll2 r0
398 LOCAL(ashlsi3_5):
399 shll2 r0
400 LOCAL(ashlsi3_3):
401 shll2 r0
402 LOCAL(ashlsi3_1):
404 shll r0
406 LOCAL(ashlsi3_14):
407 shll2 r0
408 LOCAL(ashlsi3_12):
409 shll2 r0
410 LOCAL(ashlsi3_10):
411 shll2 r0
412 LOCAL(ashlsi3_8):
414 shll8 r0
416 LOCAL(ashlsi3_15):
417 shll2 r0
418 LOCAL(ashlsi3_13):
419 shll2 r0
420 LOCAL(ashlsi3_11):
421 shll2 r0
422 LOCAL(ashlsi3_9):
423 shll8 r0
425 shll r0
427 LOCAL(ashlsi3_22):
428 shll2 r0
429 LOCAL(ashlsi3_20):
430 shll2 r0
431 LOCAL(ashlsi3_18):
432 shll2 r0
433 LOCAL(ashlsi3_16):
435 shll16 r0
437 LOCAL(ashlsi3_23):
438 shll2 r0
439 LOCAL(ashlsi3_21):
440 shll2 r0
441 LOCAL(ashlsi3_19):
442 shll2 r0
443 LOCAL(ashlsi3_17):
444 shll16 r0
446 shll r0
448 LOCAL(ashlsi3_30):
449 shll2 r0
450 LOCAL(ashlsi3_28):
451 shll2 r0
452 LOCAL(ashlsi3_26):
453 shll2 r0
454 LOCAL(ashlsi3_24):
455 shll16 r0
457 shll8 r0
459 LOCAL(ashlsi3_31):
460 shll2 r0
461 LOCAL(ashlsi3_29):
462 shll2 r0
463 LOCAL(ashlsi3_27):
464 shll2 r0
465 LOCAL(ashlsi3_25):
466 shll16 r0
467 shll8 r0
469 shll r0
471 LOCAL(ashlsi3_0):
475 #endif
477 #ifdef L_lshiftrt
480 ! GLOBAL(lshrsi3)
482 ! Entry:
484 ! r4: Value to shift
485 ! r5: Shifts
487 ! Exit:
489 ! r0: Result
491 ! Destroys:
493 ! (none)
495 .global GLOBAL(lshrsi3)
496 .align 2
497 GLOBAL(lshrsi3):
498 mov #31,r0
499 and r0,r5
500 mova LOCAL(lshrsi3_table),r0
501 mov.b @(r0,r5),r5
502 #ifdef __sh1__
503 add r5,r0
504 jmp @r0
505 #else
506 braf r5
507 #endif
508 mov r4,r0
510 .align 2
511 LOCAL(lshrsi3_table):
512 .byte LOCAL(lshrsi3_0)-LOCAL(lshrsi3_table)
513 .byte LOCAL(lshrsi3_1)-LOCAL(lshrsi3_table)
514 .byte LOCAL(lshrsi3_2)-LOCAL(lshrsi3_table)
515 .byte LOCAL(lshrsi3_3)-LOCAL(lshrsi3_table)
516 .byte LOCAL(lshrsi3_4)-LOCAL(lshrsi3_table)
517 .byte LOCAL(lshrsi3_5)-LOCAL(lshrsi3_table)
518 .byte LOCAL(lshrsi3_6)-LOCAL(lshrsi3_table)
519 .byte LOCAL(lshrsi3_7)-LOCAL(lshrsi3_table)
520 .byte LOCAL(lshrsi3_8)-LOCAL(lshrsi3_table)
521 .byte LOCAL(lshrsi3_9)-LOCAL(lshrsi3_table)
522 .byte LOCAL(lshrsi3_10)-LOCAL(lshrsi3_table)
523 .byte LOCAL(lshrsi3_11)-LOCAL(lshrsi3_table)
524 .byte LOCAL(lshrsi3_12)-LOCAL(lshrsi3_table)
525 .byte LOCAL(lshrsi3_13)-LOCAL(lshrsi3_table)
526 .byte LOCAL(lshrsi3_14)-LOCAL(lshrsi3_table)
527 .byte LOCAL(lshrsi3_15)-LOCAL(lshrsi3_table)
528 .byte LOCAL(lshrsi3_16)-LOCAL(lshrsi3_table)
529 .byte LOCAL(lshrsi3_17)-LOCAL(lshrsi3_table)
530 .byte LOCAL(lshrsi3_18)-LOCAL(lshrsi3_table)
531 .byte LOCAL(lshrsi3_19)-LOCAL(lshrsi3_table)
532 .byte LOCAL(lshrsi3_20)-LOCAL(lshrsi3_table)
533 .byte LOCAL(lshrsi3_21)-LOCAL(lshrsi3_table)
534 .byte LOCAL(lshrsi3_22)-LOCAL(lshrsi3_table)
535 .byte LOCAL(lshrsi3_23)-LOCAL(lshrsi3_table)
536 .byte LOCAL(lshrsi3_24)-LOCAL(lshrsi3_table)
537 .byte LOCAL(lshrsi3_25)-LOCAL(lshrsi3_table)
538 .byte LOCAL(lshrsi3_26)-LOCAL(lshrsi3_table)
539 .byte LOCAL(lshrsi3_27)-LOCAL(lshrsi3_table)
540 .byte LOCAL(lshrsi3_28)-LOCAL(lshrsi3_table)
541 .byte LOCAL(lshrsi3_29)-LOCAL(lshrsi3_table)
542 .byte LOCAL(lshrsi3_30)-LOCAL(lshrsi3_table)
543 .byte LOCAL(lshrsi3_31)-LOCAL(lshrsi3_table)
545 LOCAL(lshrsi3_6):
546 shlr2 r0
547 LOCAL(lshrsi3_4):
548 shlr2 r0
549 LOCAL(lshrsi3_2):
551 shlr2 r0
553 LOCAL(lshrsi3_7):
554 shlr2 r0
555 LOCAL(lshrsi3_5):
556 shlr2 r0
557 LOCAL(lshrsi3_3):
558 shlr2 r0
559 LOCAL(lshrsi3_1):
561 shlr r0
563 LOCAL(lshrsi3_14):
564 shlr2 r0
565 LOCAL(lshrsi3_12):
566 shlr2 r0
567 LOCAL(lshrsi3_10):
568 shlr2 r0
569 LOCAL(lshrsi3_8):
571 shlr8 r0
573 LOCAL(lshrsi3_15):
574 shlr2 r0
575 LOCAL(lshrsi3_13):
576 shlr2 r0
577 LOCAL(lshrsi3_11):
578 shlr2 r0
579 LOCAL(lshrsi3_9):
580 shlr8 r0
582 shlr r0
584 LOCAL(lshrsi3_22):
585 shlr2 r0
586 LOCAL(lshrsi3_20):
587 shlr2 r0
588 LOCAL(lshrsi3_18):
589 shlr2 r0
590 LOCAL(lshrsi3_16):
592 shlr16 r0
594 LOCAL(lshrsi3_23):
595 shlr2 r0
596 LOCAL(lshrsi3_21):
597 shlr2 r0
598 LOCAL(lshrsi3_19):
599 shlr2 r0
600 LOCAL(lshrsi3_17):
601 shlr16 r0
603 shlr r0
605 LOCAL(lshrsi3_30):
606 shlr2 r0
607 LOCAL(lshrsi3_28):
608 shlr2 r0
609 LOCAL(lshrsi3_26):
610 shlr2 r0
611 LOCAL(lshrsi3_24):
612 shlr16 r0
614 shlr8 r0
616 LOCAL(lshrsi3_31):
617 shlr2 r0
618 LOCAL(lshrsi3_29):
619 shlr2 r0
620 LOCAL(lshrsi3_27):
621 shlr2 r0
622 LOCAL(lshrsi3_25):
623 shlr16 r0
624 shlr8 r0
626 shlr r0
628 LOCAL(lshrsi3_0):
632 #endif
634 #ifdef L_movstr
635 .text
636 ! done all the large groups, do the remainder
638 ! jump to movstr+
639 done:
640 add #64,r5
641 mova GLOBAL(movstrSI0),r0
642 shll2 r6
643 add r6,r0
644 jmp @r0
645 add #64,r4
646 .align 4
647 .global GLOBAL(movstrSI64)
648 GLOBAL(movstrSI64):
649 mov.l @(60,r5),r0
650 mov.l r0,@(60,r4)
651 .global GLOBAL(movstrSI60)
652 GLOBAL(movstrSI60):
653 mov.l @(56,r5),r0
654 mov.l r0,@(56,r4)
655 .global GLOBAL(movstrSI56)
656 GLOBAL(movstrSI56):
657 mov.l @(52,r5),r0
658 mov.l r0,@(52,r4)
659 .global GLOBAL(movstrSI52)
660 GLOBAL(movstrSI52):
661 mov.l @(48,r5),r0
662 mov.l r0,@(48,r4)
663 .global GLOBAL(movstrSI48)
664 GLOBAL(movstrSI48):
665 mov.l @(44,r5),r0
666 mov.l r0,@(44,r4)
667 .global GLOBAL(movstrSI44)
668 GLOBAL(movstrSI44):
669 mov.l @(40,r5),r0
670 mov.l r0,@(40,r4)
671 .global GLOBAL(movstrSI40)
672 GLOBAL(movstrSI40):
673 mov.l @(36,r5),r0
674 mov.l r0,@(36,r4)
675 .global GLOBAL(movstrSI36)
676 GLOBAL(movstrSI36):
677 mov.l @(32,r5),r0
678 mov.l r0,@(32,r4)
679 .global GLOBAL(movstrSI32)
680 GLOBAL(movstrSI32):
681 mov.l @(28,r5),r0
682 mov.l r0,@(28,r4)
683 .global GLOBAL(movstrSI28)
684 GLOBAL(movstrSI28):
685 mov.l @(24,r5),r0
686 mov.l r0,@(24,r4)
687 .global GLOBAL(movstrSI24)
688 GLOBAL(movstrSI24):
689 mov.l @(20,r5),r0
690 mov.l r0,@(20,r4)
691 .global GLOBAL(movstrSI20)
692 GLOBAL(movstrSI20):
693 mov.l @(16,r5),r0
694 mov.l r0,@(16,r4)
695 .global GLOBAL(movstrSI16)
696 GLOBAL(movstrSI16):
697 mov.l @(12,r5),r0
698 mov.l r0,@(12,r4)
699 .global GLOBAL(movstrSI12)
700 GLOBAL(movstrSI12):
701 mov.l @(8,r5),r0
702 mov.l r0,@(8,r4)
703 .global GLOBAL(movstrSI8)
704 GLOBAL(movstrSI8):
705 mov.l @(4,r5),r0
706 mov.l r0,@(4,r4)
707 .global GLOBAL(movstrSI4)
708 GLOBAL(movstrSI4):
709 mov.l @(0,r5),r0
710 mov.l r0,@(0,r4)
711 GLOBAL(movstrSI0):
715 .align 4
717 .global GLOBAL(movstr)
718 GLOBAL(movstr):
719 mov.l @(60,r5),r0
720 mov.l r0,@(60,r4)
722 mov.l @(56,r5),r0
723 mov.l r0,@(56,r4)
725 mov.l @(52,r5),r0
726 mov.l r0,@(52,r4)
728 mov.l @(48,r5),r0
729 mov.l r0,@(48,r4)
731 mov.l @(44,r5),r0
732 mov.l r0,@(44,r4)
734 mov.l @(40,r5),r0
735 mov.l r0,@(40,r4)
737 mov.l @(36,r5),r0
738 mov.l r0,@(36,r4)
740 mov.l @(32,r5),r0
741 mov.l r0,@(32,r4)
743 mov.l @(28,r5),r0
744 mov.l r0,@(28,r4)
746 mov.l @(24,r5),r0
747 mov.l r0,@(24,r4)
749 mov.l @(20,r5),r0
750 mov.l r0,@(20,r4)
752 mov.l @(16,r5),r0
753 mov.l r0,@(16,r4)
755 mov.l @(12,r5),r0
756 mov.l r0,@(12,r4)
758 mov.l @(8,r5),r0
759 mov.l r0,@(8,r4)
761 mov.l @(4,r5),r0
762 mov.l r0,@(4,r4)
764 mov.l @(0,r5),r0
765 mov.l r0,@(0,r4)
767 add #-16,r6
768 cmp/pl r6
769 bf done
771 add #64,r5
772 bra GLOBAL(movstr)
773 add #64,r4
774 #endif
776 #ifdef L_movstr_i4
777 .text
778 .global GLOBAL(movstr_i4_even)
779 .global GLOBAL(movstr_i4_odd)
780 .global GLOBAL(movstrSI12_i4)
782 .p2align 5
783 L_movstr_2mod4_end:
784 mov.l r0,@(16,r4)
786 mov.l r1,@(20,r4)
788 .p2align 2
790 GLOBAL(movstr_i4_odd):
791 mov.l @r5+,r1
792 add #-4,r4
793 mov.l @r5+,r2
794 mov.l @r5+,r3
795 mov.l r1,@(4,r4)
796 mov.l r2,@(8,r4)
798 L_movstr_loop:
799 mov.l r3,@(12,r4)
800 dt r6
801 mov.l @r5+,r0
802 bt/s L_movstr_2mod4_end
803 mov.l @r5+,r1
804 add #16,r4
805 L_movstr_start_even:
806 mov.l @r5+,r2
807 mov.l @r5+,r3
808 mov.l r0,@r4
809 dt r6
810 mov.l r1,@(4,r4)
811 bf/s L_movstr_loop
812 mov.l r2,@(8,r4)
814 mov.l r3,@(12,r4)
816 GLOBAL(movstr_i4_even):
817 mov.l @r5+,r0
818 bra L_movstr_start_even
819 mov.l @r5+,r1
821 .p2align 4
822 GLOBAL(movstrSI12_i4):
823 mov.l @r5,r0
824 mov.l @(4,r5),r1
825 mov.l @(8,r5),r2
826 mov.l r0,@r4
827 mov.l r1,@(4,r4)
829 mov.l r2,@(8,r4)
830 #endif
832 #ifdef L_mulsi3
835 .global GLOBAL(mulsi3)
837 ! r4 = aabb
838 ! r5 = ccdd
839 ! r0 = aabb*ccdd via partial products
841 ! if aa == 0 and cc = 0
842 ! r0 = bb*dd
844 ! else
845 ! aa = bb*dd + (aa*dd*65536) + (cc*bb*65536)
848 GLOBAL(mulsi3):
849 mulu.w r4,r5 ! multiply the lsws macl=bb*dd
850 mov r5,r3 ! r3 = ccdd
851 swap.w r4,r2 ! r2 = bbaa
852 xtrct r2,r3 ! r3 = aacc
853 tst r3,r3 ! msws zero ?
854 bf hiset
855 rts ! yes - then we have the answer
856 sts macl,r0
858 hiset: sts macl,r0 ! r0 = bb*dd
859 mulu.w r2,r5 ! brewing macl = aa*dd
860 sts macl,r1
861 mulu.w r3,r4 ! brewing macl = cc*bb
862 sts macl,r2
863 add r1,r2
864 shll16 r2
866 add r2,r0
869 #endif
870 #endif /* ! __SH5__ */
871 #ifdef L_sdivsi3_i4
872 .title "SH DIVIDE"
873 !! 4 byte integer Divide code for the Hitachi SH
874 #ifdef __SH4__
875 !! args in r4 and r5, result in fpul, clobber dr0, dr2
877 .global GLOBAL(sdivsi3_i4)
878 GLOBAL(sdivsi3_i4):
879 lds r4,fpul
880 float fpul,dr0
881 lds r5,fpul
882 float fpul,dr2
883 fdiv dr2,dr0
885 ftrc dr0,fpul
887 #elif defined(__SH4_SINGLE__) || defined(__SH4_SINGLE_ONLY__) || (defined (__SH5__) && ! defined __SH4_NOFPU__)
888 !! args in r4 and r5, result in fpul, clobber r2, dr0, dr2
890 #if ! __SH5__ || __SH5__ == 32
891 #if __SH5__
892 .mode SHcompact
893 #endif
894 .global GLOBAL(sdivsi3_i4)
895 GLOBAL(sdivsi3_i4):
896 sts.l fpscr,@-r15
897 mov #8,r2
898 swap.w r2,r2
899 lds r2,fpscr
900 lds r4,fpul
901 float fpul,dr0
902 lds r5,fpul
903 float fpul,dr2
904 fdiv dr2,dr0
905 ftrc dr0,fpul
907 lds.l @r15+,fpscr
909 #endif /* ! __SH5__ || __SH5__ == 32 */
910 #endif /* ! __SH4__ */
911 #endif
913 #ifdef L_sdivsi3
914 /* __SH4_SINGLE_ONLY__ keeps this part for link compatibility with
915 sh3e code. */
916 #if (! defined(__SH4__) && ! defined (__SH4_SINGLE__)) || defined (__linux__)
918 !! Steve Chamberlain
919 !! sac@cygnus.com
923 !! args in r4 and r5, result in r0 clobber r1,r2,r3
925 .global GLOBAL(sdivsi3)
926 #if __SHMEDIA__
927 #if __SH5__ == 32
928 .section .text..SHmedia32,"ax"
929 #else
930 .text
931 #endif
932 .align 2
933 #if 0
934 /* The assembly code that follows is a hand-optimized version of the C
935 code that follows. Note that the registers that are modified are
936 exactly those listed as clobbered in the patterns divsi3_i1 and
937 divsi3_i1_media.
939 int __sdivsi3 (i, j)
940 int i, j;
942 register unsigned long long r18 asm ("r18");
943 register unsigned long long r19 asm ("r19");
944 register unsigned long long r0 asm ("r0") = 0;
945 register unsigned long long r1 asm ("r1") = 1;
946 register int r2 asm ("r2") = i >> 31;
947 register int r3 asm ("r3") = j >> 31;
949 r2 = r2 ? r2 : r1;
950 r3 = r3 ? r3 : r1;
951 r18 = i * r2;
952 r19 = j * r3;
953 r2 *= r3;
955 r19 <<= 31;
956 r1 <<= 31;
958 if (r18 >= r19)
959 r0 |= r1, r18 -= r19;
960 while (r19 >>= 1, r1 >>= 1);
962 return r2 * (int)r0;
965 GLOBAL(sdivsi3):
966 pt/l LOCAL(sdivsi3_dontadd), tr2
967 pt/l LOCAL(sdivsi3_loop), tr1
968 ptabs/l r18, tr0
969 movi 0, r0
970 movi 1, r1
971 shari.l r4, 31, r2
972 shari.l r5, 31, r3
973 cmveq r2, r1, r2
974 cmveq r3, r1, r3
975 muls.l r4, r2, r18
976 muls.l r5, r3, r19
977 muls.l r2, r3, r2
978 shlli r19, 31, r19
979 shlli r1, 31, r1
980 LOCAL(sdivsi3_loop):
981 bgtu r19, r18, tr2
982 or r0, r1, r0
983 sub r18, r19, r18
984 LOCAL(sdivsi3_dontadd):
985 shlri r1, 1, r1
986 shlri r19, 1, r19
987 bnei r1, 0, tr1
988 muls.l r0, r2, r0
989 add.l r0, r63, r0
990 blink tr0, r63
991 #else /* ! 0 */
992 // inputs: r4,r5
993 // clobbered: r1,r2,r3,r18,r19,r20,r21,r25,tr0
994 // result in r0
995 GLOBAL(sdivsi3):
996 // can create absolute value without extra latency,
997 // but dependent on proper sign extension of inputs:
998 // shari.l r5,31,r2
999 // xor r5,r2,r20
1000 // sub r20,r2,r20 // r20 is now absolute value of r5, zero-extended.
1001 shari.l r5,31,r2
1002 ori r2,1,r2
1003 muls.l r5,r2,r20 // r20 is now absolute value of r5, zero-extended.
1004 movi 0xffffffffffffbb0c,r19 // shift count eqiv 76
1005 shari.l r4,31,r3
1006 nsb r20,r0
1007 shlld r20,r0,r25
1008 shlri r25,48,r25
1009 sub r19,r25,r1
1010 mmulfx.w r1,r1,r2
1011 mshflo.w r1,r63,r1
1012 // If r4 was to be used in-place instead of r21, could use this sequence
1013 // to compute absolute:
1014 // sub r63,r4,r19 // compute absolute value of r4
1015 // shlri r4,32,r3 // into lower 32 bit of r4, keeping
1016 // mcmv r19,r3,r4 // the sign in the upper 32 bits intact.
1017 ori r3,1,r3
1018 mmulfx.w r25,r2,r2
1019 sub r19,r0,r0
1020 muls.l r4,r3,r21
1021 msub.w r1,r2,r2
1022 addi r2,-2,r1
1023 mulu.l r21,r1,r19
1024 mmulfx.w r2,r2,r2
1025 shlli r1,15,r1
1026 shlrd r19,r0,r19
1027 mulu.l r19,r20,r3
1028 mmacnfx.wl r25,r2,r1
1029 ptabs r18,tr0
1030 sub r21,r3,r25
1032 mulu.l r25,r1,r2
1033 addi r0,14,r0
1034 xor r4,r5,r18
1035 shlrd r2,r0,r2
1036 mulu.l r2,r20,r3
1037 add r19,r2,r19
1038 shari.l r18,31,r18
1039 sub r25,r3,r25
1041 mulu.l r25,r1,r2
1042 sub r25,r20,r25
1043 add r19,r18,r19
1044 shlrd r2,r0,r2
1045 mulu.l r2,r20,r3
1046 addi r25,1,r25
1047 add r19,r2,r19
1049 cmpgt r25,r3,r25
1050 add.l r19,r25,r0
1051 xor r0,r18,r0
1052 blink tr0,r63
1053 #endif
1054 #elif defined __SHMEDIA__
1055 /* m5compact-nofpu */
1056 // clobbered: r18,r19,r20,r21,r25,tr0,tr1,tr2
1057 .mode SHmedia
1058 .section .text..SHmedia32,"ax"
1059 .align 2
1060 GLOBAL(sdivsi3):
1061 pt/l LOCAL(sdivsi3_dontsub), tr0
1062 pt/l LOCAL(sdivsi3_loop), tr1
1063 ptabs/l r18,tr2
1064 shari.l r4,31,r18
1065 shari.l r5,31,r19
1066 xor r4,r18,r20
1067 xor r5,r19,r21
1068 sub.l r20,r18,r20
1069 sub.l r21,r19,r21
1070 xor r18,r19,r19
1071 shlli r21,32,r25
1072 addi r25,-1,r21
1073 addz.l r20,r63,r20
1074 LOCAL(sdivsi3_loop):
1075 shlli r20,1,r20
1076 bgeu/u r21,r20,tr0
1077 sub r20,r21,r20
1078 LOCAL(sdivsi3_dontsub):
1079 addi.l r25,-1,r25
1080 bnei r25,-32,tr1
1081 xor r20,r19,r20
1082 sub.l r20,r19,r0
1083 blink tr2,r63
1084 #else /* ! __SHMEDIA__ */
1085 GLOBAL(sdivsi3):
1086 mov r4,r1
1087 mov r5,r0
1089 tst r0,r0
1090 bt div0
1091 mov #0,r2
1092 div0s r2,r1
1093 subc r3,r3
1094 subc r2,r1
1095 div0s r0,r3
1096 rotcl r1
1097 div1 r0,r3
1098 rotcl r1
1099 div1 r0,r3
1100 rotcl r1
1101 div1 r0,r3
1102 rotcl r1
1103 div1 r0,r3
1104 rotcl r1
1105 div1 r0,r3
1106 rotcl r1
1107 div1 r0,r3
1108 rotcl r1
1109 div1 r0,r3
1110 rotcl r1
1111 div1 r0,r3
1112 rotcl r1
1113 div1 r0,r3
1114 rotcl r1
1115 div1 r0,r3
1116 rotcl r1
1117 div1 r0,r3
1118 rotcl r1
1119 div1 r0,r3
1120 rotcl r1
1121 div1 r0,r3
1122 rotcl r1
1123 div1 r0,r3
1124 rotcl r1
1125 div1 r0,r3
1126 rotcl r1
1127 div1 r0,r3
1128 rotcl r1
1129 div1 r0,r3
1130 rotcl r1
1131 div1 r0,r3
1132 rotcl r1
1133 div1 r0,r3
1134 rotcl r1
1135 div1 r0,r3
1136 rotcl r1
1137 div1 r0,r3
1138 rotcl r1
1139 div1 r0,r3
1140 rotcl r1
1141 div1 r0,r3
1142 rotcl r1
1143 div1 r0,r3
1144 rotcl r1
1145 div1 r0,r3
1146 rotcl r1
1147 div1 r0,r3
1148 rotcl r1
1149 div1 r0,r3
1150 rotcl r1
1151 div1 r0,r3
1152 rotcl r1
1153 div1 r0,r3
1154 rotcl r1
1155 div1 r0,r3
1156 rotcl r1
1157 div1 r0,r3
1158 rotcl r1
1159 div1 r0,r3
1160 rotcl r1
1161 addc r2,r1
1163 mov r1,r0
1166 div0: rts
1167 mov #0,r0
1169 #endif /* ! __SHMEDIA__ */
1170 #endif /* ! __SH4__ */
1171 #endif
1172 #ifdef L_udivsi3_i4
1174 .title "SH DIVIDE"
1175 !! 4 byte integer Divide code for the Hitachi SH
1176 #ifdef __SH4__
1177 !! args in r4 and r5, result in fpul, clobber r0, r1, r4, r5, dr0, dr2, dr4
1179 .global GLOBAL(udivsi3_i4)
1180 GLOBAL(udivsi3_i4):
1181 mov #1,r1
1182 cmp/hi r1,r5
1183 bf trivial
1184 rotr r1
1185 xor r1,r4
1186 lds r4,fpul
1187 mova L1,r0
1188 #ifdef FMOVD_WORKS
1189 fmov.d @r0+,dr4
1190 #else
1191 #ifdef __LITTLE_ENDIAN__
1192 fmov.s @r0+,fr5
1193 fmov.s @r0,fr4
1194 #else
1195 fmov.s @r0+,fr4
1196 fmov.s @r0,fr5
1197 #endif
1198 #endif
1199 float fpul,dr0
1200 xor r1,r5
1201 lds r5,fpul
1202 float fpul,dr2
1203 fadd dr4,dr0
1204 fadd dr4,dr2
1205 fdiv dr2,dr0
1207 ftrc dr0,fpul
1209 trivial:
1211 lds r4,fpul
1213 .align 2
1214 #ifdef FMOVD_WORKS
1215 .align 3 ! make double below 8 byte aligned.
1216 #endif
1218 .double 2147483648
1220 #elif defined (__SH5__) && ! defined (__SH4_NOFPU__)
1221 #if ! __SH5__ || __SH5__ == 32
1222 !! args in r4 and r5, result in fpul, clobber r20, r21, dr0, fr33
1223 .mode SHmedia
1224 .global GLOBAL(udivsi3_i4)
1225 GLOBAL(udivsi3_i4):
1226 addz.l r4,r63,r20
1227 addz.l r5,r63,r21
1228 fmov.qd r20,dr0
1229 fmov.qd r21,dr32
1230 ptabs r18,tr0
1231 float.qd dr0,dr0
1232 float.qd dr32,dr32
1233 fdiv.d dr0,dr32,dr0
1234 ftrc.dq dr0,dr32
1235 fmov.s fr33,fr32
1236 blink tr0,r63
1237 #endif /* ! __SH5__ || __SH5__ == 32 */
1238 #elif defined(__SH4_SINGLE__) || defined(__SH4_SINGLE_ONLY__)
1239 !! args in r4 and r5, result in fpul, clobber r0, r1, r4, r5, dr0, dr2, dr4
1241 .global GLOBAL(udivsi3_i4)
1242 GLOBAL(udivsi3_i4):
1243 mov #1,r1
1244 cmp/hi r1,r5
1245 bf trivial
1246 sts.l fpscr,@-r15
1247 mova L1,r0
1248 lds.l @r0+,fpscr
1249 rotr r1
1250 xor r1,r4
1251 lds r4,fpul
1252 #ifdef FMOVD_WORKS
1253 fmov.d @r0+,dr4
1254 #else
1255 #ifdef __LITTLE_ENDIAN__
1256 fmov.s @r0+,fr5
1257 fmov.s @r0,fr4
1258 #else
1259 fmov.s @r0+,fr4
1260 fmov.s @r0,fr5
1261 #endif
1262 #endif
1263 float fpul,dr0
1264 xor r1,r5
1265 lds r5,fpul
1266 float fpul,dr2
1267 fadd dr4,dr0
1268 fadd dr4,dr2
1269 fdiv dr2,dr0
1270 ftrc dr0,fpul
1272 lds.l @r15+,fpscr
1274 #ifdef FMOVD_WORKS
1275 .align 3 ! make double below 8 byte aligned.
1276 #endif
1277 trivial:
1279 lds r4,fpul
1281 .align 2
1283 #ifndef FMOVD_WORKS
1284 .long 0x80000
1285 #else
1286 .long 0x180000
1287 #endif
1288 .double 2147483648
1290 #endif /* ! __SH4__ */
1291 #endif
1293 #ifdef L_udivsi3
1294 /* __SH4_SINGLE_ONLY__ keeps this part for link compatibility with
1295 sh3e code. */
1296 #if (! defined(__SH4__) && ! defined (__SH4_SINGLE__)) || defined (__linux__)
1298 !! args in r4 and r5, result in r0, clobbers r4, pr, and t bit
1299 .global GLOBAL(udivsi3)
1301 #if __SHMEDIA__
1302 #if __SH5__ == 32
1303 .section .text..SHmedia32,"ax"
1304 #else
1305 .text
1306 #endif
1307 .align 2
1308 #if 0
1309 /* The assembly code that follows is a hand-optimized version of the C
1310 code that follows. Note that the registers that are modified are
1311 exactly those listed as clobbered in the patterns udivsi3_i1 and
1312 udivsi3_i1_media.
1314 unsigned
1315 __udivsi3 (i, j)
1316 unsigned i, j;
1318 register unsigned long long r0 asm ("r0") = 0;
1319 register unsigned long long r18 asm ("r18") = 1;
1320 register unsigned long long r4 asm ("r4") = i;
1321 register unsigned long long r19 asm ("r19") = j;
1323 r19 <<= 31;
1324 r18 <<= 31;
1326 if (r4 >= r19)
1327 r0 |= r18, r4 -= r19;
1328 while (r19 >>= 1, r18 >>= 1);
1330 return r0;
1333 GLOBAL(udivsi3):
1334 pt/l LOCAL(udivsi3_dontadd), tr2
1335 pt/l LOCAL(udivsi3_loop), tr1
1336 ptabs/l r18, tr0
1337 movi 0, r0
1338 movi 1, r18
1339 addz.l r5, r63, r19
1340 addz.l r4, r63, r4
1341 shlli r19, 31, r19
1342 shlli r18, 31, r18
1343 LOCAL(udivsi3_loop):
1344 bgtu r19, r4, tr2
1345 or r0, r18, r0
1346 sub r4, r19, r4
1347 LOCAL(udivsi3_dontadd):
1348 shlri r18, 1, r18
1349 shlri r19, 1, r19
1350 bnei r18, 0, tr1
1351 blink tr0, r63
1352 #else
1353 GLOBAL(udivsi3):
1354 // inputs: r4,r5
1355 // clobbered: r18,r19,r20,r21,r22,r25,tr0
1356 // result in r0.
1357 addz.l r5,r63,r22
1358 nsb r22,r0
1359 shlld r22,r0,r25
1360 shlri r25,48,r25
1361 movi 0xffffffffffffbb0c,r20 // shift count eqiv 76
1362 sub r20,r25,r21
1363 mmulfx.w r21,r21,r19
1364 mshflo.w r21,r63,r21
1365 ptabs r18,tr0
1366 mmulfx.w r25,r19,r19
1367 sub r20,r0,r0
1368 /* bubble */
1369 msub.w r21,r19,r19
1370 addi r19,-2,r21 /* It would be nice for scheduling to do this add to r21
1371 before the msub.w, but we need a different value for
1372 r19 to keep errors under control. */
1373 mulu.l r4,r21,r18
1374 mmulfx.w r19,r19,r19
1375 shlli r21,15,r21
1376 shlrd r18,r0,r18
1377 mulu.l r18,r22,r20
1378 mmacnfx.wl r25,r19,r21
1379 /* bubble */
1380 sub r4,r20,r25
1382 mulu.l r25,r21,r19
1383 addi r0,14,r0
1384 /* bubble */
1385 shlrd r19,r0,r19
1386 mulu.l r19,r22,r20
1387 add r18,r19,r18
1388 /* bubble */
1389 sub.l r25,r20,r25
1391 mulu.l r25,r21,r19
1392 addz.l r25,r63,r25
1393 sub r25,r22,r25
1394 shlrd r19,r0,r19
1395 mulu.l r19,r22,r20
1396 addi r25,1,r25
1397 add r18,r19,r18
1399 cmpgt r25,r20,r25
1400 add.l r18,r25,r0
1401 blink tr0,r63
1402 #endif
1403 #elif defined (__SHMEDIA__)
1404 /* m5compact-nofpu - more emphasis on code size than on speed, but don't
1405 ignore speed altogether - div1 needs 9 cycles, subc 7 and rotcl 4.
1406 So use a short shmedia loop. */
1407 // clobbered: r20,r21,r25,tr0,tr1,tr2
1408 .mode SHmedia
1409 .section .text..SHmedia32,"ax"
1410 .align 2
1411 GLOBAL(udivsi3):
1412 pt/l LOCAL(udivsi3_dontsub), tr0
1413 pt/l LOCAL(udivsi3_loop), tr1
1414 ptabs/l r18,tr2
1415 shlli r5,32,r25
1416 addi r25,-1,r21
1417 addz.l r4,r63,r20
1418 LOCAL(udivsi3_loop):
1419 shlli r20,1,r20
1420 bgeu/u r21,r20,tr0
1421 sub r20,r21,r20
1422 LOCAL(udivsi3_dontsub):
1423 addi.l r25,-1,r25
1424 bnei r25,-32,tr1
1425 add.l r20,r63,r0
1426 blink tr2,r63
1427 #else /* ! defined (__SHMEDIA__) */
1428 LOCAL(div8):
1429 div1 r5,r4
1430 LOCAL(div7):
1431 div1 r5,r4; div1 r5,r4; div1 r5,r4
1432 div1 r5,r4; div1 r5,r4; div1 r5,r4; rts; div1 r5,r4
1434 LOCAL(divx4):
1435 div1 r5,r4; rotcl r0
1436 div1 r5,r4; rotcl r0
1437 div1 r5,r4; rotcl r0
1438 rts; div1 r5,r4
1440 GLOBAL(udivsi3):
1441 sts.l pr,@-r15
1442 extu.w r5,r0
1443 cmp/eq r5,r0
1444 #ifdef __sh1__
1445 bf LOCAL(large_divisor)
1446 #else
1447 bf/s LOCAL(large_divisor)
1448 #endif
1449 div0u
1450 swap.w r4,r0
1451 shlr16 r4
1452 bsr LOCAL(div8)
1453 shll16 r5
1454 bsr LOCAL(div7)
1455 div1 r5,r4
1456 xtrct r4,r0
1457 xtrct r0,r4
1458 bsr LOCAL(div8)
1459 swap.w r4,r4
1460 bsr LOCAL(div7)
1461 div1 r5,r4
1462 lds.l @r15+,pr
1463 xtrct r4,r0
1464 swap.w r0,r0
1465 rotcl r0
1467 shlr16 r5
1469 LOCAL(large_divisor):
1470 #ifdef __sh1__
1471 div0u
1472 #endif
1473 mov #0,r0
1474 xtrct r4,r0
1475 xtrct r0,r4
1476 bsr LOCAL(divx4)
1477 rotcl r0
1478 bsr LOCAL(divx4)
1479 rotcl r0
1480 bsr LOCAL(divx4)
1481 rotcl r0
1482 bsr LOCAL(divx4)
1483 rotcl r0
1484 lds.l @r15+,pr
1486 rotcl r0
1488 #endif /* ! __SHMEDIA__ */
1489 #endif /* __SH4__ */
1490 #endif /* L_udivsi3 */
1492 #ifdef L_udivdi3
1493 #ifdef __SHMEDIA__
1494 .mode SHmedia
1495 .section .text..SHmedia32,"ax"
1496 .align 2
1497 .global GLOBAL(udivdi3)
1498 GLOBAL(udivdi3):
1499 shlri r3,1,r4
1500 nsb r4,r22
1501 shlld r3,r22,r6
1502 shlri r6,49,r5
1503 movi 0xffffffffffffbaf1,r21 /* .l shift count 17. */
1504 sub r21,r5,r1
1505 mmulfx.w r1,r1,r4
1506 mshflo.w r1,r63,r1
1507 sub r63,r22,r20 // r63 == 64 % 64
1508 mmulfx.w r5,r4,r4
1509 pta LOCAL(large_divisor),tr0
1510 addi r20,32,r9
1511 msub.w r1,r4,r1
1512 madd.w r1,r1,r1
1513 mmulfx.w r1,r1,r4
1514 shlri r6,32,r7
1515 bgt/u r9,r63,tr0 // large_divisor
1516 mmulfx.w r5,r4,r4
1517 shlri r2,32+14,r19
1518 addi r22,-31,r0
1519 msub.w r1,r4,r1
1521 mulu.l r1,r7,r4
1522 addi r1,-3,r5
1523 mulu.l r5,r19,r5
1524 sub r63,r4,r4 // Negate to make sure r1 ends up <= 1/r2
1525 shlri r4,2,r4 /* chop off leading %0000000000000000 001.00000000000 - or, as
1526 the case may be, %0000000000000000 000.11111111111, still */
1527 muls.l r1,r4,r4 /* leaving at least one sign bit. */
1528 mulu.l r5,r3,r8
1529 mshalds.l r1,r21,r1
1530 shari r4,26,r4
1531 shlld r8,r0,r8
1532 add r1,r4,r1 // 31 bit unsigned reciprocal now in r1 (msb equiv. 0.5)
1533 sub r2,r8,r2
1534 /* Can do second step of 64 : 32 div now, using r1 and the rest in r2. */
1536 shlri r2,22,r21
1537 mulu.l r21,r1,r21
1538 shlld r5,r0,r8
1539 addi r20,30-22,r0
1540 shlrd r21,r0,r21
1541 mulu.l r21,r3,r5
1542 add r8,r21,r8
1543 mcmpgt.l r21,r63,r21 // See Note 1
1544 addi r20,30,r0
1545 mshfhi.l r63,r21,r21
1546 sub r2,r5,r2
1547 andc r2,r21,r2
1549 /* small divisor: need a third divide step */
1550 mulu.l r2,r1,r7
1551 ptabs r18,tr0
1552 addi r2,1,r2
1553 shlrd r7,r0,r7
1554 mulu.l r7,r3,r5
1555 add r8,r7,r8
1556 sub r2,r3,r2
1557 cmpgt r2,r5,r5
1558 add r8,r5,r2
1559 /* could test r3 here to check for divide by zero. */
1560 blink tr0,r63
1562 LOCAL(large_divisor):
1563 mmulfx.w r5,r4,r4
1564 shlrd r2,r9,r25
1565 shlri r25,32,r8
1566 msub.w r1,r4,r1
1568 mulu.l r1,r7,r4
1569 addi r1,-3,r5
1570 mulu.l r5,r8,r5
1571 sub r63,r4,r4 // Negate to make sure r1 ends up <= 1/r2
1572 shlri r4,2,r4 /* chop off leading %0000000000000000 001.00000000000 - or, as
1573 the case may be, %0000000000000000 000.11111111111, still */
1574 muls.l r1,r4,r4 /* leaving at least one sign bit. */
1575 shlri r5,14-1,r8
1576 mulu.l r8,r7,r5
1577 mshalds.l r1,r21,r1
1578 shari r4,26,r4
1579 add r1,r4,r1 // 31 bit unsigned reciprocal now in r1 (msb equiv. 0.5)
1580 sub r25,r5,r25
1581 /* Can do second step of 64 : 32 div now, using r1 and the rest in r25. */
1583 shlri r25,22,r21
1584 mulu.l r21,r1,r21
1585 pta LOCAL(no_lo_adj),tr0
1586 addi r22,32,r0
1587 shlri r21,40,r21
1588 mulu.l r21,r7,r5
1589 add r8,r21,r8
1590 shlld r2,r0,r2
1591 sub r25,r5,r25
1592 bgtu/u r7,r25,tr0 // no_lo_adj
1593 addi r8,1,r8
1594 sub r25,r7,r25
1595 LOCAL(no_lo_adj):
1596 mextr4 r2,r25,r2
1598 /* large_divisor: only needs a few adjustments. */
1599 mulu.l r8,r6,r5
1600 ptabs r18,tr0
1601 /* bubble */
1602 cmpgtu r5,r2,r5
1603 sub r8,r5,r2
1604 blink tr0,r63
1605 /* Note 1: To shift the result of the second divide stage so that the result
1606 always fits into 32 bits, yet we still reduce the rest sufficiently
1607 would require a lot of instructions to do the shifts just right. Using
1608 the full 64 bit shift result to multiply with the divisor would require
1609 four extra instructions for the upper 32 bits (shift / mulu / shift / sub).
1610 Fortunately, if the upper 32 bits of the shift result are non-zero, we
1611 know that the rest after taking this partial result into account will
1612 fit into 32 bits. So we just clear the upper 32 bits of the rest if the
1613 upper 32 bits of the partial result are non-zero. */
1614 #endif /* __SHMEDIA__ */
1615 #endif /* L_udivdi3 */
1617 #ifdef L_divdi3
1618 #ifdef __SHMEDIA__
1619 .mode SHmedia
1620 .section .text..SHmedia32,"ax"
1621 .align 2
1622 .global GLOBAL(divdi3)
1623 GLOBAL(divdi3):
1624 pta GLOBAL(udivdi3),tr0
1625 shari r2,63,r22
1626 shari r3,63,r23
1627 xor r2,r22,r2
1628 xor r3,r23,r3
1629 sub r2,r22,r2
1630 sub r3,r23,r3
1631 beq/u r22,r23,tr0
1632 ptabs r18,tr1
1633 blink tr0,r18
1634 sub r63,r2,r2
1635 blink tr1,r63
1636 #endif /* __SHMEDIA__ */
1637 #endif /* L_divdi3 */
1639 #ifdef L_umoddi3
1640 #ifdef __SHMEDIA__
1641 .mode SHmedia
1642 .section .text..SHmedia32,"ax"
1643 .align 2
1644 .global GLOBAL(umoddi3)
1645 GLOBAL(umoddi3):
1646 shlri r3,1,r4
1647 nsb r4,r22
1648 shlld r3,r22,r6
1649 shlri r6,49,r5
1650 movi 0xffffffffffffbaf1,r21 /* .l shift count 17. */
1651 sub r21,r5,r1
1652 mmulfx.w r1,r1,r4
1653 mshflo.w r1,r63,r1
1654 sub r63,r22,r20 // r63 == 64 % 64
1655 mmulfx.w r5,r4,r4
1656 pta LOCAL(large_divisor),tr0
1657 addi r20,32,r9
1658 msub.w r1,r4,r1
1659 madd.w r1,r1,r1
1660 mmulfx.w r1,r1,r4
1661 shlri r6,32,r7
1662 bgt/u r9,r63,tr0 // large_divisor
1663 mmulfx.w r5,r4,r4
1664 shlri r2,32+14,r19
1665 addi r22,-31,r0
1666 msub.w r1,r4,r1
1668 mulu.l r1,r7,r4
1669 addi r1,-3,r5
1670 mulu.l r5,r19,r5
1671 sub r63,r4,r4 // Negate to make sure r1 ends up <= 1/r2
1672 shlri r4,2,r4 /* chop off leading %0000000000000000 001.00000000000 - or, as
1673 the case may be, %0000000000000000 000.11111111111, still */
1674 muls.l r1,r4,r4 /* leaving at least one sign bit. */
1675 mulu.l r5,r3,r5
1676 mshalds.l r1,r21,r1
1677 shari r4,26,r4
1678 shlld r5,r0,r5
1679 add r1,r4,r1 // 31 bit unsigned reciprocal now in r1 (msb equiv. 0.5)
1680 sub r2,r5,r2
1681 /* Can do second step of 64 : 32 div now, using r1 and the rest in r2. */
1683 shlri r2,22,r21
1684 mulu.l r21,r1,r21
1685 addi r20,30-22,r0
1686 /* bubble */ /* could test r3 here to check for divide by zero. */
1687 shlrd r21,r0,r21
1688 mulu.l r21,r3,r5
1689 mcmpgt.l r21,r63,r21 // See Note 1
1690 addi r20,30,r0
1691 mshfhi.l r63,r21,r21
1692 sub r2,r5,r2
1693 andc r2,r21,r2
1695 /* small divisor: need a third divide step */
1696 mulu.l r2,r1,r7
1697 ptabs r18,tr0
1698 sub r2,r3,r8 /* re-use r8 here for rest - r3 */
1699 shlrd r7,r0,r7
1700 mulu.l r7,r3,r5
1701 /* bubble */
1702 addi r8,1,r7
1703 cmpgt r7,r5,r7
1704 cmvne r7,r8,r2
1705 sub r2,r5,r2
1706 blink tr0,r63
1708 LOCAL(large_divisor):
1709 mmulfx.w r5,r4,r4
1710 shlrd r2,r9,r25
1711 shlri r25,32,r8
1712 msub.w r1,r4,r1
1714 mulu.l r1,r7,r4
1715 addi r1,-3,r5
1716 mulu.l r5,r8,r5
1717 sub r63,r4,r4 // Negate to make sure r1 ends up <= 1/r2
1718 shlri r4,2,r4 /* chop off leading %0000000000000000 001.00000000000 - or, as
1719 the case may be, %0000000000000000 000.11111111111, still */
1720 muls.l r1,r4,r4 /* leaving at least one sign bit. */
1721 shlri r5,14-1,r8
1722 mulu.l r8,r7,r5
1723 mshalds.l r1,r21,r1
1724 shari r4,26,r4
1725 add r1,r4,r1 // 31 bit unsigned reciprocal now in r1 (msb equiv. 0.5)
1726 sub r25,r5,r25
1727 /* Can do second step of 64 : 32 div now, using r1 and the rest in r25. */
1729 shlri r25,22,r21
1730 mulu.l r21,r1,r21
1731 pta LOCAL(no_lo_adj),tr0
1732 addi r22,32,r0
1733 shlri r21,40,r21
1734 mulu.l r21,r7,r5
1735 add r8,r21,r8
1736 shlld r2,r0,r2
1737 sub r25,r5,r25
1738 bgtu/u r7,r25,tr0 // no_lo_adj
1739 addi r8,1,r8
1740 sub r25,r7,r25
1741 LOCAL(no_lo_adj):
1742 mextr4 r2,r25,r2
1744 /* large_divisor: only needs a few adjustments. */
1745 mulu.l r8,r6,r5
1746 ptabs r18,tr0
1747 add r2,r6,r7
1748 cmpgtu r5,r2,r8
1749 cmvne r8,r7,r2
1750 sub r2,r5,r2
1751 shlrd r2,r22,r2
1752 blink tr0,r63
1753 /* Note 1: To shift the result of the second divide stage so that the result
1754 always fits into 32 bits, yet we still reduce the rest sufficiently
1755 would require a lot of instructions to do the shifts just right. Using
1756 the full 64 bit shift result to multiply with the divisor would require
1757 four extra instructions for the upper 32 bits (shift / mulu / shift / sub).
1758 Fortunately, if the upper 32 bits of the shift result are non-zero, we
1759 know that the rest after taking this partial result into account will
1760 fit into 32 bits. So we just clear the upper 32 bits of the rest if the
1761 upper 32 bits of the partial result are non-zero. */
1762 #endif /* __SHMEDIA__ */
1763 #endif /* L_umoddi3 */
1765 #ifdef L_moddi3
1766 #ifdef __SHMEDIA__
1767 .mode SHmedia
1768 .section .text..SHmedia32,"ax"
1769 .align 2
1770 .global GLOBAL(moddi3)
1771 GLOBAL(moddi3):
1772 pta GLOBAL(umoddi3),tr0
1773 shari r2,63,r22
1774 shari r3,63,r23
1775 xor r2,r22,r2
1776 xor r3,r23,r3
1777 sub r2,r22,r2
1778 sub r3,r23,r3
1779 beq/u r22,r63,tr0
1780 ptabs r18,tr1
1781 blink tr0,r18
1782 sub r63,r2,r2
1783 blink tr1,r63
1784 #endif /* __SHMEDIA__ */
1785 #endif /* L_moddi3 */
1787 #ifdef L_set_fpscr
1788 #if defined (__SH3E__) || defined(__SH4_SINGLE__) || defined(__SH4__) || defined(__SH4_SINGLE_ONLY__) || __SH5__ == 32
1789 #ifdef __SH5__
1790 .mode SHcompact
1791 #endif
1792 .global GLOBAL(set_fpscr)
1793 GLOBAL(set_fpscr):
1794 lds r4,fpscr
1795 mov.l LOCAL(set_fpscr_L1),r1
1796 swap.w r4,r0
1797 or #24,r0
1798 #ifndef FMOVD_WORKS
1799 xor #16,r0
1800 #endif
1801 #if defined(__SH4__)
1802 swap.w r0,r3
1803 mov.l r3,@(4,r1)
1804 #else /* defined(__SH3E__) || defined(__SH4_SINGLE*__) */
1805 swap.w r0,r2
1806 mov.l r2,@r1
1807 #endif
1808 #ifndef FMOVD_WORKS
1809 xor #8,r0
1810 #else
1811 xor #24,r0
1812 #endif
1813 #if defined(__SH4__)
1814 swap.w r0,r2
1816 mov.l r2,@r1
1817 #else /* defined(__SH3E__) || defined(__SH4_SINGLE*__) */
1818 swap.w r0,r3
1820 mov.l r3,@(4,r1)
1821 #endif
1822 .align 2
1823 LOCAL(set_fpscr_L1):
1824 .long GLOBAL(fpscr_values)
1825 #ifdef __ELF__
1826 .comm GLOBAL(fpscr_values),8,4
1827 #else
1828 .comm GLOBAL(fpscr_values),8
1829 #endif /* ELF */
1830 #endif /* SH3E / SH4 */
1831 #endif /* L_set_fpscr */
1832 #ifdef L_ic_invalidate
1833 #if __SH5__ == 32
1834 .mode SHmedia
1835 .section .text..SHmedia32,"ax"
1836 .align 2
1837 .global GLOBAL(init_trampoline)
1838 GLOBAL(init_trampoline):
1839 st.l r0,8,r2
1840 #ifdef __LITTLE_ENDIAN__
1841 movi 9,r20
1842 shori 0x402b,r20
1843 shori 0xd101,r20
1844 shori 0xd002,r20
1845 #else
1846 movi 0xffffffffffffd002,r20
1847 shori 0xd101,r20
1848 shori 0x402b,r20
1849 shori 9,r20
1850 #endif
1851 st.q r0,0,r20
1852 st.l r0,12,r3
1853 .global GLOBAL(ic_invalidate)
1854 GLOBAL(ic_invalidate):
1855 ocbwb r0,0
1856 synco
1857 icbi r0, 0
1858 ptabs r18, tr0
1859 synci
1860 blink tr0, r63
1861 #elif defined(__SH4_SINGLE__) || defined(__SH4__) || defined(__SH4_SINGLE_ONLY__)
1862 .global GLOBAL(ic_invalidate)
1863 GLOBAL(ic_invalidate):
1864 ocbwb @r4
1865 mova 0f,r0
1866 mov.w 1f,r1
1867 /* Compute how many cache lines 0f is away from r4. */
1868 sub r0,r4
1869 and r1,r4
1870 /* Prepare to branch to 0f plus the cache-line offset. */
1871 add # 0f - 1f,r4
1872 braf r4
1875 .short 0x1fe0
1876 .p2align 5
1877 /* This must be aligned to the beginning of a cache line. */
1879 .rept 256 /* There are 256 cache lines of 32 bytes. */
1881 .rept 15
1883 .endr
1884 .endr
1885 #endif /* SH4 */
1886 #endif /* L_ic_invalidate */
1888 #if defined (__SH5__) && __SH5__ == 32
1889 #ifdef L_shcompact_call_trampoline
1890 .section .rodata
1891 .align 1
1892 LOCAL(ct_main_table):
1893 .word LOCAL(ct_r2_fp) - datalabel LOCAL(ct_main_label)
1894 .word LOCAL(ct_r2_ld) - datalabel LOCAL(ct_main_label)
1895 .word LOCAL(ct_r2_pop) - datalabel LOCAL(ct_main_label)
1896 .word LOCAL(ct_r3_fp) - datalabel LOCAL(ct_main_label)
1897 .word LOCAL(ct_r3_ld) - datalabel LOCAL(ct_main_label)
1898 .word LOCAL(ct_r3_pop) - datalabel LOCAL(ct_main_label)
1899 .word LOCAL(ct_r4_fp) - datalabel LOCAL(ct_main_label)
1900 .word LOCAL(ct_r4_ld) - datalabel LOCAL(ct_main_label)
1901 .word LOCAL(ct_r4_pop) - datalabel LOCAL(ct_main_label)
1902 .word LOCAL(ct_r5_fp) - datalabel LOCAL(ct_main_label)
1903 .word LOCAL(ct_r5_ld) - datalabel LOCAL(ct_main_label)
1904 .word LOCAL(ct_r5_pop) - datalabel LOCAL(ct_main_label)
1905 .word LOCAL(ct_r6_fph) - datalabel LOCAL(ct_main_label)
1906 .word LOCAL(ct_r6_fpl) - datalabel LOCAL(ct_main_label)
1907 .word LOCAL(ct_r6_ld) - datalabel LOCAL(ct_main_label)
1908 .word LOCAL(ct_r6_pop) - datalabel LOCAL(ct_main_label)
1909 .word LOCAL(ct_r7_fph) - datalabel LOCAL(ct_main_label)
1910 .word LOCAL(ct_r7_fpl) - datalabel LOCAL(ct_main_label)
1911 .word LOCAL(ct_r7_ld) - datalabel LOCAL(ct_main_label)
1912 .word LOCAL(ct_r7_pop) - datalabel LOCAL(ct_main_label)
1913 .word LOCAL(ct_r8_fph) - datalabel LOCAL(ct_main_label)
1914 .word LOCAL(ct_r8_fpl) - datalabel LOCAL(ct_main_label)
1915 .word LOCAL(ct_r8_ld) - datalabel LOCAL(ct_main_label)
1916 .word LOCAL(ct_r8_pop) - datalabel LOCAL(ct_main_label)
1917 .word LOCAL(ct_r9_fph) - datalabel LOCAL(ct_main_label)
1918 .word LOCAL(ct_r9_fpl) - datalabel LOCAL(ct_main_label)
1919 .word LOCAL(ct_r9_ld) - datalabel LOCAL(ct_main_label)
1920 .word LOCAL(ct_r9_pop) - datalabel LOCAL(ct_main_label)
1921 .word LOCAL(ct_pop_seq) - datalabel LOCAL(ct_main_label)
1922 .word LOCAL(ct_pop_seq) - datalabel LOCAL(ct_main_label)
1923 .word LOCAL(ct_r9_pop) - datalabel LOCAL(ct_main_label)
1924 .word LOCAL(ct_ret_wide) - datalabel LOCAL(ct_main_label)
1925 .word LOCAL(ct_call_func) - datalabel LOCAL(ct_main_label)
1926 .mode SHmedia
1927 .section .text..SHmedia32, "ax"
1928 .align 2
1930 /* This function loads 64-bit general-purpose registers from the
1931 stack, from a memory address contained in them or from an FP
1932 register, according to a cookie passed in r1. Its execution
1933 time is linear on the number of registers that actually have
1934 to be copied. See sh.h for details on the actual bit pattern.
1936 The function to be called is passed in r0. If a 32-bit return
1937 value is expected, the actual function will be tail-called,
1938 otherwise the return address will be stored in r10 (that the
1939 caller should expect to be clobbered) and the return value
1940 will be expanded into r2/r3 upon return. */
1942 .global GLOBAL(GCC_shcompact_call_trampoline)
1943 GLOBAL(GCC_shcompact_call_trampoline):
1944 ptabs/l r0, tr0 /* Prepare to call the actual function. */
1945 movi ((datalabel LOCAL(ct_main_table) - 31 * 2) >> 16) & 65535, r0
1946 pt/l LOCAL(ct_loop), tr1
1947 addz.l r1, r63, r1
1948 shori ((datalabel LOCAL(ct_main_table) - 31 * 2)) & 65535, r0
1949 LOCAL(ct_loop):
1950 nsb r1, r28
1951 shlli r28, 1, r29
1952 ldx.w r0, r29, r30
1953 LOCAL(ct_main_label):
1954 ptrel/l r30, tr2
1955 blink tr2, r63
1956 LOCAL(ct_r2_fp): /* Copy r2 from an FP register. */
1957 /* It must be dr0, so just do it. */
1958 fmov.dq dr0, r2
1959 movi 7, r30
1960 shlli r30, 29, r31
1961 andc r1, r31, r1
1962 blink tr1, r63
1963 LOCAL(ct_r3_fp): /* Copy r3 from an FP register. */
1964 /* It is either dr0 or dr2. */
1965 movi 7, r30
1966 shlri r1, 26, r32
1967 shlli r30, 26, r31
1968 andc r1, r31, r1
1969 fmov.dq dr0, r3
1970 beqi/l r32, 4, tr1
1971 fmov.dq dr2, r3
1972 blink tr1, r63
1973 LOCAL(ct_r4_fp): /* Copy r4 from an FP register. */
1974 shlri r1, 23 - 3, r34
1975 andi r34, 3 << 3, r33
1976 addi r33, LOCAL(ct_r4_fp_copy) - datalabel LOCAL(ct_r4_fp_base), r32
1977 LOCAL(ct_r4_fp_base):
1978 ptrel/l r32, tr2
1979 movi 7, r30
1980 shlli r30, 23, r31
1981 andc r1, r31, r1
1982 blink tr2, r63
1983 LOCAL(ct_r4_fp_copy):
1984 fmov.dq dr0, r4
1985 blink tr1, r63
1986 fmov.dq dr2, r4
1987 blink tr1, r63
1988 fmov.dq dr4, r4
1989 blink tr1, r63
1990 LOCAL(ct_r5_fp): /* Copy r5 from an FP register. */
1991 shlri r1, 20 - 3, r34
1992 andi r34, 3 << 3, r33
1993 addi r33, LOCAL(ct_r5_fp_copy) - datalabel LOCAL(ct_r5_fp_base), r32
1994 LOCAL(ct_r5_fp_base):
1995 ptrel/l r32, tr2
1996 movi 7, r30
1997 shlli r30, 20, r31
1998 andc r1, r31, r1
1999 blink tr2, r63
2000 LOCAL(ct_r5_fp_copy):
2001 fmov.dq dr0, r5
2002 blink tr1, r63
2003 fmov.dq dr2, r5
2004 blink tr1, r63
2005 fmov.dq dr4, r5
2006 blink tr1, r63
2007 fmov.dq dr6, r5
2008 blink tr1, r63
2009 LOCAL(ct_r6_fph): /* Copy r6 from a high FP register. */
2010 /* It must be dr8. */
2011 fmov.dq dr8, r6
2012 movi 15, r30
2013 shlli r30, 16, r31
2014 andc r1, r31, r1
2015 blink tr1, r63
2016 LOCAL(ct_r6_fpl): /* Copy r6 from a low FP register. */
2017 shlri r1, 16 - 3, r34
2018 andi r34, 3 << 3, r33
2019 addi r33, LOCAL(ct_r6_fp_copy) - datalabel LOCAL(ct_r6_fp_base), r32
2020 LOCAL(ct_r6_fp_base):
2021 ptrel/l r32, tr2
2022 movi 7, r30
2023 shlli r30, 16, r31
2024 andc r1, r31, r1
2025 blink tr2, r63
2026 LOCAL(ct_r6_fp_copy):
2027 fmov.dq dr0, r6
2028 blink tr1, r63
2029 fmov.dq dr2, r6
2030 blink tr1, r63
2031 fmov.dq dr4, r6
2032 blink tr1, r63
2033 fmov.dq dr6, r6
2034 blink tr1, r63
2035 LOCAL(ct_r7_fph): /* Copy r7 from a high FP register. */
2036 /* It is either dr8 or dr10. */
2037 movi 15 << 12, r31
2038 shlri r1, 12, r32
2039 andc r1, r31, r1
2040 fmov.dq dr8, r7
2041 beqi/l r32, 8, tr1
2042 fmov.dq dr10, r7
2043 blink tr1, r63
2044 LOCAL(ct_r7_fpl): /* Copy r7 from a low FP register. */
2045 shlri r1, 12 - 3, r34
2046 andi r34, 3 << 3, r33
2047 addi r33, LOCAL(ct_r7_fp_copy) - datalabel LOCAL(ct_r7_fp_base), r32
2048 LOCAL(ct_r7_fp_base):
2049 ptrel/l r32, tr2
2050 movi 7 << 12, r31
2051 andc r1, r31, r1
2052 blink tr2, r63
2053 LOCAL(ct_r7_fp_copy):
2054 fmov.dq dr0, r7
2055 blink tr1, r63
2056 fmov.dq dr2, r7
2057 blink tr1, r63
2058 fmov.dq dr4, r7
2059 blink tr1, r63
2060 fmov.dq dr6, r7
2061 blink tr1, r63
2062 LOCAL(ct_r8_fph): /* Copy r8 from a high FP register. */
2063 /* It is either dr8 or dr10. */
2064 movi 15 << 8, r31
2065 andi r1, 1 << 8, r32
2066 andc r1, r31, r1
2067 fmov.dq dr8, r8
2068 beq/l r32, r63, tr1
2069 fmov.dq dr10, r8
2070 blink tr1, r63
2071 LOCAL(ct_r8_fpl): /* Copy r8 from a low FP register. */
2072 shlri r1, 8 - 3, r34
2073 andi r34, 3 << 3, r33
2074 addi r33, LOCAL(ct_r8_fp_copy) - datalabel LOCAL(ct_r8_fp_base), r32
2075 LOCAL(ct_r8_fp_base):
2076 ptrel/l r32, tr2
2077 movi 7 << 8, r31
2078 andc r1, r31, r1
2079 blink tr2, r63
2080 LOCAL(ct_r8_fp_copy):
2081 fmov.dq dr0, r8
2082 blink tr1, r63
2083 fmov.dq dr2, r8
2084 blink tr1, r63
2085 fmov.dq dr4, r8
2086 blink tr1, r63
2087 fmov.dq dr6, r8
2088 blink tr1, r63
2089 LOCAL(ct_r9_fph): /* Copy r9 from a high FP register. */
2090 /* It is either dr8 or dr10. */
2091 movi 15 << 4, r31
2092 andi r1, 1 << 4, r32
2093 andc r1, r31, r1
2094 fmov.dq dr8, r9
2095 beq/l r32, r63, tr1
2096 fmov.dq dr10, r9
2097 blink tr1, r63
2098 LOCAL(ct_r9_fpl): /* Copy r9 from a low FP register. */
2099 shlri r1, 4 - 3, r34
2100 andi r34, 3 << 3, r33
2101 addi r33, LOCAL(ct_r9_fp_copy) - datalabel LOCAL(ct_r9_fp_base), r32
2102 LOCAL(ct_r9_fp_base):
2103 ptrel/l r32, tr2
2104 movi 7 << 4, r31
2105 andc r1, r31, r1
2106 blink tr2, r63
2107 LOCAL(ct_r9_fp_copy):
2108 fmov.dq dr0, r9
2109 blink tr1, r63
2110 fmov.dq dr2, r9
2111 blink tr1, r63
2112 fmov.dq dr4, r9
2113 blink tr1, r63
2114 fmov.dq dr6, r9
2115 blink tr1, r63
2116 LOCAL(ct_r2_ld): /* Copy r2 from a memory address. */
2117 pt/l LOCAL(ct_r2_load), tr2
2118 movi 3, r30
2119 shlli r30, 29, r31
2120 and r1, r31, r32
2121 andc r1, r31, r1
2122 beq/l r31, r32, tr2
2123 addi.l r2, 8, r3
2124 ldx.q r2, r63, r2
2125 /* Fall through. */
2126 LOCAL(ct_r3_ld): /* Copy r3 from a memory address. */
2127 pt/l LOCAL(ct_r3_load), tr2
2128 movi 3, r30
2129 shlli r30, 26, r31
2130 and r1, r31, r32
2131 andc r1, r31, r1
2132 beq/l r31, r32, tr2
2133 addi.l r3, 8, r4
2134 ldx.q r3, r63, r3
2135 LOCAL(ct_r4_ld): /* Copy r4 from a memory address. */
2136 pt/l LOCAL(ct_r4_load), tr2
2137 movi 3, r30
2138 shlli r30, 23, r31
2139 and r1, r31, r32
2140 andc r1, r31, r1
2141 beq/l r31, r32, tr2
2142 addi.l r4, 8, r5
2143 ldx.q r4, r63, r4
2144 LOCAL(ct_r5_ld): /* Copy r5 from a memory address. */
2145 pt/l LOCAL(ct_r5_load), tr2
2146 movi 3, r30
2147 shlli r30, 20, r31
2148 and r1, r31, r32
2149 andc r1, r31, r1
2150 beq/l r31, r32, tr2
2151 addi.l r5, 8, r6
2152 ldx.q r5, r63, r5
2153 LOCAL(ct_r6_ld): /* Copy r6 from a memory address. */
2154 pt/l LOCAL(ct_r6_load), tr2
2155 movi 3 << 16, r31
2156 and r1, r31, r32
2157 andc r1, r31, r1
2158 beq/l r31, r32, tr2
2159 addi.l r6, 8, r7
2160 ldx.q r6, r63, r6
2161 LOCAL(ct_r7_ld): /* Copy r7 from a memory address. */
2162 pt/l LOCAL(ct_r7_load), tr2
2163 movi 3 << 12, r31
2164 and r1, r31, r32
2165 andc r1, r31, r1
2166 beq/l r31, r32, tr2
2167 addi.l r7, 8, r8
2168 ldx.q r7, r63, r7
2169 LOCAL(ct_r8_ld): /* Copy r8 from a memory address. */
2170 pt/l LOCAL(ct_r8_load), tr2
2171 movi 3 << 8, r31
2172 and r1, r31, r32
2173 andc r1, r31, r1
2174 beq/l r31, r32, tr2
2175 addi.l r8, 8, r9
2176 ldx.q r8, r63, r8
2177 LOCAL(ct_r9_ld): /* Copy r9 from a memory address. */
2178 pt/l LOCAL(ct_check_tramp), tr2
2179 ldx.q r9, r63, r9
2180 blink tr2, r63
2181 LOCAL(ct_r2_load):
2182 ldx.q r2, r63, r2
2183 blink tr1, r63
2184 LOCAL(ct_r3_load):
2185 ldx.q r3, r63, r3
2186 blink tr1, r63
2187 LOCAL(ct_r4_load):
2188 ldx.q r4, r63, r4
2189 blink tr1, r63
2190 LOCAL(ct_r5_load):
2191 ldx.q r5, r63, r5
2192 blink tr1, r63
2193 LOCAL(ct_r6_load):
2194 ldx.q r6, r63, r6
2195 blink tr1, r63
2196 LOCAL(ct_r7_load):
2197 ldx.q r7, r63, r7
2198 blink tr1, r63
2199 LOCAL(ct_r8_load):
2200 ldx.q r8, r63, r8
2201 blink tr1, r63
2202 LOCAL(ct_r2_pop): /* Pop r2 from the stack. */
2203 movi 1, r30
2204 ldx.q r15, r63, r2
2205 shlli r30, 29, r31
2206 addi.l r15, 8, r15
2207 andc r1, r31, r1
2208 blink tr1, r63
2209 LOCAL(ct_r3_pop): /* Pop r3 from the stack. */
2210 movi 1, r30
2211 ldx.q r15, r63, r3
2212 shlli r30, 26, r31
2213 addi.l r15, 8, r15
2214 andc r1, r31, r1
2215 blink tr1, r63
2216 LOCAL(ct_r4_pop): /* Pop r4 from the stack. */
2217 movi 1, r30
2218 ldx.q r15, r63, r4
2219 shlli r30, 23, r31
2220 addi.l r15, 8, r15
2221 andc r1, r31, r1
2222 blink tr1, r63
2223 LOCAL(ct_r5_pop): /* Pop r5 from the stack. */
2224 movi 1, r30
2225 ldx.q r15, r63, r5
2226 shlli r30, 20, r31
2227 addi.l r15, 8, r15
2228 andc r1, r31, r1
2229 blink tr1, r63
2230 LOCAL(ct_r6_pop): /* Pop r6 from the stack. */
2231 movi 1, r30
2232 ldx.q r15, r63, r6
2233 shlli r30, 16, r31
2234 addi.l r15, 8, r15
2235 andc r1, r31, r1
2236 blink tr1, r63
2237 LOCAL(ct_r7_pop): /* Pop r7 from the stack. */
2238 ldx.q r15, r63, r7
2239 movi 1 << 12, r31
2240 addi.l r15, 8, r15
2241 andc r1, r31, r1
2242 blink tr1, r63
2243 LOCAL(ct_r8_pop): /* Pop r8 from the stack. */
2244 ldx.q r15, r63, r8
2245 movi 1 << 8, r31
2246 addi.l r15, 8, r15
2247 andc r1, r31, r1
2248 blink tr1, r63
2249 LOCAL(ct_pop_seq): /* Pop a sequence of registers off the stack. */
2250 andi r1, 7 << 1, r30
2251 movi (LOCAL(ct_end_of_pop_seq) >> 16) & 65535, r32
2252 shlli r30, 2, r31
2253 shori LOCAL(ct_end_of_pop_seq) & 65535, r32
2254 sub.l r32, r31, r33
2255 ptabs/l r33, tr2
2256 blink tr2, r63
2257 LOCAL(ct_start_of_pop_seq): /* Beginning of pop sequence. */
2258 ldx.q r15, r63, r3
2259 addi.l r15, 8, r15
2260 ldx.q r15, r63, r4
2261 addi.l r15, 8, r15
2262 ldx.q r15, r63, r5
2263 addi.l r15, 8, r15
2264 ldx.q r15, r63, r6
2265 addi.l r15, 8, r15
2266 ldx.q r15, r63, r7
2267 addi.l r15, 8, r15
2268 ldx.q r15, r63, r8
2269 addi.l r15, 8, r15
2270 LOCAL(ct_r9_pop): /* Pop r9 from the stack. */
2271 ldx.q r15, r63, r9
2272 addi.l r15, 8, r15
2273 LOCAL(ct_end_of_pop_seq): /* Label used to compute first pop instruction. */
2274 LOCAL(ct_check_tramp): /* Check whether we need a trampoline. */
2275 pt/u LOCAL(ct_ret_wide), tr2
2276 andi r1, 1, r1
2277 bne/u r1, r63, tr2
2278 LOCAL(ct_call_func): /* Just branch to the function. */
2279 blink tr0, r63
2280 LOCAL(ct_ret_wide): /* Call the function, so that we can unpack its
2281 64-bit return value. */
2282 add.l r18, r63, r10
2283 blink tr0, r18
2284 ptabs r10, tr0
2285 #if __LITTLE_ENDIAN__
2286 shari r2, 32, r3
2287 add.l r2, r63, r2
2288 #else
2289 add.l r2, r63, r3
2290 shari r2, 32, r2
2291 #endif
2292 blink tr0, r63
2293 #endif /* L_shcompact_call_trampoline */
2295 #ifdef L_shcompact_return_trampoline
2296 /* This function does the converse of the code in `ret_wide'
2297 above. It is tail-called by SHcompact functions returning
2298 64-bit non-floating-point values, to pack the 32-bit values in
2299 r2 and r3 into r2. */
2301 .mode SHmedia
2302 .section .text..SHmedia32, "ax"
2303 .align 2
2304 .global GLOBAL(GCC_shcompact_return_trampoline)
2305 GLOBAL(GCC_shcompact_return_trampoline):
2306 ptabs/l r18, tr0
2307 #if __LITTLE_ENDIAN__
2308 addz.l r2, r63, r2
2309 shlli r3, 32, r3
2310 #else
2311 addz.l r3, r63, r3
2312 shlli r2, 32, r2
2313 #endif
2314 or r3, r2, r2
2315 blink tr0, r63
2316 #endif /* L_shcompact_return_trampoline */
2318 #ifdef L_shcompact_incoming_args
2319 .section .rodata
2320 .align 1
2321 LOCAL(ia_main_table):
2322 .word 1 /* Invalid, just loop */
2323 .word LOCAL(ia_r2_ld) - datalabel LOCAL(ia_main_label)
2324 .word LOCAL(ia_r2_push) - datalabel LOCAL(ia_main_label)
2325 .word 1 /* Invalid, just loop */
2326 .word LOCAL(ia_r3_ld) - datalabel LOCAL(ia_main_label)
2327 .word LOCAL(ia_r3_push) - datalabel LOCAL(ia_main_label)
2328 .word 1 /* Invalid, just loop */
2329 .word LOCAL(ia_r4_ld) - datalabel LOCAL(ia_main_label)
2330 .word LOCAL(ia_r4_push) - datalabel LOCAL(ia_main_label)
2331 .word 1 /* Invalid, just loop */
2332 .word LOCAL(ia_r5_ld) - datalabel LOCAL(ia_main_label)
2333 .word LOCAL(ia_r5_push) - datalabel LOCAL(ia_main_label)
2334 .word 1 /* Invalid, just loop */
2335 .word 1 /* Invalid, just loop */
2336 .word LOCAL(ia_r6_ld) - datalabel LOCAL(ia_main_label)
2337 .word LOCAL(ia_r6_push) - datalabel LOCAL(ia_main_label)
2338 .word 1 /* Invalid, just loop */
2339 .word 1 /* Invalid, just loop */
2340 .word LOCAL(ia_r7_ld) - datalabel LOCAL(ia_main_label)
2341 .word LOCAL(ia_r7_push) - datalabel LOCAL(ia_main_label)
2342 .word 1 /* Invalid, just loop */
2343 .word 1 /* Invalid, just loop */
2344 .word LOCAL(ia_r8_ld) - datalabel LOCAL(ia_main_label)
2345 .word LOCAL(ia_r8_push) - datalabel LOCAL(ia_main_label)
2346 .word 1 /* Invalid, just loop */
2347 .word 1 /* Invalid, just loop */
2348 .word LOCAL(ia_r9_ld) - datalabel LOCAL(ia_main_label)
2349 .word LOCAL(ia_r9_push) - datalabel LOCAL(ia_main_label)
2350 .word LOCAL(ia_push_seq) - datalabel LOCAL(ia_main_label)
2351 .word LOCAL(ia_push_seq) - datalabel LOCAL(ia_main_label)
2352 .word LOCAL(ia_r9_push) - datalabel LOCAL(ia_main_label)
2353 .word LOCAL(ia_return) - datalabel LOCAL(ia_main_label)
2354 .word LOCAL(ia_return) - datalabel LOCAL(ia_main_label)
2355 .mode SHmedia
2356 .section .text..SHmedia32, "ax"
2357 .align 2
2359 /* This function stores 64-bit general-purpose registers back in
2360 the stack, and loads the address in which each register
2361 was stored into itself. The lower 32 bits of r17 hold the address
2362 to begin storing, and the upper 32 bits of r17 hold the cookie.
2363 Its execution time is linear on the
2364 number of registers that actually have to be copied, and it is
2365 optimized for structures larger than 64 bits, as opposed to
2366 invidivual `long long' arguments. See sh.h for details on the
2367 actual bit pattern. */
2369 .global GLOBAL(GCC_shcompact_incoming_args)
2370 GLOBAL(GCC_shcompact_incoming_args):
2371 ptabs/l r18, tr0 /* Prepare to return. */
2372 shlri r17, 32, r0 /* Load the cookie. */
2373 movi ((datalabel LOCAL(ia_main_table) - 31 * 2) >> 16) & 65535, r43
2374 pt/l LOCAL(ia_loop), tr1
2375 add.l r17, r63, r17
2376 shori ((datalabel LOCAL(ia_main_table) - 31 * 2)) & 65535, r43
2377 LOCAL(ia_loop):
2378 nsb r0, r36
2379 shlli r36, 1, r37
2380 ldx.w r43, r37, r38
2381 LOCAL(ia_main_label):
2382 ptrel/l r38, tr2
2383 blink tr2, r63
2384 LOCAL(ia_r2_ld): /* Store r2 and load its address. */
2385 movi 3, r38
2386 shlli r38, 29, r39
2387 and r0, r39, r40
2388 andc r0, r39, r0
2389 stx.q r17, r63, r2
2390 add.l r17, r63, r2
2391 addi.l r17, 8, r17
2392 beq/u r39, r40, tr1
2393 LOCAL(ia_r3_ld): /* Store r3 and load its address. */
2394 movi 3, r38
2395 shlli r38, 26, r39
2396 and r0, r39, r40
2397 andc r0, r39, r0
2398 stx.q r17, r63, r3
2399 add.l r17, r63, r3
2400 addi.l r17, 8, r17
2401 beq/u r39, r40, tr1
2402 LOCAL(ia_r4_ld): /* Store r4 and load its address. */
2403 movi 3, r38
2404 shlli r38, 23, r39
2405 and r0, r39, r40
2406 andc r0, r39, r0
2407 stx.q r17, r63, r4
2408 add.l r17, r63, r4
2409 addi.l r17, 8, r17
2410 beq/u r39, r40, tr1
2411 LOCAL(ia_r5_ld): /* Store r5 and load its address. */
2412 movi 3, r38
2413 shlli r38, 20, r39
2414 and r0, r39, r40
2415 andc r0, r39, r0
2416 stx.q r17, r63, r5
2417 add.l r17, r63, r5
2418 addi.l r17, 8, r17
2419 beq/u r39, r40, tr1
2420 LOCAL(ia_r6_ld): /* Store r6 and load its address. */
2421 movi 3, r38
2422 shlli r38, 16, r39
2423 and r0, r39, r40
2424 andc r0, r39, r0
2425 stx.q r17, r63, r6
2426 add.l r17, r63, r6
2427 addi.l r17, 8, r17
2428 beq/u r39, r40, tr1
2429 LOCAL(ia_r7_ld): /* Store r7 and load its address. */
2430 movi 3 << 12, r39
2431 and r0, r39, r40
2432 andc r0, r39, r0
2433 stx.q r17, r63, r7
2434 add.l r17, r63, r7
2435 addi.l r17, 8, r17
2436 beq/u r39, r40, tr1
2437 LOCAL(ia_r8_ld): /* Store r8 and load its address. */
2438 movi 3 << 8, r39
2439 and r0, r39, r40
2440 andc r0, r39, r0
2441 stx.q r17, r63, r8
2442 add.l r17, r63, r8
2443 addi.l r17, 8, r17
2444 beq/u r39, r40, tr1
2445 LOCAL(ia_r9_ld): /* Store r9 and load its address. */
2446 stx.q r17, r63, r9
2447 add.l r17, r63, r9
2448 blink tr0, r63
2449 LOCAL(ia_r2_push): /* Push r2 onto the stack. */
2450 movi 1, r38
2451 shlli r38, 29, r39
2452 andc r0, r39, r0
2453 stx.q r17, r63, r2
2454 addi.l r17, 8, r17
2455 blink tr1, r63
2456 LOCAL(ia_r3_push): /* Push r3 onto the stack. */
2457 movi 1, r38
2458 shlli r38, 26, r39
2459 andc r0, r39, r0
2460 stx.q r17, r63, r3
2461 addi.l r17, 8, r17
2462 blink tr1, r63
2463 LOCAL(ia_r4_push): /* Push r4 onto the stack. */
2464 movi 1, r38
2465 shlli r38, 23, r39
2466 andc r0, r39, r0
2467 stx.q r17, r63, r4
2468 addi.l r17, 8, r17
2469 blink tr1, r63
2470 LOCAL(ia_r5_push): /* Push r5 onto the stack. */
2471 movi 1, r38
2472 shlli r38, 20, r39
2473 andc r0, r39, r0
2474 stx.q r17, r63, r5
2475 addi.l r17, 8, r17
2476 blink tr1, r63
2477 LOCAL(ia_r6_push): /* Push r6 onto the stack. */
2478 movi 1, r38
2479 shlli r38, 16, r39
2480 andc r0, r39, r0
2481 stx.q r17, r63, r6
2482 addi.l r17, 8, r17
2483 blink tr1, r63
2484 LOCAL(ia_r7_push): /* Push r7 onto the stack. */
2485 movi 1 << 12, r39
2486 andc r0, r39, r0
2487 stx.q r17, r63, r7
2488 addi.l r17, 8, r17
2489 blink tr1, r63
2490 LOCAL(ia_r8_push): /* Push r8 onto the stack. */
2491 movi 1 << 8, r39
2492 andc r0, r39, r0
2493 stx.q r17, r63, r8
2494 addi.l r17, 8, r17
2495 blink tr1, r63
2496 LOCAL(ia_push_seq): /* Push a sequence of registers onto the stack. */
2497 andi r0, 7 << 1, r38
2498 movi (LOCAL(ia_end_of_push_seq) >> 16) & 65535, r40
2499 shlli r38, 2, r39
2500 shori LOCAL(ia_end_of_push_seq) & 65535, r40
2501 sub.l r40, r39, r41
2502 ptabs/l r41, tr2
2503 blink tr2, r63
2504 LOCAL(ia_stack_of_push_seq): /* Beginning of push sequence. */
2505 stx.q r17, r63, r3
2506 addi.l r17, 8, r17
2507 stx.q r17, r63, r4
2508 addi.l r17, 8, r17
2509 stx.q r17, r63, r5
2510 addi.l r17, 8, r17
2511 stx.q r17, r63, r6
2512 addi.l r17, 8, r17
2513 stx.q r17, r63, r7
2514 addi.l r17, 8, r17
2515 stx.q r17, r63, r8
2516 addi.l r17, 8, r17
2517 LOCAL(ia_r9_push): /* Push r9 onto the stack. */
2518 stx.q r17, r63, r9
2519 LOCAL(ia_return): /* Return. */
2520 blink tr0, r63
2521 LOCAL(ia_end_of_push_seq): /* Label used to compute the first push instruction. */
2522 #endif /* L_shcompact_incoming_args */
2523 #endif
2524 #if __SH5__
2525 #ifdef L_nested_trampoline
2526 #if __SH5__ == 32
2527 .section .text..SHmedia32,"ax"
2528 #else
2529 .text
2530 #endif
2531 .align 3 /* It is copied in units of 8 bytes in SHmedia mode. */
2532 .global GLOBAL(GCC_nested_trampoline)
2533 GLOBAL(GCC_nested_trampoline):
2534 .mode SHmedia
2535 ptrel/u r63, tr0
2536 gettr tr0, r0
2537 #if __SH5__ == 64
2538 ld.q r0, 24, r1
2539 #else
2540 ld.l r0, 24, r1
2541 #endif
2542 ptabs/l r1, tr1
2543 #if __SH5__ == 64
2544 ld.q r0, 32, r1
2545 #else
2546 ld.l r0, 28, r1
2547 #endif
2548 blink tr1, r63
2549 #endif /* L_nested_trampoline */
2550 #endif /* __SH5__ */
2551 #if __SH5__ == 32
2552 #ifdef L_push_pop_shmedia_regs
2553 .section .text..SHmedia32,"ax"
2554 .mode SHmedia
2555 .align 2
2556 #ifndef __SH4_NOFPU__
2557 .global GLOBAL(GCC_push_shmedia_regs)
2558 GLOBAL(GCC_push_shmedia_regs):
2559 addi.l r15, -14*8, r15
2560 fst.d r15, 13*8, dr62
2561 fst.d r15, 12*8, dr60
2562 fst.d r15, 11*8, dr58
2563 fst.d r15, 10*8, dr56
2564 fst.d r15, 9*8, dr54
2565 fst.d r15, 8*8, dr52
2566 fst.d r15, 7*8, dr50
2567 fst.d r15, 6*8, dr48
2568 fst.d r15, 5*8, dr46
2569 fst.d r15, 4*8, dr44
2570 fst.d r15, 3*8, dr42
2571 fst.d r15, 2*8, dr40
2572 fst.d r15, 1*8, dr38
2573 fst.d r15, 0*8, dr36
2574 #endif
2575 .global GLOBAL(GCC_push_shmedia_regs_nofpu)
2576 GLOBAL(GCC_push_shmedia_regs_nofpu):
2577 ptabs/l r18, tr0
2578 addi.l r15, -27*8, r15
2579 gettr tr7, r62
2580 gettr tr6, r61
2581 gettr tr5, r60
2582 st.q r15, 26*8, r62
2583 st.q r15, 25*8, r61
2584 st.q r15, 24*8, r60
2585 st.q r15, 23*8, r59
2586 st.q r15, 22*8, r58
2587 st.q r15, 21*8, r57
2588 st.q r15, 20*8, r56
2589 st.q r15, 19*8, r55
2590 st.q r15, 18*8, r54
2591 st.q r15, 17*8, r53
2592 st.q r15, 16*8, r52
2593 st.q r15, 15*8, r51
2594 st.q r15, 14*8, r50
2595 st.q r15, 13*8, r49
2596 st.q r15, 12*8, r48
2597 st.q r15, 11*8, r47
2598 st.q r15, 10*8, r46
2599 st.q r15, 9*8, r45
2600 st.q r15, 8*8, r44
2601 st.q r15, 7*8, r35
2602 st.q r15, 6*8, r34
2603 st.q r15, 5*8, r33
2604 st.q r15, 4*8, r32
2605 st.q r15, 3*8, r31
2606 st.q r15, 2*8, r30
2607 st.q r15, 1*8, r29
2608 st.q r15, 0*8, r28
2609 blink tr0, r63
2611 #ifndef __SH4_NOFPU__
2612 .global GLOBAL(GCC_pop_shmedia_regs)
2613 GLOBAL(GCC_pop_shmedia_regs):
2614 pt .L0, tr1
2615 movi 41*8, r0
2616 fld.d r15, 40*8, dr62
2617 fld.d r15, 39*8, dr60
2618 fld.d r15, 38*8, dr58
2619 fld.d r15, 37*8, dr56
2620 fld.d r15, 36*8, dr54
2621 fld.d r15, 35*8, dr52
2622 fld.d r15, 34*8, dr50
2623 fld.d r15, 33*8, dr48
2624 fld.d r15, 32*8, dr46
2625 fld.d r15, 31*8, dr44
2626 fld.d r15, 30*8, dr42
2627 fld.d r15, 29*8, dr40
2628 fld.d r15, 28*8, dr38
2629 fld.d r15, 27*8, dr36
2630 blink tr1, r63
2631 #endif
2632 .global GLOBAL(GCC_pop_shmedia_regs_nofpu)
2633 GLOBAL(GCC_pop_shmedia_regs_nofpu):
2634 movi 27*8, r0
2635 .L0:
2636 ptabs r18, tr0
2637 ld.q r15, 26*8, r62
2638 ld.q r15, 25*8, r61
2639 ld.q r15, 24*8, r60
2640 ptabs r62, tr7
2641 ptabs r61, tr6
2642 ptabs r60, tr5
2643 ld.q r15, 23*8, r59
2644 ld.q r15, 22*8, r58
2645 ld.q r15, 21*8, r57
2646 ld.q r15, 20*8, r56
2647 ld.q r15, 19*8, r55
2648 ld.q r15, 18*8, r54
2649 ld.q r15, 17*8, r53
2650 ld.q r15, 16*8, r52
2651 ld.q r15, 15*8, r51
2652 ld.q r15, 14*8, r50
2653 ld.q r15, 13*8, r49
2654 ld.q r15, 12*8, r48
2655 ld.q r15, 11*8, r47
2656 ld.q r15, 10*8, r46
2657 ld.q r15, 9*8, r45
2658 ld.q r15, 8*8, r44
2659 ld.q r15, 7*8, r35
2660 ld.q r15, 6*8, r34
2661 ld.q r15, 5*8, r33
2662 ld.q r15, 4*8, r32
2663 ld.q r15, 3*8, r31
2664 ld.q r15, 2*8, r30
2665 ld.q r15, 1*8, r29
2666 ld.q r15, 0*8, r28
2667 add.l r15, r0, r15
2668 blink tr0, r63
2669 #endif /* __SH5__ == 32 */
2670 #endif /* L_push_pop_shmedia_regs */